summaryrefslogtreecommitdiffstats
path: root/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'vnet')
-rw-r--r--vnet/Makefile.am116
-rw-r--r--vnet/etc/scripts/arp4-mpls24
-rw-r--r--vnet/etc/scripts/lfib/ip4-to-mpls26
-rw-r--r--vnet/etc/scripts/lfib/mpls-pop-to-mpls28
-rw-r--r--vnet/etc/scripts/lfib/mpls-to-ip427
-rw-r--r--vnet/etc/scripts/lfib/mpls-to-mpls26
-rw-r--r--vnet/etc/scripts/mpls-o-ethernet/pg10
-rw-r--r--vnet/etc/scripts/mpls-o-ethernet/single.conf17
-rw-r--r--vnet/etc/scripts/source_and_port_range_check63
-rw-r--r--vnet/vnet/adj/adj.c343
-rw-r--r--vnet/vnet/adj/adj.h100
-rw-r--r--vnet/vnet/adj/adj_alloc.c (renamed from vnet/vnet/ip/adj_alloc.c)93
-rw-r--r--vnet/vnet/adj/adj_alloc.h (renamed from vnet/vnet/ip/adj_alloc.h)10
-rw-r--r--vnet/vnet/adj/adj_glean.c246
-rw-r--r--vnet/vnet/adj/adj_glean.h56
-rw-r--r--vnet/vnet/adj/adj_internal.h97
-rw-r--r--vnet/vnet/adj/adj_midchain.c226
-rw-r--r--vnet/vnet/adj/adj_midchain.h71
-rw-r--r--vnet/vnet/adj/adj_nbr.c835
-rw-r--r--vnet/vnet/adj/adj_nbr.h116
-rw-r--r--vnet/vnet/adj/adj_rewrite.c52
-rw-r--r--vnet/vnet/adj/adj_rewrite.h49
-rw-r--r--vnet/vnet/adj/adj_types.h38
-rw-r--r--vnet/vnet/classify/ip_classify.c32
-rw-r--r--vnet/vnet/classify/vnet_classify.c4
-rw-r--r--vnet/vnet/config.h4
-rw-r--r--vnet/vnet/cop/ip4_whitelist.c175
-rw-r--r--vnet/vnet/cop/ip6_whitelist.c57
-rw-r--r--vnet/vnet/devices/dpdk/cli.c2
-rw-r--r--vnet/vnet/devices/dpdk/node.c15
-rw-r--r--vnet/vnet/devices/ssvm/node.c2
-rw-r--r--vnet/vnet/dhcp/client.c129
-rw-r--r--vnet/vnet/dhcp/proxy_node.c32
-rw-r--r--vnet/vnet/dhcpv6/proxy_node.c8
-rw-r--r--vnet/vnet/dpo/classify_dpo.c120
-rw-r--r--vnet/vnet/dpo/classify_dpo.h56
-rw-r--r--vnet/vnet/dpo/dpo.c424
-rw-r--r--vnet/vnet/dpo/dpo.h354
-rw-r--r--vnet/vnet/dpo/drop_dpo.c100
-rw-r--r--vnet/vnet/dpo/drop_dpo.h37
-rw-r--r--vnet/vnet/dpo/load_balance.c760
-rw-r--r--vnet/vnet/dpo/load_balance.h203
-rw-r--r--vnet/vnet/dpo/load_balance_map.c566
-rw-r--r--vnet/vnet/dpo/load_balance_map.h78
-rw-r--r--vnet/vnet/dpo/lookup_dpo.c802
-rw-r--r--vnet/vnet/dpo/lookup_dpo.h108
-rw-r--r--vnet/vnet/dpo/mpls_label_dpo.c263
-rw-r--r--vnet/vnet/dpo/mpls_label_dpo.h66
-rw-r--r--vnet/vnet/dpo/punt_dpo.c100
-rw-r--r--vnet/vnet/dpo/punt_dpo.h30
-rw-r--r--vnet/vnet/dpo/receive_dpo.c155
-rw-r--r--vnet/vnet/dpo/receive_dpo.h62
-rw-r--r--vnet/vnet/ethernet/arp.c1038
-rw-r--r--vnet/vnet/ethernet/ethernet.h8
-rw-r--r--vnet/vnet/ethernet/interface.c14
-rw-r--r--vnet/vnet/fib/fib.c41
-rw-r--r--vnet/vnet/fib/fib.h652
-rw-r--r--vnet/vnet/fib/fib_attached_export.c524
-rw-r--r--vnet/vnet/fib/fib_attached_export.h57
-rw-r--r--vnet/vnet/fib/fib_entry.c1493
-rw-r--r--vnet/vnet/fib/fib_entry.h514
-rw-r--r--vnet/vnet/fib/fib_entry_cover.c206
-rw-r--r--vnet/vnet/fib/fib_entry_cover.h47
-rw-r--r--vnet/vnet/fib/fib_entry_src.c1278
-rw-r--r--vnet/vnet/fib/fib_entry_src.h289
-rw-r--r--vnet/vnet/fib/fib_entry_src_adj.c207
-rw-r--r--vnet/vnet/fib/fib_entry_src_api.c119
-rw-r--r--vnet/vnet/fib/fib_entry_src_default.c121
-rw-r--r--vnet/vnet/fib/fib_entry_src_default_route.c58
-rw-r--r--vnet/vnet/fib/fib_entry_src_interface.c195
-rw-r--r--vnet/vnet/fib/fib_entry_src_lisp.c130
-rw-r--r--vnet/vnet/fib/fib_entry_src_mpls.c201
-rw-r--r--vnet/vnet/fib/fib_entry_src_rr.c247
-rw-r--r--vnet/vnet/fib/fib_entry_src_special.c71
-rw-r--r--vnet/vnet/fib/fib_internal.h69
-rw-r--r--vnet/vnet/fib/fib_node.c207
-rw-r--r--vnet/vnet/fib/fib_node.h317
-rw-r--r--vnet/vnet/fib/fib_node_list.c385
-rw-r--r--vnet/vnet/fib/fib_node_list.h61
-rw-r--r--vnet/vnet/fib/fib_path.c1744
-rw-r--r--vnet/vnet/fib/fib_path.h154
-rw-r--r--vnet/vnet/fib/fib_path_ext.c182
-rw-r--r--vnet/vnet/fib/fib_path_ext.h67
-rw-r--r--vnet/vnet/fib/fib_path_list.c1100
-rw-r--r--vnet/vnet/fib/fib_path_list.h154
-rw-r--r--vnet/vnet/fib/fib_table.c1052
-rw-r--r--vnet/vnet/fib/fib_table.h732
-rw-r--r--vnet/vnet/fib/fib_test.c6330
-rw-r--r--vnet/vnet/fib/fib_types.c305
-rw-r--r--vnet/vnet/fib/fib_types.h331
-rw-r--r--vnet/vnet/fib/fib_walk.c775
-rw-r--r--vnet/vnet/fib/fib_walk.h58
-rw-r--r--vnet/vnet/fib/ip4_fib.c542
-rw-r--r--vnet/vnet/fib/ip4_fib.h141
-rw-r--r--vnet/vnet/fib/ip6_fib.c698
-rw-r--r--vnet/vnet/fib/ip6_fib.h130
-rw-r--r--vnet/vnet/fib/mpls_fib.c439
-rw-r--r--vnet/vnet/fib/mpls_fib.h106
-rw-r--r--vnet/vnet/gre/gre.c192
-rw-r--r--vnet/vnet/gre/gre.h42
-rw-r--r--vnet/vnet/gre/interface.c480
-rw-r--r--vnet/vnet/gre/node.c211
-rw-r--r--vnet/vnet/handoff.c10
-rw-r--r--vnet/vnet/handoff.h2
-rw-r--r--vnet/vnet/interface.c12
-rw-r--r--vnet/vnet/interface.h3
-rw-r--r--vnet/vnet/interface_cli.c6
-rw-r--r--vnet/vnet/interface_funcs.h1
-rw-r--r--vnet/vnet/ip/format.h6
-rw-r--r--vnet/vnet/ip/ip4.h217
-rw-r--r--vnet/vnet/ip/ip4_forward.c2133
-rw-r--r--vnet/vnet/ip/ip4_mtrie.c74
-rw-r--r--vnet/vnet/ip/ip4_mtrie.h9
-rw-r--r--vnet/vnet/ip/ip4_source_and_port_range_check.c994
-rw-r--r--vnet/vnet/ip/ip4_source_check.c134
-rw-r--r--vnet/vnet/ip/ip4_test.c3
-rw-r--r--vnet/vnet/ip/ip6.h240
-rw-r--r--vnet/vnet/ip/ip6_forward.c1444
-rw-r--r--vnet/vnet/ip/ip6_hop_by_hop.c65
-rw-r--r--vnet/vnet/ip/ip6_neighbor.c479
-rw-r--r--vnet/vnet/ip/ip6_packet.h18
-rw-r--r--vnet/vnet/ip/ip_feature_registration.c34
-rw-r--r--vnet/vnet/ip/ip_feature_registration.h3
-rw-r--r--vnet/vnet/ip/ip_source_and_port_range_check.h66
-rw-r--r--vnet/vnet/ip/lookup.c2193
-rw-r--r--vnet/vnet/ip/lookup.h333
-rw-r--r--vnet/vnet/ip/ping.c22
-rw-r--r--vnet/vnet/ip/udp.h38
-rw-r--r--vnet/vnet/ipsec-gre/ipsec_gre.c12
-rw-r--r--vnet/vnet/lisp-cp/control.c140
-rw-r--r--vnet/vnet/lisp-cp/control.h3
-rw-r--r--vnet/vnet/lisp-cp/lisp_cp_dpo.c93
-rw-r--r--vnet/vnet/lisp-cp/lisp_cp_dpo.h38
-rw-r--r--vnet/vnet/lisp-cp/lisp_types.c36
-rw-r--r--vnet/vnet/lisp-cp/lisp_types.h15
-rw-r--r--vnet/vnet/lisp-gpe/interface.c656
-rw-r--r--vnet/vnet/lisp-gpe/ip_forward.c1593
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe.c855
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe.h232
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c437
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h134
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c286
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h157
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c289
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h89
-rw-r--r--vnet/vnet/map/map.c218
-rw-r--r--vnet/vnet/map/map.h61
-rw-r--r--vnet/vnet/map/map_dpo.c191
-rw-r--r--vnet/vnet/map/map_dpo.h67
-rw-r--r--vnet/vnet/mcast/mcast_test.c160
-rw-r--r--vnet/vnet/misc.c12
-rw-r--r--vnet/vnet/mpls-gre/node.c363
-rw-r--r--vnet/vnet/mpls-gre/packet.h49
-rw-r--r--vnet/vnet/mpls/error.def (renamed from vnet/vnet/mpls-gre/error.def)3
-rw-r--r--vnet/vnet/mpls/interface.c (renamed from vnet/vnet/mpls-gre/interface.c)1095
-rw-r--r--vnet/vnet/mpls/mpls.c (renamed from vnet/vnet/mpls-gre/mpls.c)293
-rw-r--r--vnet/vnet/mpls/mpls.h (renamed from vnet/vnet/mpls-gre/mpls.h)129
-rw-r--r--vnet/vnet/mpls/mpls_features.c254
-rw-r--r--vnet/vnet/mpls/mpls_lookup.c278
-rw-r--r--vnet/vnet/mpls/mpls_output.c343
-rw-r--r--vnet/vnet/mpls/mpls_types.h39
-rw-r--r--vnet/vnet/mpls/node.c223
-rw-r--r--vnet/vnet/mpls/packet.h125
-rw-r--r--vnet/vnet/mpls/pg.c (renamed from vnet/vnet/mpls-gre/pg.c)2
-rw-r--r--vnet/vnet/mpls/policy_encap.c (renamed from vnet/vnet/mpls-gre/policy_encap.c)2
-rw-r--r--vnet/vnet/pg/stream.c6
-rw-r--r--vnet/vnet/rewrite.c12
-rw-r--r--vnet/vnet/sr/sr.c232
-rw-r--r--vnet/vnet/sr/sr.h3
-rw-r--r--vnet/vnet/sr/sr_replicate.c9
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe.c60
-rw-r--r--vnet/vnet/vxlan/vxlan.c13
172 files changed, 39115 insertions, 10658 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am
index 1c47c658ac7..41568e06045 100644
--- a/vnet/Makefile.am
+++ b/vnet/Makefile.am
@@ -13,7 +13,7 @@
AUTOMAKE_OPTIONS = foreign subdir-objects
-AM_CFLAGS = -Wall @DPDK@ @IPSEC@ @IPV6SR@
+AM_CFLAGS = -Wall -Werror @DPDK@ @IPSEC@ @IPV6SR@
libvnet_la_SOURCES =
libvnetplugin_la_SOURCES =
@@ -264,7 +264,6 @@ nobase_include_HEADERS += \
# Layer 3 protocol: IP v4/v6
########################################
libvnet_la_SOURCES += \
- vnet/ip/adj_alloc.c \
vnet/ip/format.c \
vnet/ip/icmp4.c \
vnet/ip/icmp6.c \
@@ -296,7 +295,6 @@ libvnet_la_SOURCES += \
vnet/ip/ip_frag.c
nobase_include_HEADERS += \
- vnet/ip/adj_alloc.h \
vnet/ip/format.h \
vnet/ip/icmp46_packet.h \
vnet/ip/icmp4.h \
@@ -369,13 +367,15 @@ nobase_include_HEADERS += \
########################################
libvnet_la_SOURCES += \
vnet/map/map.c \
+ vnet/map/map_dpo.c \
vnet/map/ip4_map.c \
vnet/map/ip6_map.c \
vnet/map/ip4_map_t.c \
vnet/map/ip6_map_t.c
nobase_include_HEADERS += \
- vnet/map/map.h
+ vnet/map/map.h \
+ vnet/map/map_dpo.h
if ENABLE_TESTS
TESTS += test_map
@@ -422,16 +422,20 @@ nobase_include_HEADERS += \
# Tunnel protocol: gre+mpls
########################################
libvnet_la_SOURCES += \
- vnet/mpls-gre/mpls.c \
- vnet/mpls-gre/node.c \
- vnet/mpls-gre/interface.c \
- vnet/mpls-gre/policy_encap.c \
- vnet/mpls-gre/pg.c
+ vnet/mpls/mpls.c \
+ vnet/mpls/mpls_lookup.c \
+ vnet/mpls/mpls_output.c \
+ vnet/mpls/mpls_features.c \
+ vnet/mpls/node.c \
+ vnet/mpls/interface.c \
+ vnet/mpls/policy_encap.c \
+ vnet/mpls/pg.c
nobase_include_HEADERS += \
- vnet/mpls-gre/mpls.h \
- vnet/mpls-gre/packet.h \
- vnet/mpls-gre/error.def
+ vnet/mpls/mpls.h \
+ vnet/mpls/mpls_types.h \
+ vnet/mpls/packet.h \
+ vnet/mpls/error.def
########################################
@@ -466,6 +470,7 @@ nobase_include_HEADERS += \
libvnet_la_SOURCES += \
vnet/lisp-cp/lisp_types.c \
+ vnet/lisp-cp/lisp_cp_dpo.c \
vnet/lisp-cp/control.c \
vnet/lisp-cp/gid_dictionary.c \
vnet/lisp-cp/lisp_msg_serdes.c \
@@ -513,6 +518,9 @@ endif
libvnet_la_SOURCES += \
vnet/lisp-gpe/lisp_gpe.c \
+ vnet/lisp-gpe/lisp_gpe_sub_interface.c \
+ vnet/lisp-gpe/lisp_gpe_adjacency.c \
+ vnet/lisp-gpe/lisp_gpe_tunnel.c \
vnet/lisp-gpe/interface.c \
vnet/lisp-gpe/ip_forward.c \
vnet/lisp-gpe/decap.c
@@ -720,6 +728,90 @@ nobase_include_HEADERS += \
vnet/unix/tapcli.h
########################################
+# FIB
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/fib/fib.c \
+ vnet/fib/fib_test.c \
+ vnet/fib/ip4_fib.c \
+ vnet/fib/ip6_fib.c \
+ vnet/fib/mpls_fib.c \
+ vnet/fib/fib_table.c \
+ vnet/fib/fib_walk.c \
+ vnet/fib/fib_types.c \
+ vnet/fib/fib_node.c \
+ vnet/fib/fib_node_list.c \
+ vnet/fib/fib_entry.c \
+ vnet/fib/fib_entry_src.c \
+ vnet/fib/fib_entry_src_rr.c \
+ vnet/fib/fib_entry_src_interface.c \
+ vnet/fib/fib_entry_src_default_route.c \
+ vnet/fib/fib_entry_src_special.c \
+ vnet/fib/fib_entry_src_api.c \
+ vnet/fib/fib_entry_src_adj.c \
+ vnet/fib/fib_entry_src_mpls.c \
+ vnet/fib/fib_entry_src_lisp.c \
+ vnet/fib/fib_entry_cover.c \
+ vnet/fib/fib_path_list.c \
+ vnet/fib/fib_path.c \
+ vnet/fib/fib_path_ext.c \
+ vnet/fib/fib_attached_export.c
+
+nobase_include_HEADERS += \
+ vnet/fib/fib.h \
+ vnet/fib/ip4_fib.h \
+ vnet/fib/ip6_fib.h \
+ vnet/fib/fib_types.h \
+ vnet/fib/fib_table.h \
+ vnet/fib/fib_node.h \
+ vnet/fib/fib_node_list.h \
+ vnet/fib/fib_entry.h
+
+########################################
+# ADJ
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/adj/adj_alloc.c \
+ vnet/adj/adj_nbr.c \
+ vnet/adj/adj_rewrite.c \
+ vnet/adj/adj_glean.c \
+ vnet/adj/adj_midchain.c \
+ vnet/adj/adj.c
+
+nobase_include_HEADERS += \
+ vnet/adj/adj.h \
+ vnet/adj/adj_types.h \
+ vnet/adj/adj_rewrite.h \
+ vnet/adj/adj_glean.h \
+ vnet/adj/adj_nbr.h
+
+########################################
+# Data-Plane Objects
+########################################
+
+libvnet_la_SOURCES += \
+ vnet/dpo/dpo.c \
+ vnet/dpo/drop_dpo.c \
+ vnet/dpo/punt_dpo.c \
+ vnet/dpo/receive_dpo.c \
+ vnet/dpo/load_balance.c \
+ vnet/dpo/load_balance_map.c \
+ vnet/dpo/lookup_dpo.c \
+ vnet/dpo/classify_dpo.c \
+ vnet/dpo/mpls_label_dpo.c
+
+nobase_include_HEADERS += \
+ vnet/dpo/load_balance.h \
+ vnet/dpo/drop_dpo.h \
+ vnet/dpo/lookup_dpo.h \
+ vnet/dpo/punt_dpo.h \
+ vnet/dpo/classify_dpo.h \
+ vnet/dpo/receive_dpo.h \
+ vnet/dpo/dpo.h
+
+########################################
# Plugin client library
########################################
diff --git a/vnet/etc/scripts/arp4-mpls b/vnet/etc/scripts/arp4-mpls
new file mode 100644
index 00000000000..d3d39f3b921
--- /dev/null
+++ b/vnet/etc/scripts/arp4-mpls
@@ -0,0 +1,24 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.2.2.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+trace add pg-input 100
diff --git a/vnet/etc/scripts/lfib/ip4-to-mpls b/vnet/etc/scripts/lfib/ip4-to-mpls
new file mode 100644
index 00000000000..85753797751
--- /dev/null
+++ b/vnet/etc/scripts/lfib/ip4-to-mpls
@@ -0,0 +1,26 @@
+packet-generator new {
+ name x
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ ICMP: 1.0.0.2 -> 2.2.2.2
+ ICMP echo_request
+ incrementing 100
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+trace add pg-input 100
diff --git a/vnet/etc/scripts/lfib/mpls-pop-to-mpls b/vnet/etc/scripts/lfib/mpls-pop-to-mpls
new file mode 100644
index 00000000000..2818ac133e1
--- /dev/null
+++ b/vnet/etc/scripts/lfib/mpls-pop-to-mpls
@@ -0,0 +1,28 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 72-72
+ no-recycle
+ data {
+ hex 0x0001e0ff0001f1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+mpls local-label add 30 non-eos mpls-lookup-in-table 0
+mpls local-label add 31 2.2.2.2/32
+
+trace add pg-input 100
diff --git a/vnet/etc/scripts/lfib/mpls-to-ip4 b/vnet/etc/scripts/lfib/mpls-to-ip4
new file mode 100644
index 00000000000..24e235e01db
--- /dev/null
+++ b/vnet/etc/scripts/lfib/mpls-to-ip4
@@ -0,0 +1,27 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+
+mpls local-label add 30 eos ip4-lookup-in-table 0
+
+trace add pg-input 100
diff --git a/vnet/etc/scripts/lfib/mpls-to-mpls b/vnet/etc/scripts/lfib/mpls-to-mpls
new file mode 100644
index 00000000000..497dbab324f
--- /dev/null
+++ b/vnet/etc/scripts/lfib/mpls-to-mpls
@@ -0,0 +1,26 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+}
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+set ip arp static loop1 2.0.0.2 dead.beef.babe
+set int mpls loop1 enable
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33
+mpls local-label add 30 2.2.2.2/32
+
+trace add pg-input 100
diff --git a/vnet/etc/scripts/mpls-o-ethernet/pg b/vnet/etc/scripts/mpls-o-ethernet/pg
new file mode 100644
index 00000000000..ba5397f7648
--- /dev/null
+++ b/vnet/etc/scripts/mpls-o-ethernet/pg
@@ -0,0 +1,10 @@
+packet-generator new {
+ name x
+ limit 1
+ node mpls-ethernet-input
+ size 68-68
+ no-recycle
+ data {
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
+ }
+} \ No newline at end of file
diff --git a/vnet/etc/scripts/mpls-o-ethernet/single.conf b/vnet/etc/scripts/mpls-o-ethernet/single.conf
new file mode 100644
index 00000000000..2a25d35512c
--- /dev/null
+++ b/vnet/etc/scripts/mpls-o-ethernet/single.conf
@@ -0,0 +1,17 @@
+comment { single node configuration }
+
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+set int ip address loop0 1.0.0.1/24
+set int ip address loop1 2.0.0.1/24
+
+
+ip route add 2.2.2.2/32 via 2.0.0.2 loop1
+
+mpls encap add label 30 fib 0 dest 2.2.2.2
+mpls decap add label 30 fib 0
+
+create mpls ethernet tunnel dst 00:50:56:b7:05:cb adj 2.2.2.2/32 tx-intfc loop1 fib-id 0
diff --git a/vnet/etc/scripts/source_and_port_range_check b/vnet/etc/scripts/source_and_port_range_check
new file mode 100644
index 00000000000..dce227b4315
--- /dev/null
+++ b/vnet/etc/scripts/source_and_port_range_check
@@ -0,0 +1,63 @@
+
+create loop int
+
+set int state loop0 up
+set int ip addr loop0 10.10.10.10/32
+
+packet-generator new {
+ name deny-from-default-route
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.2.3.4 -> 5.6.7.8
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name allow
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 5.6.7.8
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+packet-generator new {
+ name deny-from-port-range
+ limit 1
+ node ip4-input
+ size 64-64
+ no-recycle
+ data {
+ UDP: 1.1.1.1 -> 5.6.7.8
+ UDP: 6000 -> 6001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+set ip source-and-port-range-check 1.1.1.0/24 range 2000 - 3000 vrf 99
+
+set interface ip source-and-port-range-check pg0 udp-out-vrf 99
+
+ show ip source-and-port-range-check vrf 99 1.1.1.1
+
+set ip source-and-port-range-check 1.1.1.0/24 range 4000 - 5000 vrf 99
+
+set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99
+
+show ip source-and-port-range-check vrf 99 1.1.1.1
+show ip source-and-port-range-check vrf 99 1.1.2.1
+
+set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99 del
+
+show ip source-and-port-range-check vrf 99 1.1.2.1
+
+tr add pg-input 100
diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c
new file mode 100644
index 00000000000..b552fdb2bbc
--- /dev/null
+++ b/vnet/vnet/adj/adj.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/fib/fib_node_list.h>
+
+/*
+ * Special Adj with index zero. we need to define this since the v4 mtrie
+ * assumes an index of 0 implies the ply is empty. therefore all 'real'
+ * adjs need a non-zero index.
+ */
+static ip_adjacency_t *special_v4_miss_adj_with_index_zero;
+
+/* Adjacency packet/byte counters indexed by adjacency index. */
+vlib_combined_counter_main_t adjacency_counters;
+
+always_inline void
+adj_poison (ip_adjacency_t * adj)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ u32 save_handle = adj->heap_handle;;
+
+ memset (adj, 0xfe, sizeof (adj[0]));
+
+ adj->heap_handle = save_handle;
+ }
+}
+
+ip_adjacency_t *
+adj_alloc (fib_protocol_t proto)
+{
+ ip_adjacency_t *adj;
+
+ adj = aa_alloc();
+
+ adj_poison(adj);
+
+ /* Make sure certain fields are always initialized. */
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter(&adjacency_counters,
+ adj->heap_handle);
+
+ adj->rewrite_header.sw_if_index = ~0;
+ adj->mcast_group_index = ~0;
+ adj->saved_lookup_next_index = 0;
+ adj->n_adj = 1;
+
+ fib_node_init(&adj->ia_node,
+ FIB_NODE_TYPE_ADJ);
+ adj->ia_nh_proto = proto;
+
+ return (adj);
+}
+
+static int
+adj_index_is_special (adj_index_t adj_index)
+{
+ if (ADJ_INDEX_INVALID == adj_index)
+ return (!0);
+
+ return (0);
+}
+
+/**
+ * @brief Pretty print helper function for formatting specific adjacencies.
+ * @param s - input string to format
+ * @param args - other args passed to format function such as:
+ * - vnet_main_t
+ * - ip_lookup_main_t
+ * - adj_index
+ */
+u8 *
+format_ip_adjacency (u8 * s, va_list * args)
+{
+ vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
+ u32 adj_index = va_arg (*args, u32);
+ format_ip_adjacency_flags_t fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+ ip_adjacency_t * adj = adj_get(adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U", format_adj_nbr, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index));
+ break;
+
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ s = format (s, "%U", format_adj_midchain, adj_index, 2);
+ break;
+ default:
+ break;
+ }
+ s = format (s, " index:%d", adj_index);
+
+ if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+ {
+ s = format (s, " locks:%d", adj->ia_node.fn_locks);
+ s = format(s, "\nchildren:\n ");
+ s = fib_node_children_format(adj->ia_node.fn_children, s);
+ }
+
+ return s;
+}
+
+/*
+ * adj_last_lock_gone
+ *
+ * last lock/reference to the adj has gone, we no longer need it.
+ */
+static void
+adj_last_lock_gone (ip_adjacency_t *adj)
+{
+ ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
+ ADJ_DBG(adj, "last-lock-gone");
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo_reset(&adj->sub_type.midchain.next_dpo);
+ /* FALL THROUGH */
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_REWRITE:
+ /*
+ * complete and incomplete nbr adjs
+ */
+ adj_nbr_remove(adj->ia_nh_proto,
+ adj->ia_link,
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_glean_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ default:
+ /*
+ * type not stored in any DB from which we need to remove it
+ */
+ break;
+ }
+
+ fib_node_deinit(&adj->ia_node);
+ aa_free(adj);
+}
+
+void
+adj_lock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+ ASSERT(adj->heap_handle!=0);
+
+ ADJ_DBG(adj, "lock");
+ fib_node_lock(&adj->ia_node);
+}
+
+void
+adj_unlock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+ ASSERT(adj->heap_handle!=0);
+
+ ADJ_DBG(adj, "unlock");
+ ASSERT(adj);
+ ASSERT(adj->heap_handle!=0);
+
+ fib_node_unlock(&adj->ia_node);
+}
+
+u32
+adj_child_add (adj_index_t adj_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ if (adj_index_is_special(adj_index))
+ {
+ return (~0);
+ }
+
+ return (fib_node_child_add(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ child_type,
+ child_index));
+}
+
+void
+adj_child_remove (adj_index_t adj_index,
+ u32 sibling_index)
+{
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ fib_node_child_remove(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ sibling_index);
+}
+
+static fib_node_t *
+adj_get_node (fib_node_index_t index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ return (&adj->ia_node);
+}
+
+#define ADJ_FROM_NODE(_node) \
+ ((ip_adjacency_t*)((char*)_node - STRUCT_OFFSET_OF(ip_adjacency_t, ia_node)))
+
+static void
+adj_node_last_lock_gone (fib_node_t *node)
+{
+ adj_last_lock_gone(ADJ_FROM_NODE(node));
+}
+
+static fib_node_back_walk_rc_t
+adj_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * Que pasa. yo soj en el final!
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * Adjacency's graph node virtual function table
+ */
+static const fib_node_vft_t adj_vft = {
+ .fnv_get = adj_get_node,
+ .fnv_last_lock = adj_node_last_lock_gone,
+ .fnv_back_walk = adj_back_walk_notify,
+};
+
+static clib_error_t *
+adj_module_init (vlib_main_t * vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_ADJ, &adj_vft);
+
+ adj_nbr_module_init();
+ adj_glean_module_init();
+ adj_midchain_module_init();
+
+ /*
+ * 4 special adjs for v4 and v6 resp.
+ */
+ aa_bootstrap(8);
+ special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (adj_module_init);
+
+/*
+ * DEPRECATED: DO NOT USE
+ *
+ * Create new block of given number of contiguous adjacencies.
+ */
+ip_adjacency_t *
+ip_add_adjacency (ip_lookup_main_t * lm,
+ ip_adjacency_t * copy_adj,
+ u32 n_adj,
+ u32 * adj_index_return)
+{
+ ip_adjacency_t * adj;
+ u32 ai, i, handle;
+
+ ASSERT(1==n_adj);
+
+ adj = aa_alloc ();
+ handle = ai = adj->heap_handle;
+
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter (&adjacency_counters, ai + n_adj - 1);
+
+ for (i = 0; i < n_adj; i++)
+ {
+ /* Make sure certain fields are always initialized. */
+ adj[i].rewrite_header.sw_if_index = ~0;
+ adj[i].mcast_group_index = ~0;
+ adj[i].saved_lookup_next_index = 0;
+
+ if (copy_adj)
+ adj[i] = copy_adj[i];
+
+ adj[i].heap_handle = handle;
+ adj[i].n_adj = n_adj;
+
+ /* Zero possibly stale counters for re-used adjacencies. */
+ vlib_zero_combined_counter (&adjacency_counters, ai + i);
+ }
+
+ *adj_index_return = ai;
+ return adj;
+}
diff --git a/vnet/vnet/adj/adj.h b/vnet/vnet/adj/adj.h
new file mode 100644
index 00000000000..3a1236497e1
--- /dev/null
+++ b/vnet/vnet/adj/adj.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An adjacency is a representation of an attached L3 peer.
+ *
+ * Adjacency Sub-types:
+ * - neighbour: a representation of an attached L3 peer.
+ * Key:{addr,interface,link/ether-type}
+ * SHARED
+ * - glean: used to drive ARP/ND for packets destined to a local sub-net.
+ * 'glean' mean use the packet's destination address as the target
+ * address in the ARP packet.
+ * UNSHARED. Only one per-interface.
+ * - midchain: a nighbour adj on a virtual/tunnel interface.
+ * - rewrite: an adj with no key, but with a rewrite string.
+ *
+ * The API to create and update the adjacency is very sub-type specific. This
+ * is intentional as it encourages the user to carefully consider which adjacency
+ * sub-type they are really using, and hence assign it data in the appropriate
+ * sub-type space in the union of sub-types. This prevents the adj becoming a
+ * disorganised dumping group for 'my features needs a u16 somewhere' data. It
+ * is important to enforce this approach as space in the adjacency is a premium,
+ * as we need it to fit in 1 cache line.
+ *
+ * the API is also based around an index to an ajdacency not a raw pointer. This
+ * is so the user doesn't suffer the same limp inducing firearm injuries that
+ * the author suffered as the adjacenices can realloc.
+ */
+
+#ifndef __ADJ_H__
+#define __ADJ_H__
+
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_rewrite.h>
+#include <vnet/adj/adj_glean.h>
+
+/**
+ * @brief
+ * Take a reference counting lock on the adjacency
+ */
+extern void adj_lock(adj_index_t adj_index);
+/**
+ * @brief
+ * Release a reference counting lock on the adjacency
+ */
+extern void adj_unlock(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Add a child dependent to an adjacency. The child will
+ * thus be informed via its registerd back-walk function
+ * when the adjacency state changes.
+ */
+extern u32 adj_child_add(adj_index_t adj_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+/**
+ * @brief
+ * Remove a child dependent
+ */
+extern void adj_child_remove(adj_index_t adj_index,
+ u32 sibling_index);
+
+/**
+ * @brief
+ * The global adjacnecy heap. Exposed for fast/inline data-plane access
+ */
+extern ip_adjacency_t *adj_heap;
+
+/**
+ * @brief
+ * Adjacency packet counters
+ */
+extern vlib_combined_counter_main_t adjacency_counters;
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline ip_adjacency_t *
+adj_get (adj_index_t adj_index)
+{
+ return (vec_elt_at_index(adj_heap, adj_index));
+}
+
+#endif
diff --git a/vnet/vnet/ip/adj_alloc.c b/vnet/vnet/adj/adj_alloc.c
index 3ae7a199f19..5cc8cf6ef04 100644
--- a/vnet/vnet/ip/adj_alloc.c
+++ b/vnet/vnet/adj/adj_alloc.c
@@ -13,14 +13,18 @@
* limitations under the License.
*/
-#include <vnet/ip/adj_alloc.h>
+#include <vnet/adj/adj_alloc.h>
#include <vnet/ip/ip.h>
+/*
+ * the single adj heap
+ */
+ip_adjacency_t *adj_heap;
+
/*
* any operation which could cause the adj vector to be reallocated
* must have a worker thread barrier
*/
-
static inline int will_reallocate (ip_adjacency_t * adjs, u32 n)
{
uword aligned_header_bytes, new_data_bytes;
@@ -45,13 +49,14 @@ static inline int will_reallocate (ip_adjacency_t * adjs, u32 n)
}
ip_adjacency_t *
-aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n)
+aa_alloc (void)
{
vlib_main_t * vm = &vlib_global_main;
- aa_header_t * ah = aa_header (adjs);
+ aa_header_t * ah = aa_header (adj_heap);
ip_adjacency_t * adj_block;
u32 freelist_length;
int need_barrier_sync = 0;
+ u32 n = 1;
ASSERT(os_get_cpu_number() == 0);
ASSERT (clib_mem_is_heap_object (_vec_find(ah)));
@@ -59,14 +64,14 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n)
/* If we don't have a freelist of size N, fresh allocation is required */
if (vec_len (ah->free_indices_by_size) <= n)
{
- if (will_reallocate (adjs, n))
+ if (will_reallocate (adj_heap, n))
{
need_barrier_sync = 1;
vlib_worker_thread_barrier_sync (vm);
}
/* Workers wont look at the freelists... */
vec_validate (ah->free_indices_by_size, n);
- vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes,
+ vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes,
CLIB_CACHE_LINE_BYTES);
if (need_barrier_sync)
vlib_worker_thread_barrier_release (vm);
@@ -77,17 +82,17 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n)
{
u32 index = ah->free_indices_by_size[n][freelist_length-1];
- adj_block = &adjs[index];
+ adj_block = &adj_heap[index];
_vec_len(ah->free_indices_by_size[n]) -= 1;
goto out;
}
/* Allocate a new block of size N */
- if (will_reallocate (adjs, n))
+ if (will_reallocate (adj_heap, n))
{
need_barrier_sync = 1;
vlib_worker_thread_barrier_sync (vm);
}
- vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes,
+ vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes,
CLIB_CACHE_LINE_BYTES);
if (need_barrier_sync)
@@ -95,40 +100,45 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n)
out:
memset (adj_block, 0, n * (sizeof(*adj_block)));
- adj_block->heap_handle = adj_block - adjs;
+ adj_block->heap_handle = adj_block - adj_heap;
adj_block->n_adj = n;
- *blockp = adj_block;
- return adjs;
+
+ /*
+ * the adj heap may have realloc'd. recache.
+ */
+ ip4_main.lookup_main.adjacency_heap = adj_heap;
+ ip6_main.lookup_main.adjacency_heap = adj_heap;
+
+ return (adj_block);
}
-void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj)
+void aa_free (ip_adjacency_t * adj)
{
- aa_header_t * ah = aa_header (adjs);
+ aa_header_t * ah = aa_header (adj_heap);
- ASSERT (adjs && adj && (adj->heap_handle < vec_len (adjs)));
- ASSERT (adj->n_adj < vec_len (ah->free_indices_by_size));
+ ASSERT (adj_heap && adj && (adj->heap_handle < vec_len (adj_heap)));
ASSERT (adj->heap_handle != 0);
vec_add1 (ah->free_indices_by_size[adj->n_adj], adj->heap_handle);
adj->heap_handle = 0;
}
-ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n)
+void aa_bootstrap (u32 n)
{
ip_adjacency_t * adj_block;
aa_header_t * ah;
int i;
- vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes,
+ vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes,
CLIB_CACHE_LINE_BYTES);
memset (adj_block, 0, n * sizeof(*adj_block));
- ah = aa_header (adjs);
+ ah = aa_header (adj_heap);
memset (ah, 0, sizeof (*ah));
vec_validate (ah->free_indices_by_size, 1);
- for (i = 0 ; i < vec_len (adjs); i++)
+ for (i = 0 ; i < vec_len (adj_heap); i++)
{
adj_block->n_adj = 1;
adj_block->heap_handle = ~0;
@@ -136,24 +146,23 @@ ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n)
vec_add1 (ah->free_indices_by_size[1], n - (i+1));
}
- return adjs;
+ ip4_main.lookup_main.adjacency_heap = adj_heap;
+ ip6_main.lookup_main.adjacency_heap = adj_heap;
}
u8 * format_adjacency_alloc (u8 * s, va_list * args)
{
vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
- ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
- ip_adjacency_t * adjs = va_arg (*args, ip_adjacency_t *);
int verbose = va_arg (*args, int);
ip_adjacency_t * adj;
u32 inuse = 0, freed = 0;
u32 on_freelist = 0;
int i, j;
- aa_header_t * ah = aa_header (adjs);
+ aa_header_t * ah = aa_header (adj_heap);
- for (i = 0; i < vec_len (adjs); i += adj->n_adj)
+ for (i = 0; i < vec_len (adj_heap); i += adj->n_adj)
{
- adj = adjs + i;
+ adj = adj_heap + i;
if ((i == 0) || adj->heap_handle)
inuse += adj->n_adj;
else
@@ -164,19 +173,19 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args)
{
for (j = 0; j < vec_len(ah->free_indices_by_size[i]); j++)
{
- adj = adjs + ah->free_indices_by_size[i][j];
+ adj = adj_heap + ah->free_indices_by_size[i][j];
ASSERT(adj->heap_handle == 0);
on_freelist += adj->n_adj;
}
}
- s = format (s, "adjs: %d total, %d in use, %d free, %d on freelists\n",
- vec_len(adjs), inuse, freed, on_freelist);
+ s = format (s, "adj_heap: %d total, %d in use, %d free, %d on freelists\n",
+ vec_len(adj_heap), inuse, freed, on_freelist);
if (verbose)
{
- for (i = 0; i < vec_len (adjs); i += adj->n_adj)
+ for (i = 0; i < vec_len (adj_heap); i += adj->n_adj)
{
- adj = adjs + i;
+ adj = adj_heap + i;
if ((i == 0) || adj->heap_handle)
{
if (adj->n_adj > 1)
@@ -190,7 +199,7 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args)
s = format (s, " ");
s = format(s, "%U\n", format_ip_adjacency,
- vnm, lm, i+j);
+ vnm, i+j, FORMAT_IP_ADJACENCY_NONE);
}
}
}
@@ -200,36 +209,22 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args)
static clib_error_t *
show_adjacency_alloc_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
{
int verbose = 0;
vnet_main_t *vnm = vnet_get_main();
- ip_lookup_main_t *lm = 0;
- ip_adjacency_t * adjs = 0;
- int is_ip4 = 1;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "verbose"))
verbose = 1;
- else if (unformat (input, "ip4"))
- ;
- else if (unformat (input, "ip6"))
- is_ip4 = 0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- if (is_ip4)
- lm = &ip4_main.lookup_main;
- else
- lm = &ip6_main.lookup_main;
-
- adjs = lm->adjacency_heap;
-
- vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, lm, adjs, verbose);
+ vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, verbose);
return 0;
}
diff --git a/vnet/vnet/ip/adj_alloc.h b/vnet/vnet/adj/adj_alloc.h
index a10146c53a5..7d1a3fb3133 100644
--- a/vnet/vnet/ip/adj_alloc.h
+++ b/vnet/vnet/adj/adj_alloc.h
@@ -16,7 +16,8 @@
#ifndef __adj_alloc_h__
#define __adj_alloc_h__
-/*
+/**
+ * @brief
* Adjacency allocator: heap-like in that the code
* will dole out contiguous chunks of n items. In the interests of
* thread safety, we don't bother about coalescing free blocks of size r
@@ -43,10 +44,9 @@ static inline aa_header_t * aa_header (void * v)
return vec_aligned_header (v, sizeof (aa_header_t), sizeof (void *));
}
-ip_adjacency_t *
-aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n);
-void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj);
-ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n);
+extern ip_adjacency_t *aa_alloc(void);
+extern void aa_free (ip_adjacency_t * adj);
+extern void aa_bootstrap (u32 n);
format_function_t format_adj_allocation;
diff --git a/vnet/vnet/adj/adj_glean.c b/vnet/vnet/adj/adj_glean.c
new file mode 100644
index 00000000000..6eb6718e216
--- /dev/null
+++ b/vnet/vnet/adj/adj_glean.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * The 'DB' of all glean adjs.
+ * There is only one glean per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_gleans[FIB_PROTOCOL_MAX];
+
+static inline vlib_node_registration_t*
+adj_get_glean_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (&ip4_glean_node);
+ case FIB_PROTOCOL_IP6:
+ return (&ip6_glean_node);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+/*
+ * adj_glean_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The glean adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_glean_add_or_lock (fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_gleans[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ {
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_GLEAN;
+ adj->ia_nh_proto = proto;
+ adj_gleans[proto][sw_if_index] = adj->heap_handle;
+
+ if (NULL != nh_addr)
+ {
+ adj->sub_type.glean.receive_addr = *nh_addr;
+ }
+
+ adj->rewrite_header.data_bytes = 0;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_proto_2_nd(proto),
+ sw_if_index,
+ adj_get_glean_node(proto)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+ else
+ {
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+ }
+
+ adj_lock(adj->heap_handle);
+
+ return (adj->heap_handle);
+}
+
+void
+adj_glean_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_gleans[proto]));
+
+ adj_gleans[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_glean_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_glean_interface_state_change);
+
+static clib_error_t *
+adj_glean_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
+
+u8*
+format_adj_glean (u8* s, va_list *ap)
+{
+ index_t index = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ return (format(s, " glean: %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index)));
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_glean_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_glean,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ * object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const glean_ip4_nodes[] =
+{
+ "ip4-glean",
+ NULL,
+};
+const static char* const glean_ip6_nodes[] =
+{
+ "ip6-glean",
+ NULL,
+};
+
+const static char* const * const glean_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = glean_ip4_nodes,
+ [DPO_PROTO_IP6] = glean_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+adj_glean_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes);
+}
diff --git a/vnet/vnet/adj/adj_glean.h b/vnet/vnet/adj/adj_glean.h
new file mode 100644
index 00000000000..ce3534ecee6
--- /dev/null
+++ b/vnet/vnet/adj/adj_glean.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Glean Adjacency
+ *
+ * A gleean adjacency represent the need to discover new peers on an
+ * attached link. Packets that hit a glean adjacency will generate an
+ * ARP/ND packet addessesed to the packet's destination address.
+ * Note this is different to an incomplete neighbour adjacency, which
+ * does not send ARP/ND requests to the packet's destination address,
+ * but instead to the next-hop address of the adjacency itself.
+ */
+
+#ifndef __ADJ_GLEAN_H__
+#define __ADJ_GLEAN_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing glean adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to glean
+ *
+ * @param sw_if_index
+ * The interface on which to glean
+ *
+ * @param nh_addr
+ * the address applied to the interface on which to glean. This
+ * as the source address in packets when the ARP/ND packet is sent
+ */
+extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_glean_module_init(void);
+
+#endif
diff --git a/vnet/vnet/adj/adj_internal.h b/vnet/vnet/adj/adj_internal.h
new file mode 100644
index 00000000000..79042d1fd2a
--- /dev/null
+++ b/vnet/vnet/adj/adj_internal.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_INTERNAL_H__
+#define __ADJ_INTERNAL_H__
+
+#include <vnet/adj/adj.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+
+
+/**
+ * big switch to turn on Adjacency debugging
+ */
+#undef ADJ_DEBUG
+
+/*
+ * Debug macro
+ */
+#ifdef ADJ_DEBUG
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ clib_warning("adj:[%d:%p]:" _fmt, \
+ _adj->heap_handle, _adj, \
+ ##_args); \
+}
+#else
+#define ADJ_DBG(_e, _fmt, _args...)
+#endif
+
+static inline vlib_node_registration_t*
+adj_get_rewrite_node (fib_link_t linkt)
+{
+ switch (linkt) {
+ case FIB_LINK_IP4:
+ return (&ip4_rewrite_node);
+ case FIB_LINK_IP6:
+ return (&ip6_rewrite_node);
+ case FIB_LINK_MPLS:
+ return (&mpls_output_node);
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+static inline vnet_l3_packet_type_t
+adj_fib_link_2_vnet (fib_link_t linkt)
+{
+ switch (linkt)
+ {
+ case FIB_LINK_IP4:
+ return (VNET_L3_PACKET_TYPE_IP4);
+ case FIB_LINK_IP6:
+ return (VNET_L3_PACKET_TYPE_IP6);
+ case FIB_LINK_MPLS:
+ return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+ }
+ return (0);
+}
+
+static inline vnet_l3_packet_type_t
+adj_fib_proto_2_nd (fib_protocol_t fp)
+{
+ switch (fp)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (VNET_L3_PACKET_TYPE_ARP);
+ case FIB_PROTOCOL_IP6:
+ return (VNET_L3_PACKET_TYPE_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+ }
+ return (0);
+}
+
+extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
+
+extern void adj_nbr_remove(fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+extern void adj_glean_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+
+#endif
diff --git a/vnet/vnet/adj/adj_midchain.c b/vnet/vnet/adj/adj_midchain.c
new file mode 100644
index 00000000000..4b9b6a414d2
--- /dev/null
+++ b/vnet/vnet/adj/adj_midchain.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/fib/fib_walk.h>
+
+static inline u32
+adj_get_midchain_node (fib_link_t link)
+{
+ switch (link) {
+ case FIB_LINK_IP4:
+ return (ip4_midchain_node.index);
+ case FIB_LINK_IP6:
+ return (ip6_midchain_node.index);
+ case FIB_LINK_MPLS:
+ return (mpls_midchain_node.index);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+ u32 post_rewrite_node,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN;
+ adj->sub_type.midchain.tx_function_node = post_rewrite_node;
+
+ if (NULL != rewrite)
+ {
+ /*
+ * new rewrite provided.
+ * use a dummy rewrite header to get the interface to print into.
+ */
+ ip_adjacency_t dummy;
+ dpo_id_t tmp = DPO_NULL;
+
+ vnet_rewrite_for_tunnel(vnet_get_main(),
+ adj->rewrite_header.sw_if_index,
+ adj_get_midchain_node(adj->ia_link),
+ adj->sub_type.midchain.tx_function_node,
+ &dummy.rewrite_header,
+ rewrite,
+ vec_len(rewrite));
+
+ /*
+ * this is an update of an existing rewrite.
+ * packets are in flight. we'll need to briefly stack on the drop DPO
+ * whilst the rewrite is written, so any packets that see the partial update
+ * are binned.
+ */
+ if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo))
+ {
+ /*
+ * not stacked yet. stack on the drop
+ */
+ dpo_stack(DPO_ADJACENCY_MIDCHAIN,
+ fib_proto_to_dpo(adj->ia_nh_proto),
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto)));
+ }
+
+ dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo);
+ dpo_stack(DPO_ADJACENCY_MIDCHAIN,
+ fib_proto_to_dpo(adj->ia_nh_proto),
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto)));
+
+ CLIB_MEMORY_BARRIER();
+
+ clib_memcpy(&adj->rewrite_header,
+ &dummy.rewrite_header,
+ VLIB_BUFFER_PRE_DATA_SIZE);
+
+ CLIB_MEMORY_BARRIER();
+
+ /*
+ * The graph arc used/created here is from the post-rewirte node to the
+ * child's registered node. This is because post adj processing the next
+ * node is the interface's specific node, then the post-write-node (aka
+ * the interface's tx-function) - from there we need to get to the stacked
+ * child's node.
+ */
+ dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+ &adj->sub_type.midchain.next_dpo,
+ &tmp);
+ dpo_reset(&tmp);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ /*
+ * time for walkies fido.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_ADJ_UPDATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx);
+}
+
+/**
+ * adj_nbr_midchain_stack
+ */
+void
+adj_nbr_midchain_stack (adj_index_t adj_index,
+ const dpo_id_t *next)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index);
+
+ dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+ &adj->sub_type.midchain.next_dpo,
+ next);
+}
+
+u8*
+format_adj_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(ap, index_t);
+ u32 indent = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_fib_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, " %U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header,
+ sizeof (adj->rewrite_data), indent);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a midchain
+ * object.
+ *
+ * this means that these graph nodes are ones from which a midchain is the
+ * parent object in the DPO-graph.
+ */
+const static char* const midchain_ip4_nodes[] =
+{
+ "ip4-midchain",
+ NULL,
+};
+const static char* const midchain_ip6_nodes[] =
+{
+ "ip6-midchain",
+ NULL,
+};
+const static char* const midchain_mpls_nodes[] =
+{
+ "mpls-midchain",
+ NULL,
+};
+
+const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = midchain_mpls_nodes,
+};
+
+void
+adj_midchain_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
+}
diff --git a/vnet/vnet/adj/adj_midchain.h b/vnet/vnet/adj/adj_midchain.h
new file mode 100644
index 00000000000..adf86f1d007
--- /dev/null
+++ b/vnet/vnet/adj/adj_midchain.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Midchain Adjacency sub-type. These adjs represent an L3 peer on a
+ * tunnel interface. The tunnel's adjacency is thus not the end of the chain,
+ * and needs to stack on/link to another chain (or portion of the graph) to
+ * reach the tunnel's destination.
+ */
+
+#ifndef __ADJ_MIDCHAIN_H__
+#define __ADJ_MIDCHAIN_H__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief
+ * Convert an existing neighbour adjacency into a midchain
+ *
+ * @param adj_index
+ * The index of the neighbour adjacency.
+ *
+ * @param post_rewrite_node
+ * The VLIB graph node that provides the post-encap fixup.
+ * where 'fixup' is e.g., correcting chksum, length, etc.
+ *
+ * @param rewrite
+ * The rewrite.
+ */
+extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index,
+ u32 post_rewrite_node,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * [re]stack a midchain. 'Stacking' is the act of forming parent-child
+ * relationships in the data-plane graph.
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ *
+ * @param dpo
+ * The parent DPO to stack onto (i.e. become a child of).
+ */
+extern void adj_nbr_midchain_stack(adj_index_t adj_index,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_midchain_module_init(void);
+
+/**
+ * @brief
+ * Format a midchain adjacency
+ */
+extern u8* format_adj_midchain(u8* s, va_list *ap);
+
+#endif
diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c
new file mode 100644
index 00000000000..7da1becd4c1
--- /dev/null
+++ b/vnet/vnet/adj/adj_nbr.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * Vector Hash tables of neighbour (traditional) adjacencies
+ * Key: interface(for the vector index), address (and its proto),
+ * link-type/ether-type.
+ */
+static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
+
+// FIXME SIZE APPROPRIATELY. ASK DAVEB.
+#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
+#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+
+#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
+{ \
+ _key.key[0] = (_nh)->as_u64[0]; \
+ _key.key[1] = (_nh)->as_u64[1]; \
+ _key.key[2] = (_lt); \
+}
+
+#define ADJ_NBR_ITF_OK(_proto, _itf) \
+ (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
+ (NULL != adj_nbr_tables[_proto][sw_if_index]))
+
+static void
+adj_nbr_insert (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ adj_index_t adj_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
+ {
+ vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
+ }
+ if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
+ {
+ adj_nbr_tables[nh_proto][sw_if_index] =
+ clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
+ CLIB_CACHE_LINE_BYTES);
+ memset(adj_nbr_tables[nh_proto][sw_if_index],
+ 0,
+ sizeof(BVT(clib_bihash)));
+
+ BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
+ "Adjacency Neighbour table",
+ ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+ }
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = adj_index;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
+}
+
+void
+adj_nbr_remove (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
+}
+
+static adj_index_t
+adj_nbr_find (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return (ADJ_INDEX_INVALID);
+
+ if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
+ &kv, &kv) < 0)
+ {
+ return (ADJ_INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static inline vlib_node_registration_t*
+adj_get_nd_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (&ip4_arp_node);
+ case FIB_PROTOCOL_IP6:
+ return (&ip6_discover_neighbor_node);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+static void
+adj_ip4_nbr_probe (ip_adjacency_t *adj)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_interface_address_t * ia;
+ ethernet_arp_header_t * h;
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ ip4_address_t * src;
+ vlib_buffer_t * b;
+ vlib_main_t * vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main();
+
+ si = vnet_get_sw_interface (vnm,
+ adj->rewrite_header.sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+
+ src =
+ ip4_interface_address_matching_destination(im,
+ &adj->sub_type.nbr.next_hop.ip4,
+ adj->rewrite_header.sw_if_index,
+ &ia);
+ if (! src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet,
+ hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 * to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+adj_ip6_nbr_probe (ip_adjacency_t *adj)
+{
+ icmp6_neighbor_solicitation_header_t * h;
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_interface_address_t * ia;
+ ip6_address_t * dst, *src;
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ vlib_buffer_t * b;
+ int bogus_length;
+ vlib_main_t * vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main();
+
+ si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
+ dst = &adj->sub_type.nbr.next_hop.ip6;
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+ src = ip6_interface_address_matching_destination(im, dst,
+ adj->rewrite_header.sw_if_index,
+ &ia);
+ if (! src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet(vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
+
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address,
+ vec_len(hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
+ u32 * to_next = vlib_frame_vector_args(f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node(vm, hi->output_node_index, f);
+ }
+}
+
+static ip_adjacency_t*
+adj_nbr_alloc (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj_nbr_insert(nh_proto, link_type, nh_addr,
+ sw_if_index,
+ adj->heap_handle);
+
+ /*
+ * since we just added the ADJ we have no rewrite string for it,
+ * so its for ARP
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ adj->sub_type.nbr.next_hop = *nh_addr;
+ adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
+ memset(&adj->sub_type.midchain.next_dpo, 0,
+ sizeof(adj->sub_type.midchain.next_dpo));
+
+ return (adj);
+}
+
+/*
+ * adj_add_for_nbr
+ *
+ * Add an adjacency for the neighbour requested.
+ *
+ * The key for an adj is:
+ * - the Next-hops protocol (i.e. v4 or v6)
+ * - the address of the next-hop
+ * - the interface the next-hop is reachable through
+ * - fib_index; this is broken. i will fix it.
+ * the adj lookup currently occurs in the FIB.
+ */
+adj_index_t
+adj_nbr_add_or_lock (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+
+ /*
+ * If there is no next-hop, this is the 'auto-adj' used on p2p
+ * links instead of a glean.
+ */
+ if (ip46_address_is_zero(nh_addr))
+ {
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_link_2_vnet(link_type),
+ sw_if_index,
+ adj_get_rewrite_node(link_type)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+ else
+ {
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_proto_2_nd(nh_proto),
+ sw_if_index,
+ adj_get_nd_node(nh_proto)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+
+ switch (nh_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ adj_ip4_nbr_probe(adj);
+ break;
+ case FIB_PROTOCOL_IP6:
+ adj_ip6_nbr_probe(adj);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ }
+ }
+ else
+ {
+ adj = adj_get(adj_index);
+ }
+
+ adj_lock(adj->heap_handle);
+
+ return (adj->heap_handle);
+}
+
+adj_index_t
+adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ }
+ else
+ {
+ adj = adj_get(adj_index);
+ }
+
+ adj_lock(adj->heap_handle);
+ adj_nbr_update_rewrite(adj->heap_handle, rewrite);
+
+ return (adj->heap_handle);
+}
+
+/**
+ * adj_nbr_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite (adj_index_t adj_index,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ if (NULL != rewrite)
+ {
+ /*
+ * new rewrite provided.
+ * use a dummy rewrite header to get the interface to print into.
+ */
+ ip_adjacency_t dummy;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_link_2_vnet(adj->ia_link),
+ adj->rewrite_header.sw_if_index,
+ adj_get_rewrite_node(adj->ia_link)->index,
+ rewrite,
+ &dummy.rewrite_header,
+ sizeof (dummy.rewrite_data));
+
+ if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index)
+ {
+ /*
+ * this is an update of an existing rewrite.
+ * we can't just paste in the new rewrite as that is not atomic.
+ * So we briefly swap the ADJ to ARP type, paste, then swap back.
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ CLIB_MEMORY_BARRIER();
+ }
+ /*
+ * else
+ * this is the first time the rewrite is added.
+ * paste it on then swap the next type.
+ */
+ clib_memcpy(&adj->rewrite_header,
+ &dummy.rewrite_header,
+ VLIB_BUFFER_PRE_DATA_SIZE);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ }
+ else
+ {
+ /*
+ * clear the rewrite.
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ CLIB_MEMORY_BARRIER();
+
+ adj->rewrite_header.data_bytes = 0;
+ }
+
+ /*
+ * time for walkies fido.
+ * The link type MPLS Adj never has children. So if it is this adj
+ * that is updated, we need to walk from its IP sibling.
+ */
+ if (FIB_LINK_MPLS == adj->ia_link)
+ {
+ adj_index = adj_nbr_find(adj->ia_nh_proto,
+ fib_proto_to_link(adj->ia_nh_proto),
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ }
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ /*
+ * This walk only needs to go back one level, but there is no control here.
+ * the first receiving fib_entry_t will quash the walk
+ */
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
+}
+
+typedef struct adj_db_count_ctx_t_ {
+ u64 count;
+} adj_db_count_ctx_t;
+
+static void
+adj_db_count (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_db_count_ctx_t * ctx = arg;
+ ctx->count++;
+}
+
+u32
+adj_nbr_db_size (void)
+{
+ adj_db_count_ctx_t ctx = {
+ .count = 0,
+ };
+ fib_protocol_t proto;
+ u32 sw_if_index = 0;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ if (NULL != adj_nbr_tables[proto][sw_if_index])
+ {
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_db_count,
+ &ctx);
+ }
+ }
+ }
+ return (ctx.count);
+}
+
+/**
+ * Context for the state change walk of the DB
+ */
+typedef struct adj_nbr_interface_state_change_ctx_t_
+{
+ /**
+ * Flags passed from the vnet notifiy function
+ */
+ int flags;
+} adj_nbr_interface_state_change_ctx_t;
+
+static void
+adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface state has changed.
+ */
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (ctx->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+}
+
+static clib_error_t *
+adj_nbr_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
+ continue;
+
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = flags,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_nbr_interface_state_change_one,
+ &ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change);
+
+static void
+adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface has been deleted.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+}
+
+/**
+ * adj_nbr_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+adj_nbr_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ fib_protocol_t proto;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
+ continue;
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_nbr_interface_delete_one,
+ NULL);
+ }
+
+ return (NULL);
+
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
+
+
+static void
+adj_nbr_show_one (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ vlib_cli_output (arg, "[@%d] %U",
+ kvp->value,
+ format_ip_adjacency,
+ vnet_get_main(), kvp->value,
+ FORMAT_IP_ADJACENCY_NONE);
+}
+
+static clib_error_t *
+adj_nbr_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+
+ format_ip_adjacency,
+ vnet_get_main(), ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ u32 sw_if_index;
+
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
+ continue;
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show adj nbr",
+ .short_help = "show adj nbr [<adj_index>] [sw_if_index <index>]",
+ .function = adj_nbr_show,
+};
+
+u8*
+format_adj_nbr_incomplete (u8* s, va_list *ap)
+{
+ index_t index = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "arp-%U", format_fib_link, adj->ia_link);
+ s = format (s, ": via %U",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index));
+
+ return (s);
+}
+
+u8*
+format_adj_nbr (u8* s, va_list *ap)
+{
+ index_t index = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_fib_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_nbr_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr,
+};
+const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr_incomplete,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
+ * object.
+ *
+ * this means that these graph nodes are ones from which a nbr is the
+ * parent object in the DPO-graph.
+ */
+const static char* const nbr_ip4_nodes[] =
+{
+ "ip4-rewrite-transit",
+ NULL,
+};
+const static char* const nbr_ip6_nodes[] =
+{
+ "ip6-rewrite",
+ NULL,
+};
+const static char* const nbr_mpls_nodes[] =
+{
+ "mpls-output",
+ NULL,
+};
+const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_mpls_nodes,
+};
+
+const static char* const nbr_incomplete_ip4_nodes[] =
+{
+ "ip4-arp",
+ NULL,
+};
+const static char* const nbr_incomplete_ip6_nodes[] =
+{
+ "ip6-discover-neighbor",
+ NULL,
+};
+const static char* const nbr_incomplete_mpls_nodes[] =
+{
+ "mpls-adj-incomplete",
+ NULL,
+};
+
+const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
+};
+
+void
+adj_nbr_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY,
+ &adj_nbr_dpo_vft,
+ nbr_nodes);
+ dpo_register(DPO_ADJACENCY_INCOMPLETE,
+ &adj_nbr_incompl_dpo_vft,
+ nbr_incomplete_nodes);
+}
diff --git a/vnet/vnet/adj/adj_nbr.h b/vnet/vnet/adj/adj_nbr.h
new file mode 100644
index 00000000000..331423bd036
--- /dev/null
+++ b/vnet/vnet/adj/adj_nbr.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * Neighbour Adjacency sub-type. These adjs represent an L3 peer on a
+ * connected link.
+ */
+
+#ifndef __ADJ_NBR_H__
+#define __ADJ_NBR_H__
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ */
+extern adj_index_t adj_nbr_add_or_lock(fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * Format aa incomplete neigbour (ARP) adjacency
+ */
+extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Format a neigbour (REWRITE) adjacency
+ */
+extern u8* format_adj_nbr(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_nbr_module_init(void);
+
+/**
+ * @brief
+ * Return the size of the adjacency database. for testing purposes
+ */
+extern u32 adj_nbr_db_size(void);
+
+#endif
diff --git a/vnet/vnet/adj/adj_rewrite.c b/vnet/vnet/adj/adj_rewrite.c
new file mode 100644
index 00000000000..db802e33665
--- /dev/null
+++ b/vnet/vnet/adj/adj_rewrite.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/adj/adj_internal.h>
+
+/**
+ * adj_rewrite_add_and_lock
+ *
+ * A rewrite sub-type has the rewrite string provided, but no key
+ */
+adj_index_t
+adj_rewrite_add_and_lock (fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj->ia_link = link_type;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+
+ ASSERT(NULL != rewrite);
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_link_2_vnet(link_type),
+ adj->rewrite_header.sw_if_index,
+ adj_get_rewrite_node(link_type)->index,
+ rewrite,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+
+ adj_lock(adj->heap_handle);
+
+ return (adj->heap_handle);
+}
diff --git a/vnet/vnet/adj/adj_rewrite.h b/vnet/vnet/adj/adj_rewrite.h
new file mode 100644
index 00000000000..f8df255150d
--- /dev/null
+++ b/vnet/vnet/adj/adj_rewrite.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A rewrite adjacency has no key, and thus cannot be 'found' from the
+ * FIB resolution code. the client therefore needs to maange these adjacencies
+ */
+
+#ifndef __ADJ_REWRITE_H__
+#define __ADJ_REWRITE_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto,
+ fib_link_t link_type,
+ u32 sw_if_index,
+ u8 *rewrite);
+
+#endif
diff --git a/vnet/vnet/adj/adj_types.h b/vnet/vnet/adj/adj_types.h
new file mode 100644
index 00000000000..a7234663d29
--- /dev/null
+++ b/vnet/vnet/adj/adj_types.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_TYPES_H__
+#define __ADJ_TYPES_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an adi_index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo bar(adj_index_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 adj_index_t;
+
+/**
+ * @brief Invalid ADJ index - used when no adj is known
+ * likewise blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define ADJ_INDEX_INVALID ((u32)~0)
+
+#endif
diff --git a/vnet/vnet/classify/ip_classify.c b/vnet/vnet/classify/ip_classify.c
index c44f25e2add..44973ae5e99 100644
--- a/vnet/vnet/classify/ip_classify.c
+++ b/vnet/vnet/classify/ip_classify.c
@@ -15,6 +15,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
#include <vnet/classify/vnet_classify.h>
+#include <vnet/dpo/classify_dpo.h>
typedef struct {
u32 next_index;
@@ -63,7 +64,6 @@ ip_classify_inline (vlib_main_t * vm,
u32 n_left_from, * from, * to_next;
ip_lookup_next_t next_index;
vnet_classify_main_t * vcm = &vnet_classify_main;
- ip_lookup_main_t * lm;
f64 now = vlib_time_now (vm);
u32 hits = 0;
u32 misses = 0;
@@ -71,10 +71,8 @@ ip_classify_inline (vlib_main_t * vm,
u32 n_next;
if (is_ip4) {
- lm = &ip4_main.lookup_main;
n_next = IP4_LOOKUP_N_NEXT;
} else {
- lm = &ip6_main.lookup_main;
n_next = IP6_LOOKUP_N_NEXT;
}
@@ -88,8 +86,8 @@ ip_classify_inline (vlib_main_t * vm,
vlib_buffer_t * b0, * b1;
u32 bi0, bi1;
u8 * h0, * h1;
- u32 adj_index0, adj_index1;
- ip_adjacency_t * adj0, * adj1;
+ u32 cd_index0, cd_index1;
+ classify_dpo_t *cd0, * cd1;
u32 table_index0, table_index1;
vnet_classify_table_t * t0, * t1;
@@ -116,13 +114,13 @@ ip_classify_inline (vlib_main_t * vm,
h1 = (void *)vlib_buffer_get_current(b1) -
ethernet_buffer_header_size(b1);
- adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- adj0 = ip_get_adjacency (lm, adj_index0);
- table_index0 = adj0->classify.table_index;
+ cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ cd0 = classify_dpo_get(cd_index0);
+ table_index0 = cd0->cd_table_index;
- adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
- adj1 = ip_get_adjacency (lm, adj_index1);
- table_index1 = adj1->classify.table_index;
+ cd_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ cd1 = classify_dpo_get(cd_index1);
+ table_index1 = cd1->cd_table_index;
t0 = pool_elt_at_index (vcm->tables, table_index0);
@@ -151,8 +149,8 @@ ip_classify_inline (vlib_main_t * vm,
vlib_buffer_t * b0;
u32 bi0;
u8 * h0;
- u32 adj_index0;
- ip_adjacency_t * adj0;
+ u32 cd_index0;
+ classify_dpo_t *cd0;
u32 table_index0;
vnet_classify_table_t * t0;
@@ -161,9 +159,9 @@ ip_classify_inline (vlib_main_t * vm,
h0 = (void *)vlib_buffer_get_current(b0) -
ethernet_buffer_header_size(b0);
- adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- adj0 = ip_get_adjacency (lm, adj_index0);
- table_index0 = adj0->classify.table_index;
+ cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ cd0 = classify_dpo_get(cd_index0);
+ table_index0 = cd0->cd_table_index;
t0 = pool_elt_at_index (vcm->tables, table_index0);
vnet_buffer(b0)->l2_classify.hash =
@@ -192,7 +190,7 @@ ip_classify_inline (vlib_main_t * vm,
{
u32 bi0;
vlib_buffer_t * b0;
- u32 next0 = IP_LOOKUP_NEXT_MISS;
+ u32 next0 = IP_LOOKUP_NEXT_DROP;
u32 table_index0;
vnet_classify_table_t * t0;
vnet_classify_entry_t * e0;
diff --git a/vnet/vnet/classify/vnet_classify.c b/vnet/vnet/classify/vnet_classify.c
index 2eee0f5671e..7716fc986f2 100644
--- a/vnet/vnet/classify/vnet_classify.c
+++ b/vnet/vnet/classify/vnet_classify.c
@@ -1106,9 +1106,7 @@ uword unformat_l2_output_next_index (unformat_input_t * input, va_list * args)
}
#define foreach_ip_next \
-_(miss, MISS) \
_(drop, DROP) \
-_(local, LOCAL) \
_(rewrite, REWRITE)
uword unformat_ip_next_index (unformat_input_t * input, va_list * args)
@@ -2121,7 +2119,7 @@ test_classify_command_fn (vlib_main_t * vm,
memory_size,
0 /* skip */,
3 /* vectors to match */);
- t->miss_next_index = IP_LOOKUP_NEXT_LOCAL;
+ t->miss_next_index = IP_LOOKUP_NEXT_DROP;
vlib_cli_output (vm, "Create table %d", t - cm->tables);
}
diff --git a/vnet/vnet/config.h b/vnet/vnet/config.h
index d80ff19ec28..b77a7794a6e 100644
--- a/vnet/vnet/config.h
+++ b/vnet/vnet/config.h
@@ -161,6 +161,10 @@ u32 vnet_config_del_feature (vlib_main_t * vm,
void *feature_config,
u32 n_feature_config_bytes);
+u8 *vnet_config_format_features (vlib_main_t * vm,
+ vnet_config_main_t * cm,
+ u32 config_index, u8 * s);
+
#endif /* included_vnet_config_h */
/*
diff --git a/vnet/vnet/cop/ip4_whitelist.c b/vnet/vnet/cop/ip4_whitelist.c
index 5578558c4b1..d5121e72980 100644
--- a/vnet/vnet/cop/ip4_whitelist.c
+++ b/vnet/vnet/cop/ip4_whitelist.c
@@ -13,6 +13,8 @@
* limitations under the License.
*/
#include <vnet/cop/cop.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
typedef struct {
u32 next_index;
@@ -57,9 +59,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
u32 n_left_from, * from, * to_next;
cop_feature_type_t next_index;
cop_main_t *cm = &cop_main;
- ip4_main_t * im4 = &ip4_main;
- ip_lookup_main_t * lm4 = &im4->lookup_main;
- vlib_combined_counter_main_t * vcm = &im4->lookup_main.adjacency_counters;
+ vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
u32 cpu_index = vm->cpu_index;
from = vlib_frame_vector_args (frame);
@@ -74,7 +74,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
- {
+ {
u32 bi0, bi1;
vlib_buffer_t * b0, * b1;
u32 next0, next1;
@@ -82,147 +82,142 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
ip4_header_t * ip0, * ip1;
cop_config_main_t * ccm0, * ccm1;
cop_config_data_t * c0, * c1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- u32 adj_index0, adj_index1;
- ip_adjacency_t * adj0, * adj1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ u32 lb_index0, lb_index1;
+ const load_balance_t * lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
- CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
- CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
- }
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
/* speculatively enqueue b0 and b1 to the current next frame */
- to_next[0] = bi0 = from[0];
- to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
- ip0 = vlib_buffer_get_current (b0);
+ ip0 = vlib_buffer_get_current (b0);
- ccm0 = cm->cop_config_mains + VNET_COP_IP4;
+ ccm0 = cm->cop_config_mains + VNET_COP_IP4;
- c0 = vnet_get_config_data
+ c0 = vnet_get_config_data
(&ccm0->config_main,
&vnet_buffer (b0)->cop.current_config_index,
&next0,
sizeof (c0[0]));
- mtrie0 = &vec_elt_at_index (im4->fibs, c0->fib_index)->mtrie;
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 0);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 1);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 3);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- ASSERT (adj_index0
- == ip4_fib_lookup_with_table (im4, c0->fib_index,
- &ip0->src_address,
- 1 /* no_default_route */));
- adj0 = ip_get_adjacency (lm4, adj_index0);
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ ASSERT (lb_index0
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
+ &ip0->src_address));
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
{
b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
next0 = RX_COP_DROP;
}
- b1 = vlib_get_buffer (vm, bi1);
+ b1 = vlib_get_buffer (vm, bi1);
sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
- ip1 = vlib_buffer_get_current (b1);
+ ip1 = vlib_buffer_get_current (b1);
- ccm1 = cm->cop_config_mains + VNET_COP_IP4;
+ ccm1 = cm->cop_config_mains + VNET_COP_IP4;
- c1 = vnet_get_config_data
+ c1 = vnet_get_config_data
(&ccm1->config_main,
&vnet_buffer (b1)->cop.current_config_index,
&next1,
sizeof (c1[0]));
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
- mtrie1 = &vec_elt_at_index (im4->fibs, c1->fib_index)->mtrie;
-
- leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
&ip1->src_address, 0);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
&ip1->src_address, 1);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
&ip1->src_address, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
&ip1->src_address, 3);
- adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
-
- ASSERT (adj_index1
- == ip4_fib_lookup_with_table (im4, c1->fib_index,
- &ip1->src_address,
- 1 /* no_default_route */));
- adj1 = ip_get_adjacency (lm4, adj_index1);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ ASSERT (lb_index1
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c1->fib_index),
+ &ip1->src_address));
+ lb1 = load_balance_get (lb_index1);
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
- vlib_increment_combined_counter
- (vcm, cpu_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, b0)
+ vlib_increment_combined_counter
+ (vcm, cpu_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, b0)
+ sizeof(ethernet_header_t));
- vlib_increment_combined_counter
- (vcm, cpu_index, adj_index1, 1,
+ vlib_increment_combined_counter
+ (vcm, cpu_index, lb_index1, 1,
vlib_buffer_length_in_chain (vm, b1)
+ sizeof(ethernet_header_t));
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
- {
- b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
- next0 = RX_COP_DROP;
- }
- if (PREDICT_FALSE(adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE))
{
b1->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
next1 = RX_COP_DROP;
}
- if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
{
- ip4_cop_whitelist_trace_t *t =
+ ip4_cop_whitelist_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->sw_if_index = sw_if_index0;
t->next_index = next0;
}
- if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
{
- ip4_cop_whitelist_trace_t *t =
+ ip4_cop_whitelist_trace_t *t =
vlib_add_trace (vm, node, b1, sizeof (*t));
t->sw_if_index = sw_if_index1;
t->next_index = next1;
@@ -245,8 +240,9 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
cop_config_data_t *c0;
ip4_fib_mtrie_t * mtrie0;
ip4_fib_mtrie_leaf_t leaf0;
- u32 adj_index0;
- ip_adjacency_t * adj0;
+ u32 lb_index0;
+ const load_balance_t * lb0;
+ const dpo_id_t *dpo0;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
@@ -269,7 +265,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- mtrie0 = &vec_elt_at_index (im4->fibs, c0->fib_index)->mtrie;
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -285,20 +281,21 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 3);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ ASSERT (lb_index0
+ == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
+ &ip0->src_address));
- ASSERT (adj_index0
- == ip4_fib_lookup_with_table (im4, c0->fib_index,
- &ip0->src_address,
- 1 /* no_default_route */));
- adj0 = ip_get_adjacency (lm4, adj_index0);
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
vlib_increment_combined_counter
- (vcm, cpu_index, adj_index0, 1,
+ (vcm, cpu_index, lb_index0, 1,
vlib_buffer_length_in_chain (vm, b0)
+ sizeof(ethernet_header_t));
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
{
b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED];
next0 = RX_COP_DROP;
diff --git a/vnet/vnet/cop/ip6_whitelist.c b/vnet/vnet/cop/ip6_whitelist.c
index 4a8f33fb727..c2e16ccfe54 100644
--- a/vnet/vnet/cop/ip6_whitelist.c
+++ b/vnet/vnet/cop/ip6_whitelist.c
@@ -13,6 +13,8 @@
* limitations under the License.
*/
#include <vnet/cop/cop.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
typedef struct {
u32 next_index;
@@ -58,8 +60,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
cop_feature_type_t next_index;
cop_main_t *cm = &cop_main;
ip6_main_t * im6 = &ip6_main;
- ip_lookup_main_t * lm6 = &im6->lookup_main;
- vlib_combined_counter_main_t * vcm = &im6->lookup_main.adjacency_counters;
+ vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
u32 cpu_index = vm->cpu_index;
from = vlib_frame_vector_args (frame);
@@ -82,9 +83,10 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
ip6_header_t * ip0, * ip1;
cop_config_main_t * ccm0, * ccm1;
cop_config_data_t * c0, * c1;
- u32 adj_index0, adj_index1;
- ip_adjacency_t * adj0, * adj1;
-
+ u32 lb_index0, lb_index1;
+ const load_balance_t * lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
+
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3;
@@ -120,10 +122,12 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- adj_index0 = ip6_fib_lookup_with_table (im6, c0->fib_index,
- &ip0->src_address);
- adj0 = ip_get_adjacency (lm6, adj_index0);
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index,
+ &ip0->src_address);
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
{
b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
next0 = RX_COP_DROP;
@@ -142,28 +146,23 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
&next1,
sizeof (c1[0]));
- adj_index1 = ip6_fib_lookup_with_table (im6, c1->fib_index,
- &ip1->src_address);
+ lb_index1 = ip6_fib_table_fwding_lookup (im6, c1->fib_index,
+ &ip1->src_address);
- adj1 = ip_get_adjacency (lm6, adj_index1);
+ lb1 = load_balance_get (lb_index1);
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
vlib_increment_combined_counter
- (vcm, cpu_index, adj_index0, 1,
+ (vcm, cpu_index, lb_index0, 1,
vlib_buffer_length_in_chain (vm, b0)
+ sizeof(ethernet_header_t));
vlib_increment_combined_counter
- (vcm, cpu_index, adj_index1, 1,
+ (vcm, cpu_index, lb_index1, 1,
vlib_buffer_length_in_chain (vm, b1)
+ sizeof(ethernet_header_t));
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
- {
- b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
- next0 = RX_COP_DROP;
- }
-
- if (PREDICT_FALSE(adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE))
{
b1->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
next1 = RX_COP_DROP;
@@ -202,8 +201,9 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
ip6_header_t * ip0;
cop_config_main_t *ccm0;
cop_config_data_t *c0;
- u32 adj_index0;
- ip_adjacency_t * adj0;
+ u32 lb_index0;
+ const load_balance_t * lb0;
+ const dpo_id_t *dpo0;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
@@ -226,17 +226,18 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- adj_index0 = ip6_fib_lookup_with_table (im6, c0->fib_index,
- &ip0->src_address);
+ lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index,
+ &ip0->src_address);
- adj0 = ip_get_adjacency (lm6, adj_index0);
+ lb0 = load_balance_get (lb_index0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
vlib_increment_combined_counter
- (vcm, cpu_index, adj_index0, 1,
+ (vcm, cpu_index, lb_index0, 1,
vlib_buffer_length_in_chain (vm, b0)
+ sizeof(ethernet_header_t));
- if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL))
+ if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE))
{
b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED];
next0 = RX_COP_DROP;
diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c
index 2ffb95884d3..9e8fed44efb 100644
--- a/vnet/vnet/devices/dpdk/cli.c
+++ b/vnet/vnet/devices/dpdk/cli.c
@@ -21,7 +21,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/dpdk/dpdk.h>
#include <vnet/classify/vnet_classify.h>
-#include <vnet/mpls-gre/packet.h>
+#include <vnet/mpls/packet.h>
#include "dpdk_priv.h"
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index a9e286e56eb..63e7e559286 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -21,7 +21,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/dpdk/dpdk.h>
#include <vnet/classify/vnet_classify.h>
-#include <vnet/mpls-gre/packet.h>
+#include <vnet/mpls/packet.h>
#include <vnet/handoff.h>
#include "dpdk_priv.h"
@@ -687,7 +687,7 @@ poll_rate_limit (dpdk_main_t * dm)
<em>Next Nodes:</em>
- Static arcs to: error-drop, ethernet-input,
- ip4-input-no-checksum, ip6-input, mpls-gre-input
+ ip4-input-no-checksum, ip6-input, mpls-input
- per-interface redirection, controlled by
<code>xd->per_interface_next_index</code>
*/
@@ -791,7 +791,7 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
[DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
[DPDK_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
[DPDK_RX_NEXT_IP6_INPUT] = "ip6-input",
- [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input",
+ [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-input",
},
};
@@ -805,7 +805,6 @@ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd)
CLIB_MULTIARCH_SELECT_FN(dpdk_input);
CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss);
CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd);
-/* *INDENT-ON* */
/*
* Override the next nodes for the dpdk input nodes.
@@ -876,11 +875,3 @@ efd_config (u32 enabled,
set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op);
set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op);
}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/ssvm/node.c b/vnet/vnet/devices/ssvm/node.c
index e7d9792bd65..e613cc9cb01 100644
--- a/vnet/vnet/devices/ssvm/node.c
+++ b/vnet/vnet/devices/ssvm/node.c
@@ -330,7 +330,7 @@ VLIB_REGISTER_NODE (ssvm_eth_input_node) = {
[SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",
[SSVM_ETH_INPUT_NEXT_IP4_INPUT] = "ip4-input",
[SSVM_ETH_INPUT_NEXT_IP6_INPUT] = "ip6-input",
- [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-gre-input",
+ [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-input",
},
};
diff --git a/vnet/vnet/dhcp/client.c b/vnet/vnet/dhcp/client.c
index 5916cfdb2fa..ffe6e8dab7c 100644
--- a/vnet/vnet/dhcp/client.c
+++ b/vnet/vnet/dhcp/client.c
@@ -14,19 +14,12 @@
*/
#include <vlib/vlib.h>
#include <vnet/dhcp/proxy.h>
+#include <vnet/fib/fib_table.h>
dhcp_client_main_t dhcp_client_main;
static u8 * format_dhcp_client_state (u8 * s, va_list * va);
static vlib_node_registration_t dhcp_client_process_node;
-void __attribute__((weak))
-api_config_default_ip_route (u8 is_ipv6, u8 is_add, u32 vrf_id,
- u32 sw_if_index, u8 *next_hop_addr)
-{
- /* dummy function */
- return;
-}
-
static void
dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
{
@@ -214,14 +207,34 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b,
/*
* Configure default IP route:
- * - vrf_id is 0 by default.
*/
if (c->router_address.as_u32)
- api_config_default_ip_route (0 /* is_ipv6 */,
- 1 /* is_add */,
- 0 /* vrf_id */,
- c->sw_if_index,
- (u8 *)&c->router_address);
+ {
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ ip46_address_t nh =
+ {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_add (fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
/*
* Call the user's event callback to report DHCP information
@@ -496,11 +509,29 @@ dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now)
if (now > c->lease_expires)
{
if (c->router_address.as_u32)
- api_config_default_ip_route (0 /* is_ipv6 */,
- 0 /* is_add */,
- 0 /* vrf_id */,
- c->sw_if_index,
- (u8 *)&c->router_address);
+ {
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ ip46_address_t nh = {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ FIB_PROTOCOL_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
dhcp_client_release_address (dcm, c);
c->state = DHCP_DISCOVER;
@@ -689,7 +720,7 @@ show_dhcp_client_command_fn (vlib_main_t * vm,
p = hash_get (dcm->client_by_sw_if_index, sw_if_index);
if (p == 0)
return clib_error_return (0, "dhcp client not configured");
- c = pool_elt_at_index (dcm->clients, sw_if_index);
+ c = pool_elt_at_index (dcm->clients, p[0]);
vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose);
return 0;
}
@@ -715,6 +746,18 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a)
vlib_main_t * vm = dcm->vlib_main;
dhcp_client_t * c;
uword * p;
+ fib_prefix_t all_1s =
+ {
+ .fp_len = 32,
+ .fp_addr.ip4.as_u32 = 0xffffffff,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ fib_prefix_t all_0s =
+ {
+ .fp_len = 0,
+ .fp_addr.ip4.as_u32 = 0x0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
p = hash_get (dcm->client_by_sw_if_index, a->sw_if_index);
@@ -738,6 +781,22 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a)
} while (c->transaction_id == 0);
set_l2_rewrite (dcm, c);
hash_set (dcm->client_by_sw_if_index, a->sw_if_index, c - dcm->clients);
+
+ /* this add is ref counted by FIB so we can add for each itf */
+ fib_table_entry_special_add(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_1s,
+ FIB_SOURCE_DHCP,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * enable the interface to RX IPv4 packets
+ * this is also ref counted
+ */
+ ip4_sw_interface_enable_disable (c->sw_if_index, 1);
+
vlib_process_signal_event (vm, dhcp_client_process_node.index,
EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients);
}
@@ -745,12 +804,32 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a)
{
c = pool_elt_at_index (dcm->clients, p[0]);
+ fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_1s,
+ FIB_SOURCE_DHCP);
+
if (c->router_address.as_u32)
- api_config_default_ip_route (0 /* is_ipv6 */,
- 0 /* is_add */,
- 0 /* vrf_id */,
- c->sw_if_index,
- (u8 *)&c->router_address);
+ {
+ ip46_address_t nh = {
+ .ip4 = c->router_address,
+ };
+
+ fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index(
+ FIB_PROTOCOL_IP4,
+ c->sw_if_index),
+ &all_0s,
+ FIB_SOURCE_DHCP,
+ FIB_PROTOCOL_IP4,
+ &nh,
+ c->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ ip4_sw_interface_enable_disable (c->sw_if_index, 0);
+
vec_free (c->option_55_data);
vec_free (c->hostname);
vec_free (c->client_identifier);
diff --git a/vnet/vnet/dhcp/proxy_node.c b/vnet/vnet/dhcp/proxy_node.c
index 2073b3f7bf6..7018fc3958b 100644
--- a/vnet/vnet/dhcp/proxy_node.c
+++ b/vnet/vnet/dhcp/proxy_node.c
@@ -18,6 +18,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/dhcp/proxy.h>
+#include <vnet/fib/ip4_fib.h>
static char * dhcp_proxy_error_strings[] = {
#define dhcp_proxy_error(n,s) s,
@@ -225,7 +226,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
fib_index = im->fib_index_by_sw_if_index
[vnet_buffer(b0)->sw_if_index[VLIB_RX]];
- fib = vec_elt_at_index (im->fibs, fib_index);
+ fib = ip4_fib_get (fib_index);
fib_id = fib->table_id;
end = b0->data + b0->current_data + b0->current_length;
@@ -699,9 +700,7 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
int insert_option_82, int is_del)
{
dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
- ip4_main_t * im = &ip4_main;
dhcp_server_t * server = 0;
- ip4_fib_t *rx_fib, *server_fib;
u32 server_index = 0;
u32 rx_fib_index = 0;
@@ -711,18 +710,11 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
if (src_address->as_u32 == 0)
return VNET_API_ERROR_INVALID_SRC_ADDRESS;
- rx_fib = find_ip4_fib_by_table_index_or_id
- (&ip4_main, rx_fib_id, IP4_ROUTE_FLAG_TABLE_ID);
-
- if (rx_fib == 0)
- return VNET_API_ERROR_NO_SUCH_INNER_FIB;
-
- server_fib = find_ip4_fib_by_table_index_or_id
- (&ip4_main, server_fib_id, IP4_ROUTE_FLAG_TABLE_ID);
-
- if (server_fib == 0)
- return VNET_API_ERROR_NO_SUCH_FIB;
-
+ rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+ rx_fib_id);
+ server_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+ server_fib_id);
+
if (rx_fib_id == 0)
{
server = pool_elt_at_index (dpm->dhcp_servers, 0);
@@ -735,8 +727,6 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
goto initialize_it;
}
- rx_fib_index = rx_fib - im->fibs;
-
if (is_del)
{
if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index))
@@ -768,7 +758,7 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
initialize_it:
server->dhcp_server.as_u32 = addr->as_u32;
- server->server_fib_index = server_fib - im->fibs;
+ server->server_fib_index = server_index;
server->dhcp_src_address.as_u32 = src_address->as_u32;
server->insert_option_82 = insert_option_82;
server->valid = 1;
@@ -883,14 +873,12 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args)
return s;
}
- server_fib = find_ip4_fib_by_table_index_or_id
- (&ip4_main, server->server_fib_index, IP4_ROUTE_FLAG_FIB_INDEX);
+ server_fib = ip4_fib_get(server->server_fib_index);
if (server_fib)
server_fib_id = server_fib->table_id;
- rx_fib = find_ip4_fib_by_table_index_or_id
- (&ip4_main, rx_fib_index, IP4_ROUTE_FLAG_FIB_INDEX);
+ rx_fib = ip4_fib_get(rx_fib_index);
if (rx_fib)
rx_fib_id = rx_fib->table_id;
diff --git a/vnet/vnet/dhcpv6/proxy_node.c b/vnet/vnet/dhcpv6/proxy_node.c
index 4dc746f6936..323bdf9b730 100644
--- a/vnet/vnet/dhcpv6/proxy_node.c
+++ b/vnet/vnet/dhcpv6/proxy_node.c
@@ -18,6 +18,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/dhcpv6/proxy.h>
+#include <vnet/fib/ip6_fib.h>
static char * dhcpv6_proxy_error_strings[] = {
#define dhcpv6_proxy_error(n,s) s,
@@ -323,7 +324,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
fib_index = im->fib_index_by_sw_if_index
[vnet_buffer(b0)->sw_if_index[VLIB_RX]];
- fib = vec_elt_at_index (im->fibs, fib_index);
+ fib = ip6_fib_get (fib_index);
fib_id = fib->table_id;
p_vss = hash_get (dpm->vss_index_by_vrf_id,
@@ -573,7 +574,7 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
svr_fib_index = im->fib_index_by_sw_if_index
[vnet_buffer(b0)->sw_if_index[VLIB_RX]];
- svr_fib = vec_elt_at_index (im->fibs, svr_fib_index);
+ svr_fib = ip6_fib_get (svr_fib_index);
svr_fib_id = svr_fib->table_id;
if (svr_fib_id != dpm->server_fib_index ||
@@ -831,8 +832,7 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args)
return s;
}
- f = find_ip6_fib_by_table_index_or_id (&ip6_main, dm->server_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
+ f = ip6_fib_get (dm->server_fib_index);
if (f)
fib_id = f->table_id;
diff --git a/vnet/vnet/dpo/classify_dpo.c b/vnet/vnet/dpo/classify_dpo.c
new file mode 100644
index 00000000000..3b7b98f9da8
--- /dev/null
+++ b/vnet/vnet/dpo/classify_dpo.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+classify_dpo_t *classify_dpo_pool;
+
+static classify_dpo_t *
+classify_dpo_alloc (void)
+{
+ classify_dpo_t *cd;
+
+ pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES);
+ memset(cd, 0, sizeof(*cd));
+
+ return (cd);
+}
+
+static index_t
+classify_dpo_get_index (classify_dpo_t *cd)
+{
+ return (cd - classify_dpo_pool);
+}
+
+index_t
+classify_dpo_create (fib_protocol_t proto,
+ u32 classify_table_index)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_alloc();
+ cd->cd_proto = proto;
+ cd->cd_table_index = classify_table_index;
+
+ return (classify_dpo_get_index(cd));
+}
+
+u8*
+format_classify_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(index);
+
+ return (format(s, "classify:[%d]:table:%d",
+ index, cd->cd_table_index));
+}
+
+static void
+classify_dpo_lock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks++;
+}
+
+static void
+classify_dpo_unlock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks--;
+
+ if (0 == cd->cd_locks)
+ {
+ pool_put(classify_dpo_pool, cd);
+ }
+}
+
+const static dpo_vft_t cd_vft = {
+ .dv_lock = classify_dpo_lock,
+ .dv_unlock = classify_dpo_unlock,
+ .dv_format = format_classify_dpo,
+};
+
+const static char* const classify_ip4_nodes[] =
+{
+ "ip4-classify",
+ NULL,
+};
+const static char* const classify_ip6_nodes[] =
+{
+ "ip6-classify",
+ NULL,
+};
+const static char* const * const classify_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = classify_ip4_nodes,
+ [DPO_PROTO_IP6] = classify_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+classify_dpo_module_init (void)
+{
+ dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes);
+}
diff --git a/vnet/vnet/dpo/classify_dpo.h b/vnet/vnet/dpo/classify_dpo.h
new file mode 100644
index 00000000000..cd35c3c440b
--- /dev/null
+++ b/vnet/vnet/dpo/classify_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLASSIFY_DPO_H__
+#define __CLASSIFY_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct classify_dpo_t
+{
+ fib_protocol_t cd_proto;
+
+ u32 cd_table_index;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 cd_locks;
+} classify_dpo_t;
+
+extern index_t classify_dpo_create(fib_protocol_t proto,
+ u32 classify_table_index);
+
+extern u8* format_classify_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern classify_dpo_t *classify_dpo_pool;
+
+static inline classify_dpo_t *
+classify_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(classify_dpo_pool, index));
+}
+
+extern void classify_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/dpo.c b/vnet/vnet/dpo/dpo.c
new file mode 100644
index 00000000000..5eff52b7b8a
--- /dev/null
+++ b/vnet/vnet/dpo/dpo.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ *
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+
+/**
+ * Array of char* names for the DPO types and protos
+ */
+static const char* dpo_type_names[] = DPO_TYPES;
+static const char* dpo_proto_names[] = DPO_PROTOS;
+
+/**
+ * @brief Vector of virtual function tables for the DPO types
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static dpo_vft_t *dpo_vfts;
+
+/**
+ * @brief vector of graph node names associated with each DPO type and protocol.
+ *
+ * dpo_nodes[child_type][child_proto][node_X] = node_name;
+ * i.e.
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup"
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance"
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static const char* const * const ** dpo_nodes;
+
+/**
+ * @brief Vector of edge indicies from parent DPO nodes to child
+ *
+ * dpo_edges[child_type][child_proto][parent_type] = edge_index
+ *
+ * This array is derived at init time from the dpo_nodes above. Note that
+ * the third dimension in dpo_nodes is lost, hence, the edge index from each
+ * node MUST be the same.
+ *
+ * Note that this array is child type specific, not child instance specific.
+ */
+static u32 ***dpo_edges;
+
+/**
+ * @brief The DPO type value that can be assigend to the next dynamic
+ * type registration.
+ */
+static dpo_type_t dpo_dynamic = DPO_LAST;
+
+u8 *
+format_dpo_type (u8 * s, va_list * args)
+{
+ dpo_type_t type = va_arg (*args, int);
+
+ s = format(s, "%s", dpo_type_names[type]);
+
+ return (s);
+}
+
+u8 *
+format_dpo_id (u8 * s, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t*);
+ u32 indent = va_arg (*args, u32);
+
+ s = format(s, "[@%d]: ", dpo->dpoi_next_node);
+
+ if (NULL != dpo_vfts[dpo->dpoi_type].dv_format)
+ {
+ return (format(s, "%U",
+ dpo_vfts[dpo->dpoi_type].dv_format,
+ dpo->dpoi_index,
+ indent));
+ }
+
+ switch (dpo->dpoi_type)
+ {
+ case DPO_FIRST:
+ s = format(s, "unset");
+ break;
+ default:
+ s = format(s, "unknown");
+ break;
+ }
+ return (s);
+}
+
+u8 *
+format_dpo_proto (u8 * s, va_list * args)
+{
+ dpo_proto_t proto = va_arg (*args, int);
+
+ return (format(s, "%s", dpo_proto_names[proto]));
+}
+
+void
+dpo_set (dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index)
+{
+ dpo_id_t tmp = *dpo;
+
+ dpo->dpoi_type = type;
+ dpo->dpoi_proto = proto,
+ dpo->dpoi_index = index;
+
+ if (DPO_ADJACENCY == type)
+ {
+ /*
+ * set the adj subtype
+ */
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE;
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN;
+ break;
+ default:
+ break;
+ }
+ }
+ dpo_lock(dpo);
+ dpo_unlock(&tmp);
+}
+
+void
+dpo_reset (dpo_id_t *dpo)
+{
+ dpo_set(dpo, DPO_FIRST, DPO_PROTO_NONE, INDEX_INVALID);
+}
+
+/**
+ * \brief
+ * Compare two Data-path objects
+ *
+ * like memcmp, return 0 is matching, !0 otherwise.
+ */
+int
+dpo_cmp (const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2)
+{
+ int res;
+
+ res = dpo1->dpoi_type - dpo2->dpoi_type;
+
+ if (0 != res) return (res);
+
+ return (dpo1->dpoi_index - dpo2->dpoi_index);
+}
+
+void
+dpo_copy (dpo_id_t *dst,
+ const dpo_id_t *src)
+{
+ dpo_id_t tmp = *dst;
+
+ /*
+ * the destination is written in a single u64 write - hence atomically w.r.t
+ * any packets inflight.
+ */
+ *((u64*)dst) = *(u64*)src;
+
+ dpo_lock(dst);
+ dpo_unlock(&tmp);
+}
+
+int
+dpo_is_adj (const dpo_id_t *dpo)
+{
+ return ((dpo->dpoi_type == DPO_ADJACENCY) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_GLEAN));
+}
+
+void
+dpo_register (dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ vec_validate(dpo_vfts, type);
+ dpo_vfts[type] = *vft;
+
+ vec_validate(dpo_nodes, type);
+ dpo_nodes[type] = nodes;
+}
+
+dpo_type_t
+dpo_register_new_type (const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ dpo_type_t type = dpo_dynamic++;
+
+ dpo_register(type, vft, nodes);
+
+ return (type);
+}
+
+void
+dpo_lock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_lock(dpo);
+}
+
+void
+dpo_unlock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_unlock(dpo);
+}
+
+
+static u32
+dpo_get_next_node (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ const dpo_id_t *parent_dpo)
+{
+ dpo_proto_t parent_proto;
+ dpo_type_t parent_type;
+
+ parent_type = parent_dpo->dpoi_type;
+ parent_proto = parent_dpo->dpoi_proto;
+
+ vec_validate(dpo_edges, child_type);
+ vec_validate(dpo_edges[child_type], child_proto);
+ vec_validate_init_empty(dpo_edges[child_type][child_proto],
+ parent_dpo->dpoi_type, ~0);
+
+ /*
+ * if the edge index has not yet been created for this node to node transistion
+ */
+ if (~0 == dpo_edges[child_type][child_proto][parent_type])
+ {
+ vlib_node_t *parent_node, *child_node;
+ vlib_main_t *vm;
+ u32 edge ,pp, cc;
+
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_nodes[child_type]);
+ ASSERT(NULL != dpo_nodes[child_type][child_proto]);
+ ASSERT(NULL != dpo_nodes[parent_type]);
+ ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+ pp = 0;
+
+ /*
+ * create a graph arc from each of the parent's registered node types,
+ * to each of the childs.
+ */
+ while (NULL != dpo_nodes[child_type][child_proto][pp])
+ {
+ parent_node =
+ vlib_get_node_by_name(vm,
+ (u8*) dpo_nodes[child_type][child_proto][pp]);
+
+ cc = 0;
+
+ while (NULL != dpo_nodes[parent_type][child_proto][cc])
+ {
+ child_node =
+ vlib_get_node_by_name(vm,
+ (u8*) dpo_nodes[parent_type][parent_proto][cc]);
+
+ edge = vlib_node_add_next(vm,
+ parent_node->index,
+ child_node->index);
+
+ if (~0 == dpo_edges[child_type][child_proto][parent_type])
+ {
+ dpo_edges[child_type][child_proto][parent_type] = edge;
+ }
+ else
+ {
+ ASSERT(dpo_edges[child_type][child_proto][parent_type] == edge);
+ }
+ cc++;
+ }
+ pp++;
+ }
+ }
+
+ return (dpo_edges[child_type][child_proto][parent_type]);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+static void
+dpo_stack_i (u32 edge,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ /*
+ * in order to get an atomic update of the parent we create a temporary,
+ * from a copy of the child, and add the next_node. then we copy to the parent
+ */
+ dpo_id_t tmp = DPO_NULL;
+ dpo_copy(&tmp, parent);
+
+ /*
+ * get the edge index for the parent to child VLIB graph transisition
+ */
+ tmp.dpoi_next_node = edge;
+
+ /*
+ * this update is atomic.
+ */
+ dpo_copy(dpo, &tmp);
+
+ dpo_reset(&tmp);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child-parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+void
+dpo_stack (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. A new VLIB graph arc is created from the child node passed
+ * to the nodes registered by the parent. The VLIB infra will ensure this arc
+ * is added only once.
+ */
+void
+dpo_stack_from_node (u32 child_node_index,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_proto_t parent_proto;
+ vlib_node_t *parent_node;
+ dpo_type_t parent_type;
+ vlib_main_t *vm;
+ u32 edge;
+
+ parent_type = parent->dpoi_type;
+ parent_proto = parent->dpoi_proto;
+
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_nodes[parent_type]);
+ ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+ parent_node =
+ vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]);
+
+ edge = vlib_node_add_next(vm,
+ child_node_index,
+ parent_node->index);
+
+ dpo_stack_i(edge, dpo, parent);
+}
+
+static clib_error_t *
+dpo_module_init (vlib_main_t * vm)
+{
+ drop_dpo_module_init();
+ punt_dpo_module_init();
+ receive_dpo_module_init();
+ load_balance_module_init();
+ mpls_label_dpo_module_init();
+ classify_dpo_module_init();
+ lookup_dpo_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(dpo_module_init);
diff --git a/vnet/vnet/dpo/dpo.h b/vnet/vnet/dpo/dpo.h
new file mode 100644
index 00000000000..8c22f00b091
--- /dev/null
+++ b/vnet/vnet/dpo/dpo.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP's data-path.
+ *
+ * The DPO can be considered to be like is a base class that is specialised
+ * by other objects to provide concreate actions
+ *
+ * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of
+ * instances.
+ */
+
+#ifndef __DPO_H__
+#define __DPO_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo barindex_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 index_t;
+
+/**
+ * @brief Invalid index - used when no index is known
+ * blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define INDEX_INVALID ((index_t)(~0))
+
+/**
+ * @brief Data path protocol.
+ * Actions performed on packets in the data-plane can be described and represented
+ * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions
+ * required during ADJACENCY processing can be protocol dependent. For example,
+ * the adjacency rewrite node performs a ip4 checksum calculation, ip6 and MPLS
+ * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol
+ * dependent, and thus each graph edge/arc is too.
+ * When programming a DPO's next node arc from child to parent it is thus required
+ * to know the parent's data-path protocol so the correct arc index can be used.
+ */
+typedef enum dpo_proto_t_
+{
+#if CLIB_DEBUG > 0
+ DPO_PROTO_IP4 = 1,
+#else
+ DPO_PROTO_IP4 = 0,
+#endif
+ DPO_PROTO_IP6,
+ DPO_PROTO_MPLS,
+} __attribute__((packed)) dpo_proto_t;
+
+#define DPO_PROTO_NUM (DPO_PROTO_MPLS+1)
+#define DPO_PROTO_NONE (DPO_PROTO_NUM+1)
+
+#define DPO_PROTOS { \
+ [DPO_PROTO_IP4] = "ip4", \
+ [DPO_PROTO_IP6] = "ip6", \
+ [DPO_PROTO_MPLS] = "mpls", \
+}
+
+/**
+ * @brief Common types of data-path objects
+ * New types can be dynamically added using dpo_register_new_type()
+ */
+typedef enum dpo_type_t_ {
+ /**
+ * A non-zero value first so we can spot unitialisation errors
+ */
+ DPO_FIRST,
+ DPO_DROP,
+ DPO_PUNT,
+ /**
+ * @brief load-balancing over a choice of [un]equal cost paths
+ */
+ DPO_LOAD_BALANCE,
+ DPO_ADJACENCY,
+ DPO_ADJACENCY_INCOMPLETE,
+ DPO_ADJACENCY_MIDCHAIN,
+ DPO_ADJACENCY_GLEAN,
+ DPO_RECEIVE,
+ DPO_LOOKUP,
+ DPO_LISP_CP,
+ DPO_CLASSIFY,
+ DPO_MPLS_LABEL,
+ DPO_LAST,
+} __attribute__((packed)) dpo_type_t;
+
+#define DPO_TYPE_NUM DPO_LAST
+
+#define DPO_TYPES { \
+ [DPO_FIRST] = "dpo-invalid", \
+ [DPO_DROP] = "dpo-drop", \
+ [DPO_PUNT] = "dpo-punt", \
+ [DPO_ADJACENCY] = "dpo-adjacency", \
+ [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \
+ [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \
+ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \
+ [DPO_RECEIVE] = "dpo-receive", \
+ [DPO_LOOKUP] = "dpo-lookup", \
+ [DPO_LOAD_BALANCE] = "dpo-load-balance", \
+ [DPO_LISP_CP] = "dpo-lisp-cp", \
+ [DPO_CLASSIFY] = "dpo-classify", \
+ [DPO_MPLS_LABEL] = "dpo-mpls-label", \
+}
+
+/**
+ * @brief The identity of a DPO is a combination of its type and its
+ * instance number/index of objects of that type
+ */
+typedef struct dpo_id_t_ {
+ /**
+ * the type
+ */
+ dpo_type_t dpoi_type;
+ /**
+ * the data-path protocol of the type.
+ */
+ dpo_proto_t dpoi_proto;
+ /**
+ * The next VLIB node to follow.
+ */
+ u16 dpoi_next_node;
+ /**
+ * the index of objects of that type
+ */
+ index_t dpoi_index;
+} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t;
+
+_Static_assert(sizeof(dpo_id_t) <= sizeof(u64),
+ "DPO ID is greater than sizeof u64 "
+ "atomic updates need to be revisited");
+
+/**
+ * @brief An initialiser for DPos declared on the stack.
+ */
+#define DPO_NULL {0}
+
+/**
+ * @brief Return true if the DPO object is valid, i.e. has been initialised.
+ */
+static inline int
+dpo_id_is_valid (const dpo_id_t *dpoi)
+{
+ return (dpoi->dpoi_type != DPO_FIRST &&
+ dpoi->dpoi_index != INDEX_INVALID);
+}
+
+/**
+ * @brief
+ * Take a reference counting lock on the DPO
+ */
+extern void dpo_lock(dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Release a reference counting lock on the DPO
+ */
+extern void dpo_unlock(dpo_id_t *dpo);
+
+/**
+ * @brief Set/create a DPO ID
+ * The DPO will be locked.
+ *
+ * @param dpo
+ * The DPO object to configure
+ *
+ * @param type
+ * The dpo_type_t of the DPO
+ *
+ * @param proto
+ * The dpo_proto_t of the DPO
+ *
+ * @param index
+ * The type specific index of the DPO
+ */
+extern void dpo_set(dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index);
+
+/**
+ * @brief reset a DPO ID
+ * The DPO will be unlocked.
+ *
+ * @param dpo
+ * The DPO object to reset
+ */
+extern void dpo_reset(dpo_id_t *dpo);
+
+/**
+ * @brief compare two DPOs for equality
+ */
+extern int dpo_cmp(const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2);
+
+/**
+ * @brief
+ * atomic copy a data-plane object.
+ * This is safe to use when the dst DPO is currently switching packets
+ */
+extern void dpo_copy(dpo_id_t *dst,
+ const dpo_id_t *src);
+
+/**
+ * @brief Return TRUE is the DPO is any type of adjacency
+ */
+extern int dpo_is_adj(const dpo_id_t *dpo);
+
+/**
+ * @biref Format a DPO_id_t oject
+ */
+extern u8 *format_dpo_id(u8 * s, va_list * args);
+
+/**
+ * @biref format a DPO type
+ */
+extern u8 *format_dpo_type(u8 * s, va_list * args);
+
+/**
+ * @brief format a DPO protocol
+ */
+extern u8 *format_dpo_proto(u8 * s, va_list * args);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created. The child_type and child_proto
+ * are used to get the VLID nodes from which the arcs are added.
+ *
+ * @param child_type
+ * Child DPO type.
+ *
+ * @param child_proto
+ * Child DPO proto
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack(dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent_dpo);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created, from the child_node passed.
+ *
+ * @param child_node
+ * The VLIB grpah node index to create an arc from to the parent
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack_from_node(u32 child_node,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent);
+
+/**
+ * @brief A lock function registered for a DPO type
+ */
+typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An unlock function registered for a DPO type
+ */
+typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief A virtual function table regisitered for a DPO type
+ */
+typedef struct dpo_vft_t_
+{
+ /**
+ * A reference counting lock function
+ */
+ dpo_lock_fn_t dv_lock;
+ /**
+ * A reference counting unlock function
+ */
+ dpo_lock_fn_t dv_unlock;
+ /**
+ * A format function
+ */
+ format_function_t *dv_format;
+} dpo_vft_t;
+
+
+/**
+ * @brief For a given DPO type Register:
+ * - a virtual function table
+ * - a NULL terminated array of graph nodes from which that object type
+ * will originate packets, i.e. the nodes in which the object type will be
+ * the parent DPO in the DP graph. The ndoes are per-data-path protocol
+ * (see above).
+ *
+ * @param type
+ * The type being registered.
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ */
+void dpo_register(dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+/**
+ * @brief Create and register a new DPO type.
+ *
+ * This can be used by plugins to create new DPO types that are not listed
+ * in dpo_type_t enum
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ *
+ * @return The new dpo_type_t
+ */
+dpo_type_t dpo_register_new_type(const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+#endif
diff --git a/vnet/vnet/dpo/drop_dpo.c b/vnet/vnet/dpo/drop_dpo.c
new file mode 100644
index 00000000000..62f56488a01
--- /dev/null
+++ b/vnet/vnet/dpo/drop_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t drop_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+drop_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&drop_dpos[proto], DPO_DROP, proto, 1);
+
+ return (&drop_dpos[proto]);
+}
+
+int
+dpo_is_drop (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_DROP);
+}
+
+static void
+drop_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the drop
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+drop_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_drop_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+
+ return (format(s, "dpo-drop"));
+}
+
+const static dpo_vft_t drop_vft = {
+ .dv_lock = drop_dpo_lock,
+ .dv_unlock = drop_dpo_unlock,
+ .dv_format = format_drop_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a drop
+ * object.
+ *
+ * this means that these graph nodes are ones from which a drop is the
+ * parent object in the DPO-graph.
+ */
+const static char* const drop_ip4_nodes[] =
+{
+ "ip4-drop",
+ NULL,
+};
+const static char* const drop_ip6_nodes[] =
+{
+ "ip6-drop",
+ NULL,
+};
+const static char* const drop_mpls_nodes[] =
+{
+ "mpls-drop",
+ NULL,
+};
+const static char* const * const drop_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = drop_ip4_nodes,
+ [DPO_PROTO_IP6] = drop_ip6_nodes,
+ [DPO_PROTO_MPLS] = drop_mpls_nodes,
+};
+
+void
+drop_dpo_module_init (void)
+{
+ dpo_register(DPO_DROP, &drop_vft, drop_nodes);
+}
diff --git a/vnet/vnet/dpo/drop_dpo.h b/vnet/vnet/dpo/drop_dpo.h
new file mode 100644
index 00000000000..e7bd8f5156e
--- /dev/null
+++ b/vnet/vnet/dpo/drop_dpo.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ *
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#ifndef __DROP_DPO_H__
+#define __DROP_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_drop(const dpo_id_t *dpo);
+
+extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto);
+
+extern void drop_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/load_balance.c b/vnet/vnet/dpo/load_balance.c
new file mode 100644
index 00000000000..963ff0ba160
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance.c
@@ -0,0 +1,760 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vppinfra/math.h> /* for fabs */
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/adj/adj_internal.h>
+
+/*
+ * distribution error tolerance for load-balancing
+ */
+const f64 multipath_next_hop_error_tolerance = 0.1;
+
+#undef LB_DEBUG
+
+#ifdef LB_DEBUG
+#define LB_DBG(_lb, _fmt, _args...) \
+{ \
+ u8* _tmp =NULL; \
+ clib_warning("lb:[%s]:" _fmt, \
+ load_balance_format(load_balance_get_index((_lb)), \
+ 0, _tmp), \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define LB_DBG(_p, _fmt, _args...)
+#endif
+
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+load_balance_t *load_balance_pool;
+
+/**
+ * The one instance of load-balance main
+ */
+load_balance_main_t load_balance_main;
+
+f64
+load_balance_get_multipath_tolerance (void)
+{
+ return (multipath_next_hop_error_tolerance);
+}
+
+static inline index_t
+load_balance_get_index (const load_balance_t *lb)
+{
+ return (lb - load_balance_pool);
+}
+
+static inline dpo_id_t*
+load_balance_get_buckets (load_balance_t *lb)
+{
+ if (LB_HAS_INLINE_BUCKETS(lb))
+ {
+ return (lb->lb_buckets_inline);
+ }
+ else
+ {
+ return (lb->lb_buckets);
+ }
+}
+
+static load_balance_t *
+load_balance_alloc_i (void)
+{
+ load_balance_t *lb;
+
+ pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
+ memset(lb, 0, sizeof(*lb));
+
+ lb->lb_map = INDEX_INVALID;
+ vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+
+ return (lb);
+}
+
+static u8*
+load_balance_format (index_t lbi,
+ load_balance_format_flags_t flags,
+ u32 indent,
+ u8 *s)
+{
+ vlib_counter_t to, via;
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+ u32 i;
+
+ lb = load_balance_get(lbi);
+ vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
+ vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
+ buckets = load_balance_get_buckets(lb);
+
+ s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
+ s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+ s = format(s, "locks:%d ", lb->lb_locks);
+ s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
+ if (0 != via.packets)
+ {
+ s = format(s, " via:[%Ld:%Ld]",
+ via.packets, via.bytes);
+ }
+ s = format(s, "]");
+
+ if (INDEX_INVALID != lb->lb_map)
+ {
+ s = format(s, "\n%U%U",
+ format_white_space, indent+4,
+ format_load_balance_map, lb->lb_map, indent+4);
+ }
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ s = format(s, "\n%U[%d] %U",
+ format_white_space, indent+2,
+ i,
+ format_dpo_id,
+ &buckets[i], indent+6);
+ }
+ return (s);
+}
+
+u8*
+format_load_balance (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(args, index_t);
+ load_balance_format_flags_t flags = va_arg(args, load_balance_format_flags_t);
+
+ return (load_balance_format(lbi, flags, 0, s));
+}
+static u8*
+format_load_balance_dpo (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(args, index_t);
+ u32 indent = va_arg(args, u32);
+
+ return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
+}
+
+
+static load_balance_t *
+load_balance_create_i (u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_alloc_i();
+ lb->lb_hash_config = fhc;
+ lb->lb_n_buckets = num_buckets;
+ lb->lb_n_buckets_minus_1 = num_buckets-1;
+ lb->lb_proto = lb_proto;
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+ }
+
+ LB_DBG(lb, "create");
+
+ return (lb);
+}
+
+index_t
+load_balance_create (u32 n_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
+}
+
+static inline void
+load_balance_set_bucket_i (load_balance_t *lb,
+ u32 bucket,
+ dpo_id_t *buckets,
+ const dpo_id_t *next)
+{
+ dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
+}
+
+void
+load_balance_set_bucket (index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next)
+{
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+
+ lb = load_balance_get(lbi);
+ buckets = load_balance_get_buckets(lb);
+
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ load_balance_set_bucket_i(lb, bucket, buckets, next);
+}
+
+int
+load_balance_is_drop (const dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ if (DPO_LOAD_BALANCE != dpo->dpoi_type)
+ return (0);
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ if (1 == lb->lb_n_buckets)
+ {
+ return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
+ }
+ return (0);
+}
+
+const dpo_id_t *
+load_balance_get_bucket (index_t lbi,
+ u32 bucket)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(lbi);
+
+ return (load_balance_get_bucket_i(lb, bucket));
+}
+
+static int
+next_hop_sort_by_weight (load_balance_path_t * n1,
+ load_balance_path_t * n2)
+{
+ return ((int) n1->path_weight - (int) n2->path_weight);
+}
+
+/* Given next hop vector is over-written with normalized one with sorted weights and
+ with weights corresponding to the number of adjacencies for each next hop.
+ Returns number of adjacencies in block. */
+u32
+ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops,
+ load_balance_path_t ** normalized_next_hops,
+ u32 *sum_weight_in,
+ f64 multipath_next_hop_error_tolerance)
+{
+ load_balance_path_t * nhs;
+ uword n_nhs, n_adj, n_adj_left, i, sum_weight;
+ f64 norm, error;
+
+ n_nhs = vec_len (raw_next_hops);
+ ASSERT (n_nhs > 0);
+ if (n_nhs == 0)
+ return 0;
+
+ /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
+ nhs = *normalized_next_hops;
+ vec_validate (nhs, 2*n_nhs - 1);
+
+ /* Fast path: 1 next hop in block. */
+ n_adj = n_nhs;
+ if (n_nhs == 1)
+ {
+ nhs[0] = raw_next_hops[0];
+ nhs[0].path_weight = 1;
+ _vec_len (nhs) = 1;
+ sum_weight = 1;
+ goto done;
+ }
+
+ else if (n_nhs == 2)
+ {
+ int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
+
+ /* Fast sort. */
+ nhs[0] = raw_next_hops[cmp];
+ nhs[1] = raw_next_hops[cmp ^ 1];
+
+ /* Fast path: equal cost multipath with 2 next hops. */
+ if (nhs[0].path_weight == nhs[1].path_weight)
+ {
+ nhs[0].path_weight = nhs[1].path_weight = 1;
+ _vec_len (nhs) = 2;
+ sum_weight = 2;
+ goto done;
+ }
+ }
+ else
+ {
+ clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+ qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
+ }
+
+ /* Find total weight to normalize weights. */
+ sum_weight = 0;
+ for (i = 0; i < n_nhs; i++)
+ sum_weight += nhs[i].path_weight;
+
+ /* In the unlikely case that all weights are given as 0, set them all to 1. */
+ if (sum_weight == 0)
+ {
+ for (i = 0; i < n_nhs; i++)
+ nhs[i].path_weight = 1;
+ sum_weight = n_nhs;
+ }
+
+ /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+ for (i = 0; i < n_nhs; i++)
+ nhs[n_nhs + i].path_weight = nhs[i].path_weight;
+
+ /* Try larger and larger power of 2 sized adjacency blocks until we
+ find one where traffic flows to within 1% of specified weights. */
+ for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ {
+ error = 0;
+
+ norm = n_adj / ((f64) sum_weight);
+ n_adj_left = n_adj;
+ for (i = 0; i < n_nhs; i++)
+ {
+ f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
+ word n = flt_round_nearest (nf);
+
+ n = n > n_adj_left ? n_adj_left : n;
+ n_adj_left -= n;
+ error += fabs (nf - n);
+ nhs[i].path_weight = n;
+ }
+
+ nhs[0].path_weight += n_adj_left;
+
+ /* Less than 5% average error per adjacency with this size adjacency block? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj)
+ {
+ /* Truncate any next hops with zero weight. */
+ _vec_len (nhs) = i;
+ break;
+ }
+ }
+
+done:
+ /* Save vector for next call. */
+ *normalized_next_hops = nhs;
+ *sum_weight_in = sum_weight;
+ return n_adj;
+}
+
+static load_balance_path_t *
+load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs,
+ dpo_proto_t drop_proto)
+{
+ if (0 == vec_len(nhs))
+ {
+ load_balance_path_t *nh;
+
+ /*
+ * we need something for the load-balance. so use the drop
+ */
+ vec_add2(nhs, nh, 1);
+
+ nh->path_weight = 1;
+ dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+ }
+
+ return (nhs);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+ load_balance_path_t *nhs,
+ dpo_id_t *buckets,
+ u32 n_buckets)
+{
+ load_balance_path_t * nh;
+ u16 ii, bucket;
+
+ bucket = 0;
+
+ /*
+ * the next-hops have normalised weights. that means their sum is the number
+ * of buckets we need to fill.
+ */
+ vec_foreach (nh, nhs)
+ {
+ for (ii = 0; ii < nh->path_weight; ii++)
+ {
+ ASSERT(bucket < n_buckets);
+ load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+ }
+ }
+}
+
+static inline void
+load_balance_set_n_buckets (load_balance_t *lb,
+ u32 n_buckets)
+{
+ lb->lb_n_buckets = n_buckets;
+ lb->lb_n_buckets_minus_1 = n_buckets-1;
+}
+
+void
+load_balance_multipath_update (const dpo_id_t *dpo,
+ load_balance_path_t * raw_next_hops,
+ load_balance_flags_t flags)
+{
+ u32 sum_of_weights,n_buckets, ii;
+ load_balance_path_t * nh, * nhs;
+ index_t lbmi, old_lbmi;
+ load_balance_t *lb;
+ dpo_id_t *tmp_dpo;
+
+ nhs = NULL;
+
+ ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+ lb = load_balance_get(dpo->dpoi_index);
+ raw_next_hops =
+ load_balance_multipath_next_hop_fixup(raw_next_hops,
+ lb->lb_proto);
+ n_buckets =
+ ip_multipath_normalize_next_hops(raw_next_hops,
+ &nhs,
+ &sum_of_weights,
+ multipath_next_hop_error_tolerance);
+
+ ASSERT (n_buckets >= vec_len (raw_next_hops));
+
+ /*
+ * Save the old load-balance map used, and get a new one if required.
+ */
+ old_lbmi = lb->lb_map;
+ if (flags & LOAD_BALANCE_FLAG_USES_MAP)
+ {
+ lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
+ }
+ else
+ {
+ lbmi = INDEX_INVALID;
+ }
+
+ if (0 == lb->lb_n_buckets)
+ {
+ /*
+ * first time initialisation. no packets inflight, so we can write
+ * at leisure.
+ */
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * This is a modification of an existing load-balance.
+ * We need to ensure that packets inflight see a consistent state, that
+ * is the number of reported buckets the LB has (read from
+ * lb_n_buckets_minus_1) is not more than it actually has. So if the
+ * number of buckets is increasing, we must update the bucket array first,
+ * then the reported number. vice-versa if the number of buckets goes down.
+ */
+ if (n_buckets == lb->lb_n_buckets)
+ {
+ /*
+ * no change in the number of buckets. we can simply fill what
+ * is new over what is old.
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else if (n_buckets > lb->lb_n_buckets)
+ {
+ /*
+ * we have more buckets. the old load-balance map (if there is one)
+ * will remain valid, i.e. mapping to indices within range, so we
+ * update it last.
+ */
+ if (n_buckets > LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new increased number of buckets is crossing the threshold
+ * from the inline storage to out-line. Alloc the outline buckets
+ * first, then fixup the number. then reset the inlines.
+ */
+ ASSERT(NULL == lb->lb_buckets);
+ vec_validate_aligned(lb->lb_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ CLIB_MEMORY_BARRIER();
+
+ for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
+ {
+ dpo_reset(&lb->lb_buckets_inline[ii]);
+ }
+ }
+ else
+ {
+ /*
+ * we are not crossing the threshold. we can write the new on the
+ * old, whether they be inline or not.
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ }
+
+ /*
+ * buckets fixed. ready for the MAP update.
+ */
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * bucket size shrinkage.
+ * Any map we have will be based on the old
+ * larger number of buckets, so will be translating to indices
+ * out of range. So the new MAP must be installed first.
+ */
+ lb->lb_map = lbmi;
+ CLIB_MEMORY_BARRIER();
+
+
+ if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new decreased number of buckets is crossing the threshold
+ * from out-line storage to inline:
+ * 1 - Fill the inline buckets,
+ * 2 - fixup the number (and this point the inline buckets are
+ * used).
+ * 3 - free the outline buckets
+ */
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets_inline,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ vec_foreach(tmp_dpo, lb->lb_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(lb->lb_buckets);
+ }
+ else
+ {
+ /*
+ * not crossing the threshold.
+ * 1 - update the number to the smaller size
+ * 2 - write the new buckets
+ * 3 - reset those no longer used.
+ */
+ dpo_id_t *buckets;
+ u32 old_n_buckets;
+
+ old_n_buckets = lb->lb_n_buckets;
+ buckets = load_balance_get_buckets(lb);
+
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ load_balance_fill_buckets(lb, nhs,
+ buckets,
+ n_buckets);
+
+ for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++)
+ {
+ dpo_reset(&buckets[ii]);
+ }
+ }
+ }
+ }
+
+ vec_foreach (nh, nhs)
+ {
+ dpo_reset(&nh->path_dpo);
+ }
+
+ load_balance_map_unlock(old_lbmi);
+}
+
+static void
+load_balance_lock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks++;
+}
+
+static void
+load_balance_destroy (load_balance_t *lb)
+{
+ dpo_id_t *buckets;
+ int i;
+
+ buckets = load_balance_get_buckets(lb);
+
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ dpo_reset(&buckets[i]);
+ }
+
+ LB_DBG(lb, "destroy");
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_free(lb->lb_buckets);
+ }
+
+ pool_put(load_balance_pool, lb);
+}
+
+static void
+load_balance_unlock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks--;
+
+ if (0 == lb->lb_locks)
+ {
+ load_balance_destroy(lb);
+ }
+}
+
+const static dpo_vft_t lb_vft = {
+ .dv_lock = load_balance_lock,
+ .dv_unlock = load_balance_unlock,
+ .dv_format = format_load_balance_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
+ * object.
+ *
+ * this means that these graph nodes are ones from which a load-balance is the
+ * parent object in the DPO-graph.
+ *
+ * We do not list all the load-balance nodes, such as the *-lookup. instead
+ * we are relying on the correct use of the .sibling_of field when setting
+ * up these sibling nodes.
+ */
+const static char* const load_balance_ip4_nodes[] =
+{
+ "ip4-load-balance",
+ NULL,
+};
+const static char* const load_balance_ip6_nodes[] =
+{
+ "ip6-load-balance",
+ NULL,
+};
+const static char* const load_balance_mpls_nodes[] =
+{
+ "mpls-load-balance",
+ NULL,
+};
+const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = load_balance_ip4_nodes,
+ [DPO_PROTO_IP6] = load_balance_ip6_nodes,
+ [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
+};
+
+void
+load_balance_module_init (void)
+{
+ dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
+
+ load_balance_map_module_init();
+}
+
+static clib_error_t *
+load_balance_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance, lbi,
+ LOAD_BALANCE_FORMAT_DETAIL);
+ }
+ else
+ {
+ load_balance_t *lb;
+
+ pool_foreach(lb, load_balance_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance,
+ load_balance_get_index(lb),
+ LOAD_BALANCE_FORMAT_NONE);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_show_command, static) = {
+ .path = "show load-balance",
+ .short_help = "show load-balance [<index>]",
+ .function = load_balance_show,
+};
diff --git a/vnet/vnet/dpo/load_balance.h b/vnet/vnet/dpo/load_balance.h
new file mode 100644
index 00000000000..d630a2c2d75
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * The load-balance object represents an ECMP choice. The buckets of a load
+ * balance object point to the sub-graph after the choice is made.
+ * THe load-balance object is also object type returned from a FIB table lookup.
+ * As such it needs to represent the case where there is only one coice. It may
+ * seem like overkill to use a load-balance object in this case, but the reason
+ * is for performance. If the load-balance object were not the result of the FIB
+ * lookup, then some other object would be. The case where there was ECMP
+ * this other object would need a load-balance as a parent and hence just add
+ * an unnecessary indirection.
+ *
+ * It is also the object in the DP that represents a via-fib-entry in a recursive
+ * route.
+ *
+ */
+
+#ifndef __LOAD_BALANCE_H__
+#define __LOAD_BALANCE_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_types.h>
+
+/**
+ * Load-balance main
+ */
+typedef struct load_balance_main_t_
+{
+ vlib_combined_counter_main_t lbm_to_counters;
+ vlib_combined_counter_main_t lbm_via_counters;
+} load_balance_main_t;
+
+extern load_balance_main_t load_balance_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define LB_NUM_INLINE_BUCKETS 4
+
+/**
+ * @brief One path from an [EU]CMP set that the client wants to add to a
+ * load-balance object
+ */
+typedef struct load_balance_path_t_ {
+ /**
+ * ID of the Data-path object.
+ */
+ dpo_id_t path_dpo;
+
+ /**
+ * The index of the FIB path
+ */
+ fib_node_index_t path_index;
+
+ /**
+ * weight for the path.
+ */
+ u32 path_weight;
+} load_balance_path_t;
+
+/**
+ * The FIB DPO provieds;
+ * - load-balancing over the next DPOs in the chain/graph
+ * - per-route counters
+ */
+typedef struct load_balance_t_ {
+ /**
+ * number of buckets in the load-balance. always a power of 2.
+ */
+ u16 lb_n_buckets;
+ /**
+ * number of buckets in the load-balance - 1. used in the switch path
+ * as part of the hash calculation.
+ */
+ u16 lb_n_buckets_minus_1;
+
+ /**
+ * The protocol of packets that traverse this LB.
+ * need in combination with the flow hash config to determine how to hash.
+ * u8.
+ */
+ dpo_proto_t lb_proto;
+
+ /**
+ * The number of locks, which is approximately the number of users,
+ * of this load-balance.
+ * Load-balance objects of via-entries are heavily shared by recursives,
+ * so the lock count is a u32.
+ */
+ u32 lb_locks;
+
+ /**
+ * index of the load-balance map, INVALID if this LB does not use one
+ */
+ index_t lb_map;
+
+ /**
+ * the hash config to use when selecting a bucket. this is a u16
+ */
+ flow_hash_config_t lb_hash_config;
+
+ /**
+ * Vector of buckets containing the next DPOs, sized as lbo_num
+ */
+ dpo_id_t *lb_buckets;
+
+ /**
+ * The rest of the cache line is used for buckets. In the common case
+ * where there there are less than 4 buckets, then the buckets are
+ * on the same cachlie and we save ourselves a pointer dereferance in
+ * the data-path.
+ */
+ dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS];
+} load_balance_t;
+
+_Static_assert(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
+ "A load_balance object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum load_balance_format_flags_t_ {
+ LOAD_BALANCE_FORMAT_NONE,
+ LOAD_BALANCE_FORMAT_DETAIL = (1 << 0),
+} load_balance_format_flags_t;
+
+/**
+ * Flags controlling load-balance creation and modification
+ */
+typedef enum load_balance_flags_t_ {
+ LOAD_BALANCE_FLAG_NONE = 0,
+ LOAD_BALANCE_FLAG_USES_MAP = (1 << 0),
+} load_balance_flags_t;
+
+extern index_t load_balance_create(u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc);
+extern void load_balance_multipath_update(
+ const dpo_id_t *dpo,
+ load_balance_path_t * raw_next_hops,
+ load_balance_flags_t flags);
+
+extern void load_balance_set_bucket(index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next);
+
+extern u8* format_load_balance(u8 * s, va_list * args);
+
+extern const dpo_id_t *load_balance_get_bucket(index_t lbi,
+ u32 bucket);
+extern int load_balance_is_drop(const dpo_id_t *dpo);
+
+extern f64 load_balance_get_multipath_tolerance(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_t *load_balance_pool;
+static inline load_balance_t*
+load_balance_get (index_t lbi)
+{
+ return (pool_elt_at_index(load_balance_pool, lbi));
+}
+
+#define LB_HAS_INLINE_BUCKETS(_lb) \
+ ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+load_balance_get_bucket_i (const load_balance_t *lb,
+ u32 bucket)
+{
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+ {
+ return (&lb->lb_buckets_inline[bucket]);
+ }
+ else
+ {
+ return (&lb->lb_buckets[bucket]);
+ }
+}
+
+extern void load_balance_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/load_balance_map.c b/vnet/vnet/dpo/load_balance_map.c
new file mode 100644
index 00000000000..f08801f1ce7
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance_map.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * A hash-table of load-balance maps by path index.
+ * this provides the fast lookup of the LB map when a path goes down
+ */
+static uword *lb_maps_by_path_index;
+
+/**
+ * A hash-table of load-balance maps by set of paths.
+ * This provides the LB map sharing.
+ * LB maps do not necessarily use all the paths in the list, since
+ * the entry that is requesting the map, may not have an out-going
+ * label for each of the paths.
+ */
+static uword *load_balance_map_db;
+
+typedef enum load_balance_map_path_flags_t_
+{
+ LOAD_BALANCE_MAP_PATH_UP = (1 << 0),
+ LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
+} __attribute__ ((packed)) load_balance_map_path_flags_t;
+
+typedef struct load_balance_map_path_t_ {
+ /**
+ * Index of the path
+ */
+ fib_node_index_t lbmp_index;
+
+ /**
+ * Sibling Index in the list of all maps with this path index
+ */
+ fib_node_index_t lbmp_sibling;
+
+ /**
+ * the normalised wegiht of the path
+ */
+ u32 lbmp_weight;
+
+ /**
+ * The sate of the path
+ */
+ load_balance_map_path_flags_t lbmp_flags;
+} load_balance_map_path_t;
+
+/**
+ * The global pool of LB maps
+ */
+load_balance_map_t *load_balance_map_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \
+ { \
+ clib_warning("lbm: FIXME" _fmt, \
+ ##_args); \
+ }
+#else
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
+#endif
+
+static index_t
+load_balance_map_get_index (load_balance_map_t *lbm)
+{
+ return (lbm - load_balance_map_pool);
+}
+
+u8*
+format_load_balance_map (u8 *s, va_list ap)
+{
+ index_t lbmi = va_arg(ap, index_t);
+ u32 indent = va_arg(ap, u32);
+ load_balance_map_t *lbm;
+ u32 n_buckets, ii;
+
+ lbm = load_balance_map_get(lbmi);
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
+ s = format(s, "\n%U index:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", ii);
+ }
+ s = format(s, "\n%U map:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", lbm->lbm_buckets[ii]);
+ }
+
+ return (s);
+}
+
+
+static uword
+load_balance_map_hash (load_balance_map_t *lbm)
+{
+ u32 old_lbm_hash, new_lbm_hash, hash;
+ load_balance_map_path_t *lb_path;
+
+ new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
+
+ vec_foreach (lb_path, lbm->lbm_paths)
+ {
+ hash = lb_path->lbmp_index;
+ hash_mix32(hash, old_lbm_hash, new_lbm_hash);
+ }
+
+ return (new_lbm_hash);
+}
+
+always_inline uword
+load_balance_map_db_hash_key_from_index (uword index)
+{
+ return 1 + 2*index;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_is_index (uword key)
+{
+ return key & 1;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_2_index (uword key)
+{
+ ASSERT (load_balance_map_db_hash_key_is_index (key));
+ return key / 2;
+}
+
+static load_balance_map_t*
+load_balance_map_db_get_from_hash_key (uword key)
+{
+ load_balance_map_t *lbm;
+
+ if (load_balance_map_db_hash_key_is_index (key))
+ {
+ index_t lbm_index;
+
+ lbm_index = load_balance_map_db_hash_key_2_index(key);
+ lbm = load_balance_map_get(lbm_index);
+ }
+ else
+ {
+ lbm = uword_to_pointer (key, load_balance_map_t *);
+ }
+
+ return (lbm);
+}
+
+static uword
+load_balance_map_db_hash_key_sum (hash_t * h,
+ uword key)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_db_get_from_hash_key(key);
+
+ return (load_balance_map_hash(lbm));
+}
+
+static uword
+load_balance_map_db_hash_key_equal (hash_t * h,
+ uword key1,
+ uword key2)
+{
+ load_balance_map_t *lbm1, *lbm2;
+
+ lbm1 = load_balance_map_db_get_from_hash_key(key1);
+ lbm2 = load_balance_map_db_get_from_hash_key(key2);
+
+ return (load_balance_map_hash(lbm1) ==
+ load_balance_map_hash(lbm2));
+}
+
+static index_t
+load_balance_map_db_find (load_balance_map_t *lbm)
+{
+ uword *p;
+
+ p = hash_get(load_balance_map_db, lbm);
+
+ if (NULL != p)
+ {
+ return p[0];
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+load_balance_map_db_insert (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ fib_node_list_t list;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
+
+ /*
+ * insert into the DB based on the set of paths.
+ */
+ hash_set (load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)),
+ load_balance_map_get_index(lbm));
+
+ /*
+ * insert into each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ if (NULL == p)
+ {
+ list = fib_node_list_create();
+ hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
+ }
+ else
+ {
+ list = p[0];
+ }
+
+ lbmp->lbmp_sibling =
+ fib_node_list_push_front(list,
+ 0, FIB_NODE_TYPE_FIRST,
+ load_balance_map_get_index(lbm));
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
+}
+
+static void
+load_balance_map_db_remove (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
+
+ hash_unset(load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)));
+
+ /*
+ * remove from each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ ASSERT(NULL != p);
+
+ fib_node_list_remove(p[0], lbmp->lbmp_sibling);
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
+}
+
+/**
+ * @brief from the paths that are usable, fill the Map.
+ */
+static void
+load_balance_map_fill (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ u32 n_buckets, bucket, ii, jj;
+ u16 *tmp_buckets;
+
+ tmp_buckets = NULL;
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ /*
+ * run throught the set of paths once, and build a vector of the
+ * indices that are usable. we do this is a scratch space, since we
+ * need to refer to it multiple times as we build the real buckets.
+ */
+ vec_validate(tmp_buckets, n_buckets-1);
+
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ tmp_buckets[jj++] = bucket++;
+ }
+ }
+ else
+ {
+ bucket += lbmp->lbmp_weight;
+ }
+ }
+ _vec_len(tmp_buckets) = jj;
+
+ /*
+ * If the number of temporaries written is as many as we need, implying
+ * all paths were up, then we can simply copy the scratch area over the
+ * actual buckets' memory
+ */
+ if (jj == n_buckets)
+ {
+ memcpy(lbm->lbm_buckets,
+ tmp_buckets,
+ sizeof(lbm->lbm_buckets[0]) * n_buckets);
+ }
+ else
+ {
+ /*
+ * one or more paths are down.
+ */
+ if (0 == vec_len(tmp_buckets))
+ {
+ /*
+ * if the scratch area is empty, then no paths are usable.
+ * they will all drop. so use them all, lest we account drops
+ * against only one.
+ */
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ }
+ }
+ else
+ {
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ bucket++;
+ }
+ }
+ else
+ {
+ /*
+ * path is unusable
+ * cycle through the scratch space selecting a index.
+ * this means we load balance, in the intended ratio,
+ * over the paths that are still usable.
+ */
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = tmp_buckets[jj];
+ jj = (jj + 1) % vec_len(tmp_buckets);
+ bucket++;
+ }
+ }
+ }
+ }
+ }
+
+ vec_free(tmp_buckets);
+}
+
+static load_balance_map_t*
+load_balance_map_alloc (const load_balance_path_t *paths)
+{
+ load_balance_map_t *lbm;
+ u32 ii;
+
+ pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
+ memset(lbm, 0, sizeof(*lbm));
+
+ vec_validate(lbm->lbm_paths, vec_len(paths)-1);
+
+ vec_foreach_index(ii, paths)
+ {
+ lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index;
+ lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
+ }
+
+ return (lbm);
+}
+
+static load_balance_map_t *
+load_balance_map_init (load_balance_map_t *lbm,
+ u32 n_buckets,
+ u32 sum_of_weights)
+{
+ lbm->lbm_sum_of_norm_weights = sum_of_weights;
+ vec_validate(lbm->lbm_buckets, n_buckets-1);
+
+ load_balance_map_db_insert(lbm);
+
+ load_balance_map_fill(lbm);
+
+ return (lbm);
+}
+
+index_t
+load_balance_map_add_or_lock (u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *paths)
+{
+ load_balance_map_t *tmp, *lbm;
+ index_t lbmi;
+
+ tmp = load_balance_map_alloc(paths);
+
+ lbmi = load_balance_map_db_find(tmp);
+
+ if (INDEX_INVALID == lbmi)
+ {
+ lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
+ }
+ else
+ {
+ lbm = load_balance_map_get(lbmi);
+ }
+
+ lbm->lbm_locks++;
+
+ return (load_balance_map_get_index(lbm));
+}
+
+void
+load_balance_map_lock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks++;
+}
+
+void
+load_balance_map_unlock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ if (INDEX_INVALID == lbmi)
+ {
+ return;
+ }
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks--;
+
+ if (0 == lbm->lbm_locks)
+ {
+ load_balance_map_db_remove(lbm);
+ vec_free(lbm->lbm_paths);
+ vec_free(lbm->lbm_buckets);
+ pool_put(load_balance_map_pool, lbm);
+ }
+}
+
+static int
+load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
+ void *ctx)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(fptr->fnp_index);
+
+ load_balance_map_fill(lbm);
+
+ return (!0);
+}
+
+/**
+ * @brief the state of a path has changed (it has no doubt gone down).
+ * This is the trigger to perform a PIC edge cutover and update the maps
+ * to exclude this path.
+ */
+void
+load_balance_map_path_state_change (fib_node_index_t path_index)
+{
+ uword *p;
+
+ /*
+ * re-stripe the buckets for each affect MAP
+ */
+ p = hash_get(lb_maps_by_path_index, path_index);
+
+ if (NULL == p)
+ return;
+
+ fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
+}
+
+/**
+ * @brief Make/add a new or lock an existing Load-balance map
+ */
+void
+load_balance_map_module_init (void)
+{
+ load_balance_map_db =
+ hash_create2 (/* elts */ 0,
+ /* user */ 0,
+ /* value_bytes */ sizeof (index_t),
+ load_balance_map_db_hash_key_sum,
+ load_balance_map_db_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+
+ lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
+}
+
+static clib_error_t *
+load_balance_map_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbmi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbmi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbmi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
+ }
+ else
+ {
+ load_balance_map_t *lbm;
+
+ pool_foreach(lbm, load_balance_map_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance_map,
+ load_balance_map_get_index(lbm), 0);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
+ .path = "show load-balance-map",
+ .short_help = "show load-balance-map [<index>]",
+ .function = load_balance_map_show,
+};
diff --git a/vnet/vnet/dpo/load_balance_map.h b/vnet/vnet/dpo/load_balance_map.h
new file mode 100644
index 00000000000..f080e97ccad
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance_map.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+
+#ifndef __LOAD_BALANCE_MAP_H__
+#define __LOAD_BALANCE_MAP_H__
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
+
+struct load_balance_map_path_t_;
+
+/**
+ */
+typedef struct load_balance_map_t_ {
+ /**
+ * The buckets of the map that provide the index to index translation.
+ * In the first cacheline.
+ */
+ u16 *lbm_buckets;
+
+ /**
+ * the vector of paths this MAP represents
+ */
+ struct load_balance_map_path_t_ *lbm_paths;
+
+ /**
+ * the sum of the normalised weights. cache for convenience
+ */
+ u32 lbm_sum_of_norm_weights;
+
+ /**
+ * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts
+ */
+ u32 lbm_locks;
+} load_balance_map_t;
+
+extern index_t load_balance_map_add_or_lock(u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *norm_paths);
+
+extern void load_balance_map_lock(index_t lmbi);
+extern void load_balance_map_unlock(index_t lbmi);
+
+extern void load_balance_map_path_state_change(fib_node_index_t path_index);
+
+extern u8* format_load_balance_map(u8 *s, va_list ap);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_map_t *load_balance_map_pool;
+
+static inline load_balance_map_t*
+load_balance_map_get (index_t lbmi)
+{
+ return (pool_elt_at_index(load_balance_map_pool, lbmi));
+}
+
+
+extern void load_balance_map_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/lookup_dpo.c b/vnet/vnet/dpo/lookup_dpo.c
new file mode 100644
index 00000000000..0bfc0651a63
--- /dev/null
+++ b/vnet/vnet/dpo/lookup_dpo.c
@@ -0,0 +1,802 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+static const char *const lookup_input_names[] = LOOKUP_INPUTS;
+
+/**
+ * @brief Enumeration of the lookup subtypes
+ */
+typedef enum lookup_sub_type_t_
+{
+ LOOKUP_SUB_TYPE_SRC,
+ LOOKUP_SUB_TYPE_DST,
+ LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE,
+} lookup_sub_type_t;
+#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1)
+
+#define FOR_EACH_LOOKUP_SUB_TYPE(_st) \
+ for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++)
+
+/**
+ * @brief pool of all MPLS Label DPOs
+ */
+lookup_dpo_t *lookup_dpo_pool;
+
+/**
+ * @brief An array of registered DPO type values for the sub-types
+ */
+static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM];
+
+static lookup_dpo_t *
+lookup_dpo_alloc (void)
+{
+ lookup_dpo_t *lkd;
+
+ pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES);
+
+ return (lkd);
+}
+
+static index_t
+lookup_dpo_get_index (lookup_dpo_t *lkd)
+{
+ return (lkd - lookup_dpo_pool);
+}
+
+static void
+lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+ dpo_type_t type;
+
+ lkd = lookup_dpo_alloc();
+ lkd->lkd_fib_index = fib_index;
+ lkd->lkd_proto = proto;
+ lkd->lkd_input = input;
+ lkd->lkd_table = table_config;
+
+ /*
+ * use the input type to select the lookup sub-type
+ */
+ type = 0;
+
+ switch (input)
+ {
+ case LOOKUP_INPUT_SRC_ADDR:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC];
+ break;
+ case LOOKUP_INPUT_DST_ADDR:
+ switch (table_config)
+ {
+ case LOOKUP_TABLE_FROM_INPUT_INTERFACE:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE];
+ break;
+ case LOOKUP_TABLE_FROM_CONFIG:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST];
+ break;
+ }
+ }
+
+ if (0 == type)
+ {
+ dpo_reset(dpo);
+ }
+ else
+ {
+ dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd));
+ }
+}
+
+void
+lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+ }
+ lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+}
+
+void
+lookup_dpo_add_or_lock_w_table_id (u32 table_id,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID;
+
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ fib_index =
+ fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+ table_id);
+ }
+
+ ASSERT(FIB_NODE_INDEX_INVALID != fib_index);
+ lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+}
+
+u8*
+format_lookup_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(index);
+
+ if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table)
+ {
+ s = format(s, "%s lookup in interface's %U table",
+ lookup_input_names[lkd->lkd_input],
+ format_dpo_proto, lkd->lkd_proto);
+ }
+ else
+ {
+ s = format(s, "%s lookup in %U",
+ lookup_input_names[lkd->lkd_input],
+ format_fib_table_name, lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ return (s);
+}
+
+static void
+lookup_dpo_lock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks++;
+}
+
+static void
+lookup_dpo_unlock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks--;
+
+ if (0 == lkd->lkd_locks)
+ {
+ if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table)
+ {
+ fib_table_unlock(lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ pool_put(lookup_dpo_pool, lkd);
+ }
+}
+
+always_inline void
+ip4_src_fib_lookup_one (u32 src_fib_index0,
+ const ip4_address_t * addr0,
+ u32 * src_adj_index0)
+{
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_t * mtrie0;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+}
+
+always_inline void
+ip4_src_fib_lookup_two (u32 src_fib_index0,
+ u32 src_fib_index1,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ u32 * src_adj_index0,
+ u32 * src_adj_index1)
+{
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+}
+
+/**
+ * @brief Lookup trace data
+ */
+typedef struct lookup_trace_t_
+{
+ union {
+ ip46_address_t addr;
+ mpls_unicast_header_t hdr;
+ };
+ fib_node_index_t fib_index;
+ index_t lbi;
+} lookup_trace_t;
+
+
+always_inline uword
+lookup_dpo_ip4_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const ip4_address_t *input_addr;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip4_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip4 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip4_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr = &ip0->src_address;
+ }
+ else
+ {
+ input_addr = &ip0->dst_address;
+ }
+
+ /* do lookup */
+ ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip4 = *input_addr;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U fib-index:%d addr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_ip46_address, &t->addr, IP46_TYPE_ANY,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_ip4_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_node) = {
+ .function = lookup_ip4_dst,
+ .name = "lookup-ip4-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst)
+
+always_inline uword
+lookup_ip4_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = {
+ .function = lookup_ip4_dst_itf,
+ .name = "lookup-ip4-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf)
+
+always_inline uword
+lookup_ip4_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_src_node) = {
+ .function = lookup_ip4_src,
+ .name = "lookup-ip4-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip4-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src)
+
+always_inline uword
+lookup_dpo_ip6_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const ip6_address_t *input_addr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip6_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip6 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+ fib_index0 = lkd0->lkd_fib_index;
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr0 = &ip0->src_address;
+ }
+ else
+ {
+ input_addr0 = &ip0->dst_address;
+ }
+
+ /* do src lookup */
+ lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+ fib_index0,
+ input_addr0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip6 = *input_addr0;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip6_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_node) = {
+ .function = lookup_ip6_dst,
+ .name = "lookup-ip6-dst",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst)
+
+always_inline uword
+lookup_ip6_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 1 /*use src*/));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_src_node) = {
+ .function = lookup_ip6_src,
+ .name = "lookup-ip6-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src)
+
+always_inline uword
+lookup_dpo_mpls_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const mpls_unicast_header_t * hdr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ hdr0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by mpls lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ mpls_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /* do lookup */
+ lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_mpls_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ mpls_unicast_header_t hdr;
+
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%U fib-index:%d hdr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_mpls_header, hdr,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_mpls_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_node) = {
+ .function = lookup_mpls_dst,
+ .name = "lookup-mpls-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst)
+
+always_inline uword
+lookup_mpls_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = {
+ .function = lookup_mpls_dst_itf,
+ .name = "lookup-mpls-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf)
+
+const static dpo_vft_t lkd_vft = {
+ .dv_lock = lookup_dpo_lock,
+ .dv_unlock = lookup_dpo_unlock,
+ .dv_format = format_lookup_dpo,
+};
+
+const static char* const lookup_src_ip4_nodes[] =
+{
+ "lookup-ip4-src",
+ NULL,
+};
+const static char* const lookup_src_ip6_nodes[] =
+{
+ "lookup-ip6-src",
+ NULL,
+};
+const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_src_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_src_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static char* const lookup_dst_ip4_nodes[] =
+{
+ "lookup-ip4-dst",
+ NULL,
+};
+const static char* const lookup_dst_ip6_nodes[] =
+{
+ "lookup-ip6-dst",
+ NULL,
+};
+const static char* const lookup_dst_mpls_nodes[] =
+{
+ "lookup-mpls-dst",
+ NULL,
+};
+const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes,
+};
+
+const static char* const lookup_dst_from_interface_ip4_nodes[] =
+{
+ "lookup-ip4-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_ip6_nodes[] =
+{
+ "lookup-ip6-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_mpls_nodes[] =
+{
+ "lookup-mpls-dst-itf",
+ NULL,
+};
+const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_from_interface_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_from_interface_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes,
+};
+
+
+void
+lookup_dpo_module_init (void)
+{
+ dpo_register(DPO_LOOKUP, &lkd_vft, NULL);
+
+ /*
+ * There are various sorts of lookup; src or dst addr v4 /v6 etc.
+ * there isn't an object type for each (there is only the lookup_dpo_t),
+ * but, for performance reasons, there is a data plane function, and hence
+ * VLIB node for each. VLIB graph node construction is based on DPO types
+ * so we create sub-types.
+ */
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] =
+ dpo_register_new_type(&lkd_vft, lookup_src_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+}
diff --git a/vnet/vnet/dpo/lookup_dpo.h b/vnet/vnet/dpo/lookup_dpo.h
new file mode 100644
index 00000000000..ff283388868
--- /dev/null
+++ b/vnet/vnet/dpo/lookup_dpo.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOOKUP_DPO_H__
+#define __LOOKUP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_input_t_ {
+ LOOKUP_INPUT_SRC_ADDR,
+ LOOKUP_INPUT_DST_ADDR,
+} __attribute__ ((packed)) lookup_input_t;
+
+#define LOOKUP_INPUTS { \
+ [LOOKUP_INPUT_SRC_ADDR] = "src-address", \
+ [LOOKUP_INPUT_DST_ADDR] = "dst-address", \
+}
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_table_t_ {
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ LOOKUP_TABLE_FROM_CONFIG,
+} __attribute__ ((packed)) lookup_table_t;
+
+#define LOOKUP_TABLES { \
+ [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface", \
+ [LOOKUP_INPUT_DST_ADDR] = "table-configured", \
+}
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct lookup_dpo_t
+{
+ /**
+ * The FIB, or interface from which to get a FIB, in which to perform
+ * the next lookup;
+ */
+ fib_node_index_t lkd_fib_index;
+
+ /**
+ * The protocol of the FIB for the lookup, and hence
+ * the protocol of the packet
+ */
+ dpo_proto_t lkd_proto;
+
+ /**
+ * Switch to use src or dst address
+ */
+ lookup_input_t lkd_input;
+
+ /**
+ * Switch to use the table index passed, or the table of the input interface
+ */
+ lookup_table_t lkd_table;
+
+ /**
+ * Number of locks
+ */
+ u16 lkd_locks;
+} lookup_dpo_t;
+
+extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+
+extern u8* format_lookup_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern lookup_dpo_t *lookup_dpo_pool;
+
+static inline lookup_dpo_t *
+lookup_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(lookup_dpo_pool, index));
+}
+
+extern void lookup_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/mpls_label_dpo.c b/vnet/vnet/dpo/mpls_label_dpo.c
new file mode 100644
index 00000000000..0ec840ecfbd
--- /dev/null
+++ b/vnet/vnet/dpo/mpls_label_dpo.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static mpls_label_dpo_t *
+mpls_label_dpo_alloc (void)
+{
+ mpls_label_dpo_t *mld;
+
+ pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES);
+ memset(mld, 0, sizeof(*mld));
+
+ dpo_reset(&mld->mld_dpo);
+
+ return (mld);
+}
+
+static index_t
+mpls_label_dpo_get_index (mpls_label_dpo_t *mld)
+{
+ return (mld - mpls_label_dpo_pool);
+}
+
+index_t
+mpls_label_dpo_create (mpls_label_t label,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ const dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_alloc();
+
+ vnet_mpls_uc_set_label(&mld->mld_hdr.label_exp_s_ttl, label);
+ vnet_mpls_uc_set_ttl(&mld->mld_hdr.label_exp_s_ttl, ttl);
+ vnet_mpls_uc_set_exp(&mld->mld_hdr.label_exp_s_ttl, exp);
+ vnet_mpls_uc_set_s(&mld->mld_hdr.label_exp_s_ttl, eos);
+
+ /*
+ * get the header in network byte order since we will paint it
+ * on a packet in the data-plane
+ */
+ mld->mld_hdr.label_exp_s_ttl =
+ clib_host_to_net_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ dpo_stack(DPO_MPLS_LABEL, DPO_PROTO_MPLS, &mld->mld_dpo, dpo);
+
+ return (mpls_label_dpo_get_index(mld));
+}
+
+u8*
+format_mpls_label_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ u32 indent = va_arg (*args, u32);
+ mpls_unicast_header_t hdr;
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(index);
+
+ hdr.label_exp_s_ttl =
+ clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ return (format(s, "mpls-label:[%d]:%U\n%U%U",
+ index,
+ format_mpls_header, hdr,
+ format_white_space, indent,
+ format_dpo_id, &mld->mld_dpo, indent+2));
+}
+
+static void
+mpls_label_dpo_lock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks++;
+}
+
+static void
+mpls_label_dpo_unlock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks--;
+
+ if (0 == mld->mld_locks)
+ {
+ dpo_reset(&mld->mld_dpo);
+ pool_put(mpls_label_dpo_pool, mld);
+ }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_label_imposition_trace_t_
+{
+ /**
+ * The MPLS header imposed
+ */
+ mpls_unicast_header_t hdr;
+} mpls_label_imposition_trace_t;
+
+always_inline uword
+mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ mpls_unicast_header_t *hdr0;
+ mpls_label_dpo_t *mld0;
+ vlib_buffer_t * b0;
+ u32 bi0, mldi0;
+ u32 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* dst lookup was done by ip4 lookup */
+ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mld0 = mpls_label_dpo_get(mldi0);
+
+ /* Paint the MPLS header */
+ vlib_buffer_advance(b0, -sizeof(*hdr0));
+ hdr0 = vlib_buffer_get_current(b0);
+
+ // FIXME.
+ // need to copy the TTL from the correct place.
+ // for IPvX imposition from the IP header
+ // so we need a deidcated ipx-to-mpls-label-imp-node
+ // for mpls switch and stack another solution is required.
+ *hdr0 = mld0->mld_hdr;
+
+ next0 = mld0->mld_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_imposition_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_label_imposition_trace_t * t;
+ mpls_unicast_header_t hdr;
+ uword indent;
+
+ t = va_arg (*args, mpls_label_imposition_trace_t *);
+ indent = format_get_indent (s);
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%Umpls-header:%U",
+ format_white_space, indent,
+ format_mpls_header, hdr);
+ return (s);
+}
+
+VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
+ .function = mpls_label_imposition,
+ .name = "mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, mpls_label_imposition)
+
+const static dpo_vft_t mld_vft = {
+ .dv_lock = mpls_label_dpo_lock,
+ .dv_unlock = mpls_label_dpo_unlock,
+ .dv_format = format_mpls_label_dpo,
+};
+
+const static char* const mpls_label_imp_ip4_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_ip6_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_mpls_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes,
+ [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes,
+ [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
+};
+
+
+void
+mpls_label_dpo_module_init (void)
+{
+ dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes);
+}
diff --git a/vnet/vnet/dpo/mpls_label_dpo.h b/vnet/vnet/dpo/mpls_label_dpo.h
new file mode 100644
index 00000000000..47ee344933f
--- /dev/null
+++ b/vnet/vnet/dpo/mpls_label_dpo.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LABEL_DPO_H__
+#define __MPLS_LABEL_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_label_dpo_t
+{
+ /**
+ * The MPLS label header to impose
+ */
+ mpls_unicast_header_t mld_hdr;
+
+ /**
+ * Next DPO in the graph
+ */
+ dpo_id_t mld_dpo;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 mld_locks;
+} mpls_label_dpo_t;
+
+extern index_t mpls_label_dpo_create(mpls_label_t label,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ const dpo_id_t *dpo);
+
+extern u8* format_mpls_label_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static inline mpls_label_dpo_t *
+mpls_label_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(mpls_label_dpo_pool, index));
+}
+
+extern void mpls_label_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/punt_dpo.c b/vnet/vnet/dpo/punt_dpo.c
new file mode 100644
index 00000000000..e27a8ff3018
--- /dev/null
+++ b/vnet/vnet/dpo/punt_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing puntping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t punt_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+punt_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1);
+
+ return (&punt_dpos[proto]);
+}
+
+int
+dpo_is_punt (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_PUNT);
+}
+
+static void
+punt_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the punt
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+punt_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_punt_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+
+ return (format(s, "dpo-punt"));
+}
+
+const static dpo_vft_t punt_vft = {
+ .dv_lock = punt_dpo_lock,
+ .dv_unlock = punt_dpo_unlock,
+ .dv_format = format_punt_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a punt
+ * object.
+ *
+ * this means that these graph nodes are ones from which a punt is the
+ * parent object in the DPO-graph.
+ */
+const static char* const punt_ip4_nodes[] =
+{
+ "ip4-punt",
+ NULL,
+};
+const static char* const punt_ip6_nodes[] =
+{
+ "ip6-punt",
+ NULL,
+};
+const static char* const punt_mpls_nodes[] =
+{
+ "mpls-punt",
+ NULL,
+};
+const static char* const * const punt_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = punt_ip4_nodes,
+ [DPO_PROTO_IP6] = punt_ip6_nodes,
+ [DPO_PROTO_MPLS] = punt_mpls_nodes,
+};
+
+void
+punt_dpo_module_init (void)
+{
+ dpo_register(DPO_PUNT, &punt_vft, punt_nodes);
+}
diff --git a/vnet/vnet/dpo/punt_dpo.h b/vnet/vnet/dpo/punt_dpo.h
new file mode 100644
index 00000000000..370547c1596
--- /dev/null
+++ b/vnet/vnet/dpo/punt_dpo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief A DPO to punt packets to the Control-plane
+ */
+
+#ifndef __PUNT_DPO_H__
+#define __PUNT_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_punt(const dpo_id_t *dpo);
+
+extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto);
+
+extern void punt_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/receive_dpo.c b/vnet/vnet/dpo/receive_dpo.c
new file mode 100644
index 00000000000..ee7d82b0980
--- /dev/null
+++ b/vnet/vnet/dpo/receive_dpo.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/receive_dpo.h>
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static receive_dpo_t *
+receive_dpo_alloc (void)
+{
+ receive_dpo_t *rd;
+
+ pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES);
+ memset(rd, 0, sizeof(*rd));
+
+ return (rd);
+}
+
+static receive_dpo_t *
+receive_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+ ASSERT(DPO_RECEIVE == dpo->dpoi_type);
+
+ return (receive_dpo_get(dpo->dpoi_index));
+}
+
+
+/*
+ * receive_dpo_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The local adj is added to an interface's receive prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+void
+receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_alloc();
+
+ rd->rd_sw_if_index = sw_if_index;
+ if (NULL != nh_addr)
+ {
+ rd->rd_addr = *nh_addr;
+ }
+
+ dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool));
+}
+
+static void
+receive_dpo_lock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks++;
+}
+
+static void
+receive_dpo_unlock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks--;
+
+ if (0 == rd->rd_locks)
+ {
+ pool_put(receive_dpo_pool, rd);
+ }
+}
+
+static u8*
+format_receive_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get(index);
+
+ if (~0 != rd->rd_sw_if_index)
+ {
+ return (format(s, "dpo-receive: %U on %U",
+ format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ }
+ else
+ {
+ return (format(s, "dpo-receive"));
+ }
+}
+
+const static dpo_vft_t receive_vft = {
+ .dv_lock = receive_dpo_lock,
+ .dv_unlock = receive_dpo_unlock,
+ .dv_format = format_receive_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a receive
+ * object.
+ *
+ * this means that these graph nodes are ones from which a receive is the
+ * parent object in the DPO-graph.
+ */
+const static char* const receive_ip4_nodes[] =
+{
+ "ip4-local",
+ NULL,
+};
+const static char* const receive_ip6_nodes[] =
+{
+ "ip6-local",
+ NULL,
+};
+
+const static char* const * const receive_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = receive_ip4_nodes,
+ [DPO_PROTO_IP6] = receive_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+receive_dpo_module_init (void)
+{
+ dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes);
+}
diff --git a/vnet/vnet/dpo/receive_dpo.h b/vnet/vnet/dpo/receive_dpo.h
new file mode 100644
index 00000000000..2420fd7843c
--- /dev/null
+++ b/vnet/vnet/dpo/receive_dpo.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+
+#ifndef __RECEIVE_DPO_H__
+#define __RECEIVE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip6.h>
+
+typedef struct receive_dpo_t_
+{
+ /**
+ * The Software interface index on which traffic is received
+ */
+ u32 rd_sw_if_index;
+
+ /**
+ * The address on the receive interface. packet are destined to this address
+ */
+ ip46_address_t rd_addr;
+
+ /**
+ * number oflocks.
+ */
+ u16 rd_locks;
+} receive_dpo_t;
+
+extern void receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo);
+
+extern void receive_dpo_module_init(void);
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static inline receive_dpo_t *
+receive_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(receive_dpo_pool, index));
+}
+
+#endif
diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c
index 56df480ee6f..d08764a329f 100644
--- a/vnet/vnet/ethernet/arp.c
+++ b/vnet/vnet/ethernet/arp.c
@@ -21,6 +21,9 @@
#include <vnet/ethernet/arp_packet.h>
#include <vnet/l2/l2_input.h>
#include <vppinfra/mhash.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/mpls/mpls.h>
/**
* @file
@@ -36,24 +39,44 @@ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
typedef struct
{
u32 sw_if_index;
- u32 fib_index;
ip4_address_t ip4_address;
-} ethernet_arp_ip4_key_t;
-typedef struct
-{
- ethernet_arp_ip4_key_t key;
u8 ethernet_address[6];
u16 flags;
-#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
-#define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN (2 << 0)
+#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
+#define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1)
u64 cpu_time_last_updated;
-
- u32 *adjacencies;
+ adj_index_t adj_index[FIB_LINK_NUM];
} ethernet_arp_ip4_entry_t;
+/**
+ * @brief administrative and operational state falgs on an interface
+ */
+typedef enum ethernet_arp_interface_flags_t_
+{
+ ETHERNET_ARP_INTERFACE_UP = (0 << 1),
+ ETHERNET_ARP_INTERFACE_MPLS_ENABLE = (1 << 0),
+} ethernet_arp_interface_flags_t;
+
+/**
+ * @brief Per-interface ARP configuration and state
+ */
+typedef struct ethernet_arp_interface_t_
+{
+ /**
+ * Hash table of ARP entries.
+ * Since this hash table is per-interface, the key is only the IPv4 address.
+ */
+ uword *arp_entries;
+
+ /**
+ * Flags for administrative and operational state
+ */
+ ethernet_arp_interface_flags_t flags;
+} ethernet_arp_interface_t;
+
typedef struct
{
u32 lo_addr;
@@ -87,18 +110,43 @@ typedef struct
ethernet_arp_ip4_entry_t *ip4_entry_pool;
- mhash_t ip4_entry_by_key;
-
/* ARP attack mitigation */
u32 arp_delete_rotor;
u32 limit_arp_cache_size;
+ /** Per interface state */
+ ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
+
/* Proxy arp vector */
ethernet_proxy_arp_t *proxy_arps;
} ethernet_arp_main_t;
static ethernet_arp_main_t ethernet_arp_main;
+
+typedef enum arp_ether_type_t_
+{
+ ARP_ETHER_TYPE_IP4 = (1 << 0),
+ ARP_ETHER_TYPE_MPLS = (1 << 1),
+} arp_ether_type_t;
+#define ARP_ETHER_TYPE_BOTH (ARP_ETHER_TYPE_MPLS | ARP_ETHER_TYPE_IP4)
+
+typedef struct
+{
+ u32 sw_if_index;
+ ethernet_arp_ip4_over_ethernet_address_t a;
+ int is_static;
+ int flags;
+#define ETHERNET_ARP_ARGS_REMOVE (1<<0)
+#define ETHERNET_ARP_ARGS_FLUSH (1<<1)
+#define ETHERNET_ARP_ARGS_POPULATE (1<<2)
+ arp_ether_type_t ether_type;
+} vnet_arp_set_ip4_over_ethernet_rpc_args_t;
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a);
+
static u8 *
format_ethernet_arp_hardware_type (u8 * s, va_list * va)
{
@@ -229,27 +277,23 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
vnet_sw_interface_t *si;
- ip4_fib_t *fib;
u8 *flags = 0;
if (!e)
- return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4",
+ return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
"Flags", "Ethernet", "Interface");
- fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
- si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
-
- if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN)
- flags = format (flags, "G");
+ si = vnet_get_sw_interface (vnm, e->sw_if_index);
if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
flags = format (flags, "S");
- s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U",
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ s = format (s, "%=12U%=16U%=6s%=20U%=24U",
format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
- fib->table_id,
- format_ip4_address, &e->key.ip4_address,
+ format_ip4_address, &e->ip4_address,
flags ? (char *) flags : "",
format_ethernet_address, e->ethernet_address,
format_vnet_sw_interface_name, vnm, si);
@@ -294,207 +338,126 @@ format_arp_term_input_trace (u8 * s, va_list * va)
return s;
}
-clib_error_t *
-ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
- u32 sw_if_index, u32 flags)
+static void
+arp_mk_complete (ethernet_arp_interface_t * eai,
+ ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
{
- ethernet_arp_main_t *am = &ethernet_arp_main;
- ethernet_arp_ip4_entry_t *e;
- u32 i;
- u32 *to_add_del = 0;
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = e->ip4_address,
+ },
+ };
+ u32 fib_index;
- /* *INDENT-OFF* */
- pool_foreach (e, am->ip4_entry_pool, ({
- if (e->key.sw_if_index == sw_if_index)
- vec_add1 (to_add_del, e - am->ip4_entry_pool);
- }));
- /* *INDENT-ON* */
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
- for (i = 0; i < vec_len (to_add_del); i++)
+ if (et & ARP_ETHER_TYPE_IP4)
{
- ethernet_arp_ip4_over_ethernet_address_t arp_add;
- e = pool_elt_at_index (am->ip4_entry_pool, to_add_del[i]);
-
- clib_memcpy (&arp_add.ethernet, e->ethernet_address, 6);
- arp_add.ip4.as_u32 = e->key.ip4_address.as_u32;
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_IP4])
{
- vnet_arp_set_ip4_over_ethernet (vnm,
- e->key.sw_if_index,
- e->key.fib_index, &arp_add,
- e->flags &
- ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC);
+ e->adj_index[FIB_LINK_IP4] =
+ adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &pfx.fp_addr,
+ e->sw_if_index,
+ e->ethernet_address);
+ ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]);
+
+ fib_table_entry_update_one_path (fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ FIB_PROTOCOL_IP4,
+ &pfx.fp_addr,
+ e->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
}
- else if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) == 0)
+ else
{
- vnet_arp_unset_ip4_over_ethernet (vnm,
- e->key.sw_if_index,
- e->key.fib_index, &arp_add);
+ adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4],
+ e->ethernet_address);
+ }
+ }
+ if ((et & ARP_ETHER_TYPE_MPLS) &&
+ eai->flags & ETHERNET_ARP_INTERFACE_MPLS_ENABLE)
+ {
+ if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_MPLS])
+ {
+ e->adj_index[FIB_LINK_MPLS] =
+ adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
+ FIB_LINK_MPLS,
+ &pfx.fp_addr,
+ e->sw_if_index,
+ e->ethernet_address);
+ ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]);
+ }
+ else
+ {
+ adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS],
+ e->ethernet_address);
}
}
-
- vec_free (to_add_del);
- return 0;
-}
-
-VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
-
-static int
-vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index,
- void *a_arg, int is_static);
-
-static int
-vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index, void *a_arg);
-
-typedef struct
-{
- u32 sw_if_index;
- u32 fib_index;
- ethernet_arp_ip4_over_ethernet_address_t a;
- int is_static;
- int is_remove; /* set is_remove=1 to clear arp entry */
-} vnet_arp_set_ip4_over_ethernet_rpc_args_t;
-
-static void set_ip4_over_ethernet_rpc_callback
- (vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
-{
- vnet_main_t *vm = vnet_get_main ();
- ASSERT (os_get_cpu_number () == 0);
-
- if (a->is_remove)
- vnet_arp_unset_ip4_over_ethernet_internal (vm,
- a->sw_if_index,
- a->fib_index, &(a->a));
- else
- vnet_arp_set_ip4_over_ethernet_internal (vm,
- a->sw_if_index,
- a->fib_index,
- &(a->a), a->is_static);
-}
-
-int
-vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index, void *a_arg, int is_static)
-{
- ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
- vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
-
- args.sw_if_index = sw_if_index;
- args.fib_index = fib_index;
- args.is_static = is_static;
- args.is_remove = 0;
- clib_memcpy (&args.a, a, sizeof (*a));
-
- vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
- (u8 *) & args, sizeof (args));
- return 0;
}
int
vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index,
- void *a_arg, int is_static)
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
{
- ethernet_arp_ip4_key_t k;
ethernet_arp_ip4_entry_t *e = 0;
ethernet_arp_main_t *am = &ethernet_arp_main;
- ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
vlib_main_t *vm = vlib_get_main ();
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
int make_new_arp_cache_entry = 1;
uword *p;
- ip4_add_del_route_args_t args;
- ip_adjacency_t adj, *existing_adj;
pending_resolution_t *pr, *mc;
+ ethernet_arp_interface_t *arp_int;
+ fib_link_t link;
+ int is_static = args->is_static;
+ u32 sw_if_index = args->sw_if_index;
- u32 next_index;
- u32 adj_index;
-
- fib_index = (fib_index != (u32) ~ 0)
- ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
- k.sw_if_index = sw_if_index;
- k.ip4_address = a->ip4;
- k.fib_index = fib_index;
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
- p = mhash_get (&am->ip4_entry_by_key, &k);
- if (p)
+ if (NULL != arp_int->arp_entries)
{
- e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
+ if (p)
+ {
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
- /* Refuse to over-write static arp. */
- if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
- return -2;
- make_new_arp_cache_entry = 0;
+ /* Refuse to over-write static arp. */
+ if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
+ return -2;
+ make_new_arp_cache_entry = 0;
+ }
}
- /* Note: always install the route. It might have been deleted */
- memset (&adj, 0, sizeof (adj));
- adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj.n_adj = 1; /* otherwise signature compare fails */
+ if (make_new_arp_cache_entry)
+ {
+ pool_get (am->ip4_entry_pool, e);
- vnet_rewrite_for_sw_interface (vnm, VNET_L3_PACKET_TYPE_IP4, sw_if_index, ip4_rewrite_node.index, a->ethernet, /* destination address */
- &adj.rewrite_header,
- sizeof (adj.rewrite_data));
+ if (NULL == arp_int->arp_entries)
+ {
+ arp_int->arp_entries = hash_create (0, sizeof (u32));
+ if (mpls_sw_interface_is_enabled (sw_if_index))
+ arp_int->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
+ }
- /* result of this lookup should be next-hop adjacency */
- adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0);
- existing_adj = ip_get_adjacency (lm, adj_index);
+ hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
- if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
- {
- u32 *ai;
- u32 *adjs = vec_dup (e->adjacencies);
- /* Update all adj assigned to this arp entry */
- vec_foreach (ai, adjs)
+ e->sw_if_index = sw_if_index;
+ e->ip4_address = a->ip4;
+ FOR_EACH_FIB_LINK (link)
{
- int i;
- ip_adjacency_t *uadj = ip_get_adjacency (lm, *ai);
- for (i = 0; i < uadj->n_adj; i++)
- if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
- ip_update_adjacency (lm, *ai + i, &adj);
+ e->adj_index[link] = ADJ_INDEX_INVALID;
}
- vec_free (adjs);
- }
- else
- {
- /* Check that new adjacency actually isn't exactly the same as
- * what is already there. If we over-write the adjacency with
- * exactly the same info, its technically a new adjacency with
- * new counters, but to user it appears as counters reset.
- */
- if (vnet_ip_adjacency_share_compare (&adj, existing_adj) == 0)
- {
- /* create new adj */
- args.table_index_or_table_id = fib_index;
- args.flags =
- IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD |
- IP4_ROUTE_FLAG_NEIGHBOR;
- args.dst_address = a->ip4;
- args.dst_address_length = 32;
- args.adj_index = ~0;
- args.add_adj = &adj;
- args.n_add_adj = 1;
- ip4_add_del_route (im, &args);
- }
- }
-
- if (make_new_arp_cache_entry)
- {
- pool_get (am->ip4_entry_pool, e);
- mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool,
- /* old value */ 0);
- e->key = k;
}
/* Update time stamp and ethernet address. */
@@ -503,11 +466,16 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
e->cpu_time_last_updated = clib_cpu_time_now ();
if (is_static)
e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
+ else
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+
+ arp_mk_complete (arp_int, e, ARP_ETHER_TYPE_BOTH);
/* Customer(s) waiting for this address to be resolved? */
p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
if (p)
{
+ u32 next_index;
next_index = p[0];
while (next_index != (u32) ~ 0)
@@ -526,6 +494,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
if (p)
{
+ u32 next_index;
next_index = p[0];
while (next_index != (u32) ~ 0)
@@ -688,6 +657,7 @@ typedef enum
_ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
_ (missing_interface_address, "ARP missing interface address") \
_ (gratuitous_arp, "ARP probe or announcement dropped") \
+ _ (interface_no_table, "Interface is not mapped to an IP table") \
typedef enum
{
@@ -697,29 +667,6 @@ typedef enum
ETHERNET_ARP_N_ERROR,
} ethernet_arp_input_error_t;
-/* get first interface address */
-ip4_address_t *
-ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
- ip_interface_address_t ** result_ia)
-{
- ip_lookup_main_t *lm = &im->lookup_main;
- ip_interface_address_t *ia = 0;
- ip4_address_t *result = 0;
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address (lm, ia, sw_if_index,
- 1 /* honor unnumbered */ ,
- ({
- ip4_address_t * a =
- ip_interface_address_get_address (lm, ia);
- result = a; break;
- }));
- /* *INDENT-ON* */
-
- if (result_ia)
- *result_ia = result ? ia : 0;
- return result;
-}
static void
unset_random_arp_entry (void)
@@ -747,16 +694,14 @@ unset_random_arp_entry (void)
e = pool_elt_at_index (am->ip4_entry_pool, index);
clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
- delme.ip4.as_u32 = e->key.ip4_address.as_u32;
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
- vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
- e->key.fib_index, &delme);
+ vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
}
static void
arp_unnumbered (vlib_buffer_t * p0,
- u32 pi0,
- ethernet_header_t * eth0, ip_interface_address_t * ifa0)
+ u32 pi0, ethernet_header_t * eth0, u32 sw_if_index)
{
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
@@ -777,7 +722,7 @@ arp_unnumbered (vlib_buffer_t * p0,
clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
/* Figure out which sw_if_index supplied the address */
- unnum_src_sw_if_index = ifa0->sw_if_index;
+ unnum_src_sw_if_index = sw_if_index;
/* Track down all users of the unnumbered source */
/* *INDENT-OFF* */
@@ -928,13 +873,14 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_hw_interface_t *hw_if0;
ethernet_arp_header_t *arp0;
ethernet_header_t *eth0;
- ip_interface_address_t *ifa0;
ip_adjacency_t *adj0;
- ip4_address_t *if_addr0;
- ip4_address_t proxy_src;
- u32 pi0, error0, next0, sw_if_index0;
- u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
+ ip4_address_t *if_addr0, proxy_src;
+ u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
+ u8 is_request0, dst_is_local0, is_unnum0;
ethernet_proxy_arp_t *pa;
+ fib_node_index_t dst_fei, src_fei;
+ fib_prefix_t pfx0;
+ fib_entry_flag_t src_flags, dst_flags;
pi0 = from[0];
to_next[0] = pi0;
@@ -942,6 +888,7 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;
+ pa = 0;
p0 = vlib_get_buffer (vm, pi0);
arp0 = vlib_buffer_get_current (p0);
@@ -963,43 +910,56 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
if (error0)
- goto drop1;
+ goto drop2;
/* Check that IP address is local and matches incoming interface. */
- if_addr0 =
- ip4_interface_address_matching_destination (im4,
- &arp0->
- ip4_over_ethernet[1].
- ip4, sw_if_index0,
- &ifa0);
- if (!if_addr0)
+ fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+ if (~0 == fib_index0)
+ {
+ error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ goto drop2;
+
+ }
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4,
+ 32);
+ dst_flags = fib_entry_get_flags (dst_fei);
+
+ conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
+
+ if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
{
error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
goto drop1;
}
/* Honor unnumbered interface, if any */
- is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
+ is_unnum0 = sw_if_index0 != conn_sw_if_index0;
/* Source must also be local to subnet of matching interface address. */
- if (!ip4_destination_matches_interface
- (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
+ src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[0].ip4,
+ 32);
+ src_flags = fib_entry_get_flags (src_fei);
+
+ if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
+ (FIB_ENTRY_FLAG_CONNECTED & src_flags)) ||
+ sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
{
error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
- goto drop1;
+ goto drop2;
}
/* Reject requests/replies with our local interface address. */
- src_is_local0 =
- if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
- if (src_is_local0)
+ if (FIB_ENTRY_FLAG_LOCAL & src_flags)
{
error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
- goto drop1;
+ goto drop2;
}
- dst_is_local0 =
- if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
+ dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
+ fib_entry_get_prefix (dst_fei, &pfx0);
+ if_addr0 = &pfx0.fp_addr.ip4;
/* Fill in ethernet header. */
eth0 = ethernet_buffer_get_header (p0);
@@ -1023,7 +983,6 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
unset_random_arp_entry ();
vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0,
- (u32) ~ 0 /* default fib */ ,
&arp0->ip4_over_ethernet[0],
0 /* is_static */ );
error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
@@ -1064,21 +1023,25 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
/* Figure out how much to rewind current data from adjacency. */
- if (ifa0)
+ /* get the adj from the destination's covering connected */
+ if (NULL == pa)
{
- adj0 = ip_get_adjacency (&ip4_main.lookup_main,
- ifa0->neighbor_probe_adj_index);
- if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
+ adj0 =
+ adj_get (fib_entry_get_adj_for_source
+ (ip4_fib_table_lookup
+ (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4, 31),
+ FIB_SOURCE_INTERFACE));
+ if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
{
error0 = ETHERNET_ARP_ERROR_missing_interface_address;
goto drop2;
}
if (is_unnum0)
- arp_unnumbered (p0, pi0, eth0, ifa0);
+ arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0);
else
vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
}
-
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, pi0, next0);
@@ -1128,8 +1091,8 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* $$$ is the answer ever anything other than
* vlib_buffer_reset(..)?
*/
- ifa0 = 0;
if_addr0 = &proxy_src;
+ is_unnum0 = 0;
vlib_buffer_reset (p0);
n_proxy_arp_replies_sent++;
goto send_reply;
@@ -1192,10 +1155,9 @@ ip4_arp_entry_sort (void *a1, void *a2)
int cmp;
vnet_main_t *vnm = vnet_get_main ();
- cmp = vnet_sw_interface_compare
- (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
+ cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
if (!cmp)
- cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
+ cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
return cmp;
}
@@ -1228,7 +1190,7 @@ show_ip4_arp (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
vec_foreach (e, es)
{
- if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
+ if (sw_if_index != ~0 && e->sw_if_index != sw_if_index)
continue;
vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
}
@@ -1346,91 +1308,196 @@ ip4_set_arp_limit (u32 arp_limit)
return 0;
}
+/**
+ * @brief Control Plane hook to remove an ARP entry
+ */
+int
+vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_REMOVE;
+ args.ether_type = ARP_ETHER_TYPE_IP4;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to flush the ARP cache on an
+ * interface state change event.
+ * A flush will remove dynamic ARP entries, and for statics remove the MAC
+ * address from the corresponding adjacencies.
+ */
+static int
+vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index,
+ arp_ether_type_t et, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_FLUSH;
+ args.ether_type = et;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to populate the ARP cache on an
+ * interface state change event.
+ * For static entries this will re-source the adjacencies.
+ *
+ * @param sw_if_index The interface on which the ARP entires are acted
+ * @param et The ether type of those ARP entries.
+ */
+static int
+vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index,
+ arp_ether_type_t et, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_POPULATE;
+ args.ether_type = et;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/*
+ * arp_add_del_interface_address
+ *
+ * callback when an interface address is added or deleted
+ */
static void
-arp_ip4_entry_del_adj (ethernet_arp_ip4_entry_t * e, u32 adj_index)
+arp_add_del_interface_address (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_del)
{
- int done = 0;
- int i;
+ /*
+ * Flush the ARP cache of all entries covered by the address
+ * that is being removed.
+ */
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
- while (!done)
+ if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index)
+ return;
+
+ if (is_del)
{
- vec_foreach_index (i, e->adjacencies)
- if (vec_elt (e->adjacencies, i) == adj_index)
+ ethernet_arp_interface_t *eai;
+ u32 i, *to_delete = 0;
+ hash_pair_t *pair;
+
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ hash_foreach_pair (pair, eai->arp_entries, (
+ {
+ e =
+ pool_elt_at_index
+ (am->ip4_entry_pool,
+ pair->value[0]);
+ if
+ (ip4_destination_matches_route
+ (im, &e->ip4_address,
+ address, address_length))
+ {
+ vec_add1 (to_delete,
+ e -
+ am->ip4_entry_pool);}
+ }
+ ));
+
+ for (i = 0; i < vec_len (to_delete); i++)
{
- vec_del1 (e->adjacencies, i);
- continue;
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
+ e->sw_if_index,
+ ARP_ETHER_TYPE_BOTH, &delme);
}
- done = 1;
+
+ vec_free (to_delete);
}
}
static void
-arp_ip4_entry_add_adj (ethernet_arp_ip4_entry_t * e, u32 adj_index)
+ethernet_arp_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable)
{
- int i;
- vec_foreach_index (i, e->adjacencies)
- if (vec_elt (e->adjacencies, i) == adj_index)
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+ u32 i, *to_update = 0;
+ hash_pair_t *pair;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index)
return;
- vec_add1 (e->adjacencies, adj_index);
-}
-static void
-arp_add_del_adj_cb (struct ip_lookup_main_t *lm,
- u32 adj_index, ip_adjacency_t * adj, u32 is_del)
-{
- ethernet_arp_main_t *am = &ethernet_arp_main;
- ip4_main_t *im = &ip4_main;
- ethernet_arp_ip4_key_t k;
- ethernet_arp_ip4_entry_t *e = 0;
- uword *p;
- u32 ai;
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ if (is_enable)
+ eai->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
+ else
+ eai->flags &= ~ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
+
+ hash_foreach_pair (pair, eai->arp_entries, (
+ {
+ vec_add1 (to_update,
+ pair->value[0]);
+ }
+ ));
- for (ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj; ai++)
+ for (i = 0; i < vec_len (to_update); i++)
{
- adj = ip_get_adjacency (lm, ai);
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
- && adj->arp.next_hop.ip4.as_u32)
+ ethernet_arp_ip4_over_ethernet_address_t updateme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
+
+ clib_memcpy (&updateme.ethernet, e->ethernet_address, 6);
+ updateme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ if (is_enable)
{
- k.sw_if_index = adj->rewrite_header.sw_if_index;
- k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32;
- k.fib_index =
- im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index];
- p = mhash_get (&am->ip4_entry_by_key, &k);
- if (p)
- e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ vnet_arp_populate_ip4_over_ethernet (vnet_get_main (),
+ e->sw_if_index,
+ ARP_ETHER_TYPE_MPLS,
+ &updateme);
}
else
continue;
- if (is_del)
- {
- if (!e)
- clib_warning ("Adjacency contains unknown ARP next hop %U (del)",
- format_ip46_address, &adj->arp.next_hop,
- IP46_TYPE_IP4);
- else
- arp_ip4_entry_del_adj (e, adj->heap_handle);
- }
- else /* add */
- {
- if (!e)
- clib_warning ("Adjacency contains unknown ARP next hop %U (add)",
- format_ip46_address, &adj->arp.next_hop,
- IP46_TYPE_IP4);
- else
- arp_ip4_entry_add_adj (e, adj->heap_handle);
- }
}
+ vec_free (to_update);
}
static clib_error_t *
ethernet_arp_init (vlib_main_t * vm)
{
ethernet_arp_main_t *am = &ethernet_arp_main;
- pg_node_t *pn;
- clib_error_t *error;
ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error;
+ pg_node_t *pn;
if ((error = vlib_call_init_function (vm, ethernet_init)))
return error;
@@ -1445,10 +1512,6 @@ ethernet_arp_init (vlib_main_t * vm)
foreach_ethernet_arp_opcode;
#undef _
- mhash_init (&am->ip4_entry_by_key,
- /* value size */ sizeof (uword),
- /* key size */ sizeof (ethernet_arp_ip4_key_t));
-
/* $$$ configurable */
am->limit_arp_cache_size = 50000;
@@ -1468,100 +1531,239 @@ ethernet_arp_init (vlib_main_t * vm)
#undef _
}
- ip_register_add_del_adjacency_callback (lm, arp_add_del_adj_cb);
+ ip4_add_del_interface_address_callback_t cb;
+ cb.function = arp_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ vec_add1 (mpls_main.mpls_interface_state_change_callbacks,
+ ethernet_arp_sw_interface_mpls_state_change);
return 0;
}
VLIB_INIT_FUNCTION (ethernet_arp_init);
-int
-vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index, u32 fib_index, void *a_arg)
+static void
+arp_mk_incomplete (ethernet_arp_interface_t * eai,
+ ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
{
- ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
- vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = e->ip4_address,
+ },
+ };
+ u32 fib_index;
- args.sw_if_index = sw_if_index;
- args.fib_index = fib_index;
- args.is_remove = 1;
- clib_memcpy (&args.a, a, sizeof (*a));
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
- vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
- (u8 *) & args, sizeof (args));
- return 0;
+ if ((ARP_ETHER_TYPE_IP4 & et) &&
+ (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]))
+ {
+ /*
+ * revert the adj this ARP entry sourced to incomplete
+ */
+ adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], NULL);
+
+ /*
+ * remove the FIB erntry the ARP entry sourced
+ */
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_ADJ);
+
+ /*
+ * Unlock the adj now that the ARP entry is no longer a source
+ */
+ adj_unlock (e->adj_index[FIB_LINK_IP4]);
+ e->adj_index[FIB_LINK_IP4] = ADJ_INDEX_INVALID;
+ }
+ if ((ARP_ETHER_TYPE_MPLS & et) &&
+ (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]))
+ {
+ /*
+ * revert the adj this ARP entry sourced to incomplete
+ */
+ adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], NULL);
+
+ /*
+ * Unlock the adj now that the ARP entry is no longer a source
+ */
+ adj_unlock (e->adj_index[FIB_LINK_MPLS]);
+ e->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
+ }
+}
+
+static void
+arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+
+ hash_unset (eai->arp_entries, e->ip4_address.as_u32);
+ pool_put (am->ip4_entry_pool, e);
+}
+
+static ethernet_arp_ip4_entry_t *
+arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e = NULL;
+ uword *p;
+
+ if (NULL != eai->arp_entries)
+ {
+ p = hash_get (eai->arp_entries, addr->as_u32);
+ if (!p)
+ return (NULL);
+
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ }
+
+ return (e);
}
static inline int
vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index, void *a_arg)
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
{
- ethernet_arp_ip4_entry_t *e;
ethernet_arp_main_t *am = &ethernet_arp_main;
- ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
- ethernet_arp_ip4_key_t k;
- uword *p;
- ip4_add_del_route_args_t args;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
- u32 adj_index;
- ip_adjacency_t *adj;
-
- k.sw_if_index = sw_if_index;
- k.ip4_address = a->ip4;
- k.fib_index = fib_index;
- p = mhash_get (&am->ip4_entry_by_key, &k);
- if (!p)
- return -1;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
- memset (&args, 0, sizeof (args));
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
- /*
- * Make sure that the route actually exists before we try to delete it,
- * and make sure that it's a rewrite adjacency.
- *
- * If we point 1-N unnumbered interfaces at a loopback interface and
- * shut down the loopback before shutting down 1-N unnumbered
- * interfaces, the ARP cache will still have an entry,
- * but the route will have disappeared.
- *
- * See also ip4_del_interface_routes (...)
- * -> ip4_delete_matching_routes (...).
- */
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ arp_mk_incomplete (eai, e, ARP_ETHER_TYPE_BOTH);
+ arp_entry_free (eai, e);
+ }
+
+ return 0;
+}
+
+static int
+vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
- adj_index = ip4_fib_lookup_with_table
- (im, fib_index, &a->ip4, 1 /* disable default route */ );
+ e = arp_entry_find (eai, &args->a.ip4);
- /* Miss adj? Forget it... */
- if (adj_index != lm->miss_adj_index)
+ if (NULL != e)
{
- adj = ip_get_adjacency (lm, adj_index);
+ arp_mk_incomplete (eai, e, args->ether_type);
+
/*
- * Stupid control-plane trick:
- * admin down an interface (removes arp routes from fib),
- * bring the interface back up (does not reinstall them)
- * then remove the arp cache entry (yuck). When that happens,
- * the adj we find here will be the interface subnet ARP adj.
+ * The difference between flush and unset, is that an unset
+ * means delete for static and dynamic entries. A flush
+ * means delete only for dynamic. Flushing is what the DP
+ * does in response to interface events. unset is only done
+ * by the control plane.
*/
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
+ if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) &&
+ (args->ether_type & ARP_ETHER_TYPE_IP4))
{
- args.table_index_or_table_id = fib_index;
- args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL
- | IP4_ROUTE_FLAG_NEIGHBOR;
- args.dst_address = a->ip4;
- args.dst_address_length = 32;
- ip4_add_del_route (im, &args);
- ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
+ arp_entry_free (eai, e);
}
}
+ return (0);
+}
+
+static int
+vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ arp_mk_complete (eai, e, args->ether_type);
+ }
+ return (0);
+}
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a)
+{
+ vnet_main_t *vm = vnet_get_main ();
+ ASSERT (os_get_cpu_number () == 0);
+
+ if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
+ vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
+ vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
+ vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
+ else
+ vnet_arp_set_ip4_over_ethernet_internal (vm, a);
+}
+
+/**
+ * @brief Invoked when the interface's admin state changes
+ */
+static clib_error_t *
+ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, am->ip4_entry_pool,
+ ({
+ if (e->sw_if_index == sw_if_index)
+ {
+ vec_add1 (to_delete, e - am->ip4_entry_pool);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index,
+ ARP_ETHER_TYPE_BOTH, &delme);
+ }
+ else
+ {
+ vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index,
+ ARP_ETHER_TYPE_BOTH, &delme);
+ }
+
+ }
+ vec_free (to_delete);
+
- e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
- mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
- pool_put (am->ip4_entry_pool, e);
return 0;
}
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
+
+
static void
increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
{
@@ -1586,6 +1788,24 @@ increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
}
int
+vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg, int is_static)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.is_static = is_static;
+ args.flags = 0;
+ args.ether_type = ARP_ETHER_TYPE_IP4;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+int
vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
ip4_address_t * hi_addr, u32 fib_index, int is_del)
{
@@ -1660,57 +1880,6 @@ vnet_proxy_arp_fib_reset (u32 fib_id)
return 0;
}
-u32
-vnet_arp_glean_add (u32 fib_index, void *next_hop_arg)
-{
- ethernet_arp_main_t *am = &ethernet_arp_main;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
- ip4_address_t *next_hop = next_hop_arg;
- ip_adjacency_t add_adj, *adj;
- ip4_add_del_route_args_t args;
- ethernet_arp_ip4_entry_t *e;
- ethernet_arp_ip4_key_t k;
- u32 adj_index;
-
- adj_index = ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
- adj = ip_get_adjacency (lm, adj_index);
-
- if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
- return ~0;
-
- if (adj->arp.next_hop.ip4.as_u32 != 0)
- return adj_index;
-
- k.sw_if_index = adj->rewrite_header.sw_if_index;
- k.fib_index = fib_index;
- k.ip4_address.as_u32 = next_hop->as_u32;
-
- if (mhash_get (&am->ip4_entry_by_key, &k))
- return adj_index;
-
- pool_get (am->ip4_entry_pool, e);
- mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool,
- /* old value */ 0);
- e->key = k;
- e->cpu_time_last_updated = clib_cpu_time_now ();
- e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN;
-
- memset (&args, 0, sizeof (args));
- clib_memcpy (&add_adj, adj, sizeof (add_adj));
- ip46_address_set_ip4 (&add_adj.arp.next_hop, next_hop); /* install neighbor /32 route */
- args.table_index_or_table_id = fib_index;
- args.flags =
- IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
- args.dst_address.as_u32 = next_hop->as_u32;
- args.dst_address_length = 32;
- args.adj_index = ~0;
- args.add_adj = &add_adj;
- args.n_add_adj = 1;
- ip4_add_del_route (im, &args);
- return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
-}
-
static clib_error_t *
ip_arp_add_del_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1784,7 +1953,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
1 /* type */ , 0 /* data */ );
vnet_arp_set_ip4_over_ethernet
- (vnm, sw_if_index, fib_index, &addr, is_static);
+ (vnm, sw_if_index, &addr, is_static);
vlib_process_wait_for_event (vm);
event_type = vlib_process_get_events (vm, &event_data);
@@ -1793,8 +1962,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
clib_warning ("event type %d unexpected", event_type);
}
else
- vnet_arp_unset_ip4_over_ethernet
- (vnm, sw_if_index, fib_index, &addr);
+ vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
increment_ip4_and_mac_address (&addr);
}
diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h
index 8a1369c1093..3b2ef875290 100644
--- a/vnet/vnet/ethernet/ethernet.h
+++ b/vnet/vnet/ethernet/ethernet.h
@@ -398,13 +398,11 @@ void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi,
int
vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 fib_index, void *a_arg, int is_static);
+ u32 sw_if_index, void *a_arg, int is_static);
int
vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index, u32 fib_index,
- void *a_arg);
+ u32 sw_if_index, void *a_arg);
int vnet_proxy_arp_fib_reset (u32 fib_id);
@@ -538,8 +536,6 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
uword type_opaque,
uword data, int is_add);
-u32 vnet_arp_glean_add (u32 fib_index, void *next_hop_arg);
-
extern vlib_node_registration_t ethernet_input_node;
#endif /* included_ethernet_h */
diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c
index 0b19b51d3be..f2e2ca0d7d8 100644
--- a/vnet/vnet/ethernet/interface.c
+++ b/vnet/vnet/ethernet/interface.c
@@ -42,6 +42,20 @@
#include <vnet/pg/pg.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/srp/srp.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/devices/af_packet/af_packet.h>
+
+int
+vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
+{
+ // FIXME - use flags on the HW itf
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (!(hw->hw_class_index == ethernet_hw_interface_class.index ||
+ hw->hw_class_index == af_packet_device_class.index ||
+ hw->hw_class_index == lisp_gpe_hw_class.index ||
+ hw->hw_class_index == srp_hw_interface_class.index));
+}
/**
* @file
diff --git a/vnet/vnet/fib/fib.c b/vnet/vnet/fib/fib.c
new file mode 100644
index 00000000000..413f93e893c
--- /dev/null
+++ b/vnet/vnet/fib/fib.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_path_list.h>
+
+static clib_error_t *
+fib_module_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, dpo_module_init)))
+ return (error);
+ if ((error = vlib_call_init_function (vm, adj_module_init)))
+ return (error);
+
+ fib_entry_module_init();
+ fib_entry_src_module_init();
+ fib_path_module_init();
+ fib_path_list_module_init();
+ fib_walk_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (fib_module_init);
diff --git a/vnet/vnet/fib/fib.h b/vnet/vnet/fib/fib.h
new file mode 100644
index 00000000000..7cf1d136935
--- /dev/null
+++ b/vnet/vnet/fib/fib.h
@@ -0,0 +1,652 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * A IP v4/6 independent FIB.
+ *
+ * The main functions provided by the FIB are as follows;
+ *
+ * - source priorities
+ *
+ * A route can be added to the FIB by more than entity or source. Sources
+ * include, but are not limited to, API, CLI, LISP, MAP, etc (for the full list
+ * see fib_entry.h). Each source provides the forwarding information (FI) that
+ * is has determined as required for that route. Since each source determines the
+ * FI using different best path and loop prevention algorithms, it is not
+ * correct for the FI of multiple sources to be combined. Instead the FIB must
+ * choose to use the FI from only one source. This choose is based on a static
+ * priority assignment. For example;
+ * IF a prefix is added as a result of interface configuration:
+ * set interface address 192.168.1.1/24 GigE0
+ * and then it is also added from the CLI
+ * ip route 192.168.1.1/32 via 2.2.2.2/32
+ * then the 'interface' source will prevail, and the route will remain as
+ * 'local'.
+ * The requirement of the FIB is to always install the FI from the winning
+ * source and thus to maintain the FI added by losing sources so it can be
+ * installed should the winning source be withdrawn.
+ *
+ * - adj-fib maintenance
+ *
+ * When ARP or ND discover a neighbour on a link an adjacency forms for the
+ * address of that neighbour. It is also required to insert a route in the
+ * appropriate FIB table, corresponding to the VRF for the link, an entry for
+ * that neighbour. This entry is often referred to as an adj-fib. Adj-fibs
+ * have a dedicated source; 'ADJ'.
+ * The priority of the ADJ source is lower than most. This is so the following
+ * config;
+ * set interface address 192.168.1.1/32 GigE0
+ * ip arp 192.168.1.2 GigE0 dead.dead.dead
+ * ip route add 192.168.1.2 via 10.10.10.10 GigE1
+ * will forward traffic for 192.168.1.2 via GigE1. That is the route added
+ * by the control plane is favoured over the adjacency discovered by ARP.
+ * The control plane, with its associated authentication, is considered the
+ * authoritative source.
+ * To counter the nefarious addition of adj-fib, through the nefarious injection
+ * of adjacencies, the FIB is also required to ensure that only adj-fibs whose
+ * less specific covering prefix is connected are installed in forwarding. This
+ * requires the use of 'cover tracking', where a route maintains a dependency
+ * relationship with the route that is its less specific cover. When this cover
+ * changes (i.e. there is a new covering route) or the forwarding information
+ * of the cover changes, then the covered route is notified.
+ *
+ * Overlapping sub-nets are not supported, so no adj-fib has multiple paths.
+ * The control plane is expected to remove a prefix configured for an interface
+ * before the interface changes VRF.
+ * So while the following config is accepted:
+ * set interface address 192.168.1.1/32 GigE0
+ * ip arp 192.168.1.2 GigE0 dead.dead.dead
+ * set interface ip table GigE0 2
+ * it does not result in the desired behaviour.
+ *
+ * - attached export.
+ *
+ * Further to adj-fib maintenance above consider the following config:
+ * set interface address 192.168.1.1/24 GigE0
+ * ip route add table 2 192.168.1.0/24 GigE0
+ * Traffic destined for 192.168.1.2 in table 2 will generate an ARP request
+ * on GigE0. However, since GigE0 is in table 0, all adj-fibs will be added in
+ * FIB 0. Hence all hosts in the sub-net are unreachable from table 2. To resolve
+ * this, all adj-fib and local prefixes are exported (i.e. copied) from the
+ * 'export' table 0, to the 'import' table 2. There can be many import tables
+ * for a single export table.
+ *
+ * - recursive route resolution
+ *
+ * A recursive route is of the form:
+ * 1.1.1.1/32 via 10.10.10.10
+ * i.e. a route for which no egress interface is provided. In order to forward
+ * traffic to 1.1.1.1/32 the FIB must therefore first determine how to forward
+ * traffic to 10.10.10.10/32. This is recursive resolution.
+ * Recursive resolution, just like normal resolution, proceeds via a longest
+ * prefix match for the 'via-address' 10.10.10.10. Note it is only possible
+ * to add routes via an address (i.e. a /32 or /128) not via a shorter mask
+ * prefix. There is no use case for the latter.
+ * Since recursive resolution proceeds via a longest prefix match, the entry
+ * in the FIB that will resolve the recursive route, termed the via-entry, may
+ * change as other routes are added to the FIB. Consider the recursive
+ * route shown above, and this non-recursive route:
+ * 10.10.10.0/24 via 192.168.16.1 GigE0
+ * The entry for 10.10.10.0/24 is thus the resolving via-entry. If this entry is
+ * modified, to say;
+ * 10.10.10.0/24 via 192.16.1.3 GigE0
+ * Then packet for 1.1.1.1/32 must also be sent to the new next-hop.
+ * Now consider the addition of;
+ * 10.10.10.0/28 via 192.168.16.2 GigE0
+ * The more specific /28 is a better longest prefix match and thus becomes the
+ * via-entry. Removal of the /28 means the resolution will revert to the /24.
+ * The tracking to the changes in recursive resolution is the requirement of
+ * the FIB. When the forwarding information of the via-entry changes a back-walk
+ * is used to update dependent recursive routes. When new routes are added to
+ * the table the cover tracking feature provides the necessary notifications to
+ * the via-entry routes.
+ * The adjacency constructed for 1.1.1.1/32 will be a recursive adjacency
+ * whose next adjacency will be contributed from the via-entry. Maintaining
+ * the validity of this recursive adjacency is a requirement of the FIB.
+ *
+ * - recursive loop avoidance
+ *
+ * Consider this set of routes:
+ * 1.1.1.1/32 via 2.2.2.2
+ * 2.2.2.2/32 via 3.3.3.3
+ * 3.3.3.3/32 via 1.1.1.1
+ * this is termed a recursion loop - all of the routes in the loop are
+ * unresolved in so far as they do not have a resolving adjacency, but each
+ * is resolved because the via-entry is known. It is important here to note
+ * the distinction between the control-plane objects and the data-plane objects
+ * (more details in the implementation section). The control plane objects must
+ * allow the loop to form (i.e. the graph becomes cyclic), however, the
+ * data-plane absolutely must not allow the loop to form, otherwise the packet
+ * would loop indefinitely and never egress the device - meltdown would follow.
+ * The control plane must allow the loop to form, because when the loop breaks,
+ * all members of the loop need to be updated. Forming the loop allows the
+ * dependencies to be correctly setup to allow this to happen.
+ * There is no limit to the depth of recursion supported by VPP so:
+ * 9.9.9.100/32 via 9.9.9.99
+ * 9.9.9.99/32 via 9.9.9.98
+ * 9.9.9.98/32 via 9.9.9.97
+ * ... turtles, turtles, turtles ...
+ * 9.9.9.1/32 via 10.10.10.10 Gig0
+ * is supported to as many layers of turtles is desired, however, when
+ * back-walking a graph (in this case from 9.9.9.1/32 up toward 9.9.9.100/32)
+ * a FIB needs to differentiate the case where the recursion is deep versus
+ * the case where the recursion is looped. A simple method, employed by VPP FIB,
+ * is to limit the number of steps. VPP FIB limit is 16. Typical BGP scenarios
+ * in the wild do not exceed 3 (BGP Inter-AS option C).
+ *
+ * - Fast Convergence
+ *
+ * After a network topology change, the 'convergence' time, is the time taken
+ * for the router to complete a transition to forward traffic using the new
+ * topology. The convergence time is therefore a summation of the time to;
+ * - detect the failure.
+ * - calculate the new 'best path' information
+ * - download the new best paths to the data-plane.
+ * - install those best best in data-plane forwarding.
+ * The last two points are of relevance to VPP architecture. The download API is
+ * binary and batch, details are not discussed here. There is no HW component to
+ * programme, installation time is bounded by the memory allocation and table
+ * lookup and insert access times.
+ *
+ * 'Fast' convergence refers to a set of technologies that a FIB can employ to
+ * completely or partially restore forwarding whilst the convergence actions
+ * listed above are ongoing. Fast convergence technologies are further
+ * sub-divided into Prefix Independent Convergence (PIC) and Loop Free
+ * Alternate path Fast re-route (LFA-FRR or sometimes called IP-FRR) which
+ * affect recursive and non-recursive routes respectively.
+ *
+ * LFA-FRR
+ *
+ * Consider the network topology below:
+ *
+ * C
+ * / \
+ * X -- A --- B - Y
+ * | |
+ * D F
+ * \ /
+ * E
+ *
+ * all links are equal cost, traffic is passing from X to Y. the best path is
+ * X-A-B-Y. There are two alternative paths, one via C and one via E. An
+ * alternate path is considered to be loop free if no other router on that path
+ * would forward the traffic back to the sender. Consider router C, its best
+ * path to Y is via B, so if A were to send traffic destined to Y to C, then C
+ * would forward that traffic to B - this is a loop-free alternate path. In
+ * contrast consider router D. D's shortest path to Y is via A, so if A were to
+ * send traffic destined to Y via D, then D would send it back to A; this is
+ * not a loop-free alternate path. There are several points of note;
+ * - we are considering the pre-failure routing topology
+ * - any equal-cost multi-path between A and B is also a LFA path.
+ * - in order for A to calculate LFA paths it must be aware of the best-path
+ * to Y from the perspective of D. These calculations are thus limited to
+ * routing protocols that have a full view of the network topology, i.e.
+ * link-state DB protocols like OSPF or an SDN controller. LFA protected
+ * prefixes are thus non-recursive.
+ *
+ * LFA is specified as a 1 to 1 redundancy; a primary path has only one LFA
+ * (a.k.a. backup) path. To my knowledge this limitation is one of complexity
+ * in the calculation of and capacity planning using a 1-n redundancy.
+ *
+ * In the event that the link A-B fails, the alternate path via C can be used.
+ * In order to provide 'fast' failover in the event of a failure, the control
+ * plane will download both the primary and the backup path to the FIB. It is
+ * then a requirement of the FIB to perform the failover (a.k.a cutover) from
+ * the primary to the backup path as quickly as possible, and particularly
+ * without any other control-plane intervention. The expectation is cutover is
+ * less than 50 milli-seconds - a value allegedly from the VOIP QoS. Note that
+ * cutover time still includes the fault detection time, which in a vitalised
+ * environment could be the dominant factor. Failure detection can be either a
+ * link down, which will affect multiple paths on a multi-access interface, or
+ * via a specific path heartbeat (i.e. BFD).
+ * At this time VPP does not support LFA, that is it does not support the
+ * installation of a primary and backup path[s] for a route. However, it does
+ * support ECMP, and VPP FIB is designed to quickly remove failed paths from
+ * the ECMP set, however, it does not insert shared objects specific to the
+ * protected resource into the forwarding object graph, since this would incur
+ * a forwarding/performance cost. Failover time is thus route number dependent.
+ * Details are provided in the implementation section below.
+ *
+ * PIC
+ *
+ * PIC refers to the concept that the converge time should be independent of
+ * the number of prefixes/routes that are affected by the failure. PIC is
+ * therefore most appropriate when considering networks with large number of
+ * prefixes, i.e. BGP networks and thus recursive prefixes. There are several
+ * flavours of PIC covering different locations of protection and failure
+ * scenarios. An outline is given below, see the literature for more details:
+ *
+ * Y/16 - CE1 -- PE1---\
+ * | \ P1---\
+ * | \ PE3 -- CE3 - X/16
+ * | - P2---/
+ * Y/16 - CE2 -- PE2---/
+ *
+ * CE = customer edge, PE = provider edge. external-BGP runs between customer
+ * and provider, internal-BGP runs between provider and provider.
+ *
+ * 1) iBGP PIC-core: consider traffic from CE1 to X/16 via CE3. On PE1 there is
+ * are routes;
+ * X/16 (and hundreds of thousands of others like it)
+ * via PE3
+ * and
+ * PE3/32 (its loopback address)
+ * via 10.0.0.1 Link0 (this is P1)
+ * via 10.1.1.1 Link1 (this is P2)
+ * the failure is the loss of link0 or link1
+ * As in all PIC scenarios, in order to provide prefix independent convergence
+ * it must be that the route for X/16 (and all other routes via PE3) do not
+ * need to be updated in the FIB. The FIB therefore needs to update a single
+ * object that is shared by all routes - once this shared object is updated,
+ * then all routes using it will be instantly updated to use the new forwarding
+ * information. In this case the shared object is the resolving route via PE3.
+ * Once the route via PE3 is updated via IGP (OSPF) convergence, then all
+ * recursive routes that resolve through it are also updated. VPP FIB
+ * implements this scenario via a recursive-adjacency. the X/16 and it sibling
+ * routes share a recursive-adjacency that links to/points at/stacks on the
+ * normal adjacency contributed by the route for PE3. Once this shared
+ * recursive adj is re-linked then all routes are switched to using the new
+ * forwarding information. This is shown below;
+ *
+ * pre-failure;
+ * X/16 --> R-ADJ-1 --> ADJ-1-PE3 (multi-path via P1 and P2)
+ *
+ * post-failure:
+ * X/16 --> R-ADJ-1 --> ADJ-2-PE3 (single path via P1)
+ *
+ * note that R-ADJ-1 (the recursive adj) remains in the forwarding graph,
+ * therefore X/16 (and all its siblings) is not updated.
+ * X/16 and its siblings share the recursive adj since they share the same
+ * path-list. It is the path-list object that contributes the recursive-adj
+ * (see next section for more details)
+ *
+ *
+ * 2) iBGP PIC-edge; Traffic from CE3 to Y/16. On PE3 there is are routes;
+ * Y/16 (and hundreds of thousands of others like it)
+ * via PE1
+ * via PE2
+ * and
+ * PE1/32 (PE1's loopback address)
+ * via 10.0.2.2 Link0 (this is P1)
+ * PE2/32 (PE2's loopback address)
+ * via 10.0.3.3 Link1 (this is P2)
+ *
+ * the failure is the loss of reachability to PE2. this could be either the
+ * loss of the link P2-PE2 or the loss of the node PE2. This is detected either
+ * by the withdrawal of the PE2's loopback route or by some form of failure
+ * detection (i.e. BFD).
+ * VPP FIB again provides PIC via the use of the shared recursive-adj. Y/16 and
+ * its siblings will again share a path-list for the list {PE1,PE2}, this
+ * path-list will contribute a multi-path-recursive-adj, i.e. a multi-path-adj
+ * with each choice therein being another adj;
+ *
+ * Y/16 -> RM-ADJ --> ADJ1 (for PE1)
+ * --> ADJ2 (for PE2)
+ *
+ * when the route for PE1 is withdrawn then the multi-path-recursive-adjacency
+ * is updated to be;
+ *
+ * Y/16 --> RM-ADJ --> ADJ1 (for PE1)
+ * --> ADJ1 (for PE1)
+ *
+ * that is both choices in the ECMP set are the same and thus all traffic is
+ * forwarded to PE1. Eventually the control plane will download a route update
+ * for Y/16 to be via PE1 only. At that time the situation will be:
+ *
+ * Y/16 -> R-ADJ --> ADJ1 (for PE1)
+ *
+ * In the scenario above we assumed that PE1 and PE2 are ECMP for Y/16. eBGP
+ * PIC core is also specified for the case were one PE is primary and the other
+ * backup - VPP FIB does not support that case at this time.
+ *
+ * 3) eBGP PIC Edge; Traffic from CE3 to Y/16. On PE1 there is are routes;
+ * Y/16 (and hundreds of thousands of others like it)
+ * via CE1 (primary)
+ * via PE2 (backup)
+ * and
+ * CE1 (this is an adj-fib)
+ * via 11.0.0.1 Link0 (this is CE1) << this is an adj-fib
+ * PE2 (PE2's loopback address)
+ * via 10.0.5.5 Link1 (this is link PE1-PE2)
+ * the failure is the loss of link0 to CE1. The failure can be detected by FIB
+ * either as a link down event or by the control plane withdrawing the connected
+ * prefix on the link0 (say 10.0.5.4/30). The latter works because the resolving
+ * entry is an adj-fib, so removing the connected will withdraw the adj-fib, and
+ * hence the recursive path becomes unresolved. The former is faster,
+ * particularly in the case of Inter-AS option A where there are many VLAN
+ * sub-interfaces on the PE-CE link, one for each VRF, and so the control plane
+ * must remove the connected prefix for each sub-interface to trigger PIC in
+ * each VRF. Note though that total PIC cutover time will depend on VRF scale
+ * with either trigger.
+ * Primary and backup paths in this eBGP PIC-edge scenario are calculated by
+ * BGP. Each peer is configured to always advertise its best external path to
+ * its iBGP peers. Backup paths therefore send traffic from the PE back into the
+ * core to an alternate PE. A PE may have multiple external paths, i.e. multiple
+ * directly connected CEs, it may also have multiple backup PEs, however there
+ * is no correlation between the two, so unlike LFA-FRR, the redundancy model is
+ * N-M; N primary paths are backed-up by M backup paths - only when all primary
+ * paths fail, then the cutover is performed onto the M backup paths. Note that
+ * PE2 must be suitably configured to forward traffic on its external path that
+ * was received from PE1. VPP FIB does not support external-internal-BGP (eiBGP)
+ * load-balancing.
+ *
+ * As with LFA-FRR the use of primary and backup paths is not currently
+ * supported, however, the use of a recursive-multi-path-adj, and a suitably
+ * constrained hashing algorithm to choose from the primary or backup path sets,
+ * would again provide the necessary shared object and hence the prefix scale
+ * independent cutover.
+ *
+ * Astute readers will recognise that both of the eBGP PIC scenarios refer only
+ * to a BGP free core.
+ *
+ * Fast convergence implementation options come in two flavours:
+ * 1) Insert switches into the data-path. The switch represents the protected
+ * resource. If the switch is 'on' the primary path is taken, otherwise
+ * the backup path is taken. Testing the switch in the data-path comes with
+ * an associated performance cost. A given packet may encounter more than
+ * one protected resource as it is forwarded. This approach minimises
+ * cutover times as packets will be forwarded on the backup path as soon
+ * as the protected resource is detected to be down and the single switch
+ * is tripped. However, it comes at a performance cost, which increases
+ * with each shared resource a packet encounters in the data-path.
+ * This approach is thus best suited to LFA-FRR where the protected routes
+ * are non-recursive (i.e. encounter few shared resources) and the
+ * expectation on cutover times is more stringent (<50msecs).
+ * 2) Update shared objects. Identify objects in the data-path, that are
+ * required to be present whether or not fast convergence is required (i.e.
+ * adjacencies) that can be shared by multiple routes. Create a dependency
+ * between these objects at the protected resource. When the protected
+ * resource fails, each of the shared objects is updated in a way that all
+ * users of it see a consistent change. This approach incurs no performance
+ * penalty as the data-path structure is unchanged, however, the cutover
+ * times are longer as more work is required when the resource fails. This
+ * scheme is thus more appropriate to recursive prefixes (where the packet
+ * will encounter multiple protected resources) and to fast-convergence
+ * technologies where the cutover times are less stringent (i.e. PIC).
+ *
+ * Implementation:
+ * ---------------
+ *
+ * Due to the requirements outlined above, not all routes known to FIB
+ * (e.g. adj-fibs) are installed in forwarding. However, should circumstances
+ * change, those routes will need to be added. This adds the requirement that
+ * a FIB maintains two tables per-VRF, per-AF (where a 'table' is indexed by
+ * prefix); the forwarding and non-forwarding tables.
+ *
+ * For DP speed in VPP we want the lookup in the forwarding table to directly
+ * result in the ADJ. So the two tables; one contains all the routes (a
+ * lookup therein yields a fib_entry_t), the other contains only the forwarding
+ * routes (a lookup therein yields an ip_adjacency_t). The latter is used by the
+ * DP.
+ * This trades memory for forwarding performance. A good trade-off in VPP's
+ * expected operating environments.
+ *
+ * Note these tables are keyed only by the prefix (and since there 2 two
+ * per-VRF, implicitly by the VRF too). The key for an adjacency is the
+ * tuple:{next-hop, address (and it's AF), interface, link/ether-type}.
+ * consider this curious, but allowed, config;
+ *
+ * set int ip addr 10.0.0.1/24 Gig0
+ * set ip arp Gig0 10.0.0.2 dead.dead.dead
+ * # a host in that sub-net is routed via a better next hop (say it avoids a
+ * # big L2 domain)
+ * ip route add 10.0.0.2 Gig1 192.168.1.1
+ * # this recursive should go via Gig1
+ * ip route add 1.1.1.1/32 via 10.0.0.2
+ * # this non-recursive should go via Gig0
+ * ip route add 2.2.2.2/32 via Gig0 10.0.0.2
+ *
+ * for the last route, the lookup for the path (via {Gig0, 10.0.0.2}) in the
+ * prefix table would not yield the correct result. To fix this we need a
+ * separate table for the adjacencies.
+ *
+ * - FIB data structures;
+ *
+ * fib_entry_t:
+ * - a representation of a route.
+ * - has a prefix.
+ * - it maintains an array of path-lists that have been contributed by the
+ * different sources
+ * - install an adjacency in the forwarding table contributed by the best
+ * source's path-list.
+ *
+ * fib_path_list_t:
+ * - a list of paths
+ * - path-lists may be shared between FIB entries. The path-lists are thus
+ * kept in a DB. The key is the combined description of the paths. We share
+ * path-lists when it will aid convergence to do so. Adding path-lists to
+ * this DB that are never shared, or are not shared by prefixes that are
+ * not subject to PIC, will increase the size of the DB unnecessarily and
+ * may lead to increased search times due to hash collisions.
+ * - the path-list contributes the appropriate adj for the entry in the
+ * forwarding table. The adj can be 'normal', multi-path or recursive,
+ * depending on the number of paths and their types.
+ * - since path-lists are shared there is only one instance of the multi-path
+ * adj that they [may] create. As such multi-path adjacencies do not need a
+ * separate DB.
+ * The path-list with recursive paths and the recursive adjacency that it
+ * contributes forms the backbone of the fast convergence architecture (as
+ * described previously).
+ *
+ * fib_path_t:
+ * - a description of how to forward the traffic (i.e. via {Gig1, K}).
+ * - the path describes the intent on how to forward. This differs from how
+ * the path resolves. I.e. it might not be resolved at all (since the
+ * interface is deleted or down).
+ * - paths have different types, most notably recursive or non-recursive.
+ * - a fib_path_t will contribute the appropriate adjacency object. It is from
+ * these contributions that the DP graph/chain for the route is built.
+ * - if the path is recursive and a recursion loop is detected, then the path
+ * will contribute the special DROP adjacency. This way, whilst the control
+ * plane graph is looped, the data-plane graph does not.
+ *
+ * we build a graph of these objects;
+ *
+ * fib_entry_t -> fib_path_list_t -> fib_path_t -> ...
+ *
+ * for recursive paths:
+ *
+ * fib_path_t -> fib_entry_t -> ....
+ *
+ * for non-recursive paths
+ *
+ * fib_path_t -> ip_adjacency_t -> interface
+ *
+ * These objects, which constitute the 'control plane' part of the FIB are used
+ * to represent the resolution of a route. As a whole this is referred to as the
+ * control plane graph. There is a separate DP graph to represent the forwarding
+ * of a packet. In the DP graph each object represents an action that is applied
+ * to a packet as it traverses the graph. For example, a lookup of a IP address
+ * in the forwarding table could result in the following graph:
+ *
+ * recursive-adj --> multi-path-adj --> interface_A
+ * --> interface_B
+ *
+ * A packet traversing this FIB DP graph would thus also traverse a VPP node
+ * graph of:
+ *
+ * ipX_recursive --> ipX_rewrite --> interface_A_tx --> etc
+ *
+ * The taxonomy of objects in a FIB graph is as follows, consider;
+ *
+ * A -->
+ * B --> D
+ * C -->
+ *
+ * Where A,B and C are (for example) routes that resolve through D.
+ * parent; D is the parent of A, B, and C.
+ * children: A, B, and C are children of D.
+ * sibling: A, B and C are siblings of one another.
+ *
+ * All shared objects in the FIB are reference counted. Users of these objects
+ * are thus expected to use the add_lock/unlock semantics (as one would
+ * normally use malloc/free).
+ *
+ * WALKS
+ *
+ * It is necessary to walk/traverse the graph forwards (entry to interface) to
+ * perform a collapse or build a recursive adj and backwards (interface
+ * to entry) to perform updates, i.e. when interface state changes or when
+ * recursive route resolution updates occur.
+ * A forward walk follows simply by navigating an object's parent pointer to
+ * access its parent object. For objects with multiple parents (e.g. a
+ * path-list), each parent is walked in turn.
+ * To support back-walks direct dependencies are maintained between objects,
+ * i.e. in the relationship, {A, B, C} --> D, then object D will maintain a list
+ * of 'pointers' to its children {A, B, C}. Bare C-language pointers are not
+ * allowed, so a pointer is described in terms of an object type (i.e. entry,
+ * path-list, etc) and index - this allows the object to be retrieved from the
+ * appropriate pool. A list is maintained to achieve fast convergence at scale.
+ * When there are millions or recursive prefixes, it is very inefficient to
+ * blindly walk the tables looking for entries that were affected by a given
+ * topology change. The lowest hanging fruit when optimising is to remove
+ * actions that are not required, so all back-walks only traverse objects that
+ * are directly affected by the change.
+ *
+ * PIC Core and fast-reroute rely on FIB reacting quickly to an interface
+ * state change to update the multi-path-adjacencies that use this interface.
+ * An example graph is shown below:
+ *
+ * E_a -->
+ * E_b --> PL_2 --> P_a --> Interface_A
+ * ... --> P_c -\
+ * E_k --> \
+ * Interface_K
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_f --> Interface_F
+ * E_z -->
+ *
+ * E = fib_entry_t
+ * PL = fib_path_list_t
+ * P = fib_path_t
+ * The subscripts are arbitrary and serve only to distinguish object instances.
+ * This CP graph result in the following DP graph:
+ *
+ * M-ADJ-2 --> Interface_A
+ * \
+ * -> Interface_K
+ * /
+ * M-ADJ-1 --> Interface_F
+ *
+ * M-ADJ = multi-path-adjacency.
+ *
+ * When interface K goes down a back-walk is started over its dependants in the
+ * control plane graph. This back-walk will reach PL_1 and PL_2 and result in
+ * the calculation of new adjacencies that have interface K removed. The walk
+ * will continue to the entry objects and thus the forwarding table is updated
+ * for each prefix with the new adjacency. The DP graph then becomes:
+ *
+ * ADJ-3 --> Interface_A
+ *
+ * ADJ-4 --> Interface_F
+ *
+ * The eBGP PIC scenarios described above relied on the update of a path-list's
+ * recursive-adjacency to provide the shared point of cutover. This is shown
+ * below
+ *
+ * E_a -->
+ * E_b --> PL_2 --> P_a --> E_44 --> PL_a --> P_b --> Interface_A
+ * ... --> P_c -\
+ * E_k --> \
+ * \
+ * E_1 --> PL_k -> P_k --> Interface_K
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_f --> E_55 --> PL_e --> P_e --> Interface_E
+ * E_z -->
+ *
+ * The failure scenario is the removal of entry E_1 and thus the paths P_c and
+ * P_d become unresolved. To achieve PIC the two shared recursive path-lists,
+ * PL_1 and PL_2 must be updated to remove E_1 from the recursive-multi-path-
+ * adjacencies that they contribute, before any entry E_a to E_z is updated.
+ * This means that as the update propagates backwards (right to left) in the
+ * graph it must do so breadth first not depth first. Note this approach leads
+ * to convergence times that are dependent on the number of path-list and so
+ * the number of combinations of egress PEs - this is desirable as this
+ * scale is considerably lower than the number of prefixes.
+ *
+ * If we consider another section of the graph that is similar to the one
+ * shown above where there is another prefix E_2 in a similar position to E_1
+ * and so also has many dependent children. It is reasonable to expect that a
+ * particular network failure may simultaneously render E_1 and E_2 unreachable.
+ * This means that the update to withdraw E_2 is download immediately after the
+ * update to withdraw E_1. It is a requirement on the FIB to not spend large
+ * amounts of time in a back-walk whilst processing the update for E_1, i.e. the
+ * back-walk must not reach as far as E_a and its siblings. Therefore, after the
+ * back-walk has traversed one generation (breadth first) to update all the
+ * path-lists it should be suspended/back-ground and further updates allowed
+ * to be handled. Once the update queue is empty, the suspended walks can be
+ * resumed. Note that in the case that multiple updates affect the same entry
+ * (say E_1) then this will trigger multiple similar walks, these are merged,
+ * so each child is updated only once.
+ * In the presence of more layers of recursion PIC is still a desirable
+ * feature. Consider an extension to the diagram above, where more recursive
+ * routes (E_100 -> E_200) are added as children of E_a:
+ *
+ * E_100 -->
+ * E_101 --> PL_3 --> P_j-\
+ * ... \
+ * E_199 --> E_a -->
+ * E_b --> PL_2 --> P_a --> E_44 --> ...etc..
+ * ... --> P_c -\
+ * E_k \
+ * E_1 --> ...etc..
+ * /
+ * E_l --> /
+ * E_m --> PL_1 --> P_d -/
+ * ... --> P_e --> E_55 --> ...etc..
+ * E_z -->
+ *
+ * To achieve PIC for the routes E_100->E_199, PL_3 needs to be updated before
+ * E_b -> E_z, a breadth first traversal at each level would not achieve this.
+ * Instead the walk must proceed intelligently. Children on PL_2 are sorted so
+ * those Entry objects that themselves have children appear first in the list,
+ * those without later. When an entry object is walked that has children, a
+ * walk of its children is pushed to the front background queue. The back
+ * ground queue is a priority queue. As the breadth first traversal proceeds
+ * across the dependent entry object E_a to E_k, when the first entry that does
+ * not have children is reached (E_b), the walk is suspended and placed at the
+ * back of the queue. Following this prioritisation method shared path-list
+ * updates are performed before all non-resolving entry objects.
+ * The CPU/core/thread that handles the updates is the same thread that handles
+ * the back-walks. Handling updates has a higher priority than making walk
+ * progress, so a walk is required to be interruptable/suspendable when new
+ * updates are available.
+ * !!! TODO - this section describes how walks should be not how they are !!!
+ *
+ * In the diagram above E_100 is an IP route, however, VPP has no restrictions
+ * on the type of object that can be a dependent of a FIB entry. Children of
+ * a FIB entry can be (and are) GRE & VXLAN tunnels endpoints, L2VPN LSPs etc.
+ * By including all object types into the graph and extending the back-walk, we
+ * can thus deliver fast convergence to technologies that overlay on an IP
+ * network.
+ *
+ * If having read all the above carefully you are still thinking; 'i don't need
+ * all this %&$* i have a route only I know about and I just need to jam it in',
+ * then fib_table_entry_special_add() is your only friend.
+ */
+
+#ifndef __FIB_H__
+#define __FIB_H__
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+
+#endif
diff --git a/vnet/vnet/fib/fib_attached_export.c b/vnet/vnet/fib/fib_attached_export.c
new file mode 100644
index 00000000000..afc953a4ac5
--- /dev/null
+++ b/vnet/vnet/fib/fib_attached_export.c
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+
+#include "fib_attached_export.h"
+#include "fib_entry_cover.h"
+#include "fib_entry_src.h"
+
+/**
+ * A description of the need to import routes from the export table
+ */
+typedef struct fib_ae_import_t_
+{
+ /**
+ * The entry in the epxort table that this importer
+ * is importing covereds from
+ */
+ fib_node_index_t faei_export_entry;
+
+ /**
+ * The attached entry in the import table
+ */
+ fib_node_index_t faei_import_entry;
+ /**
+ * the sibling index on the cover
+ */
+ u32 faei_export_sibling;
+
+ /**
+ * The index of the exporter tracker. Not set if the
+ * export entry is not valid for export
+ */
+ fib_node_index_t faei_exporter;
+
+ /**
+ * A vector/list of imported entry indicies
+ */
+ fib_node_index_t *faei_importeds;
+
+ /**
+ * The FIB index and prefix we are tracking
+ */
+ fib_node_index_t faei_export_fib;
+ fib_prefix_t faei_prefix;
+
+ /**
+ * The FIB index we are importing into
+ */
+ fib_node_index_t faei_import_fib;
+} fib_ae_import_t;
+
+/**
+ * A description of the need to export routes to one or more export tables
+ */
+typedef struct fib_ae_export_t_ {
+ /**
+ * The vector/list of import tracker indicies
+ */
+ fib_node_index_t *faee_importers;
+
+ /**
+ * THe connected entry this export is acting on behalf of
+ */
+ fib_node_index_t faee_ei;
+
+ /**
+ * Reference counting locks
+ */
+ u32 faee_locks;
+} fib_ae_export_t;
+
+/*
+ * memory pools for the importers and exportes
+ */
+static fib_ae_import_t *fib_ae_import_pool;
+static fib_ae_export_t *fib_ae_export_pool;
+
+static fib_ae_export_t *
+fib_entry_ae_add_or_lock (fib_node_index_t connected)
+{
+ fib_ae_export_t *export;
+ fib_entry_t *entry;
+
+ entry = fib_entry_get(connected);
+
+ if (FIB_NODE_INDEX_INVALID == entry->fe_export)
+ {
+ pool_get(fib_ae_export_pool, export);
+ memset(export, 0, sizeof(*export));
+
+ entry->fe_export = (export - fib_ae_export_pool);
+ export->faee_ei = connected;
+ }
+ else
+ {
+ export = pool_elt_at_index(fib_ae_export_pool, entry->fe_export);
+ }
+
+ export->faee_locks++;
+
+ return (export);
+}
+
+static void
+fib_entry_import_remove (fib_ae_import_t *import,
+ fib_node_index_t entry_index)
+{
+ fib_prefix_t prefix;
+ u32 index;
+
+ /*
+ * find the index in the vector of the entry we are removing
+ */
+ index = vec_search(import->faei_importeds, entry_index);
+
+ if (index < vec_len(import->faei_importeds))
+ {
+ /*
+ * this is an entry that was previsouly imported
+ */
+ fib_entry_get_prefix(entry_index, &prefix);
+
+ fib_table_entry_special_remove(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE);
+
+ fib_entry_unlock(entry_index);
+ vec_del1(import->faei_importeds, index);
+ }
+}
+
+static void
+fib_entry_import_add (fib_ae_import_t *import,
+ fib_node_index_t entry_index)
+{
+ fib_node_index_t *existing;
+ fib_prefix_t prefix;
+
+ /*
+ * ensure we only add the exported entry once, since
+ * sourcing prefixes in the table is reference counted
+ */
+ vec_foreach(existing, import->faei_importeds)
+ {
+ if (*existing == entry_index)
+ {
+ return;
+ }
+ }
+
+ /*
+ * this is the first time this export entry has been imported
+ * Add it to the import FIB and to the list of importeds
+ */
+ fib_entry_get_prefix(entry_index, &prefix);
+
+ /*
+ * don't import entries that have the same prefix the import entry
+ */
+ if (0 != fib_prefix_cmp(&prefix,
+ &import->faei_prefix))
+ {
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(entry_index);
+
+ if (dpo_id_is_valid(dpo))
+ {
+ fib_table_entry_special_dpo_add(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ load_balance_get_bucket(dpo->dpoi_index, 0));
+
+ fib_entry_lock(entry_index);
+ vec_add1(import->faei_importeds, entry_index);
+ }
+ /*
+ * else
+ * the entry currently has no valid forwarding. when it
+ * does it will export itself
+ */
+ }
+}
+
+/**
+ * Call back when walking a connected prefix's covered prefixes for import
+ */
+static int
+fib_entry_covered_walk_import (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx)
+{
+ fib_ae_import_t *import = ctx;
+
+ fib_entry_import_add(import, covered);
+
+ return (0);
+}
+
+/*
+ * fib_entry_ae_import_add
+ *
+ * Add an importer to a connected entry
+ */
+static void
+fib_ae_export_import_add (fib_ae_export_t *export,
+ fib_ae_import_t *import)
+{
+ fib_entry_t *entry;
+
+ import->faei_exporter = (export - fib_ae_export_pool);
+ entry = fib_entry_get(export->faee_ei);
+
+ fib_entry_cover_walk(entry,
+ fib_entry_covered_walk_import,
+ import);
+}
+
+void
+fib_attached_export_import (fib_entry_t *fib_entry,
+ fib_node_index_t export_fib)
+{
+ fib_ae_import_t *import;
+
+ pool_get(fib_ae_import_pool, import);
+
+ import->faei_import_fib = fib_entry->fe_fib_index;
+ import->faei_export_fib = export_fib;
+ import->faei_prefix = fib_entry->fe_prefix;
+ import->faei_import_entry = fib_entry_get_index(fib_entry);
+ import->faei_export_sibling = ~0;
+
+ /*
+ * do an exact match in the export table
+ */
+ import->faei_export_entry =
+ fib_table_lookup_exact_match(import->faei_export_fib,
+ &import->faei_prefix);
+
+ if (FIB_NODE_INDEX_INVALID == import->faei_export_entry)
+ {
+ /*
+ * no exact matching entry in the export table. can't be good.
+ * track the next best thing
+ */
+ import->faei_export_entry =
+ fib_table_lookup(import->faei_export_fib,
+ &import->faei_prefix);
+ import->faei_exporter = FIB_NODE_INDEX_INVALID;
+ }
+ else
+ {
+ /*
+ * found the entry in the export table. import the
+ * the prefixes that it covers.
+ * only if the prefix found in the export FIB really is
+ * attached do we want to import its covered
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED &
+ fib_entry_get_flags_i(fib_entry_get(import->faei_export_entry)))
+ {
+ fib_ae_export_t *export;
+
+ export = fib_entry_ae_add_or_lock(import->faei_export_entry);
+ vec_add1(export->faee_importers, (import - fib_ae_import_pool));
+ fib_ae_export_import_add(export, import);
+ }
+ }
+
+ /*
+ * track the entry in the export table so we can update appropriately
+ * when it changes
+ */
+ import->faei_export_sibling =
+ fib_entry_cover_track(fib_entry_get(import->faei_export_entry),
+ fib_entry_get_index(fib_entry));
+
+ fib_entry->fe_import = (import - fib_ae_import_pool);
+}
+
+/**
+ * \brief All the imported entries need to be pruged
+ */
+void
+fib_attached_export_purge (fib_entry_t *fib_entry)
+{
+ if (FIB_NODE_INDEX_INVALID != fib_entry->fe_import)
+ {
+ fib_node_index_t *import_index;
+ fib_entry_t *export_entry;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ import = pool_elt_at_index(fib_ae_import_pool,
+ fib_entry->fe_import);
+
+ /*
+ * remove each imported entry
+ */
+ vec_foreach(import_index, import->faei_importeds)
+ {
+ fib_prefix_t prefix;
+
+ fib_entry_get_prefix(*import_index, &prefix);
+
+ fib_table_entry_delete(import->faei_import_fib,
+ &prefix,
+ FIB_SOURCE_AE);
+ fib_entry_unlock(*import_index);
+ }
+ vec_free(import->faei_importeds);
+
+ /*
+ * stop tracking the export entry
+ */
+ if (~0 != import->faei_export_sibling)
+ {
+ fib_entry_cover_untrack(fib_entry_get(import->faei_export_entry),
+ import->faei_export_sibling);
+ }
+ import->faei_export_sibling = ~0;
+
+ /*
+ * remove this import tracker from the export's list,
+ * if it is attached to one. It won't be in the case the tracked
+ * export entry is not an attached exact match.
+ */
+ if (FIB_NODE_INDEX_INVALID != import->faei_exporter)
+ {
+ export_entry = fib_entry_get(import->faei_export_entry);
+ ASSERT(FIB_NODE_INDEX_INVALID != export_entry->fe_export);
+ export = pool_elt_at_index(fib_ae_export_pool, export_entry->fe_export);
+
+ u32 index = vec_search(export->faee_importers,
+ (import - fib_ae_import_pool));
+
+ ASSERT(index < vec_len(export->faee_importers));
+ vec_del1(export->faee_importers, index);
+
+ /*
+ * free the exporter if there are no longer importers
+ */
+ if (0 == --export->faee_locks)
+ {
+ pool_put(fib_ae_export_pool, export);
+ export_entry->fe_export = FIB_NODE_INDEX_INVALID;
+ }
+ }
+
+ /*
+ * free the import tracker
+ */
+ pool_put(fib_ae_import_pool, import);
+ fib_entry->fe_import = FIB_NODE_INDEX_INVALID;
+ }
+}
+
+void
+fib_attached_export_covered_added (fib_entry_t *cover,
+ fib_node_index_t covered)
+{
+ if (FIB_NODE_INDEX_INVALID != cover->fe_export)
+ {
+ /*
+ * the covering prefix is exporting to other tables
+ */
+ fib_node_index_t *import_index;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, cover->fe_export);
+
+ /*
+ * export the covered entry to each of the importers
+ */
+ vec_foreach(import_index, export->faee_importers)
+ {
+ import = pool_elt_at_index(fib_ae_import_pool, *import_index);
+
+ fib_entry_import_add(import, covered);
+ }
+ }
+}
+
+void
+fib_attached_export_covered_removed (fib_entry_t *cover,
+ fib_node_index_t covered)
+{
+ if (FIB_NODE_INDEX_INVALID != cover->fe_export)
+ {
+ /*
+ * the covering prefix is exporting to other tables
+ */
+ fib_node_index_t *import_index;
+ fib_ae_import_t *import;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, cover->fe_export);
+
+ /*
+ * remove the covered entry from each of the importers
+ */
+ vec_foreach(import_index, export->faee_importers)
+ {
+ import = pool_elt_at_index(fib_ae_import_pool, *import_index);
+
+ fib_entry_import_remove(import, covered);
+ }
+ }
+}
+
+static void
+fib_attached_export_cover_modified_i (fib_entry_t *fib_entry)
+{
+ if (FIB_NODE_INDEX_INVALID != fib_entry->fe_import)
+ {
+ fib_ae_import_t *import;
+ u32 export_fib;
+
+ /*
+ * safe the temporaries we need from the existing import
+ * since it will be toast after the purge.
+ */
+ import = pool_elt_at_index(fib_ae_import_pool, fib_entry->fe_import);
+ export_fib = import->faei_export_fib;
+
+ /*
+ * keep it simple. purge anything that was previously imported.
+ * then re-evaluate the need to import.
+ */
+ fib_attached_export_purge(fib_entry);
+ fib_attached_export_import(fib_entry, export_fib);
+ }
+}
+
+/**
+ * \brief If this entry is tracking a cover (in another table)
+ * then that cover has changed. re-evaluate import.
+ */
+void
+fib_attached_export_cover_change (fib_entry_t *fib_entry)
+{
+ fib_attached_export_cover_modified_i(fib_entry);
+}
+
+/**
+ * \brief If this entry is tracking a cover (in another table)
+ * then that cover has been updated. re-evaluate import.
+ */
+void
+fib_attached_export_cover_update (fib_entry_t *fib_entry)
+{
+ fib_attached_export_cover_modified_i(fib_entry);
+}
+
+u8*
+fib_ae_import_format (fib_node_index_t import_index,
+ u8* s)
+{
+ if (FIB_NODE_INDEX_INVALID != import_index)
+ {
+ fib_node_index_t *index;
+ fib_ae_import_t *import;
+
+ import = pool_elt_at_index(fib_ae_import_pool, import_index);
+
+ s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool));
+ s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix);
+ s = format(s, "export-entry:%d ", import->faei_export_entry);
+ s = format(s, "export-sibling:%d ", import->faei_export_sibling);
+ s = format(s, "exporter:%d ", import->faei_exporter);
+ s = format(s, "export-fib:%d ", import->faei_export_fib);
+
+ s = format(s, "import-entry:%d ", import->faei_import_entry);
+ s = format(s, "import-fib:%d ", import->faei_import_fib);
+
+ s = format(s, "importeds:[");
+ vec_foreach(index, import->faei_importeds)
+ {
+ s = format(s, "%d, ", *index);
+ }
+ s = format(s, "]]");
+ }
+
+ return (s);
+}
+
+u8*
+fib_ae_export_format (fib_node_index_t export_index, u8*s)
+{
+ if (FIB_NODE_INDEX_INVALID != export_index)
+ {
+ fib_node_index_t *index;
+ fib_ae_export_t *export;
+
+ export = pool_elt_at_index(fib_ae_export_pool, export_index);
+
+ s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool));
+ s = format(s, "export-entry:%d ", export->faee_ei);
+
+ s = format(s, "importers:[");
+ vec_foreach(index, export->faee_importers)
+ {
+ s = format(s, "%d, ", *index);
+ }
+ s = format(s, "]]");
+ }
+ return (s);
+}
diff --git a/vnet/vnet/fib/fib_attached_export.h b/vnet/vnet/fib/fib_attached_export.h
new file mode 100644
index 00000000000..ee68481187c
--- /dev/null
+++ b/vnet/vnet/fib/fib_attached_export.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * FIB attached export
+ *
+ * what's it all about?
+ * say one does this:
+ * set int ip table Gig0 2
+ * set int ip addr Gig0 10.0.0.1/24
+ * Ggi0 is in table 2 with a connected address.
+ * Now we add a routing matching said connected in a different table
+ * ip route add table 3 10.0.0.0/24 via Gig0
+ * How do we expect traffic in table 3 to be forwarded? Clearly out of
+ * Ggi0. It's an attached route, hence we are saying that we can ARP for
+ * hosts in the attached subnet. and we can. but any ARP entries we send
+ * we be received on Gig0, but since Gig0 is in table 2, it will install
+ * the adj-fins in table 2. So traffic in table 3 will never hit an adj-fib
+ * and hence always the glean, and so thus be effectively dropped.
+ * How do we fix this? Attached Export !! All more specfiic entries in table 2
+ * that track and are covered by the connected are automatically exported into
+ * table 3. Now table 3 also has adj-fibs (and the local) so traffic to hosts
+ * is restored.
+ */
+
+#ifndef __FIB_ATTACHED_EXPORT_H__
+#define __FIB_ATTACHED_EXPORT_H__
+
+#include <vnet/fib/fib_types.h>
+
+extern void fib_attached_export_import(fib_entry_t *fib_entry,
+ fib_node_index_t export_fib);
+
+extern void fib_attached_export_purge(fib_entry_t *fib_entry);
+
+extern void fib_attached_export_covered_added(fib_entry_t *cover,
+ fib_node_index_t covered);
+extern void fib_attached_export_covered_removed(fib_entry_t *cover,
+ fib_node_index_t covered);
+extern void fib_attached_export_cover_change(fib_entry_t *fib_entry);
+extern void fib_attached_export_cover_update(fib_entry_t *fib_entry);
+
+extern u8* fib_ae_import_format(fib_node_index_t import_index, u8*s);
+extern u8* fib_ae_export_format(fib_node_index_t export_index, u8*s);
+
+#endif
diff --git a/vnet/vnet/fib/fib_entry.c b/vnet/vnet/fib/fib_entry.c
new file mode 100644
index 00000000000..8b63f0dc974
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry.c
@@ -0,0 +1,1493 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/fib_attached_export.h>
+#include <vnet/fib/fib_path_ext.h>
+
+/*
+ * Array of strings/names for the FIB sources
+ */
+static const char *fib_source_names[] = FIB_SOURCES;
+static const char *fib_attribute_names[] = FIB_ENTRY_ATTRIBUTES;
+
+/*
+ * Pool for all fib_entries
+ */
+static fib_entry_t *fib_entry_pool;
+
+fib_entry_t *
+fib_entry_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_entry_pool, index));
+}
+
+static fib_node_t *
+fib_entry_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_entry_get(index));
+}
+
+fib_node_index_t
+fib_entry_get_index (const fib_entry_t * fib_entry)
+{
+ return (fib_entry - fib_entry_pool);
+}
+
+static fib_protocol_t
+fib_entry_get_proto (const fib_entry_t * fib_entry)
+{
+ return (fib_entry->fe_prefix.fp_proto);
+}
+
+/**
+ * @brief Turn the chain type requested by the client into the one they
+ * really wanted
+ */
+static fib_forward_chain_type_t
+fib_entry_chain_type_fixup (const fib_entry_t *entry,
+ fib_forward_chain_type_t fct)
+{
+ if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct)
+ {
+ /*
+ * The EOS chain is a tricky since one cannot know the adjacency
+ * to link to without knowing what the packets payload protocol
+ * will be once the label is popped.
+ */
+ fib_forward_chain_type_t dfct;
+
+ dfct = fib_entry_get_default_chain_type(entry);
+
+ if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
+ {
+ /*
+ * If the entry being asked is a eos-MPLS label entry,
+ * then use the payload-protocol field, that we stashed there
+ * for just this purpose
+ */
+ return (fib_proto_to_forw_chain_type(entry->fe_prefix.fp_payload_proto));
+ }
+ /*
+ * else give them what this entry would be by default. i.e. if it's a v6
+ * entry, then the label its local labelled should be carrying v6 traffic.
+ * If it's a non-EOS label entry, then there are more labels and we want
+ * a non-eos chain.
+ */
+ return (dfct);
+ }
+
+ return (fct);
+}
+
+fib_forward_chain_type_t
+fib_entry_get_default_chain_type (const fib_entry_t *fib_entry)
+{
+ switch (fib_entry->fe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case FIB_PROTOCOL_MPLS:
+ if (MPLS_EOS == fib_entry->fe_prefix.fp_eos)
+ /*
+ * If the entry being asked is a eos-MPLS label entry,
+ * then use the payload-protocol field, that we stashed there
+ * for just this purpose
+ */
+ return (fib_proto_to_forw_chain_type(fib_entry->fe_prefix.fp_payload_proto));
+ else
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ }
+
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+u8 *
+format_fib_entry (u8 * s, va_list * args)
+{
+ fib_forward_chain_type_t fct;
+ fib_entry_attribute_t attr;
+ fib_path_ext_t *path_ext;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *src;
+ fib_node_index_t fei;
+ fib_source_t source;
+ u32 n_covered;
+ int level;
+
+ fei = va_arg (*args, fib_node_index_t);
+ level = va_arg (*args, int);
+ fib_entry = fib_entry_get(fei);
+
+ s = format (s, "%U", format_fib_prefix, &fib_entry->fe_prefix);
+
+ if (level >= FIB_ENTRY_FORMAT_DETAIL)
+ {
+ s = format (s, " fib:%d", fib_entry->fe_fib_index);
+ s = format (s, " index:%d", fib_entry_get_index(fib_entry));
+ s = format (s, " locks:%d", fib_entry->fe_node.fn_locks);
+
+ FOR_EACH_SRC_ADDED(fib_entry, src, source,
+ ({
+ s = format (s, "\n src:%s ",
+ fib_source_names[source]);
+ s = fib_entry_src_format(fib_entry, source, s);
+ s = format (s, " refs:%d ", src->fes_ref_count);
+ if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) {
+ s = format(s, "flags:");
+ FOR_EACH_FIB_ATTRIBUTE(attr) {
+ if ((1<<attr) & src->fes_entry_flags) {
+ s = format (s, "%s,", fib_attribute_names[attr]);
+ }
+ }
+ }
+ s = format (s, "\n");
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ s = fib_path_list_format(src->fes_pl, s);
+ }
+ if (NULL != src->fes_path_exts)
+ {
+ s = format(s, " Extensions:");
+ vec_foreach(path_ext, src->fes_path_exts)
+ {
+ s = format(s, "\n %U", format_fib_path_ext, path_ext);
+ }
+ }
+ }));
+
+ n_covered = fib_entry_cover_get_size(fib_entry);
+ if (n_covered > 0) {
+ s = format(s, "\n tracking %d covered: ", n_covered);
+ s = fib_entry_cover_list_format(fib_entry, s);
+ }
+ s = fib_ae_import_format(fib_entry->fe_import, s);
+ s = fib_ae_export_format(fib_entry->fe_export, s);
+
+ s = format (s, "\n forwarding: ");
+ }
+ else
+ {
+ s = format (s, "\n");
+ }
+
+ fct = fib_entry_get_default_chain_type(fib_entry);
+
+ if (!dpo_id_is_valid(&fib_entry->fe_lb[fct]))
+ {
+ s = format (s, " UNRESOLVED\n");
+ return (s);
+ }
+ else
+ {
+ if (level >= FIB_ENTRY_FORMAT_DETAIL2)
+ {
+
+ FOR_EACH_FIB_FORW_CHAIN(fct)
+ {
+ s = format(s, " %U-chain\n %U",
+ format_fib_forw_chain_type, fct,
+ format_dpo_id,
+ &fib_entry->fe_lb[fct],
+ 2);
+ s = format(s, "\n");
+ }
+ }
+ else
+ {
+ s = format(s, " %U-chain\n %U",
+ format_fib_forw_chain_type, fct,
+ format_dpo_id,
+ &fib_entry->fe_lb[fct],
+ 2);
+ s = format(s, "\n");
+ }
+ }
+
+ if (level >= FIB_ENTRY_FORMAT_DETAIL2)
+ {
+ s = format(s, "\nchildren:");
+ s = fib_node_children_format(fib_entry->fe_node.fn_children, s);
+ }
+
+ /* adj = adj_get(fib_entry->fe_prefix.fp_proto, fib_entry->fe_adj_index); */
+
+ /* ip_multipath_next_hop_t * nhs, tmp_nhs[1]; */
+ /* u32 i, j, n_left, n_nhs; */
+ /* vlib_counter_t c, sum; */
+ /* ip_lookup_main_t *lm = fib_get_lookup_main(fib_entry->fe_prefix.fp_proto); */
+
+ /* if (adj->n_adj == 1) */
+ /* { */
+ /* nhs = &tmp_nhs[0]; */
+ /* nhs[0].next_hop_adj_index = ~0; /\* not used *\/ */
+ /* nhs[0].weight = 1; */
+ /* n_nhs = 1; */
+ /* } */
+ /* else */
+ /* { */
+ /* ip_multipath_adjacency_t * madj; */
+ /* madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); */
+ /* nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); */
+ /* n_nhs = madj->normalized_next_hops.count; */
+ /* } */
+
+ /* n_left = nhs[0].weight; */
+ /* vlib_counter_zero (&sum); */
+ /* for (i = j = 0; i < adj->n_adj; i++) */
+ /* { */
+ /* n_left -= 1; */
+ /* vlib_get_combined_counter(&lm->adjacency_counters, */
+ /* fib_entry->fe_adj_index + i, */
+ /* &c); */
+ /* /\* if (clear) *\/ */
+ /* /\* vlib_zero_combined_counter (&lm->adjacency_counters, *\/ */
+ /* /\* fib_entry->fe_adj_index + i); *\/ */
+
+ /* vlib_counter_add (&sum, &c); */
+ /* if (n_left == 0) */
+ /* { */
+ /* s = format (s, "%16Ld%16Ld ", sum.packets, sum.bytes); */
+ /* s = format (s, "weight %d, index %d", */
+ /* nhs[j].weight, fib_entry->fe_adj_index + i); */
+
+ /* if (adj->n_adj > 1) */
+ /* s = format (s, ", multipath"); */
+
+ /* s = format (s, "\n%U", */
+ /* format_ip_adjacency, */
+ /* vnet_get_main(), lm, fib_entry->fe_adj_index + i); */
+
+ /* // vlib_cli_output (vm, "%v", msg); */
+ /* //vec_free (msg); */
+ /* } */
+ /* else */
+ /* { */
+ /* j++; */
+ /* if (j < n_nhs) */
+ /* { */
+ /* n_left = nhs[j].weight; */
+ /* vlib_counter_zero (&sum); */
+ /* } */
+ /* } */
+ /* } */
+
+ return (s);
+}
+
+static fib_entry_t*
+fib_entry_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_ENTRY == node->fn_type);
+#endif
+ return ((fib_entry_t*)node);
+}
+
+static void
+fib_entry_last_lock_gone (fib_node_t *node)
+{
+ fib_forward_chain_type_t fct;
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_from_fib_node(node);
+
+ FOR_EACH_FIB_FORW_CHAIN(fct)
+ {
+ dpo_reset(&fib_entry->fe_lb[fct]);
+ }
+
+ FIB_ENTRY_DBG(fib_entry, "last-lock");
+
+ fib_node_deinit(&fib_entry->fe_node);
+ // FIXME -RR Backwalk
+ pool_put(fib_entry_pool, fib_entry);
+}
+
+static fib_entry_src_t*
+fib_entry_get_best_src_i (const fib_entry_t *fib_entry)
+{
+ fib_entry_src_t *bsrc;
+
+ /*
+ * the enum of sources is deliberately arranged in priority order
+ */
+ if (0 == vec_len(fib_entry->fe_srcs))
+ {
+ bsrc = NULL;
+ }
+ else
+ {
+ bsrc = vec_elt_at_index(fib_entry->fe_srcs, 0);
+ }
+
+ return (bsrc);
+}
+
+static fib_source_t
+fib_entry_src_get_source (const fib_entry_src_t *esrc)
+{
+ if (NULL != esrc)
+ {
+ return (esrc->fes_src);
+ }
+ return (FIB_SOURCE_MAX);
+}
+
+static fib_entry_flag_t
+fib_entry_src_get_flags (const fib_entry_src_t *esrc)
+{
+ if (NULL != esrc)
+ {
+ return (esrc->fes_entry_flags);
+ }
+ return (FIB_ENTRY_FLAG_NONE);
+}
+
+fib_entry_flag_t
+fib_entry_get_flags (fib_node_index_t fib_entry_index)
+{
+ return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index)));
+}
+
+/*
+ * fib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+fib_entry_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_from_fib_node(node);
+
+ if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_get_best_source(
+ fib_entry_get_index(fib_entry)));
+ }
+
+ if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
+ {
+ /*
+ * ADJ updates (complete<->incomplete) do not need to propagate to
+ * recursive entries.
+ * The only reason its needed as far back as here, is that the adj
+ * and the incomplete adj are a different DPO type, so the LBs need
+ * to re-stack.
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
+ else
+ {
+ /*
+ * all other walk types can be reclassifed to a re-evaluate to
+ * all recursive dependents.
+ * By reclassifying we ensure that should any of these walk types meet
+ * they can be merged.
+ */
+ ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ /*
+ * propagate the backwalk further if we haven't already reached the
+ * maximum depth.
+ */
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry),
+ ctx);
+ }
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_entry_vft = {
+ .fnv_get = fib_entry_get_node,
+ .fnv_last_lock = fib_entry_last_lock_gone,
+ .fnv_back_walk = fib_entry_back_walk_notify,
+};
+
+/*
+ * fib_entry_contribute_forwarding
+ *
+ * Get an lock the forwarding information (DPO) contributed by the FIB entry.
+ */
+void
+fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * these are not the droids you are looking for...
+ */
+ type = fib_entry_chain_type_fixup(fib_entry, type);
+
+ if (!dpo_id_is_valid(&fib_entry->fe_lb[type]))
+ {
+ /*
+ * on-demand create eos/non-eos.
+ * There is no on-demand delete because:
+ * - memory versus complexity & reliability:
+ * leaving unrequired [n]eos LB arounds wastes memory, cleaning
+ * then up on the right trigger is more code. i favour the latter.
+ */
+ fib_entry_src_mk_lb(fib_entry,
+ fib_entry_get_best_src_i(fib_entry),
+ type,
+ &fib_entry->fe_lb[type]);
+ }
+
+ dpo_copy(dpo, &fib_entry->fe_lb[type]);
+}
+
+const dpo_id_t *
+fib_entry_contribute_ip_forwarding (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (&fib_entry->fe_lb[fib_entry_get_default_chain_type(fib_entry)]);
+}
+
+adj_index_t
+fib_entry_get_adj (fib_node_index_t fib_entry_index)
+{
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(fib_entry_index);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+
+ if (dpo_is_adj(dpo))
+ {
+ return (dpo->dpoi_index);
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_entry_get_path_list (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (fib_entry->fe_parent);
+}
+
+u32
+fib_entry_get_fib_table_id(fib_node_index_t fib_entry_index)
+{
+
+
+ return (0);
+}
+
+u32
+fib_entry_child_add (fib_node_index_t fib_entry_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ return (fib_node_child_add(FIB_NODE_TYPE_ENTRY,
+ fib_entry_index,
+ child_type,
+ child_index));
+};
+
+void
+fib_entry_child_remove (fib_node_index_t fib_entry_index,
+ u32 sibling_index)
+{
+ fib_node_child_remove(FIB_NODE_TYPE_ENTRY,
+ fib_entry_index,
+ sibling_index);
+}
+
+static fib_entry_t *
+fib_entry_alloc (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_node_index_t *fib_entry_index)
+{
+ fib_forward_chain_type_t fct;
+ fib_entry_t *fib_entry;
+
+ pool_get(fib_entry_pool, fib_entry);
+ memset(fib_entry, 0, sizeof(*fib_entry));
+
+ fib_node_init(&fib_entry->fe_node,
+ FIB_NODE_TYPE_ENTRY);
+
+ fib_entry->fe_fib_index = fib_index;
+ fib_entry->fe_prefix = *prefix;
+ if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto)
+ {
+ fib_entry->fe_prefix.fp_len = 21;
+ ASSERT(DPO_PROTO_NONE != fib_entry->fe_prefix.fp_payload_proto);
+ }
+
+ fib_entry->fe_export = FIB_NODE_INDEX_INVALID;
+ fib_entry->fe_import = FIB_NODE_INDEX_INVALID;
+ fib_entry->fe_covered = FIB_NODE_INDEX_INVALID;
+ FOR_EACH_FIB_FORW_CHAIN(fct)
+ {
+ dpo_reset(&fib_entry->fe_lb[fct]);
+ }
+
+ *fib_entry_index = fib_entry_get_index(fib_entry);
+
+ FIB_ENTRY_DBG(fib_entry, "alloc");
+
+ return (fib_entry);
+}
+
+static void
+fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ /*
+ * handle changes to attached export for import entries
+ */
+ int is_import = (FIB_ENTRY_FLAG_IMPORT & fib_entry_get_flags_i(fib_entry));
+ int was_import = (FIB_ENTRY_FLAG_IMPORT & old_flags);
+
+ if (!was_import && is_import)
+ {
+ /*
+ * transition from not exported to exported
+ */
+
+ /*
+ * there is an assumption here that the entry resolves via only
+ * one interface and that it is the cross VRF interface.
+ */
+ u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
+
+ fib_attached_export_import(fib_entry,
+ fib_table_get_index_for_sw_if_index(
+ fib_entry_get_proto(fib_entry),
+ sw_if_index));
+ }
+ else if (was_import && !is_import)
+ {
+ /*
+ * transition from exported to not exported
+ */
+ fib_attached_export_purge(fib_entry);
+ }
+ /*
+ * else
+ * no change. nothing to do.
+ */
+
+ /*
+ * handle changes to attached export for export entries
+ */
+ int is_attached = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(fib_entry));
+ int was_attached = (FIB_ENTRY_FLAG_ATTACHED & old_flags);
+
+ if (!was_attached && is_attached)
+ {
+ /*
+ * transition to attached. time to export
+ */
+ // FIXME
+ }
+ // else FIXME
+}
+
+static void
+fib_entry_post_install_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ fib_entry_post_flag_update_actions(fib_entry, source, old_flags);
+ fib_entry_src_action_installed(fib_entry, source);
+}
+
+fib_node_index_t
+fib_entry_create (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_t *fib_entry;
+
+ ASSERT(0 < vec_len(paths));
+
+ fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
+
+ /*
+ * since this is a new entry create, we don't need to check for winning
+ * sources - there is only one.
+ */
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags,
+ drop_dpo_get(
+ fib_proto_to_dpo(
+ fib_entry_get_proto(fib_entry))));
+ fib_entry_src_action_path_swap(fib_entry,
+ source,
+ flags,
+ paths);
+ /*
+ * handle possible realloc's by refetching the pointer
+ */
+ fib_entry = fib_entry_get(fib_entry_index);
+ fib_entry_src_action_activate(fib_entry, source);
+
+ fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_entry_create_special (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_t *fib_entry;
+
+ /*
+ * create and initiliase the new enty
+ */
+ fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
+
+ /*
+ * create the path-list
+ */
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo);
+ fib_entry_src_action_activate(fib_entry, source);
+
+ fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
+
+ return (fib_entry_index);
+}
+
+static void
+fib_entry_post_update_actions (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t old_flags)
+{
+ /*
+ * backwalk to children to inform then of the change to forwarding.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_get_index(fib_entry), &bw_ctx);
+
+ /*
+ * then inform any covered prefixes
+ */
+ fib_entry_cover_update_notify(fib_entry);
+
+ fib_entry_post_install_actions(fib_entry, source, old_flags);
+}
+
+void
+fib_entry_special_add (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ else if (source > best_source)
+ {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+}
+
+void
+fib_entry_path_add (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ ASSERT(1 == vec_len(rpath));
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry = fib_entry_src_action_path_add(fib_entry, source, flags, rpath);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ else if (source > best_source)
+ {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+}
+
+/*
+ * fib_entry_path_remove
+ *
+ * remove a path from the entry.
+ * return the fib_entry's index if it is still present, INVALID otherwise.
+ */
+fib_entry_src_flag_t
+fib_entry_path_remove (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_entry_src_flag_t sflag;
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ ASSERT(1 == vec_len(rpath));
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ sflag = fib_entry_src_action_path_remove(fib_entry, source, rpath);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source )
+ {
+ /*
+ * Que! removing a path from a source that is better than the
+ * one this entry is using.
+ */
+ ASSERT(0);
+ }
+ else if (source > best_source )
+ {
+ /*
+ * the source is not the best. nothing to do.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ /*
+ * removing a path from the path-list we were using.
+ */
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag))
+ {
+ /*
+ * the last path from the source was removed.
+ * fallback to lower source
+ */
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_SOURCE_MAX == best_source) {
+ /*
+ * no more sources left. this entry is toast.
+ */
+ fib_entry_src_action_uninstall(fib_entry);
+ fib_entry_post_flag_update_actions(fib_entry, source, bflags);
+
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+ else
+ {
+ fib_entry_src_action_activate(fib_entry, best_source);
+ source = best_source;
+ }
+ }
+ else
+ {
+ /*
+ * re-install the new forwarding information
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+
+ /*
+ * still have sources
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+}
+
+/*
+ * fib_entry_special_remove
+ *
+ * remove a special source from the entry.
+ * return the fib_entry's index if it is still present, INVALID otherwise.
+ */
+fib_entry_src_flag_t
+fib_entry_special_remove (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_src_flag_t sflag;
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ sflag = fib_entry_src_action_remove(fib_entry, source);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source )
+ {
+ /*
+ * Que! removing a path from a source that is better than the
+ * one this entry is using. This can only mean it is a source
+ * this prefix does not have.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else if (source > best_source ) {
+ /*
+ * the source is not the best. nothing to do.
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag))
+ {
+ /*
+ * the source was removed. use the next best.
+ */
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_SOURCE_MAX == best_source) {
+ /*
+ * no more sources left. this entry is toast.
+ */
+ fib_entry_src_action_uninstall(fib_entry);
+ fib_entry_post_flag_update_actions(fib_entry, source, bflags);
+
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+ else
+ {
+ fib_entry_src_action_activate(fib_entry, best_source);
+ source = best_source;
+ }
+ }
+ else
+ {
+ /*
+ * re-install the new forwarding information
+ */
+ fib_entry_src_action_reactivate(fib_entry, source);
+ }
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+
+ /*
+ * still have sources
+ */
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+}
+
+/**
+ * fib_entry_delete
+ *
+ * The source is withdrawing all the paths it provided
+ */
+fib_entry_src_flag_t
+fib_entry_delete (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ return (fib_entry_special_remove(fib_entry_index, source));
+}
+
+/**
+ * fib_entry_update
+ *
+ * The source has provided a new set of paths that will replace the old.
+ */
+void
+fib_entry_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths)
+{
+ fib_source_t best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ ASSERT(NULL != fib_entry);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry_src_action_path_swap(fib_entry,
+ source,
+ flags,
+ paths);
+ /*
+ * handle possible realloc's by refetching the pointer
+ */
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * if the path list for the source passed is invalid,
+ * then we need to create a new one. else we are updating
+ * an existing.
+ */
+ if (source < best_source)
+ {
+ /*
+ * we have a new winning source.
+ */
+ fib_entry_src_action_deactivate(fib_entry, best_source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+ else if (source > best_source) {
+ /*
+ * the new source loses. nothing to do here.
+ * the data from the source is saved in the path-list created
+ */
+ return;
+ }
+ else
+ {
+ /*
+ * the new source is one this entry already has.
+ * But the path-list was updated, which will contribute new forwarding,
+ * so install it.
+ */
+ fib_entry_src_action_deactivate(fib_entry, source);
+ fib_entry_src_action_activate(fib_entry, source);
+ }
+
+ fib_entry_post_update_actions(fib_entry, source, bflags);
+}
+
+
+/*
+ * fib_entry_cover_changed
+ *
+ * this entry is tracking its cover and that cover has changed.
+ */
+void
+fib_entry_cover_changed (fib_node_index_t fib_entry_index)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_source_t source, best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+ u32 index;
+
+ bflags = FIB_ENTRY_FLAG_NONE;
+ best_source = FIB_SOURCE_FIRST;
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_attached_export_cover_change(fib_entry);
+
+ /*
+ * propagate the notificuation to each of the added sources
+ */
+ index = 0;
+ FOR_EACH_SRC_ADDED(fib_entry, esrc, source,
+ ({
+ if (0 == index)
+ {
+ /*
+ * only the best source gets to set the back walk flags
+ */
+ res = fib_entry_src_action_cover_change(fib_entry, source);
+ bflags = fib_entry_src_get_flags(esrc);
+ best_source = fib_entry_src_get_source(esrc);
+ }
+ else
+ {
+ fib_entry_src_action_cover_change(fib_entry, source);
+ }
+ index++;
+ }));
+
+ if (res.install)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_src_get_source(
+ fib_entry_get_best_src_i(fib_entry)));
+ fib_entry_post_install_actions(fib_entry, best_source, bflags);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+
+ if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason)
+ {
+ /*
+ * time for walkies fido.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = res.bw_reason,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx);
+ }
+}
+
+/*
+ * fib_entry_cover_updated
+ *
+ * this entry is tracking its cover and that cover has been updated
+ * (i.e. its forwarding information has changed).
+ */
+void
+fib_entry_cover_updated (fib_node_index_t fib_entry_index)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_source_t source, best_source;
+ fib_entry_flag_t bflags;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+ u32 index;
+
+ bflags = FIB_ENTRY_FLAG_NONE;
+ best_source = FIB_SOURCE_FIRST;
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_attached_export_cover_update(fib_entry);
+
+ /*
+ * propagate the notificuation to each of the added sources
+ */
+ index = 0;
+ FOR_EACH_SRC_ADDED(fib_entry, esrc, source,
+ ({
+ if (0 == index)
+ {
+ /*
+ * only the best source gets to set the back walk flags
+ */
+ res = fib_entry_src_action_cover_update(fib_entry, source);
+ bflags = fib_entry_src_get_flags(esrc);
+ best_source = fib_entry_src_get_source(esrc);
+ }
+ else
+ {
+ fib_entry_src_action_cover_update(fib_entry, source);
+ }
+ index++;
+ }));
+
+ if (res.install)
+ {
+ fib_entry_src_action_reactivate(fib_entry,
+ fib_entry_src_get_source(
+ fib_entry_get_best_src_i(fib_entry)));
+ fib_entry_post_install_actions(fib_entry, best_source, bflags);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+
+ if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason)
+ {
+ /*
+ * time for walkies fido.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = res.bw_reason,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx);
+ }
+}
+
+int
+fib_entry_recursive_loop_detect (fib_node_index_t entry_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_entry_t *fib_entry;
+ int was_looped, is_looped;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ if (FIB_NODE_INDEX_INVALID != fib_entry->fe_parent)
+ {
+ fib_node_index_t *entries = *entry_indicies;
+ fib_forward_chain_type_t fct;
+
+ vec_add1(entries, entry_index);
+ was_looped = fib_path_list_is_looped(fib_entry->fe_parent);
+ is_looped = fib_path_list_recursive_loop_detect(fib_entry->fe_parent,
+ &entries);
+
+ *entry_indicies = entries;
+
+ if (!!was_looped != !!is_looped)
+ {
+ /*
+ * re-evaluate all the entry's forwarding
+ * NOTE: this is an inplace modify
+ */
+ FOR_EACH_FIB_FORW_CHAIN(fct)
+ {
+ if (dpo_id_is_valid(&fib_entry->fe_lb[fct]))
+ {
+ fib_entry_src_mk_lb(fib_entry,
+ fib_entry_get_best_src_i(fib_entry),
+ fct,
+ &fib_entry->fe_lb[fct]);
+ }
+ }
+ }
+ }
+ else
+ {
+ /*
+ * the entry is currently not linked to a path-list. this happens
+ * when it is this entry that is re-linking path-lists and has thus
+ * broken the loop
+ */
+ is_looped = 0;
+ }
+
+ return (is_looped);
+}
+
+u32
+fib_entry_get_resolving_interface (fib_node_index_t entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ return (fib_path_list_get_resolving_interface(fib_entry->fe_parent));
+}
+
+fib_source_t
+fib_entry_get_best_source (fib_node_index_t entry_index)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_get(entry_index);
+
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ return (fib_entry_src_get_source(bsrc));
+}
+
+static int
+fib_ip4_address_compare (ip4_address_t * a1,
+ ip4_address_t * a2)
+{
+ /*
+ * IP addresses are unsiged ints. the return value here needs to be signed
+ * a simple subtraction won't cut it.
+ * If the addresses are the same, the sort order is undefiend, so phoey.
+ */
+ return ((clib_net_to_host_u32(a1->data_u32) >
+ clib_net_to_host_u32(a2->data_u32) ) ?
+ 1 : -1);
+}
+
+static int
+fib_ip6_address_compare (ip6_address_t * a1,
+ ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]));
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+static int
+fib_entry_cmp (fib_node_index_t fib_entry_index1,
+ fib_node_index_t fib_entry_index2)
+{
+ fib_entry_t *fib_entry1, *fib_entry2;
+ int cmp = 0;
+
+ fib_entry1 = fib_entry_get(fib_entry_index1);
+ fib_entry2 = fib_entry_get(fib_entry_index2);
+
+ switch (fib_entry1->fe_prefix.fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ cmp = fib_ip4_address_compare(&fib_entry1->fe_prefix.fp_addr.ip4,
+ &fib_entry2->fe_prefix.fp_addr.ip4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ cmp = fib_ip6_address_compare(&fib_entry1->fe_prefix.fp_addr.ip6,
+ &fib_entry2->fe_prefix.fp_addr.ip6);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ cmp = (fib_entry1->fe_prefix.fp_label - fib_entry2->fe_prefix.fp_label);
+
+ if (0 == cmp)
+ {
+ cmp = (fib_entry1->fe_prefix.fp_eos - fib_entry2->fe_prefix.fp_eos);
+ }
+ break;
+ }
+
+ if (0 == cmp) {
+ cmp = (fib_entry1->fe_prefix.fp_len - fib_entry2->fe_prefix.fp_len);
+ }
+ return (cmp);
+}
+
+int
+fib_entry_cmp_for_sort (void *i1, void *i2)
+{
+ fib_node_index_t *fib_entry_index1 = i1, *fib_entry_index2 = i2;
+
+ return (fib_entry_cmp(*fib_entry_index1,
+ *fib_entry_index2));
+}
+
+void
+fib_entry_lock (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_node_lock(&fib_entry->fe_node);
+}
+
+void
+fib_entry_unlock (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_node_unlock(&fib_entry->fe_node);
+}
+
+void
+fib_entry_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_ENTRY, &fib_entry_vft);
+}
+
+void
+fib_entry_get_prefix (fib_node_index_t fib_entry_index,
+ fib_prefix_t *pfx)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ *pfx = fib_entry->fe_prefix;
+}
+
+u32
+fib_entry_get_fib_index (fib_node_index_t fib_entry_index)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (fib_entry->fe_fib_index);
+}
+
+u32
+fib_entry_pool_size (void)
+{
+ return (pool_elts(fib_entry_pool));
+}
+
+static clib_error_t *
+show_fib_entry_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_node_index_t fei;
+
+ if (unformat (input, "%d", &fei))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_entry_pool, fei))
+ {
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_fib_entry, fei,
+ FIB_ENTRY_FORMAT_DETAIL2);
+ }
+ else
+ {
+ vlib_cli_output (vm, "entry %d invalid", fei);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB Entries:");
+ pool_foreach_index(fei, fib_entry_pool,
+ ({
+ vlib_cli_output (vm, "%d@%U",
+ fei,
+ format_fib_entry, fei,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }));
+ }
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_entry, static) = {
+ .path = "show fib entry",
+ .function = show_fib_entry_command,
+ .short_help = "show fib entry",
+};
diff --git a/vnet/vnet/fib/fib_entry.h b/vnet/vnet/fib/fib_entry.h
new file mode 100644
index 00000000000..ac22c170d55
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry.h
@@ -0,0 +1,514 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_H__
+#define __FIB_ENTRY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * The different sources that can create a route.
+ * The sources are defined here the thier relative priority order.
+ * The lower the value the higher the priority
+ */
+typedef enum fib_source_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_SOURCE_FIRST,
+ /**
+ * Special sources. These are for entries that are added to all
+ * FIBs by default, and should never be over-ridden (hence they
+ * are the highest priority)
+ */
+ FIB_SOURCE_SPECIAL = FIB_SOURCE_FIRST,
+ /**
+ * Classify. A route that links directly to a classify adj
+ */
+ FIB_SOURCE_CLASSIFY,
+ /**
+ * Route added as a result of interface configuration.
+ * this will also come from the API/CLI, but the distinction is
+ * that is from confiiguration on an interface, not a 'ip route' command
+ */
+ FIB_SOURCE_INTERFACE,
+ /**
+ * A high priority source a plugin can use
+ */
+ FIB_SOURCE_PLUGIN_HI,
+ /**
+ * From the control plane API
+ */
+ FIB_SOURCE_API,
+ /**
+ * From the CLI.
+ */
+ FIB_SOURCE_CLI,
+ /**
+ * LISP
+ */
+ FIB_SOURCE_LISP,
+ /**
+ * SRv6
+ */
+ FIB_SOURCE_SR,
+ /**
+ * IPv[46] Mapping
+ */
+ FIB_SOURCE_MAP,
+ /**
+ * SIXRD
+ */
+ FIB_SOURCE_SIXRD,
+ /**
+ * DHCP
+ */
+ FIB_SOURCE_DHCP,
+ /**
+ * Adjacency source.
+ * routes created as a result of ARP/ND entries. This is lower priority
+ * then the API/CLI. This is on purpose. trust me.
+ */
+ FIB_SOURCE_ADJ,
+ /**
+ * MPLS label. The prefix has been assigned a local label. This source
+ * never provides forwarding information, instead it acts as a place-holder
+ * so the association of label to prefix can be maintained
+ */
+ FIB_SOURCE_MPLS,
+ /**
+ * Attached Export source.
+ * routes created as a result of attahced export. routes thus sourced
+ * will be present in the export tables
+ */
+ FIB_SOURCE_AE,
+ /**
+ * Recursive resolution source.
+ * Used to install an entry that is thre resolution traget of another.
+ */
+ FIB_SOURCE_RR,
+ /**
+ * The default route source.
+ * The default route is always added to the FIB table (like the
+ * special sources) but we need to be able to over-ride it with
+ * 'ip route' sources when provided
+ */
+ FIB_SOURCE_DEFAULT_ROUTE,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_SOURCE_LAST = FIB_SOURCE_DEFAULT_ROUTE,
+} __attribute__ ((packed)) fib_source_t;
+
+_Static_assert (sizeof(fib_source_t) == 1,
+ "FIB too many sources");
+
+/**
+ * The maximum number of sources
+ */
+#define FIB_SOURCE_MAX (FIB_SOURCE_LAST+1)
+
+#define FIB_SOURCES { \
+ [FIB_SOURCE_SPECIAL] = "special", \
+ [FIB_SOURCE_INTERFACE] = "interface", \
+ [FIB_SOURCE_API] = "API", \
+ [FIB_SOURCE_CLI] = "CLI", \
+ [FIB_SOURCE_ADJ] = "adjacency", \
+ [FIB_SOURCE_MAP] = "MAP", \
+ [FIB_SOURCE_SR] = "SR", \
+ [FIB_SOURCE_SIXRD] = "SixRD", \
+ [FIB_SOURCE_LISP] = "LISP", \
+ [FIB_SOURCE_CLASSIFY] = "classify", \
+ [FIB_SOURCE_DHCP] = "DHCP", \
+ [FIB_SOURCE_RR] = "recursive-resolution", \
+ [FIB_SOURCE_AE] = "attached_export", \
+ [FIB_SOURCE_MPLS] = "mpls", \
+ [FIB_SOURCE_DEFAULT_ROUTE] = "default-route", \
+}
+
+#define FOR_EACH_FIB_SOURCE(_item) \
+ for (_item = FIB_SOURCE_FIRST; _item < FIB_SOURCE_MAX; _item++)
+
+/**
+ * The different sources that can create a route.
+ * The sources are defined here the thier relative priority order.
+ * The lower the value the higher the priority
+ */
+typedef enum fib_entry_attribute_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_ENTRY_ATTRIBUTE_FIRST,
+ /**
+ * Connected. The prefix is configured on an interface.
+ */
+ FIB_ENTRY_ATTRIBUTE_CONNECTED = FIB_ENTRY_ATTRIBUTE_FIRST,
+ /**
+ * Attached. The prefix is attached to an interface.
+ */
+ FIB_ENTRY_ATTRIBUTE_ATTACHED,
+ /**
+ * The route is an explicit drop.
+ */
+ FIB_ENTRY_ATTRIBUTE_DROP,
+ /**
+ * The route is exclusive. The client creating the route is
+ * providing an exclusive adjacency.
+ */
+ FIB_ENTRY_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * The route is attached cross tables and thus imports covered
+ * prefixes from the other table.
+ */
+ FIB_ENTRY_ATTRIBUTE_IMPORT,
+ /**
+ * The prefix/address is local to this device
+ */
+ FIB_ENTRY_ATTRIBUTE_LOCAL,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_LOCAL,
+} fib_entry_attribute_t;
+
+/**
+ * The maximum number of sources
+ */
+#define FIB_ENTRY_ATTRIBUTE_MAX (FIB_ENTRY_ATTRIBUTE_LAST+1)
+
+#define FIB_ENTRY_ATTRIBUTES { \
+ [FIB_ENTRY_ATTRIBUTE_CONNECTED] = "connected", \
+ [FIB_ENTRY_ATTRIBUTE_ATTACHED] = "attached", \
+ [FIB_ENTRY_ATTRIBUTE_IMPORT] = "import", \
+ [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \
+ [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \
+}
+
+#define FOR_EACH_FIB_ATTRIBUTE(_item) \
+ for (_item = FIB_ENTRY_ATTRIBUTE_FIRST; \
+ _item < FIB_ENTRY_ATTRIBUTE_MAX; \
+ _item++)
+
+typedef enum fib_entry_flag_t_ {
+ FIB_ENTRY_FLAG_NONE = 0,
+ FIB_ENTRY_FLAG_CONNECTED = (1 << FIB_ENTRY_ATTRIBUTE_CONNECTED),
+ FIB_ENTRY_FLAG_ATTACHED = (1 << FIB_ENTRY_ATTRIBUTE_ATTACHED),
+ FIB_ENTRY_FLAG_DROP = (1 << FIB_ENTRY_ATTRIBUTE_DROP),
+ FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE),
+ FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL),
+ FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT),
+} fib_entry_flag_t;
+
+/**
+ * Flags for the source data
+ */
+typedef enum fib_entry_src_attribute_t_ {
+ /**
+ * Marker. Add new values after this one.
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_FIRST,
+ /**
+ * the source has been added to the entry
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_ADDED = FIB_ENTRY_SRC_ATTRIBUTE_FIRST,
+ /**
+ * the source is active/best
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE,
+ /**
+ * Marker. add new entries before this one.
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE,
+} fib_entry_src_attribute_t;
+
+#define FIB_ENTRY_SRC_ATTRIBUTE_MAX (FIB_ENTRY_SRC_ATTRIBUTE_LAST+1)
+
+#define FIB_ENTRY_SRC_ATTRIBUTES { \
+ [FIB_ENTRY_SRC_ATTRIBUTE_ADDED] = "added", \
+ [FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \
+}
+
+typedef enum fib_entry_src_flag_t_ {
+ FIB_ENTRY_SRC_FLAG_NONE = 0,
+ FIB_ENTRY_SRC_FLAG_ADDED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ADDED),
+ FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE),
+} __attribute__ ((packed)) fib_entry_src_flag_t;
+
+/*
+ * Keep the size of the flags field to 2 bytes, so it
+ * can be placed next to the 2 bytes reference count
+ */
+_Static_assert (sizeof(fib_entry_src_flag_t) <= 2,
+ "FIB entry flags field size too big");
+
+/**
+ * Information related to the source of a FIB entry
+ */
+typedef struct fib_entry_src_t_ {
+ /**
+ * The path-list created by the source
+ */
+ fib_node_index_t fes_pl;
+ /**
+ * Which source this info block is for
+ */
+ fib_source_t fes_src;
+ /**
+ * Flags on the source
+ */
+ fib_entry_src_flag_t fes_flags;
+ /**
+ * Flags the source contributes to the entry
+ */
+ fib_entry_flag_t fes_entry_flags;
+
+ /**
+ * 1 bytes ref count. This is not the number of users of the Entry
+ * (which is itself not large, due to path-list sharing), but the number
+ * of times a given source has been added. Which is even fewer
+ */
+ u8 fes_ref_count;
+
+ /**
+ * A vector of path extensions
+ */
+ struct fib_path_ext_t_ *fes_path_exts;
+
+ /**
+ * Source specific info
+ */
+ union {
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesr_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesr_sibling;
+ } rr;
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesa_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesa_sibling;
+ } adj;
+ struct {
+ /**
+ * the index of the FIB entry that is the covering entry
+ */
+ fib_node_index_t fesi_cover;
+ /**
+ * This source's index in the cover's list
+ */
+ u32 fesi_sibling;
+ } interface;
+ struct {
+ /**
+ * This MPLS local label associated with the prefix.
+ */
+ mpls_label_t fesm_label;
+
+ /**
+ * the indicies of the LFIB entries created
+ */
+ fib_node_index_t fesm_lfes[2];
+ } mpls;
+ struct {
+ /**
+ * The source FIB index.
+ */
+ fib_node_index_t fesl_fib_index;
+ } lisp;
+ };
+} fib_entry_src_t;
+
+/**
+ * An entry in a FIB table.
+ *
+ * This entry represents a route added to the FIB that is stored
+ * in one of the FIB tables.
+ */
+typedef struct fib_entry_t_ {
+ /**
+ * Base class. The entry's node representation in the graph.
+ */
+ fib_node_t fe_node;
+ /**
+ * The prefix of the route
+ */
+ fib_prefix_t fe_prefix;
+ /**
+ * The index of the FIB table this entry is in
+ */
+ u32 fe_fib_index;
+ /**
+ * The load-balance used for forwarding.
+ *
+ * We don't share the EOS and non-EOS even in case when they could be
+ * because:
+ * - complexity & reliability v. memory
+ * determining the conditions where sharing is possible is non-trivial.
+ * - separate LBs means we can get the EOS bit right in the MPLS label DPO
+ * and so save a few clock cycles in the DP imposition node since we can
+ * paint the header straight on without the need to check the packet
+ * type to derive the EOS bit value.
+ */
+ dpo_id_t fe_lb[FIB_FORW_CHAIN_NUM];
+ /**
+ * Vector of source infos.
+ * Most entries will only have 1 source. So we optimise for memory usage,
+ * which is preferable since we have many entries.
+ */
+ fib_entry_src_t *fe_srcs;
+ /**
+ * the path-list for which this entry is a child. This is also the path-list
+ * that is contributing forwarding for this entry.
+ */
+ fib_node_index_t fe_parent;
+ /**
+ * index of this entry in the parent's child list.
+ * This is set when this entry is added as a child, but can also
+ * be changed by the parent as it manages its list.
+ */
+ u32 fe_sibling;
+ /**
+ * Dependency list of covered entries.
+ * these are more specific entries that are interested in changes
+ * to their respective cover
+ */
+ fib_node_list_t fe_covered;
+ /**
+ * exporter
+ */
+ fib_node_index_t fe_export;
+ fib_node_index_t fe_import;
+} fib_entry_t;
+
+#define FOR_EACH_FIB_ENTRY_FLAG(_item) \
+ for (_item = FIB_ENTRY_FLAG_FIRST; _item < FIB_ENTRY_FLAG_MAX; _item++)
+
+#define FIB_ENTRY_FORMAT_BRIEF (0x0)
+#define FIB_ENTRY_FORMAT_DETAIL (0x1)
+#define FIB_ENTRY_FORMAT_DETAIL2 (0x2)
+
+extern u8 *format_fib_entry (u8 * s, va_list * args);
+
+extern fib_node_index_t fib_entry_create_special(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_node_index_t fib_entry_create (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths);
+extern void fib_entry_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths);
+
+extern void fib_entry_path_add(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath);
+extern void fib_entry_special_add(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+extern fib_entry_src_flag_t fib_entry_special_remove(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_entry_src_flag_t fib_entry_path_remove(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const fib_route_path_t *rpath);
+extern fib_entry_src_flag_t fib_entry_delete(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern void fib_entry_contribute_forwarding(
+ fib_node_index_t fib_entry_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern const dpo_id_t * fib_entry_contribute_ip_forwarding(
+ fib_node_index_t fib_entry_index);
+extern adj_index_t fib_entry_get_adj_for_source(
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
+extern const int fib_entry_get_dpo_for_source (
+ fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ dpo_id_t *dpo);
+
+extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index);
+
+extern int fib_entry_cmp_for_sort(void *i1, void *i2);
+
+extern void fib_entry_cover_changed(fib_node_index_t fib_entry);
+extern void fib_entry_cover_updated(fib_node_index_t fib_entry);
+extern int fib_entry_recursive_loop_detect(fib_node_index_t entry_index,
+ fib_node_index_t **entry_indicies);
+
+extern void fib_entry_lock(fib_node_index_t fib_entry_index);
+extern void fib_entry_unlock(fib_node_index_t fib_entry_index);
+
+extern u32 fib_entry_child_add(fib_node_index_t fib_entry_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+extern void fib_entry_child_remove(fib_node_index_t fib_entry_index,
+ u32 sibling_index);
+extern u32 fib_entry_get_resolving_interface(fib_node_index_t fib_entry_index);
+
+extern void fib_entry_get_prefix(fib_node_index_t fib_entry_index,
+ fib_prefix_t *pfx);
+extern u32 fib_entry_get_fib_index(fib_node_index_t fib_entry_index);
+extern void fib_entry_set_source_data(fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const void *data);
+extern const void* fib_entry_get_source_data(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_entry_flag_t fib_entry_get_flags(fib_node_index_t fib_entry_index);
+extern fib_source_t fib_entry_get_best_source(fib_node_index_t fib_entry_index);
+extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index,
+ fib_source_t source);
+
+extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index);
+extern u32 fib_entry_get_fib_table_id(fib_node_index_t fib_entry_index);
+
+extern void fib_entry_module_init(void);
+
+/*
+ * unsafe... beware the raw pointer.
+ */
+extern fib_node_index_t fib_entry_get_index(const fib_entry_t * fib_entry);
+extern fib_entry_t * fib_entry_get(fib_node_index_t fib_entry_index);
+
+/*
+ * for testing purposes.
+ */
+extern u32 fib_entry_pool_size(void);
+
+#endif
diff --git a/vnet/vnet/fib/fib_entry_cover.c b/vnet/vnet/fib/fib_entry_cover.c
new file mode 100644
index 00000000000..06b5b918abc
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_cover.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/fib_node_list.h>
+
+u32
+fib_entry_cover_track (fib_entry_t* cover,
+ fib_node_index_t covered)
+{
+ FIB_ENTRY_DBG(cover, "cover-track %d", covered);
+
+ ASSERT(fib_entry_get_index(cover) != covered);
+
+ if (FIB_NODE_INDEX_INVALID == cover->fe_covered)
+ {
+ cover->fe_covered = fib_node_list_create();
+ }
+
+ return (fib_node_list_push_front(cover->fe_covered,
+ 0, FIB_NODE_TYPE_ENTRY,
+ covered));
+}
+
+void
+fib_entry_cover_untrack (fib_entry_t* cover,
+ u32 tracked_index)
+{
+ FIB_ENTRY_DBG(cover, "cover-untrack @ %d", tracked_index);
+
+ if (FIB_NODE_INDEX_INVALID == cover->fe_covered)
+ return;
+
+ fib_node_list_remove(cover->fe_covered, tracked_index);
+
+ if (0 == fib_node_list_get_size(cover->fe_covered))
+ {
+ fib_node_list_destroy(&cover->fe_covered);
+ }
+}
+
+/**
+ * Internal struct to hold user supplied paraneters for the cover walk
+ */
+typedef struct fib_enty_cover_walk_ctx_t_ {
+ fib_entry_t *cover;
+ fib_entry_covered_walk_t walk;
+ void *ctx;
+} fib_enty_cover_walk_ctx_t;
+
+static int
+fib_entry_cover_walk_node_ptr (fib_node_ptr_t *depend,
+ void *args)
+{
+ fib_enty_cover_walk_ctx_t *ctx = args;
+
+ ctx->walk(ctx->cover, depend->fnp_index, ctx->ctx);
+
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_walk (fib_entry_t *cover,
+ fib_entry_covered_walk_t walk,
+ void *args)
+{
+ if (FIB_NODE_INDEX_INVALID != cover->fe_covered)
+ {
+ fib_enty_cover_walk_ctx_t ctx = {
+ .cover = cover,
+ .walk = walk,
+ .ctx = args,
+ };
+
+ fib_node_list_walk(cover->fe_covered,
+ fib_entry_cover_walk_node_ptr,
+ &ctx);
+ }
+}
+
+u32
+fib_entry_cover_get_size (fib_entry_t *cover)
+{
+ if (FIB_NODE_INDEX_INVALID != cover->fe_covered)
+ return (fib_node_list_get_size(cover->fe_covered));
+ return (0);
+}
+
+typedef struct fib_entry_cover_list_format_ctx_t_ {
+ u8 *s;
+} fib_entry_cover_list_format_ctx_t;
+
+static int
+fib_entry_covered_list_format_one (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *args)
+{
+ fib_entry_cover_list_format_ctx_t * ctx = args;
+
+ ctx->s = format(ctx->s, "%d, ", covered);
+
+ /* continue */
+ return (1);
+}
+
+u8*
+fib_entry_cover_list_format (fib_entry_t *fib_entry,
+ u8 *s)
+{
+ fib_entry_cover_list_format_ctx_t ctx = {
+ .s = s,
+ };
+
+ fib_entry_cover_walk(fib_entry,
+ fib_entry_covered_list_format_one,
+ &ctx);
+
+ return (ctx.s);
+}
+
+static int
+fib_entry_cover_change_one (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *args)
+{
+ fib_node_index_t new_cover;
+
+ /*
+ * The 3 entries involved here are:
+ * cover - the least specific. It will cover both the others
+ * new_cover - the enty just inserted below the cover
+ * covered - the entry that was tracking the cover.
+ *
+ * The checks below are to determine if new_cover is a cover for covered.
+ */
+ new_cover = pointer_to_uword(args);
+
+ if (FIB_NODE_INDEX_INVALID == new_cover)
+ {
+ /*
+ * nothing has been inserted, which implies the cover was removed.
+ * 'cover' is thus the new cover.
+ */
+ fib_entry_cover_changed(covered);
+ }
+ else if (new_cover != covered)
+ {
+ fib_prefix_t pfx_covered, pfx_new_cover;
+
+ fib_entry_get_prefix(covered, &pfx_covered);
+ fib_entry_get_prefix(new_cover, &pfx_new_cover);
+
+ if (fib_prefix_is_cover(&pfx_new_cover, &pfx_covered))
+ {
+ fib_entry_cover_changed(covered);
+ }
+ }
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_change_notify (fib_node_index_t cover_index,
+ fib_node_index_t covered)
+{
+ fib_entry_t *cover;
+
+ cover = fib_entry_get(cover_index);
+
+ fib_entry_cover_walk(cover,
+ fib_entry_cover_change_one,
+ uword_to_pointer(covered, void*));
+}
+
+static int
+fib_entry_cover_update_one (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *args)
+{
+ fib_entry_cover_updated(covered);
+
+ /* continue */
+ return (1);
+}
+
+void
+fib_entry_cover_update_notify (fib_entry_t *fib_entry)
+{
+ fib_entry_cover_walk(fib_entry,
+ fib_entry_cover_update_one,
+ NULL);
+}
diff --git a/vnet/vnet/fib/fib_entry_cover.h b/vnet/vnet/fib/fib_entry_cover.h
new file mode 100644
index 00000000000..fbbbc211dc9
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_cover.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_COVER_H__
+#define __FIB_ENTRY_COVER_H__
+
+#include "fib_entry.h"
+
+/**
+ * callback function used when walking the covered entries
+ */
+typedef int (*fib_entry_covered_walk_t)(fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx);
+
+extern u32 fib_entry_cover_track(fib_entry_t *cover,
+ fib_node_index_t covered);
+
+extern void fib_entry_cover_untrack(fib_entry_t *cover,
+ u32 tracked_index);
+
+extern void fib_entry_cover_walk(fib_entry_t *cover,
+ fib_entry_covered_walk_t walk,
+ void *ctx);
+
+extern void fib_entry_cover_change_notify(fib_node_index_t cover_index,
+ fib_node_index_t covered_index);
+extern void fib_entry_cover_update_notify(fib_entry_t *cover);
+
+extern u32 fib_entry_cover_get_size(fib_entry_t *cover);
+
+extern u8* fib_entry_cover_list_format(fib_entry_t *fib_entry,
+ u8 *s);
+
+#endif
diff --git a/vnet/vnet/fib/fib_entry_src.c b/vnet/vnet/fib/fib_entry_src.c
new file mode 100644
index 00000000000..f7d84e5ea34
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include "fib_entry_src.h"
+#include "fib_table.h"
+#include "fib_path_ext.h"
+
+/*
+ * per-source type vft
+ */
+static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX];
+
+static fib_protocol_t
+fib_entry_get_proto (const fib_entry_t * fib_entry)
+{
+ return (fib_entry->fe_prefix.fp_proto);
+}
+
+void
+fib_entry_src_register (fib_source_t source,
+ const fib_entry_src_vft_t *vft)
+{
+ fib_entry_src_vft[source] = *vft;
+}
+
+static int
+fib_entry_src_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_entry_src_t *esrc1 = v1, *esrc2 = v2;
+
+ return (esrc1->fes_src - esrc2->fes_src);
+}
+
+void
+fib_entry_src_action_init (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_entry_src_t esrc = {
+ .fes_pl = FIB_NODE_INDEX_INVALID,
+ .fes_flags = FIB_ENTRY_SRC_FLAG_NONE,
+ .fes_src = source,
+ };
+
+ if (NULL != fib_entry_src_vft[source].fesv_init)
+ {
+ fib_entry_src_vft[source].fesv_init(&esrc);
+ }
+
+ vec_add1(fib_entry->fe_srcs, esrc);
+ vec_sort_with_function(fib_entry->fe_srcs,
+ fib_entry_src_cmp_for_sort);
+}
+
+static fib_entry_src_t *
+fib_entry_src_find (const fib_entry_t *fib_entry,
+ fib_source_t source,
+ u32 *index)
+
+{
+ fib_entry_src_t *esrc;
+ int ii;
+
+ ii = 0;
+ vec_foreach(esrc, fib_entry->fe_srcs)
+ {
+ if (esrc->fes_src == source)
+ {
+ if (NULL != index)
+ {
+ *index = ii;
+ }
+ return (esrc);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+int
+fib_entry_is_sourced (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ return (NULL != fib_entry_src_find(fib_entry, source, NULL));
+}
+
+static fib_entry_src_t *
+fib_entry_src_find_or_create (fib_entry_t *fib_entry,
+ fib_source_t source,
+ u32 *index)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL == esrc)
+ {
+ fib_entry_src_action_init(fib_entry, source);
+ }
+
+ return (fib_entry_src_find(fib_entry, source, NULL));
+}
+
+void
+fib_entry_src_action_deinit (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_entry_src_t *esrc;
+ u32 index = ~0;
+
+ esrc = fib_entry_src_find(fib_entry, source, &index);
+
+ ASSERT(NULL != esrc);
+
+ if (NULL != fib_entry_src_vft[source].fesv_deinit)
+ {
+ fib_entry_src_vft[source].fesv_deinit(esrc);
+ }
+
+ vec_free(esrc->fes_path_exts);
+ vec_del1(fib_entry->fe_srcs, index);
+}
+
+fib_entry_src_cover_res_t
+fib_entry_src_action_cover_change (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ if (NULL != fib_entry_src_vft[source].fesv_cover_change)
+ {
+ return (fib_entry_src_vft[source].fesv_cover_change(
+ fib_entry_src_find(fib_entry, source, NULL),
+ fib_entry));
+ }
+
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ return (res);
+}
+
+fib_entry_src_cover_res_t
+fib_entry_src_action_cover_update (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ if (NULL != fib_entry_src_vft[source].fesv_cover_update)
+ {
+ return (fib_entry_src_vft[source].fesv_cover_update(
+ fib_entry_src_find(fib_entry, source, NULL),
+ fib_entry));
+ }
+
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ return (res);
+}
+
+typedef struct fib_entry_src_collect_forwarding_ctx_t_
+{
+ load_balance_path_t * next_hops;
+ const fib_entry_t *fib_entry;
+ const fib_entry_src_t *esrc;
+ fib_forward_chain_type_t fct;
+ int is_recursive;
+} fib_entry_src_collect_forwarding_ctx_t;
+
+/**
+ * @brief Determine whether this FIB entry should use a load-balance MAP
+ * to support PIC edge fast convergence
+ */
+load_balance_flags_t
+fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx)
+{
+ /**
+ * We'll use a LB map is the path-list has recursive paths.
+ * recursive paths implies BGP, and hence scale.
+ */
+ if (ctx->is_recursive)
+ {
+ return (LOAD_BALANCE_FLAG_USES_MAP);
+ }
+ return (LOAD_BALANCE_FLAG_NONE);
+}
+
+static int
+fib_entry_src_valid_out_label (mpls_label_t label)
+{
+ return ((MPLS_LABEL_IS_REAL(label) ||
+ MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL == label ||
+ MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL == label ||
+ MPLS_IETF_IMPLICIT_NULL_LABEL == label));
+}
+
+static int
+fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ fib_entry_src_collect_forwarding_ctx_t *ctx;
+ fib_path_ext_t *path_ext;
+
+ ctx = arg;
+
+ /*
+ * if the path is not resolved, don't include it.
+ */
+ if (!fib_path_is_resolved(path_index))
+ {
+ return (!0);
+ }
+
+ if (fib_path_is_recursive(path_index))
+ {
+ ctx->is_recursive = 1;
+ }
+
+ /*
+ * get the matching path-extension for the path being visited.
+ */
+ vec_foreach(path_ext, ctx->esrc->fes_path_exts)
+ {
+ if (path_ext->fpe_path_index == path_index)
+ break;
+ }
+
+ if (NULL != path_ext &&
+ path_ext->fpe_path_index == path_index &&
+ fib_entry_src_valid_out_label(path_ext->fpe_label))
+ {
+ /*
+ * found a matching extension. stack it to obtain the forwarding
+ * info for this path.
+ */
+ ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fct, ctx->next_hops);
+ }
+ else
+ {
+ load_balance_path_t *nh;
+
+ /*
+ * no extension => no out-going label for this path. that's OK
+ * in the case of an IP or EOS chain, but not for non-EOS
+ */
+ switch (ctx->fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ /*
+ * EOS traffic with no label to stack, we need the IP Adj
+ */
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ if (fib_path_is_exclusive(path_index) ||
+ fib_path_is_deag(path_index))
+ {
+ vec_add2(ctx->next_hops, nh, 1);
+
+ nh->path_index = path_index;
+ nh->path_weight = fib_path_get_weight(path_index);
+ fib_path_contribute_forwarding(path_index,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &nh->path_dpo);
+ }
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ return (!0);
+}
+
+void
+fib_entry_src_mk_lb (fib_entry_t *fib_entry,
+ const fib_entry_src_t *esrc,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb)
+{
+ dpo_proto_t lb_proto;
+
+ /*
+ * If the entry has path extensions then we construct a load-balance
+ * by stacking the extensions on the forwarding chains of the paths.
+ * Otherwise we use the load-balance of the path-list
+ */
+ fib_entry_src_collect_forwarding_ctx_t ctx = {
+ .esrc = esrc,
+ .fib_entry = fib_entry,
+ .next_hops = NULL,
+ .is_recursive = 0,
+ .fct = fct,
+ };
+
+ lb_proto = fib_proto_to_dpo(fib_entry_get_proto(fib_entry));
+
+ fib_path_list_walk(esrc->fes_pl,
+ fib_entry_src_collect_forwarding,
+ &ctx);
+
+ if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_EXCLUSIVE)
+ {
+ /*
+ * the client provided the DPO that the entry should link to.
+ * all entries must link to a LB, so if it is an LB already
+ * then we can use it.
+ */
+ if ((1 == vec_len(ctx.next_hops)) &&
+ (DPO_LOAD_BALANCE == ctx.next_hops[0].path_dpo.dpoi_type))
+ {
+ dpo_copy(dpo_lb, &ctx.next_hops[0].path_dpo);
+ dpo_reset(&ctx.next_hops[0].path_dpo);
+ return;
+ }
+ }
+
+ if (!dpo_id_is_valid(dpo_lb))
+ {
+ /*
+ * first time create
+ */
+ flow_hash_config_t fhc;
+
+ fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index,
+ dpo_proto_to_fib(lb_proto));
+ dpo_set(dpo_lb,
+ DPO_LOAD_BALANCE,
+ lb_proto,
+ load_balance_create(0, lb_proto, fhc));
+ }
+
+ load_balance_multipath_update(dpo_lb,
+ ctx.next_hops,
+ fib_entry_calc_lb_flags(&ctx));
+}
+
+void
+fib_entry_src_action_install (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ /*
+ * Install the forwarding chain for the given source into the forwarding
+ * tables
+ */
+ fib_forward_chain_type_t fct;
+ fib_entry_src_t *esrc;
+
+ fct = fib_entry_get_default_chain_type(fib_entry);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ fib_entry_src_mk_lb(fib_entry, esrc, fct, &fib_entry->fe_lb[fct]);
+
+ FIB_ENTRY_DBG(fib_entry, "install: %d",
+ fib_entry->fe_lb[fct]);
+
+ /*
+ * insert the adj into the data-plane forwarding trie
+ */
+ fib_table_fwding_dpo_update(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix,
+ &fib_entry->fe_lb[fct]);
+
+ if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct ||
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct)
+ {
+ for (fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+ fct <= FIB_FORW_CHAIN_TYPE_MPLS_EOS;
+ fct++)
+ {
+ /*
+ * if any of the other chain types are already created they will need
+ * updating too
+ */
+ if (dpo_id_is_valid(&fib_entry->fe_lb[fct]))
+ {
+ fib_entry_src_mk_lb(fib_entry,
+ esrc,
+ fct,
+ &fib_entry->fe_lb[fct]);
+ }
+ }
+ }
+}
+
+void
+fib_entry_src_action_uninstall (fib_entry_t *fib_entry)
+{
+ fib_forward_chain_type_t fct;
+
+ fct = fib_entry_get_default_chain_type(fib_entry);
+ /*
+ * uninstall the forwarding chain for the given source from the
+ * forwarding tables
+ */
+ FIB_ENTRY_DBG(fib_entry, "uninstall: %d",
+ fib_entry->fe_adj_index);
+
+ if (dpo_id_is_valid(&fib_entry->fe_lb[fct]))
+ {
+ /* fib_forward_chain_type_t fct; */
+ /* fib_path_ext_t *path_ext; */
+
+ fib_table_fwding_dpo_remove(
+ fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix,
+ &fib_entry->fe_lb[fct]);
+
+ dpo_reset(&fib_entry->fe_lb[fct]);
+ }
+}
+
+static void
+fib_entry_recursive_loop_detect_i (fib_node_index_t path_list_index)
+{
+ fib_node_index_t *entries = NULL;
+
+ fib_path_list_recursive_loop_detect(path_list_index, &entries);
+
+ vec_free(entries);
+}
+
+void
+fib_entry_src_action_activate (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ int houston_we_are_go_for_install;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(!(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE));
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED);
+
+ esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ACTIVE;
+
+ if (NULL != fib_entry_src_vft[source].fesv_activate)
+ {
+ houston_we_are_go_for_install =
+ fib_entry_src_vft[source].fesv_activate(esrc, fib_entry);
+ }
+ else
+ {
+ /*
+ * the source is not providing an activate function, we'll assume
+ * therefore it has no objection to installing the entry
+ */
+ houston_we_are_go_for_install = !0;
+ }
+
+ /*
+ * link to the path-list provided by the source, and go check
+ * if that forms any loops in the graph.
+ */
+ fib_entry->fe_parent = esrc->fes_pl;
+ fib_entry->fe_sibling =
+ fib_path_list_child_add(fib_entry->fe_parent,
+ FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry));
+
+ fib_entry_recursive_loop_detect_i(fib_entry->fe_parent);
+
+ FIB_ENTRY_DBG(fib_entry, "activate: %d",
+ fib_entry->fe_parent);
+
+ if (0 != houston_we_are_go_for_install)
+ {
+ fib_entry_src_action_install(fib_entry, source);
+ }
+ else
+ {
+ fib_entry_src_action_uninstall(fib_entry);
+ }
+}
+
+void
+fib_entry_src_action_deactivate (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_node_index_t path_list_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ if (NULL != fib_entry_src_vft[source].fesv_deactivate)
+ {
+ fib_entry_src_vft[source].fesv_deactivate(esrc, fib_entry);
+ }
+
+ esrc->fes_flags &= ~FIB_ENTRY_SRC_FLAG_ACTIVE;
+
+ FIB_ENTRY_DBG(fib_entry, "deactivate: %d", fib_entry->fe_parent);
+
+ /*
+ * un-link from an old path-list. Check for any loops this will clear
+ */
+ path_list_index = fib_entry->fe_parent;
+ fib_entry->fe_parent = FIB_NODE_INDEX_INVALID;
+
+ fib_entry_recursive_loop_detect_i(path_list_index);
+
+ /*
+ * this will unlock the path-list, so it may be invalid thereafter.
+ */
+ fib_path_list_child_remove(path_list_index, fib_entry->fe_sibling);
+ fib_entry->fe_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_action_fwd_update (const fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_entry_src_t *esrc;
+
+ vec_foreach(esrc, fib_entry->fe_srcs)
+ {
+ if (NULL != fib_entry_src_vft[esrc->fes_src].fesv_fwd_update)
+ {
+ fib_entry_src_vft[esrc->fes_src].fesv_fwd_update(esrc,
+ fib_entry,
+ source);
+ }
+ }
+}
+
+void
+fib_entry_src_action_reactivate (fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_node_index_t path_list_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ FIB_ENTRY_DBG(fib_entry, "reactivate: %d to %d",
+ fib_entry->fe_parent,
+ esrc->fes_pl);
+
+ if (fib_entry->fe_parent != esrc->fes_pl)
+ {
+ /*
+ * un-link from an old path-list. Check for any loops this will clear
+ */
+ path_list_index = fib_entry->fe_parent;
+ fib_entry->fe_parent = FIB_NODE_INDEX_INVALID;
+
+ /*
+ * temporary lock so it doesn't get deleted when this entry is no
+ * longer a child.
+ */
+ fib_path_list_lock(path_list_index);
+
+ /*
+ * this entry is no longer a child. after unlinking check if any loops
+ * were broken
+ */
+ fib_path_list_child_remove(path_list_index,
+ fib_entry->fe_sibling);
+
+ fib_entry_recursive_loop_detect_i(path_list_index);
+
+ /*
+ * link to the path-list provided by the source, and go check
+ * if that forms any loops in the graph.
+ */
+ fib_entry->fe_parent = esrc->fes_pl;
+ fib_entry->fe_sibling =
+ fib_path_list_child_add(fib_entry->fe_parent,
+ FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry));
+
+ fib_entry_recursive_loop_detect_i(fib_entry->fe_parent);
+ fib_path_list_unlock(path_list_index);
+ }
+ fib_entry_src_action_install(fib_entry, source);
+ fib_entry_src_action_fwd_update(fib_entry, source);
+}
+
+void
+fib_entry_src_action_installed (const fib_entry_t *fib_entry,
+ fib_source_t source)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != fib_entry_src_vft[source].fesv_installed)
+ {
+ fib_entry_src_vft[source].fesv_installed(esrc,
+ fib_entry);
+ }
+
+ fib_entry_src_action_fwd_update(fib_entry, source);
+}
+
+/*
+ * fib_entry_src_action_add
+ *
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the original entry
+ */
+fib_entry_t *
+fib_entry_src_action_add (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find_or_create(fib_entry, source, NULL);
+
+ esrc->fes_ref_count++;
+
+ if (1 != esrc->fes_ref_count)
+ {
+ /*
+ * we only want to add the source on the 0->1 transition
+ */
+ return (fib_entry);
+ }
+
+ esrc->fes_entry_flags = flags;
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ if (NULL != fib_entry_src_vft[source].fesv_add)
+ {
+ fib_entry_src_vft[source].fesv_add(esrc,
+ fib_entry,
+ flags,
+ fib_entry_get_proto(fib_entry),
+ dpo);
+ }
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED;
+
+ fib_path_list_lock(esrc->fes_pl);
+
+ /*
+ * the source owns a lock on the entry
+ */
+ fib_entry_lock(fib_entry_get_index(fib_entry));
+
+ return (fib_entry);
+}
+
+fib_entry_src_flag_t
+fib_entry_src_action_remove (fib_entry_t *fib_entry,
+ fib_source_t source)
+
+{
+ fib_node_index_t old_path_list;
+ fib_entry_src_flag_t sflags;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL == esrc)
+ return (FIB_ENTRY_SRC_FLAG_ACTIVE);
+
+ esrc->fes_ref_count--;
+ sflags = esrc->fes_flags;
+
+ if (0 != esrc->fes_ref_count)
+ {
+ /*
+ * only remove the source on the 1->0 transisition
+ */
+ return (sflags);
+ }
+
+ if (esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE)
+ {
+ fib_entry_src_action_deactivate(fib_entry, source);
+ }
+
+ old_path_list = esrc->fes_pl;
+
+ if (NULL != fib_entry_src_vft[source].fesv_remove)
+ {
+ fib_entry_src_vft[source].fesv_remove(esrc);
+ }
+
+ fib_path_list_unlock(old_path_list);
+ fib_entry_unlock(fib_entry_get_index(fib_entry));
+
+ sflags &= ~FIB_ENTRY_SRC_FLAG_ADDED;
+ fib_entry_src_action_deinit(fib_entry, source);
+
+ return (sflags);
+}
+
+static inline int
+fib_route_recurses_via_self (const fib_prefix_t *prefix,
+ const fib_route_path_t *rpath)
+{
+ /*
+ * not all zeros next hop &&
+ * is recursive path &&
+ * nexthop is same as the route's address
+ */
+ return ((!ip46_address_is_zero(&rpath->frp_addr)) &&
+ (~0 == rpath->frp_sw_if_index) &&
+ (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr)));
+
+}
+
+/*
+ * fib_route_attached_cross_table
+ *
+ * Return true the the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static inline int
+fib_route_attached_cross_table (const fib_entry_t *fib_entry,
+ const fib_route_path_t *rpath)
+{
+ /*
+ * - All zeros next-hop
+ * - a valid interface
+ * - entry's fib index not equeal to interface's index
+ */
+ if (ip46_address_is_zero(&rpath->frp_addr) &&
+ (~0 != rpath->frp_sw_if_index) &&
+ (fib_entry->fe_fib_index !=
+ fib_table_get_index_for_sw_if_index(fib_entry_get_proto(fib_entry),
+ rpath->frp_sw_if_index)))
+ {
+ return (!0);
+ }
+ return (0);
+}
+
+/*
+ * fib_route_attached_cross_table
+ *
+ * Return true the the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static inline int
+fib_path_is_attached (const fib_route_path_t *rpath)
+{
+ /*
+ * - All zeros next-hop
+ * - a valid interface
+ */
+ if (ip46_address_is_zero(&rpath->frp_addr) &&
+ (~0 != rpath->frp_sw_if_index))
+ {
+ return (!0);
+ }
+ return (0);
+}
+
+fib_path_list_flags_t
+fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags)
+{
+ fib_path_list_flags_t plf = FIB_PATH_LIST_FLAG_NONE;
+
+ if (eflags & FIB_ENTRY_FLAG_DROP)
+ {
+ plf |= FIB_PATH_LIST_FLAG_DROP;
+ }
+ if (eflags & FIB_ENTRY_FLAG_LOCAL)
+ {
+ plf |= FIB_PATH_LIST_FLAG_LOCAL;
+ }
+ if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
+ {
+ plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE;
+ }
+
+ return (plf);
+}
+
+static void
+fib_entry_flags_update (const fib_entry_t *fib_entry,
+ const fib_route_path_t *rpath,
+ fib_path_list_flags_t *pl_flags,
+ fib_entry_src_t *esrc)
+{
+ /*
+ * don't allow the addition of a recursive looped path for prefix
+ * via itself.
+ */
+ if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath))
+ {
+ /*
+ * force the install of a drop path-list.
+ * we want the entry to have some path-list, mainly so
+ * the dodgy path can be rmeoved when the source stops playing
+ * silly buggers.
+ */
+ *pl_flags |= FIB_PATH_LIST_FLAG_DROP;
+ }
+ else
+ {
+ *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP;
+ }
+
+ if ((esrc->fes_src == FIB_SOURCE_API) ||
+ (esrc->fes_src == FIB_SOURCE_CLI))
+ {
+ if (fib_path_is_attached(rpath))
+ {
+ esrc->fes_entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ }
+ else
+ {
+ esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_ATTACHED;
+ }
+ }
+ if (fib_route_attached_cross_table(fib_entry, rpath))
+ {
+ esrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT;
+ }
+ else
+ {
+ esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT;
+ }
+}
+
+/*
+ * fib_entry_src_path_ext_add
+ *
+ * append a path extension to the entry's list
+ */
+static void
+fib_entry_src_path_ext_append (fib_entry_src_t *esrc,
+ const fib_route_path_t *rpath)
+{
+ if (MPLS_LABEL_INVALID != rpath->frp_label)
+ {
+ fib_path_ext_t *path_ext;
+
+ vec_add2(esrc->fes_path_exts, path_ext, 1);
+
+ fib_path_ext_init(path_ext, esrc->fes_pl, rpath);
+ }
+}
+
+/*
+ * fib_entry_src_path_ext_insert
+ *
+ * insert, sorted, a path extension to the entry's list.
+ * It's not strictly necessary in sort the path extensions, since each
+ * extension has the path index to which it resolves. However, by being
+ * sorted the load-balance produced has a deterministic order, not an order
+ * based on the sequence of extension additions. this is a considerable benefit.
+ */
+static void
+fib_entry_src_path_ext_insert (fib_entry_src_t *esrc,
+ const fib_route_path_t *rpath)
+{
+ if (0 == vec_len(esrc->fes_path_exts))
+ return (fib_entry_src_path_ext_append(esrc, rpath));
+
+ if (MPLS_LABEL_INVALID != rpath->frp_label)
+ {
+ fib_path_ext_t path_ext;
+ int i = 0;
+
+ fib_path_ext_init(&path_ext, esrc->fes_pl, rpath);
+
+ while (i < vec_len(esrc->fes_path_exts) &&
+ (fib_path_ext_cmp(&esrc->fes_path_exts[i], rpath) < 0))
+ {
+ i++;
+ }
+
+ vec_insert_elts(esrc->fes_path_exts, &path_ext, 1, i);
+ }
+}
+
+/*
+ * fib_entry_src_action_add
+ *
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the entry
+ */
+fib_entry_t*
+fib_entry_src_action_path_add (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t old_path_list, fib_entry_index;
+ fib_path_list_flags_t pl_flags;
+ fib_path_ext_t *path_ext;
+ fib_entry_src_t *esrc;
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ if (NULL == esrc)
+ {
+ fib_entry =
+ fib_entry_src_action_add(fib_entry,
+ source,
+ flags,
+ drop_dpo_get(
+ fib_proto_to_dpo(
+ fib_entry_get_proto(fib_entry))));
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ }
+
+ /*
+ * we are no doubt modifying a path-list. If the path-list
+ * is shared, and hence not modifiable, then the index returned
+ * will be for a different path-list. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_add);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry));
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+
+ fib_entry_src_vft[source].fesv_path_add(esrc, fib_entry, pl_flags, rpath);
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ vec_foreach(path_ext, esrc->fes_path_exts)
+ {
+ fib_path_ext_resolve(path_ext, esrc->fes_pl);
+ }
+ /*
+ * if the path has a label we need to add a path extension
+ */
+ fib_entry_src_path_ext_insert(esrc, rpath);
+
+ fib_path_list_lock(esrc->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ return (fib_entry);
+}
+
+/*
+ * fib_entry_src_action_swap
+ *
+ * The source is providing new paths to replace the old ones.
+ * Adding a source can result in a new fib_entry being created, which
+ * can inturn mean the pool is realloc'd and thus the entry passed as
+ * an argument it also realloc'd
+ * @return the entry
+ */
+fib_entry_t*
+fib_entry_src_action_path_swap (fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t old_path_list, fib_entry_index;
+ fib_path_list_flags_t pl_flags;
+ const fib_route_path_t *rpath;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ /*
+ * save variable so we can recover from a fib_entry realloc.
+ */
+ fib_entry_index = fib_entry_get_index(fib_entry);
+
+ if (NULL == esrc)
+ {
+ fib_entry = fib_entry_src_action_add(fib_entry,
+ source,
+ flags,
+ drop_dpo_get(
+ fib_proto_to_dpo(
+ fib_entry_get_proto(fib_entry))));
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+ }
+
+ /*
+ * swapping paths may create a new path-list (or may use an existing shared)
+ * but we are certainly getting a different one. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_swap);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(
+ fib_entry_get_flags_i(fib_entry));
+ vec_foreach(rpath, rpaths)
+ {
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+ }
+
+ fib_entry_src_vft[source].fesv_path_swap(esrc,
+ fib_entry,
+ pl_flags,
+ rpaths);
+
+ vec_free(esrc->fes_path_exts);
+ vec_foreach(rpath, rpaths)
+ {
+ fib_entry_src_path_ext_append(esrc, rpath);
+ }
+
+ fib_entry = fib_entry_get(fib_entry_index);
+
+ fib_path_list_lock(esrc->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ return (fib_entry);
+}
+
+fib_entry_src_flag_t
+fib_entry_src_action_path_remove (fib_entry_t *fib_entry,
+ fib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ fib_path_list_flags_t pl_flags;
+ fib_node_index_t old_path_list;
+ fib_path_ext_t *path_ext;
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ ASSERT(NULL != esrc);
+ ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED);
+
+ /*
+ * we no doubt modifying a path-list. If the path-list
+ * is shared, and hence not modifiable, then the index returned
+ * will be for a different path-list. This FIB entry to needs
+ * to maintain its lock appropriately.
+ */
+ old_path_list = esrc->fes_pl;
+
+ ASSERT(NULL != fib_entry_src_vft[source].fesv_path_remove);
+
+ pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry));
+ fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
+
+ fib_entry_src_vft[source].fesv_path_remove(esrc, pl_flags, rpath);
+ /*
+ * find the matching path extension and remove it
+ */
+ vec_foreach(path_ext, esrc->fes_path_exts)
+ {
+ if (!fib_path_ext_cmp(path_ext, rpath))
+ {
+ /*
+ * delete the element moving the remaining elements down 1 position.
+ * this preserves the sorted order.
+ */
+ vec_delete(esrc->fes_path_exts, 1, (path_ext - esrc->fes_path_exts));
+ break;
+ }
+ }
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ vec_foreach(path_ext, esrc->fes_path_exts)
+ {
+ fib_path_ext_resolve(path_ext, esrc->fes_pl);
+ }
+
+ /*
+ * lock the new path-list, unlock the old if it had one
+ */
+ fib_path_list_unlock(old_path_list);
+
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) {
+ fib_path_list_lock(esrc->fes_pl);
+ return (FIB_ENTRY_SRC_FLAG_ADDED);
+ }
+ else
+ {
+ /*
+ * no more paths left from this source
+ */
+ fib_entry_src_action_remove(fib_entry, source);
+ return (FIB_ENTRY_SRC_FLAG_NONE);
+ }
+}
+
+u8*
+fib_entry_src_format (fib_entry_t *fib_entry,
+ fib_source_t source,
+ u8* s)
+{
+ fib_entry_src_t *esrc;
+
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != fib_entry_src_vft[source].fesv_format)
+ {
+ return (fib_entry_src_vft[source].fesv_format(esrc, s));
+ }
+ return (s);
+}
+
+adj_index_t
+fib_entry_get_adj_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return (ADJ_INDEX_INVALID);
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl)
+ {
+ return (fib_path_list_get_adj(
+ esrc->fes_pl,
+ fib_entry_get_default_chain_type(fib_entry)));
+ }
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+const int
+fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ dpo_id_t *dpo)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return (0);
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc)
+ {
+ if (FIB_NODE_INDEX_INVALID != esrc->fes_pl)
+ {
+ fib_path_list_contribute_forwarding(
+ esrc->fes_pl,
+ fib_entry_get_default_chain_type(fib_entry),
+ dpo);
+
+ return (dpo_id_is_valid(dpo));
+ }
+ }
+ return (0);
+}
+
+fib_entry_flag_t
+fib_entry_get_flags_i (const fib_entry_t *fib_entry)
+{
+ fib_entry_flag_t flags;
+
+ /*
+ * the vector of sources is deliberately arranged in priority order
+ */
+ if (0 == vec_len(fib_entry->fe_srcs))
+ {
+ flags = FIB_ENTRY_FLAG_NONE;
+ }
+ else
+ {
+ fib_entry_src_t *esrc;
+
+ esrc = vec_elt_at_index(fib_entry->fe_srcs, 0);
+ flags = esrc->fes_entry_flags;
+ }
+
+ return (flags);
+}
+
+void
+fib_entry_set_source_data (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ const void *data)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc &&
+ NULL != fib_entry_src_vft[source].fesv_set_data)
+ {
+ fib_entry_src_vft[source].fesv_set_data(esrc, fib_entry, data);
+ }
+}
+
+const void*
+fib_entry_get_source_data (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source, NULL);
+
+ if (NULL != esrc &&
+ NULL != fib_entry_src_vft[source].fesv_get_data)
+ {
+ return (fib_entry_src_vft[source].fesv_get_data(esrc, fib_entry));
+ }
+ return (NULL);
+}
+
+void
+fib_entry_src_module_init (void)
+{
+ fib_entry_src_rr_register();
+ fib_entry_src_interface_register();
+ fib_entry_src_default_route_register();
+ fib_entry_src_special_register();
+ fib_entry_src_api_register();
+ fib_entry_src_adj_register();
+ fib_entry_src_mpls_register();
+ fib_entry_src_lisp_register();
+}
diff --git a/vnet/vnet/fib/fib_entry_src.h b/vnet/vnet/fib/fib_entry_src.h
new file mode 100644
index 00000000000..d70aabc9c00
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_ENTRY_SRC_H__
+#define __FIB_ENTRY_SRC_H__
+
+#include "fib_entry.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+
+/**
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_ENTRY_DBG(_e, _fmt, _args...) \
+{ \
+ u8*__tmp = NULL; \
+ __tmp = format(__tmp, "e:[%d:%U", \
+ fib_entry_get_index(_e), \
+ format_ip46_address, \
+ &_e->fe_prefix.fp_addr, \
+ IP46_TYPE_ANY); \
+ __tmp = format(__tmp, "/%d]:", \
+ _e->fe_prefix.fp_len); \
+ __tmp = format(__tmp, _fmt, ##_args); \
+ clib_warning("%s", __tmp); \
+ vec_free(__tmp); \
+}
+#else
+#define FIB_ENTRY_DBG(_e, _fmt, _args...)
+#endif
+
+/**
+ * Source initialisation Function
+ */
+typedef void (*fib_entry_src_init_t)(fib_entry_src_t *src);
+
+/**
+ * Source deinitialisation Function
+ */
+typedef void (*fib_entry_src_deinit_t)(fib_entry_src_t *src);
+
+/**
+ * Source activation. Called when the source is the new best source on the entry.
+ * Return non-zero if the entry can now install, 0 otherwise
+ */
+typedef int (*fib_entry_src_activate_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Source Add.
+ * Called when the source is added to the entry
+ */
+typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo);
+
+/**
+ * Source Remove.
+ */
+typedef void (*fib_entry_src_remove_t)(fib_entry_src_t *src);
+
+/**
+ * Result from a cover update/change
+ */
+typedef struct fib_entry_src_cover_res_t_ {
+ u16 install;
+ fib_node_bw_reason_flag_t bw_reason;
+} fib_entry_src_cover_res_t;
+
+/**
+ * Cover changed. the source should re-evaluate its cover.
+ */
+typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_change_t)(
+ fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Cover updated. The cover the source has, has updated (i.e. its forwarding)
+ * the source may need to re-evaluate.
+ */
+typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_update_t)(
+ fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Forwarding updated. Notification that the forwarding information for the
+ * entry has been updated. This notification is sent to all sources, not just
+ * the active best.
+ */
+typedef void (*fib_entry_src_fwd_update_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_source_t best_source);
+
+/**
+ * Installed. Notification that the source is now installed as
+ * the entry's forwarding source.
+ */
+typedef void (*fib_entry_src_installed_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * format.
+ */
+typedef u8* (*fib_entry_src_format_t)(fib_entry_src_t *src,
+ u8* s);
+
+/**
+ * Source path add
+ * the source is adding a new path
+ */
+typedef void (*fib_entry_src_path_add_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Source path remove
+ * the source is remoinvg a path
+ */
+typedef void (*fib_entry_src_path_remove_t)(fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Source path replace/swap
+ * the source is providing a new set of paths
+ */
+typedef void (*fib_entry_src_path_swap_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *path);
+
+/**
+ * Set source specific opaque data
+ */
+typedef void (*fib_entry_src_set_data_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ const void *data);
+
+/**
+ * Get source specific opaque data
+ */
+typedef const void* (*fib_entry_src_get_data_t)(fib_entry_src_t *src,
+ const fib_entry_t *fib_entry);
+
+/**
+ * Virtual function table each FIB entry source will register
+ */
+typedef struct fib_entry_src_vft_t_ {
+ fib_entry_src_init_t fesv_init;
+ fib_entry_src_deinit_t fesv_deinit;
+ fib_entry_src_activate_t fesv_activate;
+ fib_entry_src_deactivate_t fesv_deactivate;
+ fib_entry_src_add_t fesv_add;
+ fib_entry_src_remove_t fesv_remove;
+ fib_entry_src_path_swap_t fesv_path_swap;
+ fib_entry_src_path_add_t fesv_path_add;
+ fib_entry_src_path_remove_t fesv_path_remove;
+ fib_entry_src_cover_change_t fesv_cover_change;
+ fib_entry_src_cover_update_t fesv_cover_update;
+ fib_entry_src_format_t fesv_format;
+ fib_entry_src_installed_t fesv_installed;
+ fib_entry_src_fwd_update_t fesv_fwd_update;
+ fib_entry_src_get_data_t fesv_get_data;
+ fib_entry_src_set_data_t fesv_set_data;
+} fib_entry_src_vft_t;
+
+#define FOR_EACH_SRC_ADDED(_entry, _src, _source, action) \
+{ \
+ vec_foreach(_src, _entry->fe_srcs) \
+ { \
+ if (_src->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED) { \
+ _source = _src->fes_src; \
+ do { \
+ action; \
+ } while(0); \
+ } \
+ } \
+}
+
+extern u8* fib_entry_src_format(fib_entry_t *entry,
+ fib_source_t source,
+ u8* s);
+
+extern void fib_entry_src_register(fib_source_t source,
+ const fib_entry_src_vft_t *vft);
+
+extern void fib_entry_src_action_init(fib_entry_t *entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_deinit(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_entry_src_cover_res_t fib_entry_src_action_cover_change(
+ fib_entry_t *entry,
+ fib_source_t source);
+
+extern fib_entry_src_cover_res_t fib_entry_src_action_cover_update(
+ fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_activate(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_deactivate(fib_entry_t *fib_entry,
+ fib_source_t source);
+extern void fib_entry_src_action_reactivate(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_entry_t* fib_entry_src_action_add(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_entry_src_flag_t fib_entry_src_action_remove(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_install(fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern void fib_entry_src_action_uninstall(fib_entry_t *fib_entry);
+
+extern fib_entry_t* fib_entry_src_action_path_add(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *path);
+
+extern fib_entry_t* fib_entry_src_action_path_swap(fib_entry_t *fib_entry,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *path);
+
+extern fib_entry_src_flag_t fib_entry_src_action_path_remove(fib_entry_t *fib_entry,
+ fib_source_t source,
+ const fib_route_path_t *path);
+
+extern void fib_entry_src_action_installed(const fib_entry_t *fib_entry,
+ fib_source_t source);
+
+extern fib_forward_chain_type_t fib_entry_get_default_chain_type(
+ const fib_entry_t *fib_entry);
+extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry);
+extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags(
+ fib_entry_flag_t eflags);
+
+extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry,
+ const fib_entry_src_t *esrc,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb);
+
+
+/*
+ * Per-source registration. declared here so we save a separate .h file for each
+ */
+extern void fib_entry_src_default_register(void);
+extern void fib_entry_src_rr_register(void);
+extern void fib_entry_src_interface_register(void);
+extern void fib_entry_src_default_route_register(void);
+extern void fib_entry_src_special_register(void);
+extern void fib_entry_src_api_register(void);
+extern void fib_entry_src_adj_register(void);
+extern void fib_entry_src_mpls_register(void);
+extern void fib_entry_src_lisp_register(void);
+
+extern void fib_entry_src_module_init(void);
+
+#endif
diff --git a/vnet/vnet/fib/fib_entry_src_adj.c b/vnet/vnet/fib/fib_entry_src_adj.c
new file mode 100644
index 00000000000..64f82a73e07
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_adj.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+#include "fib_table.h"
+#include "fib_entry_cover.h"
+#include "fib_attached_export.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_adj_init (fib_entry_src_t *src)
+{
+ src->adj.fesa_cover = FIB_NODE_INDEX_INVALID;
+ src->adj.fesa_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_adj_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ src->fes_pl = fib_path_list_create(pl_flags, paths);
+}
+
+static void
+fib_entry_src_adj_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+
+/*
+ * Source activate.
+ * Called when the source is teh new longer best source on the entry
+ */
+static int
+fib_entry_src_adj_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * find the covering prefix. become a dependent thereof.
+ * there should always be a cover, though it may be the default route.
+ */
+ src->adj.fesa_cover = fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ ASSERT(fib_entry_get_index(fib_entry) != src->adj.fesa_cover);
+
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ ASSERT(cover != fib_entry);
+
+ src->adj.fesa_sibling =
+ fib_entry_cover_track(cover,
+ fib_entry_get_index(fib_entry));
+
+ /*
+ * if the ocver is attached then this adj source entry can install,
+ * via the adj. otherwise install a drop.
+ * This prevents ARP/ND entries that on interface X that do not belong
+ * on X's subnet from being added to the FIB. To do so would allow
+ * nefarious gratuitous ARP requests from attracting traffic to the sender.
+ *
+ * and yes, I really do mean attached and not connected.
+ * this abomination;
+ * ip route add 10.0.0.0/24 Eth0
+ * is attached. and we want adj-fibs to install on Eth0.
+ */
+ return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover));
+}
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_adj_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ fib_entry_cover_untrack(cover, src->adj.fesa_sibling);
+
+ /*
+ * tell the cover this entry no longer needs exporting
+ */
+ fib_attached_export_covered_removed(cover, fib_entry_get_index(fib_entry));
+
+ src->adj.fesa_cover = FIB_NODE_INDEX_INVALID;
+}
+
+static u8*
+fib_entry_src_adj_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->adj.fesa_cover));
+}
+
+static void
+fib_entry_src_adj_installed (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * The adj source now rules! poke our cover to get exported
+ */
+ fib_entry_t *cover;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ fib_attached_export_covered_added(cover,
+ fib_entry_get_index(fib_entry));
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_adj_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ fib_entry_src_adj_deactivate(src, fib_entry);
+
+ res.install = fib_entry_src_adj_activate(src, fib_entry);
+
+ if (res.install) {
+ /*
+ * ADJ fib can install
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+ }
+
+ return (res);
+}
+
+/*
+ * fib_entry_src_adj_cover_update
+ */
+static fib_entry_src_cover_res_t
+fib_entry_src_adj_cover_update (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * the cover has updated, i.e. its forwarding or flags
+ * have changed. do'nt decativate/activate here, since this
+ * prefix is updated during the covers walk.
+ */
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_entry_t *cover;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover);
+
+ cover = fib_entry_get(src->adj.fesa_cover);
+
+ res.install = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover));
+
+ return (res);
+}
+
+const static fib_entry_src_vft_t adj_src_vft = {
+ .fesv_init = fib_entry_src_adj_init,
+ .fesv_path_swap = fib_entry_src_adj_path_swap,
+ .fesv_remove = fib_entry_src_adj_remove,
+ .fesv_activate = fib_entry_src_adj_activate,
+ .fesv_deactivate = fib_entry_src_adj_deactivate,
+ .fesv_format = fib_entry_src_adj_format,
+ .fesv_installed = fib_entry_src_adj_installed,
+ .fesv_cover_change = fib_entry_src_adj_cover_change,
+ .fesv_cover_update = fib_entry_src_adj_cover_update,
+};
+
+void
+fib_entry_src_adj_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_ADJ, &adj_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_entry_src_api.c b/vnet/vnet/fib/fib_entry_src_api.c
new file mode 100644
index 00000000000..edc8a47bc17
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_api.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_api_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_api_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_api_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+}
+
+static void
+fib_entry_src_api_path_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID == src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths);
+ }
+ else
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_add(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_api_path_remove (fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_remove(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_api_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo)
+{
+ if (FIB_ENTRY_FLAG_NONE != flags)
+ {
+ src->fes_pl = fib_path_list_create_special(
+ proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+ }
+}
+
+static void
+fib_entry_src_api_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+const static fib_entry_src_vft_t api_src_vft = {
+ .fesv_init = fib_entry_src_api_init,
+ .fesv_deinit = fib_entry_src_api_deinit,
+ .fesv_add = fib_entry_src_api_add,
+ .fesv_remove = fib_entry_src_api_remove,
+ .fesv_path_add = fib_entry_src_api_path_add,
+ .fesv_path_swap = fib_entry_src_api_path_swap,
+ .fesv_path_remove = fib_entry_src_api_path_remove,
+};
+
+void
+fib_entry_src_api_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_PLUGIN_HI, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_API, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_CLI, &api_src_vft);
+ fib_entry_src_register(FIB_SOURCE_DHCP, &api_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_entry_src_default.c b/vnet/vnet/fib/fib_entry_src_default.c
new file mode 100644
index 00000000000..9846cf56e64
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_default.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_default_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_default_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_cover_change (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_default_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_default_path_add (fib_entry_src_t *src,
+ fib_protocol_t proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight)
+{
+}
+
+static void
+fib_entry_src_default_path_remove (fib_entry_src_t *src,
+ fib_protocol_t proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight)
+{
+}
+
+
+/*
+ * Source activate.
+ * Called when the source is teh new longer best source on the entry
+ */
+static void
+fib_entry_src_default_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+}
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_default_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+}
+
+static void
+fib_entry_src_default_add (fib_entry_src_t *src,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto)
+{
+}
+
+static void
+fib_entry_src_default_remove (fib_entry_src_t *src)
+{
+}
+
+const static fib_entry_src_vft_t default_src_vft = {
+ .fesv_init = fib_entry_src_default_init,
+ .fesv_deinit = fib_entry_src_default_deinit,
+ .fesv_add = fib_entry_src_default_add,
+ .fesv_remove = fib_entry_src_default_remove,
+ .fesv_path_add = fib_entry_src_default_path_add,
+ .fesv_path_remove = fib_entry_src_default_path_remove,
+ .fesv_activate = fib_entry_src_default_activate,
+ .fesv_deactivate = fib_entry_src_default_deactivate,
+};
+
+void
+fib_entry_src_default_register (void)
+{
+ fib_source_t source;
+
+ FOR_EACH_FIB_SOURCE(source) {
+ fib_entry_src_register(source, &default_src_vft);
+ }
+}
diff --git a/vnet/vnet/fib/fib_entry_src_default_route.c b/vnet/vnet/fib/fib_entry_src_default_route.c
new file mode 100644
index 00000000000..8615f72dc46
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_default_route.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_default_route_init (fib_entry_src_t *src)
+{
+ src->fes_flags = FIB_ENTRY_FLAG_NONE;
+}
+
+static void
+fib_entry_src_default_route_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_default_route_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl = fib_path_list_create_special(proto,
+ FIB_PATH_LIST_FLAG_DROP,
+ dpo);
+}
+
+const static fib_entry_src_vft_t interface_src_vft = {
+ .fesv_init = fib_entry_src_default_route_init,
+ .fesv_add = fib_entry_src_default_route_add,
+ .fesv_remove = fib_entry_src_default_route_remove,
+};
+
+void
+fib_entry_src_default_route_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_DEFAULT_ROUTE, &interface_src_vft);
+}
+
+
diff --git a/vnet/vnet/fib/fib_entry_src_interface.c b/vnet/vnet/fib/fib_entry_src_interface.c
new file mode 100644
index 00000000000..2fb61677568
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_interface.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+#include "fib_table.h"
+#include "fib_entry_cover.h"
+#include "fib_attached_export.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_interface_init (fib_entry_src_t *src)
+{
+ src->interface.fesi_cover = FIB_NODE_INDEX_INVALID;
+ src->interface.fesi_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_interface_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ ip_adjacency_t *adj;
+
+ src->fes_pl = fib_path_list_create(pl_flags, paths);
+
+ /*
+ * this is a hack to get the entry's prefix into the glean adjacnecy
+ * so that it is available for fast retreival in the switch path.
+ */
+ if (!(FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags))
+ {
+ adj = adj_get(fib_path_list_get_adj(
+ src->fes_pl,
+ fib_entry_get_default_chain_type(entry)));
+
+ if (IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index);
+ {
+ /*
+ * the connected prefix will link to a glean on a non-p2p
+ * interface.
+ */
+ adj->sub_type.glean.receive_addr = entry->fe_prefix.fp_addr;
+ }
+ }
+}
+
+/*
+ * Source activate.
+ * Called when the source is teh new longer best source on the entry
+ */
+static int
+fib_entry_src_interface_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)
+ {
+ /*
+ * Track the covering attached/connected cover. This is so that
+ * during an attached export of the cover, this local prefix is
+ * also exported
+ */
+ src->interface.fesi_cover =
+ fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->interface.fesi_cover);
+
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ src->interface.fesi_sibling =
+ fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
+ }
+
+ return (!0);
+}
+
+
+/*
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_interface_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover)
+ {
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ fib_entry_cover_untrack(cover, src->interface.fesi_sibling);
+
+ src->interface.fesi_cover = FIB_NODE_INDEX_INVALID;
+ }
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_interface_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ if (FIB_NODE_INDEX_INVALID == src->interface.fesi_cover)
+ {
+ /*
+ * not tracking the cover. surprised we got poked?
+ */
+ return (res);
+ }
+
+ /*
+ * this function is called when this entry's cover has a more specific
+ * entry inserted benaeth it. That does not necessarily mean that this
+ * entry is covered by the new prefix. check that
+ */
+ if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix))
+ {
+ fib_entry_src_interface_deactivate(src, fib_entry);
+ fib_entry_src_interface_activate(src, fib_entry);
+ }
+ return (res);
+}
+
+static void
+fib_entry_src_interface_installed (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ /*
+ * The interface source now rules! poke our cover to get exported
+ */
+ fib_entry_t *cover;
+
+ if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover)
+ {
+ cover = fib_entry_get(src->interface.fesi_cover);
+
+ fib_attached_export_covered_added(cover,
+ fib_entry_get_index(fib_entry));
+ }
+}
+
+static u8*
+fib_entry_src_interface_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->interface.fesi_cover));
+}
+
+const static fib_entry_src_vft_t interface_src_vft = {
+ .fesv_init = fib_entry_src_interface_init,
+ .fesv_path_swap = fib_entry_src_interface_path_swap,
+ .fesv_activate = fib_entry_src_interface_activate,
+ .fesv_deactivate = fib_entry_src_interface_deactivate,
+ .fesv_format = fib_entry_src_interface_format,
+ .fesv_installed = fib_entry_src_interface_installed,
+ .fesv_cover_change = fib_entry_src_interface_cover_change,
+ /*
+ * not concerned about updates to the cover. the cover will
+ * decide to export or not
+ */
+};
+
+void
+fib_entry_src_interface_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_INTERFACE, &interface_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_entry_src_lisp.c b/vnet/vnet/fib/fib_entry_src_lisp.c
new file mode 100644
index 00000000000..116c492994b
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_lisp.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+#include "fib_path_list.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_lisp_init (fib_entry_src_t *src)
+{
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_lisp_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_lisp_path_swap (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+}
+
+static void
+fib_entry_src_lisp_path_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID == src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths);
+ }
+ else
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_add(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_lisp_path_remove (fib_entry_src_t *src,
+ fib_path_list_flags_t pl_flags,
+ const fib_route_path_t *paths)
+{
+ if (FIB_NODE_INDEX_INVALID != src->fes_pl)
+ {
+ src->fes_pl =
+ fib_path_list_copy_and_path_remove(src->fes_pl,
+ (FIB_PATH_LIST_FLAG_SHARED | pl_flags),
+ paths);
+ }
+}
+
+static void
+fib_entry_src_lisp_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo)
+{
+ if (FIB_ENTRY_FLAG_NONE != flags)
+ {
+ src->fes_pl = fib_path_list_create_special(proto, flags, dpo);
+ }
+}
+
+static void
+fib_entry_src_lisp_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_lisp_set_data (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ const void *data)
+{
+ src->lisp.fesl_fib_index = *(u32*)data;
+}
+
+static const void*
+fib_entry_src_lisp_get_data (fib_entry_src_t *src,
+ const fib_entry_t *entry)
+{
+ return (&(src->lisp.fesl_fib_index));
+}
+
+const static fib_entry_src_vft_t api_src_vft = {
+ .fesv_init = fib_entry_src_lisp_init,
+ .fesv_deinit = fib_entry_src_lisp_deinit,
+ .fesv_add = fib_entry_src_lisp_add,
+ .fesv_remove = fib_entry_src_lisp_remove,
+ .fesv_path_add = fib_entry_src_lisp_path_add,
+ .fesv_path_swap = fib_entry_src_lisp_path_swap,
+ .fesv_path_remove = fib_entry_src_lisp_path_remove,
+ .fesv_set_data = fib_entry_src_lisp_set_data,
+ .fesv_get_data = fib_entry_src_lisp_get_data,
+};
+
+void
+fib_entry_src_lisp_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_LISP, &api_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_entry_src_mpls.c b/vnet/vnet/fib/fib_entry_src_mpls.c
new file mode 100644
index 00000000000..5145c10977f
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_mpls.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/fib/mpls_fib.h>
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_mpls_init (fib_entry_src_t *src)
+{
+ mpls_eos_bit_t eos;
+
+ src->fes_flags = FIB_ENTRY_FLAG_NONE;
+ src->mpls.fesm_label = MPLS_LABEL_INVALID;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ src->mpls.fesm_lfes[eos] = FIB_NODE_INDEX_INVALID;
+ }
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_mpls_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_mpls_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+ src->mpls.fesm_label = MPLS_LABEL_INVALID;
+}
+
+static void
+fib_entry_src_mpls_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl =
+ fib_path_list_create_special(proto,
+ FIB_PATH_LIST_FLAG_DROP,
+ drop_dpo_get(fib_proto_to_dpo(proto)));
+}
+
+static void
+fib_entry_src_mpls_fwd_update (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ fib_source_t best_source)
+{
+ dpo_id_t dpo = DPO_NULL;
+ mpls_eos_bit_t eos;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ fib_entry_contribute_forwarding(fib_entry_get_index(fib_entry),
+ (eos ?
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS :
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS),
+ &dpo);
+
+ fib_table_entry_special_dpo_update(src->mpls.fesm_lfes[eos],
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ }
+ dpo_reset(&dpo);
+}
+
+static void
+fib_entry_src_mpls_set_data (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ const void *data)
+{
+ dpo_proto_t payload_proto;
+ fib_node_index_t fei;
+ mpls_label_t label;
+ mpls_eos_bit_t eos;
+
+ /*
+ * post MPLS table alloc and the possible rea-alloc of fib entrys
+ * the entry pointer will no longer be valid. so save its index
+ */
+ payload_proto = entry->fe_prefix.fp_proto;
+ fei = fib_entry_get_index(entry);
+ label = *(mpls_label_t*)data;
+
+ if (MPLS_LABEL_INVALID == label)
+ {
+ /*
+ * removing the local label
+ */
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ fib_table_entry_delete_index(src->mpls.fesm_lfes[eos],
+ FIB_SOURCE_SPECIAL);
+ }
+ fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS);
+ src->mpls.fesm_label = label;
+ }
+ else
+ {
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = label,
+ };
+ fib_node_index_t fib_index;
+ dpo_id_t dpo = DPO_NULL;
+
+ /*
+ * adding a new local label. make sure the MPLS fib exists.
+ */
+ if (MPLS_LABEL_INVALID == src->mpls.fesm_label)
+ {
+ fib_index =
+ fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID);
+ }
+ else
+ {
+ fib_index = mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID);
+ }
+
+ src->mpls.fesm_label = label;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_eos = eos;
+ prefix.fp_payload_proto = fib_proto_to_dpo(payload_proto);
+
+ fib_entry_contribute_forwarding(fei,
+ (eos ?
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS :
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS),
+ &dpo);
+ src->mpls.fesm_lfes[eos] =
+ fib_table_entry_special_dpo_add(fib_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ }
+}
+
+static const void *
+fib_entry_src_mpls_get_data (fib_entry_src_t *src,
+ const fib_entry_t *entry)
+{
+ return (&(src->mpls.fesm_label));
+}
+
+static u8*
+fib_entry_src_mpls_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "MPLS local-label:%d", src->mpls.fesm_label));
+}
+
+const static fib_entry_src_vft_t mpls_src_vft = {
+ .fesv_init = fib_entry_src_mpls_init,
+ .fesv_deinit = fib_entry_src_mpls_deinit,
+ .fesv_add = fib_entry_src_mpls_add,
+ .fesv_remove = fib_entry_src_mpls_remove,
+ .fesv_format = fib_entry_src_mpls_format,
+ .fesv_fwd_update = fib_entry_src_mpls_fwd_update,
+ .fesv_set_data = fib_entry_src_mpls_set_data,
+ .fesv_get_data = fib_entry_src_mpls_get_data,
+};
+
+void
+fib_entry_src_mpls_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_MPLS, &mpls_src_vft);
+}
+
+
diff --git a/vnet/vnet/fib/fib_entry_src_rr.c b/vnet/vnet/fib/fib_entry_src_rr.c
new file mode 100644
index 00000000000..f6b89603165
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_rr.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj.h>
+
+#include "fib_entry_src.h"
+#include "fib_entry_cover.h"
+#include "fib_entry.h"
+#include "fib_table.h"
+
+/*
+ * fib_entry_src_rr_resolve_via_connected
+ *
+ * Resolve via a connected cover.
+ */
+static void
+fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry,
+ const fib_entry_t *cover)
+{
+ const fib_route_path_t path = {
+ .frp_proto = fib_entry->fe_prefix.fp_proto,
+ .frp_addr = fib_entry->fe_prefix.fp_addr,
+ .frp_sw_if_index = fib_entry_get_resolving_interface(
+ fib_entry_get_index(cover)),
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ };
+ fib_route_path_t *paths = NULL;
+ vec_add1(paths, path);
+
+ /*
+ * since the cover is connected, the address this entry corresponds
+ * to is a peer (ARP-able for) on the interface to which the cover is
+ * connected. The fact we resolve via the cover, just means this RR
+ * source is the first SRC to use said peer. The ARP source will be along
+ * shortly to over-rule this RR source.
+ */
+ src->fes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NONE, paths);
+ src->fes_entry_flags = fib_entry_get_flags(fib_entry_get_index(cover));
+
+ vec_free(paths);
+}
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_rr_init (fib_entry_src_t *src)
+{
+ src->rr.fesr_cover = FIB_NODE_INDEX_INVALID;
+ src->rr.fesr_sibling = FIB_NODE_INDEX_INVALID;
+}
+
+/*
+ * Source activation. Called when the source is the new best source on the entry
+ */
+static int
+fib_entry_src_rr_activate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * find the covering prefix. become a dependent thereof.
+ * there should always be a cover, though it may be the default route.
+ */
+ src->rr.fesr_cover = fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover);
+
+ cover = fib_entry_get(src->rr.fesr_cover);
+
+ src->rr.fesr_sibling =
+ fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
+
+ /*
+ * if the ocver is attached then install an attached-host path
+ * (like an adj-fib). Otherwise inherit the forwarding from the cover
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
+ {
+ fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover);
+ }
+ else
+ {
+ src->fes_pl = cover->fe_parent;
+ }
+ fib_path_list_lock(src->fes_pl);
+
+ /*
+ * return go for install
+ */
+ return (!0);
+}
+
+/**
+ * Source Deactivate.
+ * Called when the source is no longer best source on the entry
+ */
+static void
+fib_entry_src_rr_deactivate (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_t *cover;
+
+ /*
+ * remove the depednecy on the covering entry
+ */
+ ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover);
+ cover = fib_entry_get(src->rr.fesr_cover);
+
+ fib_entry_cover_untrack(cover, src->rr.fesr_sibling);
+
+ src->rr.fesr_cover = FIB_NODE_INDEX_INVALID;
+
+ fib_path_list_unlock(src->fes_pl);
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+ src->fes_entry_flags = FIB_ENTRY_FLAG_NONE;
+}
+
+static fib_entry_src_cover_res_t
+fib_entry_src_rr_cover_change (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+
+ if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover)
+ {
+ /*
+ * the source may be added, but it is not active
+ * if it is not tracking the cover.
+ */
+ return (res);
+ }
+
+ /*
+ * this function is called when this entry's cover has a more specific
+ * entry inserted benaeth it. That does not necessarily mean that this
+ * entry is covered by the new prefix. check that
+ */
+ if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index,
+ &fib_entry->fe_prefix))
+ {
+ fib_entry_src_rr_deactivate(src, fib_entry);
+ fib_entry_src_rr_activate(src, fib_entry);
+
+ /*
+ * dependent children need to re-resolve to the new forwarding info
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+ }
+ return (res);
+}
+
+/*
+ * fib_entry_src_rr_cover_update
+ *
+ * This entry's cover has updated its forwarding info. This entry
+ * will need to re-inheret.
+ */
+static fib_entry_src_cover_res_t
+fib_entry_src_rr_cover_update (fib_entry_src_t *src,
+ const fib_entry_t *fib_entry)
+{
+ fib_entry_src_cover_res_t res = {
+ .install = !0,
+ .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE,
+ };
+ fib_node_index_t old_path_list;
+ fib_entry_t *cover;
+
+ if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover)
+ {
+ /*
+ * the source may be added, but it is not active
+ * if it is not tracking the cover.
+ */
+ return (res);
+ }
+
+ cover = fib_entry_get(src->rr.fesr_cover);
+ old_path_list = src->fes_pl;
+
+ /*
+ * if the ocver is attached then install an attached-host path
+ * (like an adj-fib). Otherwise inherit the forwarding from the cover
+ */
+ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
+ {
+ fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover);
+ }
+ else
+ {
+ src->fes_pl = cover->fe_parent;
+ }
+ fib_path_list_lock(src->fes_pl);
+ fib_path_list_unlock(old_path_list);
+
+ /*
+ * dependent children need to re-resolve to the new forwarding info
+ */
+ res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ return (res);
+}
+
+static u8*
+fib_entry_src_rr_format (fib_entry_src_t *src,
+ u8* s)
+{
+ return (format(s, "cover:%d", src->rr.fesr_cover));
+}
+
+const static fib_entry_src_vft_t rr_src_vft = {
+ .fesv_init = fib_entry_src_rr_init,
+ .fesv_activate = fib_entry_src_rr_activate,
+ .fesv_deactivate = fib_entry_src_rr_deactivate,
+ .fesv_cover_change = fib_entry_src_rr_cover_change,
+ .fesv_cover_update = fib_entry_src_rr_cover_update,
+ .fesv_format = fib_entry_src_rr_format,
+};
+
+void
+fib_entry_src_rr_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_RR, &rr_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_entry_src_special.c b/vnet/vnet/fib/fib_entry_src_special.c
new file mode 100644
index 00000000000..f73e280f1c8
--- /dev/null
+++ b/vnet/vnet/fib/fib_entry_src_special.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fib_entry.h"
+#include "fib_entry_src.h"
+
+/**
+ * Source initialisation Function
+ */
+static void
+fib_entry_src_special_init (fib_entry_src_t *src)
+{
+ src->fes_flags = FIB_ENTRY_FLAG_NONE;
+}
+
+/**
+ * Source deinitialisation Function
+ */
+static void
+fib_entry_src_special_deinit (fib_entry_src_t *src)
+{
+}
+
+static void
+fib_entry_src_special_remove (fib_entry_src_t *src)
+{
+ src->fes_pl = FIB_NODE_INDEX_INVALID;
+}
+
+static void
+fib_entry_src_special_add (fib_entry_src_t *src,
+ const fib_entry_t *entry,
+ fib_entry_flag_t flags,
+ fib_protocol_t proto,
+ const dpo_id_t *dpo)
+{
+ src->fes_pl =
+ fib_path_list_create_special(proto,
+ fib_entry_src_flags_2_path_list_flags(flags),
+ dpo);
+}
+
+const static fib_entry_src_vft_t special_src_vft = {
+ .fesv_init = fib_entry_src_special_init,
+ .fesv_deinit = fib_entry_src_special_deinit,
+ .fesv_add = fib_entry_src_special_add,
+ .fesv_remove = fib_entry_src_special_remove,
+};
+
+void
+fib_entry_src_special_register (void)
+{
+ fib_entry_src_register(FIB_SOURCE_SPECIAL, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_MAP, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_SR, &special_src_vft);
+ fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft);
+}
diff --git a/vnet/vnet/fib/fib_internal.h b/vnet/vnet/fib/fib_internal.h
new file mode 100644
index 00000000000..26b349eee5e
--- /dev/null
+++ b/vnet/vnet/fib/fib_internal.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_INTERNAL_H__
+#define __FIB_INTERNAL_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Big train switch; FIB debugs on or off
+ */
+#undef FIB_DEBUG
+
+extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+ fib_prefix_t *prf);
+
+extern int fib_route_path_cmp(const fib_route_path_t *rpath1,
+ const fib_route_path_t *rpath2);
+
+/**
+ * @brief
+ * Add or update an entry in the FIB's forwarding table.
+ * This is called from the fib_entry code. It is not meant to be used
+ * by the client/source.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add/update
+ *
+ * @param dpo
+ * The data-path object to use for forwarding
+ */
+extern void fib_table_fwding_dpo_update(u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo);
+/**
+ * @brief
+ * remove an entry in the FIB's forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add/update
+ *
+ * @param dpo
+ * The data-path object to use for forwarding
+ */
+extern void fib_table_fwding_dpo_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo);
+
+
+#endif
diff --git a/vnet/vnet/fib/fib_node.c b/vnet/vnet/fib/fib_node.c
new file mode 100644
index 00000000000..8ac67d2ef92
--- /dev/null
+++ b/vnet/vnet/fib/fib_node.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_node_list.h>
+
+/*
+ * The per-type vector of virtual function tables
+ */
+static fib_node_vft_t *fn_vfts;
+
+/**
+ * The last registered new type
+ */
+static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST;
+
+/*
+ * the node type names
+ */
+static const char *fn_type_names[] = FIB_NODE_TYPES;
+
+const char*
+fib_node_type_get_name (fib_node_type_t type)
+{
+ if (type < FIB_NODE_TYPE_LAST)
+ return (fn_type_names[type]);
+ else
+ {
+ if (NULL != fn_vfts[type].fnv_format)
+ {
+ return ("fixme");
+ }
+ else
+ {
+ return ("unknown");
+ }
+ }
+}
+
+/**
+ * fib_node_register_type
+ *
+ * Register the function table for a given type
+ */
+void
+fib_node_register_type (fib_node_type_t type,
+ const fib_node_vft_t *vft)
+{
+ /*
+ * assert that one only registration is made per-node type
+ */
+ if (vec_len(fn_vfts) > type)
+ ASSERT(NULL == fn_vfts[type].fnv_get);
+
+ /*
+ * Assert that we are getting each of the required functions
+ */
+ ASSERT(NULL != vft->fnv_get);
+ ASSERT(NULL != vft->fnv_last_lock);
+
+ vec_validate(fn_vfts, type);
+ fn_vfts[type] = *vft;
+}
+
+fib_node_type_t
+fib_node_register_new_type (const fib_node_vft_t *vft)
+{
+ fib_node_type_t new_type;
+
+ new_type = ++last_new_type;
+
+ fib_node_register_type(new_type, vft);
+
+ return (new_type);
+}
+
+static u8*
+fib_node_format (fib_node_ptr_t *fnp, u8*s)
+{
+ return (format(s, "{%s:%d}", fn_type_names[fnp->fnp_type], fnp->fnp_index));
+}
+
+u32
+fib_node_child_add (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_t *parent;
+
+ parent = fn_vfts[parent_type].fnv_get(parent_index);
+
+ /*
+ * return the index of the sibling in the child list
+ */
+ fib_node_lock(parent);
+
+ if (FIB_NODE_INDEX_INVALID == parent->fn_children)
+ {
+ parent->fn_children = fib_node_list_create();
+ }
+
+ return (fib_node_list_push_front(parent->fn_children,
+ 0, type,
+ index));
+}
+
+void
+fib_node_child_remove (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_index_t sibling_index)
+{
+ fib_node_t *parent;
+
+ parent = fn_vfts[parent_type].fnv_get(parent_index);
+
+ fib_node_list_remove(parent->fn_children, sibling_index);
+
+ if (0 == fib_node_list_get_size(parent->fn_children))
+ {
+ fib_node_list_destroy(&parent->fn_children);
+ }
+
+ fib_node_unlock(parent);
+}
+
+
+fib_node_back_walk_rc_t
+fib_node_back_walk_one (fib_node_ptr_t *ptr,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_t *node;
+
+ node = fn_vfts[ptr->fnp_type].fnv_get(ptr->fnp_index);
+
+ return (fn_vfts[ptr->fnp_type].fnv_back_walk(node, ctx));
+}
+
+static int
+fib_node_ptr_format_one_child (fib_node_ptr_t *ptr,
+ void *arg)
+{
+ u8 **s = (u8**) arg;
+
+ *s = fib_node_format(ptr, *s);
+
+ return (1);
+}
+
+u8*
+fib_node_children_format (fib_node_list_t list,
+ u8 *s)
+{
+ fib_node_list_walk(list, fib_node_ptr_format_one_child, (void*)&s);
+
+ return (s);
+}
+
+void
+fib_node_init (fib_node_t *node,
+ fib_node_type_t type)
+{
+#if CLIB_DEBUG > 0
+ /**
+ * The node's type. make sure we are dynamic/down casting correctly
+ */
+ node->fn_type = type;
+#endif
+ node->fn_locks = 0;
+ node->fn_vft = &fn_vfts[type];
+ node->fn_children = FIB_NODE_INDEX_INVALID;
+}
+
+void
+fib_node_deinit (fib_node_t *node)
+{
+ fib_node_list_destroy(&node->fn_children);
+}
+
+void
+fib_node_lock (fib_node_t *node)
+{
+ node->fn_locks++;
+}
+
+void
+fib_node_unlock (fib_node_t *node)
+{
+ node->fn_locks--;
+
+ if (0 == node->fn_locks)
+ {
+ node->fn_vft->fnv_last_lock(node);
+ }
+}
diff --git a/vnet/vnet/fib/fib_node.h b/vnet/vnet/fib/fib_node.h
new file mode 100644
index 00000000000..a05b6f1b61a
--- /dev/null
+++ b/vnet/vnet/fib/fib_node.h
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_NODE_H__
+#define __FIB_NODE_H__
+
+#include <vnet/fib/fib_types.h>
+
+/**
+ * The types of nodes in a FIB graph
+ */
+typedef enum fib_node_type_t_ {
+ /**
+ * Marker. New types after this one.
+ */
+ FIB_NODE_TYPE_FIRST = 0,
+ /**
+ * See the respective fib_*.h files for descriptions of these objects.
+ */
+ FIB_NODE_TYPE_WALK,
+ FIB_NODE_TYPE_ENTRY,
+ FIB_NODE_TYPE_PATH_LIST,
+ FIB_NODE_TYPE_PATH,
+ FIB_NODE_TYPE_ADJ,
+ FIB_NODE_TYPE_MPLS_ENTRY,
+ FIB_NODE_TYPE_LISP_GPE_TUNNEL,
+ FIB_NODE_TYPE_LISP_ADJ,
+ FIB_NODE_TYPE_MPLS_GRE_TUNNEL,
+ FIB_NODE_TYPE_GRE_TUNNEL,
+ /**
+ * Marker. New types before this one. leave the test last.
+ */
+ FIB_NODE_TYPE_TEST,
+ FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST,
+} fib_node_type_t;
+
+#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1)
+
+#define FIB_NODE_TYPES { \
+ [FIB_NODE_TYPE_ENTRY] = "entry", \
+ [FIB_NODE_TYPE_WALK] = "walk", \
+ [FIB_NODE_TYPE_PATH_LIST] = "path-list", \
+ [FIB_NODE_TYPE_PATH] = "path", \
+ [FIB_NODE_TYPE_MPLS_ENTRY] = "mpls-entry", \
+ [FIB_NODE_TYPE_ADJ] = "adj", \
+ [FIB_NODE_TYPE_LISP_GPE_TUNNEL] = "lisp-gpe-tunnel", \
+ [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \
+ [FIB_NODE_TYPE_MPLS_GRE_TUNNEL] = "mpls-gre-tunnel", \
+ [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \
+}
+
+/**
+ * Reasons for backwalking the FIB object graph
+ */
+typedef enum fib_node_back_walk_reason_t_ {
+ /**
+ * Marker. Add new ones after.
+ */
+ FIB_NODE_BW_REASON_FIRST = 0,
+ /**
+ * Walk to re-resolve the child.
+ * Used when the parent is no longer a valid resolution target
+ */
+ FIB_NODE_BW_REASON_RESOLVE = FIB_NODE_BW_REASON_FIRST,
+ /**
+ * Walk to re-evaluate the forwarding contributed by the parent.
+ * Used when a parent's forwarding changes and the child needs to
+ * incorporate this change in its forwarding.
+ */
+ FIB_NODE_BW_REASON_EVALUATE,
+ /**
+ * A resolving interface has come up
+ */
+ FIB_NODE_BW_REASON_INTERFACE_UP,
+ /**
+ * A resolving interface has gone down
+ */
+ FIB_NODE_BW_REASON_INTERFACE_DOWN,
+ /**
+ * A resolving interface has been deleted.
+ */
+ FIB_NODE_BW_REASON_INTERFACE_DELETE,
+ /**
+ * Walk to re-collapse the multipath adjs when the rewrite of
+ * a unipath adjacency changes
+ */
+ FIB_NODE_BW_REASON_ADJ_UPDATE,
+ /**
+ * Marker. Add new before and update
+ */
+ FIB_NODE_BW_REASON_LAST = FIB_NODE_BW_REASON_EVALUATE,
+} fib_node_back_walk_reason_t;
+
+#define FIB_NODE_BW_REASONS { \
+ [FIB_NODE_BW_REASON_RESOLVE] = "resolve" \
+ [FIB_NODE_BW_REASON_EVALUATE] = "evaluate" \
+ [FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up" \
+ [FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down" \
+ [FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete" \
+ [FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update" \
+}
+
+/**
+ * Flags enum constructed from the reaons
+ */
+typedef enum fib_node_bw_reason_flag_t_ {
+ FIB_NODE_BW_REASON_FLAG_NONE = 0,
+ FIB_NODE_BW_REASON_FLAG_RESOLVE = (1 << FIB_NODE_BW_REASON_RESOLVE),
+ FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE),
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE),
+} __attribute__ ((packed)) fib_node_bw_reason_flag_t;
+
+_Static_assert(sizeof(fib_node_bw_reason_flag_t) < 2,
+ "BW Reason enum < 2 byte. Consequences for cover_upd_res_t");
+
+/**
+ * Forward eclarations
+ */
+struct fib_node_t_;
+
+/**
+ * A representation of one pointer to another node.
+ * To fully qualify a node, one must know its type and its index so it
+ * can be retrieved from the appropriate pool. Direct pointers to nodes
+ * are forbidden, since all nodes are allocated from pools, which are vectors,
+ * and thus subject to realloc at any time.
+ */
+typedef struct fib_node_ptr_t_ {
+ /**
+ * node type
+ */
+ fib_node_type_t fnp_type;
+ /**
+ * node's index
+ */
+ fib_node_index_t fnp_index;
+} fib_node_ptr_t;
+
+/**
+ * @brief A list of FIB nodes.
+ */
+typedef u32 fib_node_list_t;
+
+/**
+ * Context passed between object during a back walk.
+ */
+typedef struct fib_node_back_walk_ctx_t_ {
+ /**
+ * The reason/trigger for the backwalk
+ */
+ fib_node_bw_reason_flag_t fnbw_reason;
+
+ /**
+ * the number of levels the walk has already traversed.
+ * this value is maintained by the walk infra, tp limit the depth of
+ * a walk so it does not run indefinately the presence of a loop/cycle
+ * in the graph.
+ */
+ u32 fnbw_depth;
+} fib_node_back_walk_ctx_t;
+
+/**
+ * We consider a depth of 32 to be sufficient to cover all sane
+ * network topologies. Anything more is then an indication that
+ * there is a loop/cycle in the FIB graph.
+ * Note that all object types contribute to 1 to the depth.
+ */
+#define FIB_NODE_GRAPH_MAX_DEPTH ((u32)32)
+
+/**
+ * A callback function for walking a node dependency list
+ */
+typedef int (*fib_node_ptr_walk_t)(fib_node_ptr_t *depend,
+ void *ctx);
+
+/**
+ * A list of dependent nodes.
+ * This is currently implemented as a hash_table of fib_node_ptr_t
+ */
+typedef fib_node_ptr_t fib_node_ptr_list_t;
+
+/**
+ * Return code from a back walk function
+ */
+typedef enum fib_node_back_walk_rc_t_ {
+ FIB_NODE_BACK_WALK_MERGE,
+ FIB_NODE_BACK_WALK_CONTINUE,
+} fib_node_back_walk_rc_t;
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+typedef fib_node_back_walk_rc_t (*fib_node_back_walk_t)(
+ struct fib_node_t_ *node,
+ fib_node_back_walk_ctx_t *ctx);
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+typedef struct fib_node_t_* (*fib_node_get_t)(fib_node_index_t index);
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+typedef void (*fib_node_last_lock_gone_t)(struct fib_node_t_ *node);
+
+/**
+ * A FIB graph nodes virtual function table
+ */
+typedef struct fib_node_vft_t_ {
+ fib_node_get_t fnv_get;
+ fib_node_last_lock_gone_t fnv_last_lock;
+ fib_node_back_walk_t fnv_back_walk;
+ format_function_t *fnv_format;
+} fib_node_vft_t;
+
+/**
+ * An node in the FIB graph
+ *
+ * Objects in the FIB form a graph.
+ */
+typedef struct fib_node_t_ {
+#if CLIB_DEBUG > 0
+ /**
+ * The node's type. make sure we are dynamic/down casting correctly
+ */
+ fib_node_type_t fn_type;
+#endif
+ /**
+ * The node's VFT.
+ * we could store the type here instead, and lookup the VFT using that. But
+ * I like this better,
+ */
+ const fib_node_vft_t *fn_vft;
+
+ /**
+ * Vector of nodes that depend upon/use/share this node
+ */
+ fib_node_list_t fn_children;
+
+ /**
+ * Number of dependents on this node. This number includes the number
+ * of children
+ */
+ u32 fn_locks;
+} fib_node_t;
+
+/**
+ * @brief
+ * Register the function table for a given type
+ *
+ * @param ft
+ * FIB node type
+ *
+ * @param vft
+ * virtual function table
+ */
+extern void fib_node_register_type (fib_node_type_t ft,
+ const fib_node_vft_t *vft);
+
+/**
+ * @brief
+ * Create a new FIB node type and Register the function table for it.
+ *
+ * @param vft
+ * virtual function table
+ *
+ * @return new FIB node type
+ */
+extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft);
+
+extern void fib_node_init(fib_node_t *node,
+ fib_node_type_t ft);
+extern void fib_node_deinit(fib_node_t *node);
+
+extern void fib_node_lock(fib_node_t *node);
+extern void fib_node_unlock(fib_node_t *node);
+
+extern u32 fib_node_child_add(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index);
+extern void fib_node_child_remove(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_index_t sibling_index);
+
+extern fib_node_back_walk_rc_t fib_node_back_walk_one(fib_node_ptr_t *ptr,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern u8* fib_node_children_format(fib_node_list_t list,
+ u8 *s);
+
+extern const char* fib_node_type_get_name(fib_node_type_t type);
+
+static inline int
+fib_node_index_is_valid (fib_node_index_t ni)
+{
+ return (FIB_NODE_INDEX_INVALID != ni);
+}
+
+#endif
+
diff --git a/vnet/vnet/fib/fib_node_list.c b/vnet/vnet/fib/fib_node_list.c
new file mode 100644
index 00000000000..1d2e75ecec2
--- /dev/null
+++ b/vnet/vnet/fib/fib_node_list.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief a hetrogeneous w.r.t. FIB node type, of FIB nodes.
+ * Since we cannot use C pointers, due to memeory reallocs, the next/prev
+ * are described as key:{type,index}.
+ */
+
+#include <vnet/fib/fib_node_list.h>
+
+/**
+ * @brief An element in the list
+ */
+typedef struct fib_node_list_elt_t_
+{
+ /**
+ * An opaque indentifier set by the FIB node owning this element
+ * that will allow the owner to identify which element it is.
+ */
+ int fnle_owner_id;
+
+ /**
+ * The index of the list this element is in
+ */
+ fib_node_list_t fnle_list;
+
+ /**
+ * The owner of this element
+ */
+ fib_node_ptr_t fnle_owner;
+
+ /**
+ * The next element in the list
+ */
+ u32 fnle_next;
+
+ /**
+ * The previous element in the list
+ */
+ u32 fnle_prev;
+} fib_node_list_elt_t;
+
+/**
+ * @brief A list of FIB nodes
+ */
+typedef struct fib_node_list_head_t_
+{
+ /**
+ * The head element
+ */
+ u32 fnlh_head;
+
+ /**
+ * Number of elements in the list
+ */
+ u32 fnlh_n_elts;
+} fib_node_list_head_t;
+
+/**
+ * Pools of list elements and heads
+ */
+static fib_node_list_elt_t *fib_node_list_elt_pool;
+static fib_node_list_head_t *fib_node_list_head_pool;
+
+static index_t
+fib_node_list_elt_get_index (fib_node_list_elt_t *elt)
+{
+ return (elt - fib_node_list_elt_pool);
+}
+
+static fib_node_list_elt_t *
+fib_node_list_elt_get (index_t fi)
+{
+ return (pool_elt_at_index(fib_node_list_elt_pool, fi));
+}
+
+static index_t
+fib_node_list_head_get_index (fib_node_list_head_t *head)
+{
+ return (head - fib_node_list_head_pool);
+}
+static fib_node_list_head_t *
+fib_node_list_head_get (fib_node_list_t fi)
+{
+ return (pool_elt_at_index(fib_node_list_head_pool, fi));
+}
+
+static fib_node_list_elt_t *
+fib_node_list_elt_create (fib_node_list_head_t *head,
+ int id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_list_elt_t *elt;
+
+ pool_get(fib_node_list_elt_pool, elt);
+
+ elt->fnle_list = fib_node_list_head_get_index(head);
+ elt->fnle_owner_id = id;
+ elt->fnle_owner.fnp_type = type;
+ elt->fnle_owner.fnp_index = index;
+
+ elt->fnle_next = FIB_NODE_INDEX_INVALID;
+ elt->fnle_prev = FIB_NODE_INDEX_INVALID;
+
+ return (elt);
+}
+
+static void
+fib_node_list_head_init (fib_node_list_head_t *head)
+{
+ head->fnlh_n_elts = 0;
+ head->fnlh_head = FIB_NODE_INDEX_INVALID;
+}
+
+/**
+ * @brief Create a new node list. The expectation is that these are few in number
+ * so straight from the memory subsystem
+ */
+fib_node_list_t
+fib_node_list_create (void)
+{
+ fib_node_list_head_t *head;
+
+ pool_get(fib_node_list_head_pool, head);
+
+ fib_node_list_head_init(head);
+
+ return (fib_node_list_head_get_index(head));
+}
+
+void
+fib_node_list_destroy (fib_node_list_t *list)
+{
+ fib_node_list_head_t *head;
+
+ if (FIB_NODE_INDEX_INVALID == *list)
+ return;
+
+ head = fib_node_list_head_get(*list);
+ ASSERT(0 == head->fnlh_n_elts);
+
+ pool_put(fib_node_list_head_pool, head);
+ *list = FIB_NODE_INDEX_INVALID;
+}
+
+
+/**
+ * @brief Insert an element at the from of the list.
+ */
+u32
+fib_node_list_push_front (fib_node_list_t list,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ fib_node_list_elt_t *elt, *next;
+ fib_node_list_head_t *head;
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_create(head, owner_id, type, index);
+
+ elt->fnle_prev = FIB_NODE_INDEX_INVALID;
+ elt->fnle_next = head->fnlh_head;
+
+ if (FIB_NODE_INDEX_INVALID != head->fnlh_head)
+ {
+ next = fib_node_list_elt_get(head->fnlh_head);
+ next->fnle_prev = fib_node_list_elt_get_index(elt);
+ }
+ head->fnlh_head = fib_node_list_elt_get_index(elt);
+
+ head->fnlh_n_elts++;
+
+ return (fib_node_list_elt_get_index(elt));
+}
+
+u32
+fib_node_list_push_back (fib_node_list_t list,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index)
+{
+ ASSERT(0);
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+fib_node_list_extract (fib_node_list_head_t *head,
+ fib_node_list_elt_t *elt)
+{
+ fib_node_list_elt_t *next, *prev;
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ next = fib_node_list_elt_get(elt->fnle_next);
+ next->fnle_prev = elt->fnle_prev;
+ }
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_prev)
+ {
+ prev = fib_node_list_elt_get(elt->fnle_prev);
+ prev->fnle_next = elt->fnle_next;
+ }
+ else
+ {
+ ASSERT (fib_node_list_elt_get_index(elt) == head->fnlh_head);
+ head->fnlh_head = elt->fnle_next;
+ }
+}
+
+static void
+fib_node_list_insert_after (fib_node_list_head_t *head,
+ fib_node_list_elt_t *prev,
+ fib_node_list_elt_t *elt)
+{
+ fib_node_list_elt_t *next;
+
+ elt->fnle_next = prev->fnle_next;
+ if (FIB_NODE_INDEX_INVALID != prev->fnle_next)
+ {
+ next = fib_node_list_elt_get(prev->fnle_next);
+ next->fnle_prev = fib_node_list_elt_get_index(elt);
+ }
+ prev->fnle_next = fib_node_list_elt_get_index(elt);
+ elt->fnle_prev = fib_node_list_elt_get_index(prev);
+}
+
+void
+fib_node_list_remove (fib_node_list_t list,
+ u32 sibling)
+{
+ fib_node_list_head_t *head;
+ fib_node_list_elt_t *elt;
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_get(sibling);
+
+ fib_node_list_extract(head, elt);
+
+ head->fnlh_n_elts--;
+ pool_put(fib_node_list_elt_pool, elt);
+}
+
+void
+fib_node_list_elt_remove (u32 sibling)
+{
+ fib_node_list_elt_t *elt;
+
+ elt = fib_node_list_elt_get(sibling);
+
+ fib_node_list_remove(elt->fnle_list, sibling);
+}
+
+/**
+ * @brief Advance the sibling one step (toward the tail) in the list.
+ * return 0 if at the end of the list, 1 otherwise.
+ */
+int
+fib_node_list_advance (u32 sibling)
+{
+ fib_node_list_elt_t *elt, *next;
+ fib_node_list_head_t *head;
+
+ elt = fib_node_list_elt_get(sibling);
+ head = fib_node_list_head_get(elt->fnle_list);
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ /*
+ * not at the end of the list
+ */
+ next = fib_node_list_elt_get(elt->fnle_next);
+
+ fib_node_list_extract(head, elt);
+ fib_node_list_insert_after(head, next, elt);
+
+ return (1);
+ }
+ else
+ {
+ return (0);
+ }
+}
+
+int
+fib_node_list_elt_get_next (u32 sibling,
+ fib_node_ptr_t *ptr)
+{
+ fib_node_list_elt_t *elt, *next;
+
+ elt = fib_node_list_elt_get(sibling);
+
+ if (FIB_NODE_INDEX_INVALID != elt->fnle_next)
+ {
+ next = fib_node_list_elt_get(elt->fnle_next);
+
+ *ptr = next->fnle_owner;
+ return (1);
+ }
+ else
+ {
+ ptr->fnp_index = FIB_NODE_INDEX_INVALID;
+ return (0);
+ }
+}
+
+u32
+fib_node_list_get_size (fib_node_list_t list)
+{
+ fib_node_list_head_t *head;
+
+ if (FIB_NODE_INDEX_INVALID == list)
+ {
+ return (0);
+ }
+
+ head = fib_node_list_head_get(list);
+
+ return (head->fnlh_n_elts);
+}
+
+int
+fib_node_list_get_front (fib_node_list_t list,
+ fib_node_ptr_t *ptr)
+{
+ fib_node_list_head_t *head;
+ fib_node_list_elt_t *elt;
+
+
+ if (0 == fib_node_list_get_size(list))
+ {
+ ptr->fnp_index = FIB_NODE_INDEX_INVALID;
+ return (0);
+ }
+
+ head = fib_node_list_head_get(list);
+ elt = fib_node_list_elt_get(head->fnlh_head);
+
+ *ptr = elt->fnle_owner;
+
+ return (1);
+}
+
+/**
+ * @brief Walk the list of node. This must be safe w.r.t. the removal
+ * of nodes during the walk.
+ */
+void
+fib_node_list_walk (fib_node_list_t list,
+ fib_node_list_walk_cb_t fn,
+ void *args)
+{
+ fib_node_list_elt_t *elt;
+ fib_node_list_head_t *head;
+ u32 sibling;
+
+ if (FIB_NODE_INDEX_INVALID == list)
+ {
+ return;
+ }
+
+ head = fib_node_list_head_get(list);
+ sibling = head->fnlh_head;
+
+ while (FIB_NODE_INDEX_INVALID != sibling)
+ {
+ elt = fib_node_list_elt_get(sibling);
+ sibling = elt->fnle_next;
+
+ fn(&elt->fnle_owner, args);
+ }
+}
diff --git a/vnet/vnet/fib/fib_node_list.h b/vnet/vnet/fib/fib_node_list.h
new file mode 100644
index 00000000000..afee3c6152c
--- /dev/null
+++ b/vnet/vnet/fib/fib_node_list.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief a hetrogeneous w.r.t. FIB node type, list of FIB nodes.
+ * Since we cannot use C pointers, due to memeory reallocs, the next/prev
+ * are described as an index to an element. Each element contains a pointer
+ * (key:{type, index}) to a FIB node.
+ */
+
+#ifndef __FIB_NODE_LIST_H__
+#define __FIB_NODE_LIST_H__
+
+#include <vnet/fib/fib_node.h>
+
+extern fib_node_list_t fib_node_list_create(void);
+extern void fib_node_list_destroy(fib_node_list_t *list);
+
+extern u32 fib_node_list_push_front(fib_node_list_t head,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index);
+extern u32 fib_node_list_push_back(fib_node_list_t head,
+ int owner_id,
+ fib_node_type_t type,
+ fib_node_index_t index);
+extern void fib_node_list_remove(fib_node_list_t head,
+ u32 sibling);
+extern void fib_node_list_elt_remove(u32 sibling);
+
+extern int fib_node_list_advance(u32 sibling);
+
+extern int fib_node_list_get_front(fib_node_list_t head,
+ fib_node_ptr_t *ptr);
+
+extern int fib_node_list_elt_get_next(u32 elt,
+ fib_node_ptr_t *ptr);
+
+extern u32 fib_node_list_get_size(fib_node_list_t head);
+
+/**
+ * @brief Callback function invoked during a list walk
+ */
+typedef int (*fib_node_list_walk_cb_t)(fib_node_ptr_t *owner,
+ void *args);
+
+extern void fib_node_list_walk(fib_node_list_t head,
+ fib_node_list_walk_cb_t fn,
+ void *args);
+#endif
diff --git a/vnet/vnet/fib/fib_path.c b/vnet/vnet/fib/fib_path.c
new file mode 100644
index 00000000000..d2e5e319afd
--- /dev/null
+++ b/vnet/vnet/fib/fib_path.c
@@ -0,0 +1,1744 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/lookup_dpo.h>
+
+#include <vnet/adj/adj.h>
+
+#include "fib_path.h"
+#include "fib_node.h"
+#include "fib_table.h"
+#include "fib_entry.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+
+/**
+ * Enurmeration of path types
+ */
+typedef enum fib_path_type_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_TYPE_FIRST = 0,
+ /**
+ * Attached-nexthop. An interface and a nexthop are known.
+ */
+ FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
+ /**
+ * attached. Only the interface is known.
+ */
+ FIB_PATH_TYPE_ATTACHED,
+ /**
+ * recursive. Only the next-hop is known.
+ */
+ FIB_PATH_TYPE_RECURSIVE,
+ /**
+ * special. nothing is known. so we drop.
+ */
+ FIB_PATH_TYPE_SPECIAL,
+ /**
+ * exclusive. user provided adj.
+ */
+ FIB_PATH_TYPE_EXCLUSIVE,
+ /**
+ * deag. Link to a lookup adj in the next table
+ */
+ FIB_PATH_TYPE_DEAG,
+ /**
+ * receive. it's for-us.
+ */
+ FIB_PATH_TYPE_RECEIVE,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
+} __attribute__ ((packed)) fib_path_type_t;
+
+/**
+ * The maximum number of path_types
+ */
+#define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
+
+#define FIB_PATH_TYPES { \
+ [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop", \
+ [FIB_PATH_TYPE_ATTACHED] = "attached", \
+ [FIB_PATH_TYPE_RECURSIVE] = "recursive", \
+ [FIB_PATH_TYPE_SPECIAL] = "special", \
+ [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_TYPE_DEAG] = "deag", \
+ [FIB_PATH_TYPE_RECEIVE] = "receive", \
+}
+
+#define FOR_EACH_FIB_PATH_TYPE(_item) \
+ for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
+
+/**
+ * Enurmeration of path operational (i.e. derived) attributes
+ */
+typedef enum fib_path_oper_attribute_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
+ /**
+ * The path forms part of a recursive loop.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
+ /**
+ * The path is resolved
+ */
+ FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
+ /**
+ * The path has become a permanent drop.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_DROP,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
+} __attribute__ ((packed)) fib_path_oper_attribute_t;
+
+/**
+ * The maximum number of path operational attributes
+ */
+#define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
+
+#define FIB_PATH_OPER_ATTRIBUTES { \
+ [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop", \
+ [FIB_PATH_OPER_ATTRIBUTE_RESOLVED] = "resolved", \
+ [FIB_PATH_OPER_ATTRIBUTE_DROP] = "drop", \
+}
+
+#define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
+ _item++)
+
+/**
+ * Path flags from the attributes
+ */
+typedef enum fib_path_oper_flags_t_ {
+ FIB_PATH_OPER_FLAG_NONE = 0,
+ FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
+ FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
+ FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
+} __attribute__ ((packed)) fib_path_oper_flags_t;
+
+/**
+ * A FIB path
+ */
+typedef struct fib_path_t_ {
+ /**
+ * A path is a node in the FIB graph.
+ */
+ fib_node_t fp_node;
+
+ /**
+ * The index of the path-list to which this path belongs
+ */
+ u32 fp_pl_index;
+
+ /**
+ * This marks the start of the memory area used to hash
+ * the path
+ */
+ STRUCT_MARK(path_hash_start);
+
+ /**
+ * Configuration Flags
+ */
+ fib_path_cfg_flags_t fp_cfg_flags;
+
+ /**
+ * The type of the path. This is the selector for the union
+ */
+ fib_path_type_t fp_type;
+
+ /**
+ * The protocol of the next-hop, i.e. the address family of the
+ * next-hop's address. We can't derive this from the address itself
+ * since the address can be all zeros
+ */
+ fib_protocol_t fp_nh_proto;
+
+ /**
+ * UCMP [unnormalised] weigt
+ */
+ u32 fp_weight;
+
+ /**
+ * per-type union of the data required to resolve the path
+ */
+ union {
+ struct {
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_nh;
+ /**
+ * The interface
+ */
+ u32 fp_interface;
+ } attached_next_hop;
+ struct {
+ /**
+ * The interface
+ */
+ u32 fp_interface;
+ } attached;
+ struct {
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_nh;
+ /**
+ * The FIB table index in which to find the next-hop.
+ * This needs to be fixed. We should lookup the adjacencies in
+ * a separate table of adjacencies, rather than from the FIB.
+ * Two reasons I can think of:
+ * - consider:
+ * int ip addr Gig0 10.0.0.1/24
+ * ip route 10.0.0.2/32 via Gig1 192.168.1.2
+ * ip route 1.1.1.1/32 via Gig0 10.0.0.2
+ * this is perfectly valid.
+ * Packets addressed to 10.0.0.2 should be sent via Gig1.
+ * Packets address to 1.1.1.1 should be sent via Gig0.
+ * when we perform the adj resolution from the FIB for the path
+ * "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
+ * and so we will pick up the adj via Gig1 - which was not what the
+ * operator wanted.
+ * - we can only return link-type IPv4 and so not the link-type MPLS.
+ * more on this in a later commit.
+ *
+ * The table ID should only belong to a recursive path and indicate
+ * which FIB should be used to resolve the next-hop.
+ */
+ fib_node_index_t fp_tbl_id;
+ } recursive;
+ struct {
+ /**
+ * The FIN index in which to perfom the next lookup
+ */
+ fib_node_index_t fp_tbl_id;
+ } deag;
+ struct {
+ } special;
+ struct {
+ /**
+ * The user provided 'exclusive' DPO
+ */
+ dpo_id_t fp_ex_dpo;
+ } exclusive;
+ struct {
+ /**
+ * The interface on which the local address is configured
+ */
+ u32 fp_interface;
+ /**
+ * The next-hop
+ */
+ ip46_address_t fp_addr;
+ } receive;
+ };
+ STRUCT_MARK(path_hash_end);
+
+ /**
+ * Memebers in this last section represent information that is
+ * dervied during resolution. It should not be copied to new paths
+ * nor compared.
+ */
+
+ /**
+ * Operational Flags
+ */
+ fib_path_oper_flags_t fp_oper_flags;
+
+ /**
+ * the resolving via fib. not part of the union, since it it not part
+ * of the path's hash.
+ */
+ fib_node_index_t fp_via_fib;
+
+ /**
+ * The Data-path objects through which this path resolves for IP.
+ */
+ dpo_id_t fp_dpo;
+
+ /**
+ * the index of this path in the parent's child list.
+ */
+ u32 fp_sibling;
+} fib_path_t;
+
+/*
+ * Array of strings/names for the path types and attributes
+ */
+static const char *fib_path_type_names[] = FIB_PATH_TYPES;
+static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
+static const char *fib_path_cfg_attribute_names[] = FIB_PATH_CFG_ATTRIBUTES;
+
+/*
+ * The memory pool from which we allocate all the paths
+ */
+static fib_path_t *fib_path_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_PATH_DBG(_p, _fmt, _args...) \
+{ \
+ u8 *_tmp = NULL; \
+ _tmp = fib_path_format(fib_path_get_index(_p), _tmp); \
+ clib_warning("path:[%d:%s]:" _fmt, \
+ fib_path_get_index(_p), _tmp, \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define FIB_PATH_DBG(_p, _fmt, _args...)
+#endif
+
+static fib_path_t *
+fib_path_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_path_pool, index));
+}
+
+static fib_node_index_t
+fib_path_get_index (fib_path_t *path)
+{
+ return (path - fib_path_pool);
+}
+
+static fib_node_t *
+fib_path_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_path_get(index));
+}
+
+static fib_path_t*
+fib_path_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
+#endif
+ return ((fib_path_t*)node);
+}
+
+u8 *
+format_fib_path (u8 * s, va_list * args)
+{
+ fib_path_t *path = va_arg (*args, fib_path_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ fib_path_oper_attribute_t oattr;
+ fib_path_cfg_attribute_t cattr;
+
+ s = format (s, " index:%d ", fib_path_get_index(path));
+ s = format (s, "pl-index:%d ", path->fp_pl_index);
+ s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
+ s = format (s, "weight=%d ", path->fp_weight);
+ s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
+ if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
+ s = format(s, " oper-flags:");
+ FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
+ if ((1<<oattr) & path->fp_oper_flags) {
+ s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
+ }
+ }
+ }
+ if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
+ s = format(s, " cfg-flags:");
+ FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
+ if ((1<<cattr) & path->fp_cfg_flags) {
+ s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
+ }
+ }
+ }
+ s = format(s, "\n ");
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ s = format (s, "%U", format_ip46_address,
+ &path->attached_next_hop.fp_nh,
+ IP46_TYPE_ANY);
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
+ {
+ s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
+ }
+ else
+ {
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(
+ vnm,
+ path->attached_next_hop.fp_interface));
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached_next_hop.fp_interface))
+ {
+ s = format (s, " (p2p)");
+ }
+ }
+ if (!dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, "\n unresolved");
+ }
+ else
+ {
+ s = format(s, "\n %U",
+ format_dpo_id,
+ &path->fp_dpo, 13);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
+ {
+ s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
+ }
+ else
+ {
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(
+ vnm,
+ path->attached.fp_interface));
+ }
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ s = format (s, "via %U",
+ format_ip46_address,
+ &path->recursive.fp_nh,
+ IP46_TYPE_ANY);
+ s = format (s, " in fib:%d", path->recursive.fp_tbl_id, path->fp_via_fib);
+ s = format (s, " via-fib:%d", path->fp_via_fib);
+ s = format (s, " via-dpo:[%U:%d]",
+ format_dpo_type, path->fp_dpo.dpoi_type,
+ path->fp_dpo.dpoi_index);
+
+ break;
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ if (dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, "%U", format_dpo_id,
+ &path->fp_dpo, 2);
+ }
+ break;
+ }
+ return (s);
+}
+
+u8 *
+fib_path_format (fib_node_index_t pi, u8 *s)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(pi);
+ ASSERT(NULL != path);
+
+ return (format (s, "%U", format_fib_path, path));
+}
+
+u8 *
+fib_path_adj_format (fib_node_index_t pi,
+ u32 indent,
+ u8 *s)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(pi);
+ ASSERT(NULL != path);
+
+ if (!dpo_id_is_valid(&path->fp_dpo))
+ {
+ s = format(s, " unresolved");
+ }
+ else
+ {
+ s = format(s, "%U", format_dpo_id,
+ &path->fp_dpo, 2);
+ }
+
+ return (s);
+}
+
+/*
+ * fib_path_last_lock_gone
+ *
+ * We don't share paths, we share path lists, so the [un]lock functions
+ * are no-ops
+ */
+static void
+fib_path_last_lock_gone (fib_node_t *node)
+{
+ ASSERT(0);
+}
+
+static const adj_index_t
+fib_path_attached_next_hop_get_adj (fib_path_t *path,
+ fib_link_t link)
+{
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached_next_hop.fp_interface))
+ {
+ /*
+ * if the interface is p2p then the adj for the specific
+ * neighbour on that link will never exist. on p2p links
+ * the subnet address (the attached route) links to the
+ * auto-adj (see below), we want that adj here too.
+ */
+ return (adj_nbr_add_or_lock(path->fp_nh_proto,
+ link,
+ &zero_addr,
+ path->attached_next_hop.fp_interface));
+ }
+ else
+ {
+ return (adj_nbr_add_or_lock(path->fp_nh_proto,
+ link,
+ &path->attached_next_hop.fp_nh,
+ path->attached_next_hop.fp_interface));
+ }
+}
+
+static void
+fib_path_attached_next_hop_set (fib_path_t *path)
+{
+ /*
+ * resolve directly via the adjacnecy discribed by the
+ * interface and next-hop
+ */
+ if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+ path->attached_next_hop.fp_interface))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+
+ dpo_set(&path->fp_dpo,
+ DPO_ADJACENCY,
+ fib_proto_to_dpo(path->fp_nh_proto),
+ fib_path_attached_next_hop_get_adj(
+ path,
+ fib_proto_to_link(path->fp_nh_proto)));
+
+ /*
+ * become a child of the adjacency so we receive updates
+ * when its rewrite changes
+ */
+ path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+}
+
+/*
+ * create of update the paths recursive adj
+ */
+static void
+fib_path_recursive_adj_update (fib_path_t *path,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ dpo_id_t via_dpo = DPO_NULL;
+
+ /*
+ * get the DPO to resolve through from the via-entry
+ */
+ fib_entry_contribute_forwarding(path->fp_via_fib,
+ fct,
+ &via_dpo);
+
+
+ /*
+ * hope for the best - clear if restrictions apply.
+ */
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+
+ /*
+ * Validate any recursion constraints and over-ride the via
+ * adj if not met
+ */
+ if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
+ {
+ /*
+ * the via FIB must be a host route.
+ * note the via FIB just added will always be a host route
+ * since it is an RR source added host route. So what we need to
+ * check is whether the route has other sources. If it does then
+ * some other source has added it as a host route. If it doesn't
+ * then it was added only here and inherits forwarding from a cover.
+ * the cover is not a host route.
+ * The RR source is the lowest priority source, so we check if it
+ * is the best. if it is there are no other sources.
+ */
+ if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+
+ /*
+ * PIC edge trigger. let the load-balance maps know
+ */
+ load_balance_map_path_state_change(fib_path_get_index(path));
+ }
+ }
+ else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
+ {
+ /*
+ * RR source entries inherit the flags from the cover, so
+ * we can check the via directly
+ */
+ if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+
+ /*
+ * PIC edge trigger. let the load-balance maps know
+ */
+ load_balance_map_path_state_change(fib_path_get_index(path));
+ }
+ }
+
+ /*
+ * update the path's contributed DPO
+ */
+ dpo_copy(dpo, &via_dpo);
+
+ FIB_PATH_DBG(path, "recursive update: %U",
+ fib_get_lookup_main(path->fp_nh_proto),
+ &path->fp_dpo, 2);
+
+ dpo_reset(&via_dpo);
+}
+
+/*
+ * fib_path_is_permanent_drop
+ *
+ * Return !0 if the path is configured to permanently drop,
+ * despite other attributes.
+ */
+static int
+fib_path_is_permanent_drop (fib_path_t *path)
+{
+ return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
+ (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
+}
+
+/*
+ * fib_path_unresolve
+ *
+ * Remove our dependency on the resolution target
+ */
+static void
+fib_path_unresolve (fib_path_t *path)
+{
+ /*
+ * the forced drop path does not need unresolving
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ return;
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
+ {
+ fib_prefix_t pfx;
+
+ fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
+ fib_entry_child_remove(path->fp_via_fib,
+ path->fp_sibling);
+ fib_table_entry_special_remove(path->recursive.fp_tbl_id,
+ &pfx,
+ FIB_SOURCE_RR);
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ case FIB_PATH_TYPE_ATTACHED:
+ adj_child_remove(path->fp_dpo.dpoi_index,
+ path->fp_sibling);
+ adj_unlock(path->fp_dpo.dpoi_index);
+ break;
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ dpo_reset(&path->exclusive.fp_ex_dpo);
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_DEAG:
+ /*
+ * these hold only the path's DPO, which is reset below.
+ */
+ break;
+ }
+
+ /*
+ * release the adj we were holding and pick up the
+ * drop just in case.
+ */
+ dpo_reset(&path->fp_dpo);
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+
+ return;
+}
+
+static fib_forward_chain_type_t
+fib_path_proto_to_chain_type (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ }
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+/*
+ * fib_path_back_walk_notify
+ *
+ * A back walk has reach this path.
+ */
+static fib_node_back_walk_rc_t
+fib_path_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_path_t *path;
+
+ path = fib_path_from_fib_node(node);
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
+ {
+ /*
+ * modify the recursive adjacency to use the new forwarding
+ * of the via-fib.
+ * this update is visible to packets in flight in the DP.
+ */
+ fib_path_recursive_adj_update(
+ path,
+ fib_path_proto_to_chain_type(path->fp_nh_proto),
+ &path->fp_dpo);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ /*
+FIXME comment
+ * ADJ_UPDATE backwalk pass silently through here and up to
+ * the path-list when the multipath adj collapse occurs.
+ * The reason we do this is that the assumtption is that VPP
+ * runs in an environment where the Control-Plane is remote
+ * and hence reacts slowly to link up down. In order to remove
+ * this down link from the ECMP set quickly, we back-walk.
+ * VPP also has dedicated CPUs, so we are not stealing resources
+ * from the CP to do so.
+ */
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ /*
+ * The interface this path resolves through has been deleted.
+ * This will leave the path in a permanent drop state. The route
+ * needs to be removed and readded (and hence the path-list deleted)
+ * before it can forward again.
+ */
+ fib_path_unresolve(path);
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
+ {
+ /*
+ * restack the DPO to pick up the correct DPO sub-type
+ */
+ adj_index_t ai;
+
+ ai = fib_path_attached_next_hop_get_adj(
+ path,
+ fib_proto_to_link(path->fp_nh_proto));
+
+ dpo_set(&path->fp_dpo, DPO_ADJACENCY,
+ fib_proto_to_dpo(path->fp_nh_proto),
+ ai);
+ adj_unlock(ai);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ /*
+ * FIXME; this could schedule a lower priority walk, since attached
+ * routes are not usually in ECMP configurations so the backwalk to
+ * the FIB entry does not need to be high priority
+ */
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_path_unresolve(path);
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ /*
+ * FIXME When VRF delete is allowed this will need a poke.
+ */
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * these path types have no parents. so to be
+ * walked from one is unexpected.
+ */
+ ASSERT(0);
+ break;
+ }
+
+ /*
+ * propagate the backwalk further to the path-list
+ */
+ fib_path_list_back_walk(path->fp_pl_index, ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path's graph node virtual function table
+ */
+static const fib_node_vft_t fib_path_vft = {
+ .fnv_get = fib_path_get_node,
+ .fnv_last_lock = fib_path_last_lock_gone,
+ .fnv_back_walk = fib_path_back_walk_notify,
+};
+
+static fib_path_cfg_flags_t
+fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
+{
+ fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_ATTRIBUTE_FIRST;
+
+ if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
+ cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
+ if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
+ cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
+
+ return (cfg_flags);
+}
+
+/*
+ * fib_path_create
+ *
+ * Create and initialise a new path object.
+ * return the index of the path.
+ */
+fib_node_index_t
+fib_path_create (fib_node_index_t pl_index,
+ fib_protocol_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_path_t *path;
+
+ pool_get(fib_path_pool, path);
+ memset(path, 0, sizeof(*path));
+
+ fib_node_init(&path->fp_node,
+ FIB_NODE_TYPE_PATH);
+
+ dpo_reset(&path->fp_dpo);
+ path->fp_pl_index = pl_index;
+ path->fp_nh_proto = nh_proto;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ path->fp_weight = rpath->frp_weight;
+ path->fp_cfg_flags = flags;
+ path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
+
+ /*
+ * deduce the path's tpye from the parementers and save what is needed.
+ */
+ if (~0 != rpath->frp_sw_if_index)
+ {
+ if (flags & FIB_PATH_CFG_FLAG_LOCAL)
+ {
+ path->fp_type = FIB_PATH_TYPE_RECEIVE;
+ path->receive.fp_interface = rpath->frp_sw_if_index;
+ path->receive.fp_addr = rpath->frp_addr;
+ }
+ else
+ {
+ if (ip46_address_is_zero(&rpath->frp_addr))
+ {
+ path->fp_type = FIB_PATH_TYPE_ATTACHED;
+ path->attached.fp_interface = rpath->frp_sw_if_index;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
+ path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
+ path->attached_next_hop.fp_nh = rpath->frp_addr;
+ }
+ }
+ }
+ else
+ {
+ if (ip46_address_is_zero(&rpath->frp_addr))
+ {
+ if (~0 == rpath->frp_fib_index)
+ {
+ path->fp_type = FIB_PATH_TYPE_SPECIAL;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_DEAG;
+ path->deag.fp_tbl_id = rpath->frp_fib_index;
+ }
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_RECURSIVE;
+ path->recursive.fp_nh = rpath->frp_addr;
+ path->recursive.fp_tbl_id = rpath->frp_fib_index;
+ }
+ }
+
+ FIB_PATH_DBG(path, "create");
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_create_special
+ *
+ * Create and initialise a new path object.
+ * return the index of the path.
+ */
+fib_node_index_t
+fib_path_create_special (fib_node_index_t pl_index,
+ fib_protocol_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_path_t *path;
+
+ pool_get(fib_path_pool, path);
+ memset(path, 0, sizeof(*path));
+
+ fib_node_init(&path->fp_node,
+ FIB_NODE_TYPE_PATH);
+ dpo_reset(&path->fp_dpo);
+
+ path->fp_pl_index = pl_index;
+ path->fp_weight = 1;
+ path->fp_nh_proto = nh_proto;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ path->fp_cfg_flags = flags;
+
+ if (FIB_PATH_CFG_FLAG_DROP & flags)
+ {
+ path->fp_type = FIB_PATH_TYPE_SPECIAL;
+ }
+ else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
+ {
+ path->fp_type = FIB_PATH_TYPE_RECEIVE;
+ path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
+ }
+ else
+ {
+ path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
+ ASSERT(NULL != dpo);
+ dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
+ }
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_copy
+ *
+ * Copy a path. return index of new path.
+ */
+fib_node_index_t
+fib_path_copy (fib_node_index_t path_index,
+ fib_node_index_t path_list_index)
+{
+ fib_path_t *path, *orig_path;
+
+ pool_get(fib_path_pool, path);
+
+ orig_path = fib_path_get(path_index);
+ ASSERT(NULL != orig_path);
+
+ memcpy(path, orig_path, sizeof(*path));
+
+ FIB_PATH_DBG(path, "create-copy:%d", path_index);
+
+ /*
+ * reset the dynamic section
+ */
+ fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
+ path->fp_oper_flags = FIB_PATH_OPER_FLAG_NONE;
+ path->fp_pl_index = path_list_index;
+ path->fp_via_fib = FIB_NODE_INDEX_INVALID;
+ memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
+ dpo_reset(&path->fp_dpo);
+
+ return (fib_path_get_index(path));
+}
+
+/*
+ * fib_path_destroy
+ *
+ * destroy a path that is no longer required
+ */
+void
+fib_path_destroy (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(NULL != path);
+ FIB_PATH_DBG(path, "destroy");
+
+ fib_path_unresolve(path);
+
+ fib_node_deinit(&path->fp_node);
+ pool_put(fib_path_pool, path);
+}
+
+/*
+ * fib_path_destroy
+ *
+ * destroy a path that is no longer required
+ */
+uword
+fib_path_hash (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
+ (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
+ STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
+ 0));
+}
+
+/*
+ * fib_path_cmp_i
+ *
+ * Compare two paths for equivalence.
+ */
+static int
+fib_path_cmp_i (const fib_path_t *path1,
+ const fib_path_t *path2)
+{
+ int res;
+
+ res = 1;
+
+ /*
+ * paths of different types and protocol are not equal.
+ * different weights only are the same path.
+ */
+ if (path1->fp_type != path2->fp_type)
+ {
+ res = (path1->fp_type - path2->fp_type);
+ }
+ if (path1->fp_nh_proto != path2->fp_nh_proto)
+ {
+ res = (path1->fp_nh_proto - path2->fp_nh_proto);
+ }
+ else
+ {
+ /*
+ * both paths are of the same type.
+ * consider each type and its attributes in turn.
+ */
+ switch (path1->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
+ &path2->attached_next_hop.fp_nh);
+ if (0 == res) {
+ res = vnet_sw_interface_compare(
+ vnet_get_main(),
+ path1->attached_next_hop.fp_interface,
+ path2->attached_next_hop.fp_interface);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ res = vnet_sw_interface_compare(
+ vnet_get_main(),
+ path1->attached.fp_interface,
+ path2->attached.fp_interface);
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ res = ip46_address_cmp(&path1->recursive.fp_nh,
+ &path2->recursive.fp_nh);
+
+ if (0 == res)
+ {
+ res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ res = 0;
+ break;
+ }
+ }
+ return (res);
+}
+
+/*
+ * fib_path_cmp_for_sort
+ *
+ * Compare two paths for equivalence. Used during path sorting.
+ * As usual 0 means equal.
+ */
+int
+fib_path_cmp_for_sort (void * v1,
+ void * v2)
+{
+ fib_node_index_t *pi1 = v1, *pi2 = v2;
+ fib_path_t *path1, *path2;
+
+ path1 = fib_path_get(*pi1);
+ path2 = fib_path_get(*pi2);
+
+ return (fib_path_cmp_i(path1, path2));
+}
+
+/*
+ * fib_path_cmp
+ *
+ * Compare two paths for equivalence.
+ */
+int
+fib_path_cmp (fib_node_index_t pi1,
+ fib_node_index_t pi2)
+{
+ fib_path_t *path1, *path2;
+
+ path1 = fib_path_get(pi1);
+ path2 = fib_path_get(pi2);
+
+ return (fib_path_cmp_i(path1, path2));
+}
+
+int
+fib_path_cmp_w_route_path (fib_node_index_t path_index,
+ const fib_route_path_t *rpath)
+{
+ fib_path_t *path;
+ int res;
+
+ path = fib_path_get(path_index);
+
+ res = 1;
+
+ if (path->fp_weight != rpath->frp_weight)
+ {
+ res = (path->fp_weight - rpath->frp_weight);
+ }
+ else
+ {
+ /*
+ * both paths are of the same type.
+ * consider each type and its attributes in turn.
+ */
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
+ &rpath->frp_addr);
+ if (0 == res)
+ {
+ res = vnet_sw_interface_compare(
+ vnet_get_main(),
+ path->attached_next_hop.fp_interface,
+ rpath->frp_sw_if_index);
+ }
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ res = vnet_sw_interface_compare(
+ vnet_get_main(),
+ path->attached.fp_interface,
+ rpath->frp_sw_if_index);
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ res = ip46_address_cmp(&path->recursive.fp_nh,
+ &rpath->frp_addr);
+
+ if (0 == res)
+ {
+ res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
+ break;
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ res = 0;
+ break;
+ }
+ }
+ return (res);
+}
+
+/*
+ * fib_path_recursive_loop_detect
+ *
+ * A forward walk of the FIB object graph to detect for a cycle/loop. This
+ * walk is initiated when an entry is linking to a new path list or from an old.
+ * The entry vector passed contains all the FIB entrys that are children of this
+ * path (it is all the entries encountered on the walk so far). If this vector
+ * contains the entry this path resolve via, then a loop is about to form.
+ * The loop must be allowed to form, since we need the dependencies in place
+ * so that we can track when the loop breaks.
+ * However, we MUST not produce a loop in the forwarding graph (else packets
+ * would loop around the switch path until the loop breaks), so we mark recursive
+ * paths as looped so that they do not contribute forwarding information.
+ * By marking the path as looped, an etry such as;
+ * X/Y
+ * via a.a.a.a (looped)
+ * via b.b.b.b (not looped)
+ * can still forward using the info provided by b.b.b.b only
+ */
+int
+fib_path_recursive_loop_detect (fib_node_index_t path_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ /*
+ * the forced drop path is never looped, cos it is never resolved.
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ return (0);
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_RECURSIVE:
+ {
+ fib_node_index_t *entry_index, *entries;
+ int looped = 0;
+ entries = *entry_indicies;
+
+ vec_foreach(entry_index, entries) {
+ if (*entry_index == path->fp_via_fib)
+ {
+ /*
+ * the entry that is about to link to this path-list (or
+ * one of this path-list's children) is the same entry that
+ * this recursive path resolves through. this is a cycle.
+ * abort the walk.
+ */
+ looped = 1;
+ break;
+ }
+ }
+
+ if (looped)
+ {
+ FIB_PATH_DBG(path, "recursive loop formed");
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+
+ dpo_copy(&path->fp_dpo,
+ drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+ }
+ else
+ {
+ /*
+ * no loop here yet. keep forward walking the graph.
+ */
+ if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
+ {
+ FIB_PATH_DBG(path, "recursive loop formed");
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+ }
+ else
+ {
+ FIB_PATH_DBG(path, "recursive loop cleared");
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
+ }
+ }
+ break;
+ }
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ case FIB_PATH_TYPE_ATTACHED:
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * these path types cannot be part of a loop, since they are the leaves
+ * of the graph.
+ */
+ break;
+ }
+
+ return (fib_path_is_looped(path_index));
+}
+
+int
+fib_path_resolve (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ /*
+ * hope for the best.
+ */
+ path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
+
+ /*
+ * the forced drop path resolves via the drop adj
+ */
+ if (fib_path_is_permanent_drop(path))
+ {
+ dpo_copy(&path->fp_dpo,
+ drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ return (fib_path_is_resolved(path_index));
+ }
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ fib_path_attached_next_hop_set(path);
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ /*
+ * path->attached.fp_interface
+ */
+ if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+ path->attached.fp_interface))
+ {
+ path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+ }
+ if (vnet_sw_interface_is_p2p(vnet_get_main(),
+ path->attached.fp_interface))
+ {
+ /*
+ * point-2-point interfaces do not require a glean, since
+ * there is nothing to ARP. Install a rewrite/nbr adj instead
+ */
+ dpo_set(&path->fp_dpo,
+ DPO_ADJACENCY,
+ fib_proto_to_dpo(path->fp_nh_proto),
+ adj_nbr_add_or_lock(
+ path->fp_nh_proto,
+ fib_proto_to_link(path->fp_nh_proto),
+ &zero_addr,
+ path->attached.fp_interface));
+ }
+ else
+ {
+ dpo_set(&path->fp_dpo,
+ DPO_ADJACENCY_GLEAN,
+ fib_proto_to_dpo(path->fp_nh_proto),
+ adj_glean_add_or_lock(path->fp_nh_proto,
+ path->attached.fp_interface,
+ NULL));
+ }
+ /*
+ * become a child of the adjacency so we receive updates
+ * when the interface state changes
+ */
+ path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ {
+ /*
+ * Create a RR source entry in the table for the address
+ * that this path recurses through.
+ * This resolve action is recursive, hence we may create
+ * more paths in the process. more creates mean maybe realloc
+ * of this path.
+ */
+ fib_node_index_t fei;
+ fib_prefix_t pfx;
+
+ ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
+
+ fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
+
+ fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
+ &pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+
+ path = fib_path_get(path_index);
+ path->fp_via_fib = fei;
+
+ /*
+ * become a dependent child of the entry so the path is
+ * informed when the forwarding for the entry changes.
+ */
+ path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
+ FIB_NODE_TYPE_PATH,
+ fib_path_get_index(path));
+
+ /*
+ * create and configure the IP DPO
+ */
+ fib_path_recursive_adj_update(
+ path,
+ fib_path_proto_to_chain_type(path->fp_nh_proto),
+ &path->fp_dpo);
+
+ break;
+ }
+ case FIB_PATH_TYPE_SPECIAL:
+ /*
+ * Resolve via the drop
+ */
+ dpo_copy(&path->fp_dpo,
+ drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ /*
+ * Resolve via a lookup DPO.
+ * FIXME. control plane should add routes with a table ID
+ */
+ lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
+ fib_proto_to_dpo(path->fp_nh_proto),
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &path->fp_dpo);
+ break;
+ case FIB_PATH_TYPE_RECEIVE:
+ /*
+ * Resolve via a receive DPO.
+ */
+ receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
+ path->receive.fp_interface,
+ &path->receive.fp_addr,
+ &path->fp_dpo);
+ break;
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ /*
+ * Resolve via the user provided DPO
+ */
+ dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
+ break;
+ }
+
+ return (fib_path_is_resolved(path_index));
+}
+
+u32
+fib_path_get_resolving_interface (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ return (path->attached_next_hop.fp_interface);
+ case FIB_PATH_TYPE_ATTACHED:
+ return (path->attached.fp_interface);
+ case FIB_PATH_TYPE_RECEIVE:
+ return (path->receive.fp_interface);
+ case FIB_PATH_TYPE_RECURSIVE:
+ return (fib_entry_get_resolving_interface(path->fp_via_fib));
+ case FIB_PATH_TYPE_SPECIAL:
+ case FIB_PATH_TYPE_DEAG:
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ break;
+ }
+ return (~0);
+}
+
+adj_index_t
+fib_path_get_adj (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(dpo_is_adj(&path->fp_dpo));
+ if (dpo_is_adj(&path->fp_dpo))
+ {
+ return (path->fp_dpo.dpoi_index);
+ }
+ return (ADJ_INDEX_INVALID);
+}
+
+int
+fib_path_get_weight (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ return (path->fp_weight);
+}
+
+void
+fib_path_contribute_forwarding (fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+ ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
+
+ FIB_PATH_DBG(path, "contribute");
+
+ /*
+ * The DPO stored in the path was created when the path was resolved.
+ * This then represents the path's 'native' protocol; IP.
+ * For all others will need to go find something else.
+ */
+ if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
+ {
+ dpo_copy(dpo, &path->fp_dpo);
+ }
+ else {
+ switch (path->fp_type)
+ {
+ case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ {
+ adj_index_t ai;
+
+ /*
+ * get a MPLS link type adj.
+ */
+ ai = fib_path_attached_next_hop_get_adj(
+ path,
+ fib_forw_chain_type_to_link_type(fct));
+ dpo_set(dpo, DPO_ADJACENCY,
+ fib_forw_chain_type_to_dpo_proto(fct), ai);
+ adj_unlock(ai);
+
+ break;
+ }
+ }
+ break;
+ case FIB_PATH_TYPE_RECURSIVE:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ /*
+ * Assume that EOS and IP forwarding is the same.
+ * revisit for ieBGP
+ */
+ dpo_copy(dpo, &path->fp_dpo);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ fib_path_recursive_adj_update(path, fct, dpo);
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_DEAG:
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
+ DPO_PROTO_MPLS,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ dpo);
+ break;
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ dpo_copy(dpo, &path->fp_dpo);
+ break;
+ }
+ break;
+ case FIB_PATH_TYPE_EXCLUSIVE:
+ dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
+ break;
+ case FIB_PATH_TYPE_ATTACHED:
+ case FIB_PATH_TYPE_RECEIVE:
+ case FIB_PATH_TYPE_SPECIAL:
+ ASSERT(0);
+ break;
+ }
+
+ }
+}
+
+load_balance_path_t *
+fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ load_balance_path_t *hash_key)
+{
+ load_balance_path_t *mnh;
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ ASSERT(path);
+
+ if (fib_path_is_resolved(path_index))
+ {
+ vec_add2(hash_key, mnh, 1);
+
+ mnh->path_weight = path->fp_weight;
+ mnh->path_index = path_index;
+ dpo_copy(&mnh->path_dpo, &path->fp_dpo);
+ }
+
+ return (hash_key);
+}
+
+int
+fib_path_is_recursive (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
+}
+
+int
+fib_path_is_exclusive (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
+}
+
+int
+fib_path_is_deag (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (FIB_PATH_TYPE_DEAG == path->fp_type);
+}
+
+int
+fib_path_is_resolved (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (dpo_id_is_valid(&path->fp_dpo) &&
+ (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
+ !fib_path_is_looped(path_index) &&
+ !fib_path_is_permanent_drop(path));
+}
+
+int
+fib_path_is_looped (fib_node_index_t path_index)
+{
+ fib_path_t *path;
+
+ path = fib_path_get(path_index);
+
+ return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
+}
+
+void
+fib_path_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
+}
+
+static clib_error_t *
+show_fib_path_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_path_t *path;
+
+ vlib_cli_output (vm, "FIB Path Lists");
+ pool_foreach(path, fib_path_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_fib_path, path);
+ }));
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_path, static) = {
+ .path = "show fib paths",
+ .function = show_fib_path_command,
+ .short_help = "show fib paths",
+};
diff --git a/vnet/vnet/fib/fib_path.h b/vnet/vnet/fib/fib_path.h
new file mode 100644
index 00000000000..16ca358c04a
--- /dev/null
+++ b/vnet/vnet/fib/fib_path.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Given a route of the form;
+ * q.r.s.t/Y
+ * via <interface> <next-hop>
+ *
+ * The prefix is: q.r.s.t./Y
+ * the path is: 'via <interface> <next-hop>
+ *
+ * The path is the description of where to send the traffic, and the
+ * the prefix is a description of which traffic to send.
+ * It is the aim of the FIB to resolve the path, i.e. to find the corresponding
+ * adjacency to match the path's description.
+ */
+
+#ifndef __FIB_PATH_H__
+#define __FIB_PATH_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/load_balance.h>
+
+#include <vnet/fib/fib_types.h>
+#include <vnet/adj/adj_types.h>
+
+/**
+ * Enurmeration of path configuration attributes
+ */
+typedef enum fib_path_cfg_attribute_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_FIRST = 0,
+ /**
+ * The path is forced to a drop, whatever the next-hop info says.
+ * something somewhere knows better...
+ */
+ FIB_PATH_CFG_ATTRIBUTE_DROP = FIB_PATH_CFG_ATTRIBUTE_FIRST,
+ /**
+ * The path uses an adj that is exclusive. I.e. it is known only by
+ * the source of the route.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * Recursion constraint via host
+ */
+ FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST,
+ /**
+ * Recursion constraint via attached
+ */
+ FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED,
+ /**
+ * The path is a for-us path
+ */
+ FIB_PATH_CFG_ATTRIBUTE_LOCAL,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_PATH_CFG_ATTRIBUTE_LAST = FIB_PATH_CFG_ATTRIBUTE_LOCAL,
+} __attribute__ ((packed)) fib_path_cfg_attribute_t;
+
+/**
+ * The maximum number of path attributes
+ */
+#define FIB_PATH_CFG_ATTRIBUTE_MAX (FIB_PATH_CFG_ATTRIBUTE_LAST + 1)
+
+#define FIB_PATH_CFG_ATTRIBUTES { \
+ [FIB_PATH_CFG_ATTRIBUTE_DROP] = "drop", \
+ [FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST] = "resolve-host", \
+ [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \
+ [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \
+}
+
+#define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_CFG_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_CFG_ATTRIBUTE_LAST; \
+ _item++)
+
+/**
+ * Path config flags from the attributes
+ */
+typedef enum fib_path_cfg_flags_t_ {
+ FIB_PATH_CFG_FLAG_NONE = 0,
+ FIB_PATH_CFG_FLAG_DROP = (1 << FIB_PATH_CFG_ATTRIBUTE_DROP),
+ FIB_PATH_CFG_FLAG_EXCLUSIVE = (1 << FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE),
+ FIB_PATH_CFG_FLAG_RESOLVE_HOST = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST),
+ FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED),
+ FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL),
+} __attribute__ ((packed)) fib_path_cfg_flags_t;
+
+
+extern u8 *fib_path_format(fib_node_index_t pi, u8 *s);
+extern u8 *fib_path_adj_format(fib_node_index_t pi,
+ u32 indent,
+ u8 *s);
+
+extern u8 * format_fib_path(u8 * s, va_list * args);
+
+extern fib_node_index_t fib_path_create(fib_node_index_t pl_index,
+ fib_protocol_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const fib_route_path_t *path);
+extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index,
+ fib_protocol_t nh_proto,
+ fib_path_cfg_flags_t flags,
+ const dpo_id_t *dpo);
+
+extern int fib_path_cmp(fib_node_index_t path_index1,
+ fib_node_index_t path_index2);
+extern int fib_path_cmp_for_sort(void * a1, void * a2);
+extern int fib_path_cmp_w_route_path(fib_node_index_t path_index,
+ const fib_route_path_t *rpath);
+extern fib_node_index_t fib_path_copy(fib_node_index_t path_index,
+ fib_node_index_t path_list_index);
+extern int fib_path_resolve(fib_node_index_t path_index);
+extern int fib_path_is_resolved(fib_node_index_t path_index);
+extern int fib_path_is_recursive(fib_node_index_t path_index);
+extern int fib_path_is_exclusive(fib_node_index_t path_index);
+extern int fib_path_is_deag(fib_node_index_t path_index);
+extern int fib_path_is_looped(fib_node_index_t path_index);
+extern void fib_path_destroy(fib_node_index_t path_index);
+extern uword fib_path_hash(fib_node_index_t path_index);
+extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
+ fib_node_index_t path_index,
+ fib_forward_chain_type_t fct,
+ load_balance_path_t *hash_key);
+extern void fib_path_contribute_forwarding(fib_node_index_t path_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern adj_index_t fib_path_get_adj(fib_node_index_t path_index);
+extern int fib_path_recursive_loop_detect(fib_node_index_t path_index,
+ fib_node_index_t **entry_indicies);
+extern u32 fib_path_get_resolving_interface(fib_node_index_t fib_entry_index);
+extern int fib_path_get_weight(fib_node_index_t path_index);
+
+extern void fib_path_module_init(void);
+
+extern void fib_path_module_init(void);
+
+#endif
diff --git a/vnet/vnet/fib/fib_path_ext.c b/vnet/vnet/fib/fib_path_ext.c
new file mode 100644
index 00000000000..f40c749e981
--- /dev/null
+++ b/vnet/vnet/fib/fib_path_ext.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include "fib_path_ext.h"
+#include "fib_path.h"
+#include "fib_path_list.h"
+#include "fib_internal.h"
+
+u8 *
+format_fib_path_ext (u8 * s, va_list * args)
+{
+ fib_path_ext_t *path_ext;
+
+ path_ext = va_arg (*args, fib_path_ext_t *);
+
+ s = format(s, "path:%d label:%U",
+ path_ext->fpe_path_index,
+ format_mpls_unicast_label,
+ path_ext->fpe_path.frp_label);
+
+ return (s);
+}
+
+int
+fib_path_ext_cmp (fib_path_ext_t *path_ext,
+ const fib_route_path_t *rpath)
+{
+ return (fib_route_path_cmp(&path_ext->fpe_path, rpath));
+}
+
+static int
+fib_path_ext_match (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *ctx)
+{
+ fib_path_ext_t *path_ext = ctx;
+
+ if (!fib_path_cmp_w_route_path(path_index,
+ &path_ext->fpe_path))
+ {
+ path_ext->fpe_path_index = path_index;
+ return (0);
+ }
+ // keep going
+ return (1);
+}
+
+void
+fib_path_ext_resolve (fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index)
+{
+ /*
+ * Find the path on the path list that this is an extension for
+ */
+ path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID;
+ fib_path_list_walk(path_list_index,
+ fib_path_ext_match,
+ path_ext);
+}
+
+void
+fib_path_ext_init (fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index,
+ const fib_route_path_t *rpath)
+{
+ path_ext->fpe_path = *rpath;
+ path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID;
+
+ fib_path_ext_resolve(path_ext, path_list_index);
+}
+
+load_balance_path_t *
+fib_path_ext_stack (fib_path_ext_t *path_ext,
+ fib_forward_chain_type_t parent_fct,
+ load_balance_path_t *nhs)
+{
+ fib_forward_chain_type_t child_fct;
+ load_balance_path_t *nh;
+
+ if (!fib_path_is_resolved(path_ext->fpe_path_index))
+ return (nhs);
+
+ /*
+ * Since we are stacking this path-extension, it must have a valid out
+ * label. From the chain type request by the child, determine what
+ * chain type we will request from the parent.
+ */
+ switch (parent_fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ ASSERT(0);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ if (MPLS_IETF_IMPLICIT_NULL_LABEL == path_ext->fpe_label)
+ {
+ /*
+ * implicit-null label for the eos or IP chain, need to pick up
+ * the IP adj
+ */
+ child_fct = parent_fct;
+ }
+ else
+ {
+ /*
+ * we have a label to stack. packets will thus be labelled when
+ * they encounter th child, ergo, non-eos.
+ */
+ child_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+ }
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ child_fct = parent_fct;
+ break;
+ default:
+ return (nhs);
+ break;
+ }
+
+ dpo_id_t via_dpo = DPO_NULL;
+
+ /*
+ * The next object in the graph after the imposition of the label
+ * will be the DPO contributed by the path through which the packets
+ * are to be sent. We stack the MPLS Label DPO on this path DPO
+ */
+ fib_path_contribute_forwarding(path_ext->fpe_path_index,
+ child_fct,
+ &via_dpo);
+
+ if (dpo_is_drop(&via_dpo) ||
+ load_balance_is_drop(&via_dpo))
+ {
+ /*
+ * don't stack a path extension on a drop. doing so will create
+ * a LB bucket entry on drop, and we will lose a percentage of traffic.
+ */
+ }
+ else
+ {
+ vec_add2(nhs, nh, 1);
+ nh->path_weight = fib_path_get_weight(path_ext->fpe_path_index);
+ nh->path_index = path_ext->fpe_path_index;
+ dpo_copy(&nh->path_dpo, &via_dpo);
+
+ /*
+ * The label is stackable for this chain type
+ * construct the mpls header that will be imposed in the data-path
+ */
+ if (MPLS_IETF_IMPLICIT_NULL_LABEL != path_ext->fpe_label)
+ {
+ dpo_set(&nh->path_dpo,
+ DPO_MPLS_LABEL,
+ DPO_PROTO_MPLS,
+ mpls_label_dpo_create(path_ext->fpe_label,
+ (parent_fct == FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS ?
+ MPLS_NON_EOS :
+ MPLS_EOS),
+ 255, 0,
+ &nh->path_dpo));
+ }
+ }
+ dpo_reset(&via_dpo);
+
+ return (nhs);
+}
diff --git a/vnet/vnet/fib/fib_path_ext.h b/vnet/vnet/fib/fib_path_ext.h
new file mode 100644
index 00000000000..949b1e2b64f
--- /dev/null
+++ b/vnet/vnet/fib/fib_path_ext.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_PATH_EXT_H__
+#define __FIB_PATH_EXT_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_types.h>
+
+/**
+ * A path extension is a per-entry addition to the forwarigind information
+ * when packets are sent for that entry over that path.
+ *
+ * For example:
+ * ip route add 1.1.1.1/32 via 10.10.10.10 mpls-label 100
+ *
+ * The out-going MPLS label value 100 is a path-extension. It is a value sepcific
+ * to the entry 1.1.1.1/32 and valid only went packets are sent via 10.10.10.10.
+ */
+typedef struct fib_path_ext_t_
+{
+ /**
+ * A description of the path that is being extended.
+ * This description is used to match this extension with the [changing]
+ * instance of a fib_path_t that is extended
+ */
+ fib_route_path_t fpe_path;
+#define fpe_label fpe_path.frp_label
+
+ /**
+ * The index of the path. This is the global index, not the path's
+ * position in the path-list.
+ */
+ fib_node_index_t fpe_path_index;
+} fib_path_ext_t;
+
+
+extern u8 * format_fib_path_ext(u8 * s, va_list * args);
+
+extern void fib_path_ext_init(fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index,
+ const fib_route_path_t *rpath);
+
+extern int fib_path_ext_cmp(fib_path_ext_t *path_ext,
+ const fib_route_path_t *rpath);
+
+extern void fib_path_ext_resolve(fib_path_ext_t *path_ext,
+ fib_node_index_t path_list_index);
+
+extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext,
+ fib_forward_chain_type_t fct,
+ load_balance_path_t *nhs);
+
+#endif
+
diff --git a/vnet/vnet/fib/fib_path_list.c b/vnet/vnet/fib/fib_path_list.c
new file mode 100644
index 00000000000..1df73968614
--- /dev/null
+++ b/vnet/vnet/fib/fib_path_list.c
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/mhash.h>
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/fib/fib_walk.h>
+
+/**
+ * FIB path-list
+ * A representation of the list/set of path trough which a prefix is reachable
+ */
+typedef struct fib_path_list_t_ {
+ /**
+ * A path-list is a node in the FIB graph.
+ */
+ fib_node_t fpl_node;
+
+ /**
+ * Flags on the path-list
+ */
+ fib_path_list_flags_t fpl_flags;
+
+ /**
+ * The next-hop protocol for the paths in this path list.
+ * Note that fixing the proto here means we don't support a mix of
+ * v4 and v6 paths. ho hum.
+ */
+ fib_protocol_t fpl_nh_proto;
+
+ /**
+ * Vector of paths indecies for all configured paths.
+ * For shareable path-lists this list MUST not change.
+ */
+ fib_node_index_t *fpl_paths;
+} fib_path_list_t;
+
+/*
+ * Array of strings/names for the FIB sources
+ */
+static const char *fib_path_list_attr_names[] = FIB_PATH_LIST_ATTRIBUTES;
+
+/*
+ * The memory pool from which we allocate all the path-lists
+ */
+static fib_path_list_t * fib_path_list_pool;
+
+/*
+ * The data-base of shared path-lists
+ */
+static uword *fib_path_list_db;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) \
+{ \
+ u8 *_tmp = 0; \
+ _tmp = fib_path_list_format( \
+ fib_path_list_get_index(_pl), _tmp); \
+ clib_warning("pl:[%d:%p:%p:%s]:" _fmt, \
+ fib_path_list_get_index(_pl), \
+ _pl, _pl->fpl_paths, _tmp, \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...)
+#endif
+
+static fib_path_list_t *
+fib_path_list_get (fib_node_index_t index)
+{
+ return (pool_elt_at_index(fib_path_list_pool, index));
+}
+
+static fib_node_t *
+fib_path_list_get_node (fib_node_index_t index)
+{
+ return ((fib_node_t*)fib_path_list_get(index));
+}
+
+static fib_path_list_t*
+fib_path_list_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+ ASSERT(FIB_NODE_TYPE_PATH_LIST == node->fn_type);
+#endif
+ return ((fib_path_list_t*)node);
+}
+
+static fib_node_index_t
+fib_path_list_get_index (fib_path_list_t *path_list)
+{
+ return (path_list - fib_path_list_pool);
+}
+
+static u8 *
+format_fib_path_list (u8 * s, va_list * args)
+{
+ fib_path_list_attribute_t attr;
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+
+ path_list = va_arg (*args, fib_path_list_t *);
+
+ s = format (s, " index:%u", fib_path_list_get_index(path_list));
+ s = format (s, " locks:%u", path_list->fpl_node.fn_locks);
+ s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto);
+
+ if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags)
+ {
+ s = format (s, " flags:");
+ FOR_EACH_PATH_LIST_ATTRIBUTE(attr)
+ {
+ if ((1<<attr) & path_list->fpl_flags)
+ {
+ s = format (s, "%s,", fib_path_list_attr_names[attr]);
+ }
+ }
+ }
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ s = fib_path_format(*path_index, s);
+ s = format(s, "\n");
+ }
+
+ return (s);
+}
+
+u8 *
+fib_path_list_adjs_format (fib_node_index_t path_list_index,
+ u32 indent,
+ u8 * s)
+{
+ fib_path_list_t *path_list;
+ u32 i;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach_index (i, path_list->fpl_paths)
+ {
+ s = fib_path_adj_format(path_list->fpl_paths[i],
+ indent, s);
+ }
+
+ return (s);
+}
+
+
+u8 *
+fib_path_list_format (fib_node_index_t path_list_index,
+ u8 * s)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (format(s, "%U", format_fib_path_list, path_list));
+}
+
+static uword
+fib_path_list_hash (fib_path_list_t *path_list)
+{
+ uword old_path_list_hash, new_path_list_hash, path_hash;
+ fib_node_index_t *path_index;
+
+ ASSERT(path_list);
+
+ new_path_list_hash = old_path_list_hash = vec_len(path_list->fpl_paths);
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ path_hash = fib_path_hash(*path_index);
+#if uword_bits == 64
+ hash_mix64(path_hash, old_path_list_hash, new_path_list_hash);
+#else
+ hash_mix32(path_hash, old_path_list_hash, new_path_list_hash);
+#endif
+ }
+
+ return (new_path_list_hash);
+}
+
+always_inline uword
+fib_path_list_db_hash_key_from_index (uword index)
+{
+ return 1 + 2*index;
+}
+
+always_inline uword
+fib_path_list_db_hash_key_is_index (uword key)
+{
+ return key & 1;
+}
+
+always_inline uword
+fib_path_list_db_hash_key_2_index (uword key)
+{
+ ASSERT (fib_path_list_db_hash_key_is_index (key));
+ return key / 2;
+}
+
+static fib_path_list_t*
+fib_path_list_db_get_from_hash_key (uword key)
+{
+ fib_path_list_t *path_list;
+
+ if (fib_path_list_db_hash_key_is_index (key))
+ {
+ fib_node_index_t path_list_index;
+
+ path_list_index = fib_path_list_db_hash_key_2_index(key);
+ path_list = fib_path_list_get(path_list_index);
+ }
+ else
+ {
+ path_list = uword_to_pointer (key, fib_path_list_t *);
+ }
+
+ return (path_list);
+}
+
+static uword
+fib_path_list_db_hash_key_sum (hash_t * h,
+ uword key)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_db_get_from_hash_key(key);
+
+ return (fib_path_list_hash(path_list));
+}
+
+static uword
+fib_path_list_db_hash_key_equal (hash_t * h,
+ uword key1,
+ uword key2)
+{
+ fib_path_list_t *path_list1, *path_list2;
+
+ path_list1 = fib_path_list_db_get_from_hash_key(key1);
+ path_list2 = fib_path_list_db_get_from_hash_key(key2);
+
+ return (fib_path_list_hash(path_list1) ==
+ fib_path_list_hash(path_list2));
+}
+
+static fib_node_index_t
+fib_path_list_db_find (fib_path_list_t *path_list)
+{
+ uword *p;
+
+ p = hash_get(fib_path_list_db, path_list);
+
+ if (NULL != p)
+ {
+ return p[0];
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+fib_path_list_db_insert (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(FIB_NODE_INDEX_INVALID == fib_path_list_db_find(path_list));
+
+ hash_set (fib_path_list_db,
+ fib_path_list_db_hash_key_from_index(path_list_index),
+ path_list_index);
+
+ FIB_PATH_LIST_DBG(path_list, "DB-inserted");
+}
+
+static void
+fib_path_list_db_remove (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != fib_path_list_db_find(path_list));
+
+ hash_unset(fib_path_list_db,
+ fib_path_list_db_hash_key_from_index(path_list_index));
+
+ FIB_PATH_LIST_DBG(path_list, "DB-removed");
+}
+
+static void
+fib_path_list_destroy (fib_path_list_t *path_list)
+{
+ fib_node_index_t *path_index;
+
+ FIB_PATH_LIST_DBG(path_list, "destroy");
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ fib_path_destroy(*path_index);
+ }
+
+ vec_free(path_list->fpl_paths);
+
+ fib_node_deinit(&path_list->fpl_node);
+ pool_put(fib_path_list_pool, path_list);
+}
+
+static void
+fib_path_list_last_lock_gone (fib_node_t *node)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_from_fib_node(node);
+
+ FIB_PATH_LIST_DBG(path_list, "last-lock");
+
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ fib_path_list_db_remove(fib_path_list_get_index(path_list));
+ }
+ fib_path_list_destroy(path_list);
+}
+
+/*
+ * fib_path_mk_lb
+ *
+ * update the multipath adj this path-list will contribute to its
+ * children's forwarding.
+ */
+static void
+fib_path_list_mk_lb (fib_path_list_t *path_list,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo)
+{
+ load_balance_path_t *hash_key;
+ fib_node_index_t *path_index;
+
+ hash_key = NULL;
+
+ /*
+ * We gather the DPOs from resolved paths.
+ */
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ hash_key = fib_path_append_nh_for_multipath_hash(
+ *path_index,
+ type,
+ hash_key);
+ }
+
+ /*
+ * Path-list load-balances, which if used, would be shared and hence
+ * never need a load-balance map.
+ */
+ load_balance_multipath_update(dpo, hash_key, LOAD_BALANCE_FLAG_NONE);
+
+ FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index);
+
+ vec_free(hash_key);
+}
+
+/*
+ * fib_path_list_back_walk
+ *
+ * Called from one of this path-list's paths to progate
+ * a back walk
+ */
+void
+fib_path_list_back_walk (fib_node_index_t path_list_index,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ /*
+ * propagate the backwalk further
+ */
+ if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children))
+ {
+ /*
+ * only a few children. continue the walk synchronously
+ */
+ fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
+ }
+ else
+ {
+ /*
+ * many children. schedule a async walk
+ */
+ fib_walk_async(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ FIB_WALK_PRIORITY_LOW,
+ ctx);
+ }
+}
+
+/*
+ * fib_path_list_back_walk_notify
+ *
+ * A back walk has reach this path-list.
+ */
+static fib_node_back_walk_rc_t
+fib_path_list_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * the path-list is not a direct child of any other node type
+ * paths, which do not change thier to-list-mapping, save the
+ * list they are a member of, and invoke the BW function directly.
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_path_list_vft = {
+ .fnv_get = fib_path_list_get_node,
+ .fnv_last_lock = fib_path_list_last_lock_gone,
+ .fnv_back_walk = fib_path_list_back_walk_notify,
+};
+
+static fib_path_list_t *
+fib_path_list_alloc (fib_node_index_t *path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ pool_get(fib_path_list_pool, path_list);
+ memset(path_list, 0, sizeof(*path_list));
+
+ fib_node_init(&path_list->fpl_node,
+ FIB_NODE_TYPE_PATH_LIST);
+
+ if (NULL != path_list_index)
+ {
+ *path_list_index = fib_path_list_get_index(path_list);
+ }
+
+ FIB_PATH_LIST_DBG(path_list, "alloc");
+
+ return (path_list);
+}
+
+static fib_path_list_t *
+fib_path_list_resolve (fib_path_list_t *path_list)
+{
+ fib_node_index_t *path_index, *paths, path_list_index;
+
+ ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_RESOLVED));
+
+ /*
+ * resolving a path-list is a recursive action. this means more path
+ * lists can be created during this call, and hence this path-list
+ * can be realloc'd. so we work with copies.
+ * this function is called only once per-path list, so its no great overhead.
+ */
+ path_list_index = fib_path_list_get_index(path_list);
+ paths = vec_dup(path_list->fpl_paths);
+
+ vec_foreach (path_index, paths)
+ {
+ fib_path_resolve(*path_index);
+ }
+
+ vec_free(paths);
+ path_list = fib_path_list_get(path_list_index);
+
+ FIB_PATH_LIST_DBG(path_list, "resovled");
+
+ return (path_list);
+}
+
+u32
+fib_path_list_get_resolving_interface (fib_node_index_t path_list_index)
+{
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+ u32 sw_if_index;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ sw_if_index = ~0;
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ sw_if_index = fib_path_get_resolving_interface(*path_index);
+ if (~0 != sw_if_index)
+ {
+ return (sw_if_index);
+ }
+ }
+
+ return (sw_if_index);
+}
+
+int
+fib_path_list_is_looped (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
+}
+
+static fib_path_cfg_flags_t
+fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
+{
+ fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
+
+ if (plf & FIB_PATH_LIST_FLAG_LOCAL)
+ {
+ pf |= FIB_PATH_CFG_FLAG_LOCAL;
+ }
+ if (plf & FIB_PATH_LIST_FLAG_DROP)
+ {
+ pf |= FIB_PATH_CFG_FLAG_DROP;
+ }
+ if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+ {
+ pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+ }
+
+ return (pf);
+}
+
+static fib_path_list_flags_t
+fib_path_list_flags_fixup (fib_path_list_flags_t flags)
+{
+ /*
+ * we do no share drop nor exclusive path-lists
+ */
+ if (flags & FIB_PATH_LIST_FLAG_DROP ||
+ flags & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+ {
+ flags &= ~FIB_PATH_LIST_FLAG_SHARED;
+ }
+
+ return (flags);
+}
+
+fib_node_index_t
+fib_path_list_create (fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_list_index, old_path_list_index;
+ fib_path_list_t *path_list;
+ int i;
+
+ flags = fib_path_list_flags_fixup(flags);
+ path_list = fib_path_list_alloc(&path_list_index);
+ path_list->fpl_flags = flags;
+ /*
+ * we'll assume for now all paths are the same next-hop protocol
+ */
+ path_list->fpl_nh_proto = rpaths[0].frp_proto;
+
+ vec_foreach_index(i, rpaths)
+ {
+ vec_add1(path_list->fpl_paths,
+ fib_path_create(path_list_index,
+ path_list->fpl_nh_proto,
+ fib_path_list_flags_2_path_flags(flags),
+ &rpaths[i]));
+ }
+
+ /*
+ * If a shared path list is requested, consult the DB for a match
+ */
+ if (flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ old_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != old_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = old_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+
+ return (path_list_index);
+}
+
+fib_node_index_t
+fib_path_list_create_special (fib_protocol_t nh_proto,
+ fib_path_list_flags_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t path_index, path_list_index;
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_alloc(&path_list_index);
+ path_list->fpl_flags = flags;
+ path_list->fpl_nh_proto = nh_proto;
+
+ path_index =
+ fib_path_create_special(path_list_index,
+ path_list->fpl_nh_proto,
+ fib_path_list_flags_2_path_flags(flags),
+ dpo);
+ vec_add1(path_list->fpl_paths, path_index);
+
+ /*
+ * we don't share path-lists. we can do PIC on them so why bother.
+ */
+ path_list = fib_path_list_resolve(path_list);
+
+ return (path_list_index);
+}
+
+/*
+ * fib_path_list_copy_and_path_add
+ *
+ * Create a copy of a path-list and append one more path to it.
+ * The path-list returned could either have been newly created, or
+ * can be a shared path-list from the data-base.
+ */
+fib_node_index_t
+fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_index, path_list_index, *orig_path_index;
+ fib_path_list_t *path_list, *orig_path_list;
+ fib_node_index_t pi;
+
+ ASSERT(1 == vec_len(rpaths));
+
+ /*
+ * alloc the new list before we retrieve the old one, lest
+ * the alloc result in a realloc
+ */
+ path_list = fib_path_list_alloc(&path_list_index);
+
+ orig_path_list = fib_path_list_get(orig_path_list_index);
+
+ FIB_PATH_LIST_DBG(orig_path_list, "copy-add");
+
+ flags = fib_path_list_flags_fixup(flags);
+ path_list->fpl_flags = flags;
+ path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
+ vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths));
+ pi = 0;
+
+ vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+ {
+ path_index = fib_path_copy(*orig_path_index, path_list_index);
+ path_list->fpl_paths[pi++] = path_index;
+ }
+ path_index = fib_path_create(path_list_index,
+ path_list->fpl_nh_proto,
+ fib_path_list_flags_2_path_flags(flags),
+ rpaths);
+ path_list->fpl_paths[pi] = path_index;
+
+ /*
+ * we sort the paths since the key for the path-list is
+ * the description of the paths it contains. The paths need to
+ * be sorted else this description will differ.
+ */
+ vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort);
+
+ FIB_PATH_LIST_DBG(path_list, "path-added");
+
+ /*
+ * If a shared path list is requested, consult the DB for a match
+ */
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ fib_node_index_t exist_path_list_index;
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ exist_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = exist_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+
+ return (path_list_index);
+}
+
+/*
+ * fib_path_list_copy_and_path_remove
+ *
+ * Copy the path-list excluding the path passed.
+ * If the path is the last one, then the index reurned will be invalid.
+ * i.e. the path-list is toast.
+ */
+fib_node_index_t
+fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *rpaths)
+{
+ fib_node_index_t path_index, *orig_path_index, path_list_index, tmp_path_index;
+ fib_path_list_t *path_list, *orig_path_list;
+ fib_node_index_t pi;
+
+ ASSERT(1 == vec_len(rpaths));
+
+ path_list = fib_path_list_alloc(&path_list_index);
+
+ flags = fib_path_list_flags_fixup(flags);
+ orig_path_list = fib_path_list_get(orig_path_list_index);
+
+ FIB_PATH_LIST_DBG(orig_path_list, "copy-remove");
+
+ path_list->fpl_flags = flags;
+ path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
+ /*
+ * allocate as many paths as we might need in one go, rather than
+ * using vec_add to do a few at a time.
+ */
+ if (vec_len(orig_path_list->fpl_paths) > 1)
+ {
+ vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths) - 2);
+ }
+ pi = 0;
+
+ /*
+ * create a representation of the path to be removed, so it
+ * can be used as a comparison object during the copy.
+ */
+ tmp_path_index = fib_path_create(path_list_index,
+ path_list->fpl_nh_proto,
+ fib_path_list_flags_2_path_flags(flags),
+ rpaths);
+
+ vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+ {
+ if (0 != fib_path_cmp(tmp_path_index, *orig_path_index)) {
+ path_index = fib_path_copy(*orig_path_index, path_list_index);
+ if (pi < vec_len(path_list->fpl_paths))
+ {
+ path_list->fpl_paths[pi++] = path_index;
+ }
+ else
+ {
+ /*
+ * this is the unlikely case that the path being
+ * removed does not match one in the path-list, so
+ * we end up with as many paths as we started with.
+ * the paths vector was sized above with the expectation
+ * that we would have 1 less.
+ */
+ vec_add1(path_list->fpl_paths, path_index);
+ }
+ }
+ }
+
+ /*
+ * done with the temporary now
+ */
+ fib_path_destroy(tmp_path_index);
+
+ /*
+ * if there are no paths, then the new path-list is aborted
+ */
+ if (0 == vec_len(path_list->fpl_paths)) {
+ FIB_PATH_LIST_DBG(path_list, "last-path-removed");
+
+ fib_path_list_destroy(path_list);
+
+ path_list_index = FIB_NODE_INDEX_INVALID;
+ } else {
+ /*
+ * we sort the paths since the key for the path-list is
+ * the description of the paths it contains. The paths need to
+ * be sorted else this description will differ.
+ */
+ vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort);
+
+ /*
+ * If a shared path list is requested, consult the DB for a match
+ */
+ if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+ {
+ fib_node_index_t exist_path_list_index;
+
+ /*
+ * check for a matching path-list in the DB.
+ * If we find one then we can return the existing one and destroy the
+ * new one just created.
+ */
+ exist_path_list_index = fib_path_list_db_find(path_list);
+ if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
+ {
+ fib_path_list_destroy(path_list);
+
+ path_list_index = exist_path_list_index;
+ }
+ else
+ {
+ /*
+ * if there was not a matching path-list, then this
+ * new one will need inserting into the DB and resolving.
+ */
+ fib_path_list_db_insert(path_list_index);
+
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+ else
+ {
+ /*
+ * no shared path list requested. resolve and use the one
+ * just created.
+ */
+ path_list = fib_path_list_resolve(path_list);
+ }
+ }
+
+ return (path_list_index);
+}
+
+/*
+ * fib_path_list_contribute_forwarding
+ *
+ * Return the index of a load-balance that user of this path-list should
+ * use for forwarding
+ */
+void
+fib_path_list_contribute_forwarding (fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_path_list_mk_lb(path_list, type, dpo);
+}
+
+/*
+ * fib_path_list_get_adj
+ *
+ * Return the index of a adjacency for the first path that user of this
+ * path-list should use for forwarding
+ */
+adj_index_t
+fib_path_list_get_adj (fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type)
+{
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+ return (fib_path_get_adj(path_list->fpl_paths[0]));
+}
+
+int
+fib_path_list_recursive_loop_detect (fib_node_index_t path_list_index,
+ fib_node_index_t **entry_indicies)
+{
+ fib_node_index_t *path_index;
+ int is_looped, list_looped;
+ fib_path_list_t *path_list;
+
+ list_looped = 0;
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach (path_index, path_list->fpl_paths)
+ {
+ fib_node_index_t *copy, **copy_ptr;
+
+ /*
+ * we need a copy of the nodes visited so that when we add entries
+ * we explore on the nth path and a looped is detected, those entries
+ * are not again searched for n+1 path and so finding a loop that does
+ * not exist.
+ */
+ copy = vec_dup(*entry_indicies);
+ copy_ptr = &copy;
+
+ is_looped = fib_path_recursive_loop_detect(*path_index, copy_ptr);
+ list_looped += is_looped;
+ }
+
+ FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", eval);
+
+ if (list_looped)
+ {
+ path_list->fpl_flags |= FIB_PATH_LIST_FLAG_LOOPED;
+ }
+ else
+ {
+ path_list->fpl_flags &= ~FIB_PATH_LIST_FLAG_LOOPED;
+ }
+
+ return (list_looped);
+}
+
+u32
+fib_path_list_child_add (fib_node_index_t path_list_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ child_type,
+ child_index));
+}
+
+void
+fib_path_list_child_remove (fib_node_index_t path_list_index,
+ u32 si)
+{
+ fib_node_child_remove(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ si);
+}
+
+void
+fib_path_list_lock(fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ if (FIB_NODE_INDEX_INVALID != path_list_index)
+ {
+ path_list = fib_path_list_get(path_list_index);
+
+ fib_node_lock(&path_list->fpl_node);
+ FIB_PATH_LIST_DBG(path_list, "lock");
+ }
+}
+
+void
+fib_path_list_unlock (fib_node_index_t path_list_index)
+{
+ fib_path_list_t *path_list;
+
+ if (FIB_NODE_INDEX_INVALID != path_list_index)
+ {
+ path_list = fib_path_list_get(path_list_index);
+ FIB_PATH_LIST_DBG(path_list, "unlock");
+
+ fib_node_unlock(&path_list->fpl_node);
+ }
+}
+
+u32
+fib_path_list_pool_size (void)
+{
+ return (pool_elts(fib_path_list_pool));
+}
+
+u32
+fib_path_list_db_size (void)
+{
+ return (hash_elts(fib_path_list_db));
+}
+
+void
+fib_path_list_walk (fib_node_index_t path_list_index,
+ fib_path_list_walk_fn_t func,
+ void *ctx)
+{
+ fib_node_index_t *path_index;
+ fib_path_list_t *path_list;
+
+ path_list = fib_path_list_get(path_list_index);
+
+ vec_foreach(path_index, path_list->fpl_paths)
+ {
+ if (!func(path_list_index, *path_index, ctx))
+ break;
+ }
+}
+
+
+void
+fib_path_list_module_init (void)
+{
+ fib_node_register_type (FIB_NODE_TYPE_PATH_LIST, &fib_path_list_vft);
+
+ fib_path_list_db = hash_create2 (/* elts */ 0,
+ /* user */ 0,
+ /* value_bytes */ sizeof (fib_node_index_t),
+ fib_path_list_db_hash_key_sum,
+ fib_path_list_db_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+}
+
+static clib_error_t *
+show_fib_path_list_command (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_path_list_t *path_list;
+ fib_node_index_t pli;
+
+ if (unformat (input, "%d", &pli))
+ {
+ /*
+ * show one in detail
+ */
+ if (!pool_is_free_index(fib_path_list_pool, pli))
+ {
+ path_list = fib_path_list_get(pli);
+ u8 *s = fib_path_list_format(pli, NULL);
+ s = format(s, "children:");
+ s = fib_node_children_format(path_list->fpl_node.fn_children, s);
+ vlib_cli_output (vm, "%s", s);
+ vec_free(s);
+ }
+ else
+ {
+ vlib_cli_output (vm, "path list %d invalid", pli);
+ }
+ }
+ else
+ {
+ /*
+ * show all
+ */
+ vlib_cli_output (vm, "FIB Path Lists");
+ pool_foreach(path_list, fib_path_list_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_fib_path_list, path_list);
+ }));
+ }
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_fib_path_list, static) = {
+ .path = "show fib path list",
+ .function = show_fib_path_list_command,
+ .short_help = "show fib path list",
+};
diff --git a/vnet/vnet/fib/fib_path_list.h b/vnet/vnet/fib/fib_path_list.h
new file mode 100644
index 00000000000..42e07abdd4b
--- /dev/null
+++ b/vnet/vnet/fib/fib_path_list.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_PATH_LIST_H__
+#define __FIB_PATH_LIST_H__
+
+#include <vlib/vlib.h>
+#include <vnet/adj/adj.h>
+
+#include "fib_node.h"
+#include "fib_path.h"
+
+/**
+ * Enumeration of path-list flags.
+ */
+typedef enum fib_path_list_attribute_t_ {
+ /**
+ * Marker. Add new flags after this one.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_FIRST = 0,
+ /**
+ * This path list is shareable. Shareable path-lists
+ * are inserted into the path-list data-base.
+ * All path-list are inherently shareable, the reason we share some and
+ * not others is to limit the size of the path-list database. This DB must
+ * be searched for each route update.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST,
+ /**
+ * explicit drop path-list. Used when the entry source needs to
+ * force a drop, despite the fact the path info is present.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_DROP,
+ /**
+ * explicit local path-list.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LOCAL,
+ /**
+ * exclusive path-list. Exclusive means the path will resolve via the
+ * exclusive (user provided) adj.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE,
+ /**
+ * resolved path-list
+ */
+ FIB_PATH_LIST_ATTRIBUTE_RESOLVED,
+ /**
+ * looped path-list. one path looped implies the whole list is
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+ /**
+ * Marher. Add new flags before this one, and then update it.
+ */
+ FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+} fib_path_list_attribute_t;
+
+typedef enum fib_path_list_flags_t_ {
+ FIB_PATH_LIST_FLAG_NONE = 0,
+ FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED),
+ FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP),
+ FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL),
+ FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
+ FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED),
+ FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED),
+} fib_path_list_flags_t;
+
+#define FIB_PATH_LIST_ATTRIBUTES { \
+ [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \
+ [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \
+ [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \
+ [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \
+ [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \
+ [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \
+}
+
+#define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \
+ for (_item = FIB_PATH_LIST_ATTRIBUTE_FIRST; \
+ _item <= FIB_PATH_LIST_ATTRIBUTE_LAST; \
+ _item++)
+
+extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags,
+ const fib_route_path_t *paths);
+extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto,
+ fib_path_list_flags_t flags,
+ const dpo_id_t *dpo);
+
+extern fib_node_index_t fib_path_list_copy_and_path_add(
+ fib_node_index_t pl_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *path);
+extern fib_node_index_t fib_path_list_copy_and_path_remove(
+ fib_node_index_t pl_index,
+ fib_path_list_flags_t flags,
+ const fib_route_path_t *path);
+extern void fib_path_list_contribute_forwarding (fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type,
+ dpo_id_t *dpo);
+extern index_t fib_path_list_get_adj(fib_node_index_t path_list_index,
+ fib_forward_chain_type_t type);
+
+extern u32 fib_path_list_child_add(fib_node_index_t pl_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+extern void fib_path_list_child_remove(fib_node_index_t pl_index,
+ fib_node_index_t sibling_index);
+extern void fib_path_list_back_walk(fib_node_index_t pl_index,
+ fib_node_back_walk_ctx_t *ctx);
+extern void fib_path_list_lock(fib_node_index_t pl_index);
+extern void fib_path_list_unlock(fib_node_index_t pl_index);
+extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index,
+ fib_node_index_t **entry_indicies);
+extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index);
+extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
+extern u8 * fib_path_list_format(fib_node_index_t pl_index,
+ u8 * s);
+extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index,
+ u32 indent,
+ u8 * s);
+extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index,
+ const fib_node_index_t *pis);
+/**
+ * A callback function type for walking a path-list's paths
+ */
+typedef int (*fib_path_list_walk_fn_t)(fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *ctx);
+
+extern void fib_path_list_walk(fib_node_index_t pl_index,
+ fib_path_list_walk_fn_t func,
+ void *ctx);
+
+extern void fib_path_list_module_init(void);
+
+extern void fib_path_list_module_init(void);
+
+/*
+ * functions for testing.
+ */
+u32 fib_path_list_pool_size(void);
+u32 fib_path_list_db_size(void);
+
+#endif
diff --git a/vnet/vnet/fib/fib_table.c b/vnet/vnet/fib/fib_table.c
new file mode 100644
index 00000000000..84c8708851c
--- /dev/null
+++ b/vnet/vnet/fib/fib_table.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry_cover.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+fib_table_t *
+fib_table_get (fib_node_index_t index,
+ fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (pool_elt_at_index(ip4_main.fibs, index));
+ case FIB_PROTOCOL_IP6:
+ return (pool_elt_at_index(ip6_main.fibs, index));
+ case FIB_PROTOCOL_MPLS:
+ return (pool_elt_at_index(mpls_main.fibs, index));
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+static inline fib_node_index_t
+fib_table_lookup_i (fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_lookup(&fib_table->v4,
+ &prefix->fp_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_lookup(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_lookup(&fib_table->mpls,
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_table_lookup (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_lookup_i(fib_table_get(fib_index, prefix->fp_proto), prefix));
+}
+
+static inline fib_node_index_t
+fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_lookup_exact_match(&fib_table->v4,
+ &prefix->fp_addr.ip4,
+ prefix->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_lookup_exact_match(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_lookup(&fib_table->mpls,
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+fib_table_lookup_exact_match (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_lookup_exact_match_i(fib_table_get(fib_index,
+ prefix->fp_proto),
+ prefix));
+}
+
+static fib_node_index_t
+fib_table_get_less_specific_i (fib_table_t *fib_table,
+ const fib_prefix_t *prefix)
+{
+ fib_prefix_t pfx;
+
+ pfx = *prefix;
+
+ if (FIB_PROTOCOL_MPLS == pfx.fp_proto)
+ {
+ return (FIB_NODE_INDEX_INVALID);
+ }
+
+ /*
+ * in the absence of a tree structure for the table that allows for an O(1)
+ * parent get, a cheeky way to find the cover is to LPM for the prefix with
+ * mask-1.
+ * there should always be a cover, though it may be the default route. the
+ * default route's cover is the default route.
+ */
+ if (pfx.fp_len != 0) {
+ pfx.fp_len -= 1;
+ }
+
+ return (fib_table_lookup_i(fib_table, &pfx));
+}
+
+fib_node_index_t
+fib_table_get_less_specific (u32 fib_index,
+ const fib_prefix_t *prefix)
+{
+ return (fib_table_get_less_specific_i(fib_table_get(fib_index,
+ prefix->fp_proto),
+ prefix));
+}
+
+static void
+fib_table_entry_remove (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ fib_table->ft_total_route_counts--;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_entry_remove(&fib_table->v4,
+ &prefix->fp_addr.ip4,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_entry_remove(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_entry_remove(&fib_table->mpls,
+ prefix->fp_label,
+ prefix->fp_eos);
+ break;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+}
+
+static void
+fib_table_post_insert_actions (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ fib_node_index_t fib_entry_cover_index;
+
+ /*
+ * no cover relationships in the MPLS FIB
+ */
+ if (FIB_PROTOCOL_MPLS == prefix->fp_proto)
+ return;
+
+ /*
+ * find and inform the covering entry that a new more specific
+ * has been inserted beneath it
+ */
+ fib_entry_cover_index = fib_table_get_less_specific_i(fib_table, prefix);
+ /*
+ * the indicies are the same when the default route is first added
+ */
+ if (fib_entry_cover_index != fib_entry_index)
+ {
+ fib_entry_cover_change_notify(fib_entry_cover_index,
+ fib_entry_index);
+ }
+}
+
+static void
+fib_table_entry_insert (fib_table_t *fib_table,
+ const fib_prefix_t *prefix,
+ fib_node_index_t fib_entry_index)
+{
+ vlib_smp_unsafe_warning();
+
+ fib_entry_lock(fib_entry_index);
+ fib_table->ft_total_route_counts++;
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_entry_insert(&fib_table->v4,
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ fib_entry_index);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_entry_insert(fib_table->ft_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ fib_entry_index);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_entry_insert(&fib_table->mpls,
+ prefix->fp_label,
+ prefix->fp_eos,
+ fib_entry_index);
+ break;
+ }
+
+ fib_table_post_insert_actions(fib_table, prefix, fib_entry_index);
+}
+
+void
+fib_table_fwding_dpo_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo)
+{
+ vlib_smp_unsafe_warning();
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_fwding_dpo_update(ip4_fib_get(fib_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_fwding_dpo_update(fib_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_forwarding_table_update(mpls_fib_get(fib_index),
+ prefix->fp_label,
+ prefix->fp_eos,
+ dpo));
+ }
+}
+
+void
+fib_table_fwding_dpo_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ const dpo_id_t *dpo)
+{
+ vlib_smp_unsafe_warning();
+
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index),
+ &prefix->fp_addr.ip4,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_fwding_dpo_remove(fib_index,
+ &prefix->fp_addr.ip6,
+ prefix->fp_len,
+ dpo));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_forwarding_table_reset(mpls_fib_get(fib_index),
+ prefix->fp_label,
+ prefix->fp_eos));
+ }
+}
+
+
+fib_node_index_t
+fib_table_entry_special_dpo_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create_special(fib_index, prefix,
+ source, flags,
+ dpo);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_special_add(fib_entry_index, source, flags, dpo);
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_special_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ adj_index_t adj_index)
+{
+ fib_node_index_t fib_entry_index;
+ dpo_id_t tmp_dpo = DPO_NULL;
+
+ if (ADJ_INDEX_INVALID != adj_index)
+ {
+ dpo_set(&tmp_dpo,
+ DPO_ADJACENCY,
+ FIB_PROTOCOL_MAX,
+ adj_index);
+ }
+ else
+ {
+ dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto)));
+ }
+
+ fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, source,
+ flags, &tmp_dpo);
+
+ dpo_unlock(&tmp_dpo);
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_special_dpo_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const dpo_id_t *dpo)
+{
+ fib_prefix_t prefix;
+ u32 fib_index;
+
+ fib_entry_get_prefix(fib_entry_index, &prefix);
+ fib_index = fib_entry_get_fib_index(fib_entry_index);
+
+ fib_table_entry_special_dpo_add(fib_index, &prefix, source, flags, dpo);
+ fib_table_entry_special_remove(fib_index, &prefix, source);
+}
+
+void
+fib_table_entry_special_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist. i'll allow it.
+ */
+ }
+ else
+ {
+ fib_entry_src_flag_t src_flag;
+ int was_sourced;
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ src_flag = fib_entry_special_remove(fib_entry_index, source);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+ }
+}
+
+/**
+ * fib_table_route_path_fixup
+ *
+ * Convert attached hosts to attached next-hops.
+ *
+ * This special case is required because an attached path will link to a
+ * glean, and the FIB entry will have the interface or API/CLI source. When
+ * the ARP/ND process is completes then that source (which will provide a
+ * complete adjacency) will be lower priority and so the FIB entry will
+ * remain linked to a glean and traffic will never reach the hosts. For
+ * an ATTAHCED_HOST path we can link the path directly to the [incomplete]
+ * adjacency.
+ */
+static void
+fib_table_route_path_fixup (const fib_prefix_t *prefix,
+ fib_route_path_t *path)
+{
+ if (fib_prefix_is_host(prefix) &&
+ ip46_address_is_zero(&path->frp_addr) &&
+ path->frp_sw_if_index != ~0)
+ {
+ path->frp_addr = prefix->fp_addr;
+ }
+}
+
+fib_node_index_t
+fib_table_entry_path_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t next_hop_label,
+ fib_route_path_flags_t path_flags)
+{
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ .frp_label = next_hop_label,
+ };
+ fib_node_index_t fib_entry_index;
+ fib_route_path_t *paths = NULL;
+
+ fib_table_route_path_fixup(prefix, &path);
+ vec_add1(paths, path);
+
+ fib_entry_index = fib_table_entry_path_add2(fib_index, prefix,
+ source, flags, paths);
+
+ vec_free(paths);
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_path_add2 (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create(fib_index, prefix,
+ source, flags,
+ rpath);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_path_add(fib_entry_index, source, flags, rpath);;
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_path_remove2 (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ const fib_route_path_t *rpath)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist. i'll allow it.
+ */
+ }
+ else
+ {
+ fib_entry_src_flag_t src_flag;
+ int was_sourced;
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ src_flag = fib_entry_path_remove(fib_entry_index, source, rpath);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+ }
+}
+
+void
+fib_table_entry_path_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ fib_route_path_flags_t path_flags)
+{
+ /*
+ * 1 is it present
+ * yes => remove source
+ * 2 - is it still sourced?
+ * no => cover walk
+ */
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ };
+ fib_route_path_t *paths = NULL;
+
+ fib_table_route_path_fixup(prefix, &path);
+ vec_add1(paths, path);
+
+ fib_table_entry_path_remove2(fib_index, prefix, source, paths);
+
+ vec_free(paths);
+}
+
+static int
+fib_route_path_cmp_for_sort (void * v1,
+ void * v2)
+{
+ return (fib_route_path_cmp(v1, v2));
+}
+
+fib_node_index_t
+fib_table_entry_update (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths)
+{
+ fib_node_index_t fib_entry_index;
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
+
+ /*
+ * sort the paths provided by the control plane. this means
+ * the paths and the extension on the entry will be sorted.
+ */
+ vec_sort_with_function(((fib_route_path_t*)paths), // const cast
+ fib_route_path_cmp_for_sort);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ fib_entry_index = fib_entry_create(fib_index, prefix,
+ source, flags,
+ paths);
+
+ fib_table_entry_insert(fib_table, prefix, fib_entry_index);
+ fib_table->ft_src_route_counts[source]++;
+ }
+ else
+ {
+ int was_sourced;
+
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+ fib_entry_update(fib_entry_index, source, flags, paths);
+
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]++;
+ }
+ }
+
+ return (fib_entry_index);
+}
+
+fib_node_index_t
+fib_table_entry_update_one_path (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t next_hop_label,
+ fib_route_path_flags_t path_flags)
+{
+ fib_node_index_t fib_entry_index;
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_flags = path_flags,
+ .frp_label = next_hop_label,
+ };
+ fib_route_path_t *paths = NULL;
+
+ fib_table_route_path_fixup(prefix, &path);
+ vec_add1(paths, path);
+
+ fib_entry_index =
+ fib_table_entry_update(fib_index, prefix, source, flags, paths);
+
+ vec_free(paths);
+
+ return (fib_entry_index);
+}
+
+static void
+fib_table_entry_delete_i (u32 fib_index,
+ fib_node_index_t fib_entry_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ fib_entry_src_flag_t src_flag;
+ fib_table_t *fib_table;
+ int was_sourced;
+
+ fib_table = fib_table_get(fib_index, prefix->fp_proto);
+ was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+
+ /*
+ * don't nobody go nowhere
+ */
+ fib_entry_lock(fib_entry_index);
+
+ src_flag = fib_entry_delete(fib_entry_index, source);
+
+ if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
+ {
+ /*
+ * last source gone. remove from the table
+ */
+ fib_table_entry_remove(fib_table, prefix, fib_entry_index);
+
+ /*
+ * now the entry is no longer in the table, we can
+ * inform the entries that it covers to re-calculate their cover
+ */
+ fib_entry_cover_change_notify(fib_entry_index,
+ FIB_NODE_INDEX_INVALID);
+ }
+ /*
+ * else
+ * still has sources, leave it be.
+ */
+ if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
+ {
+ fib_table->ft_src_route_counts[source]--;
+ }
+
+ fib_entry_unlock(fib_entry_index);
+}
+
+void
+fib_table_entry_delete (u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source)
+{
+ fib_node_index_t fib_entry_index;
+
+ fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ {
+ /*
+ * removing an etry that does not exist.
+ * i'll allow it, but i won't like it.
+ */
+ clib_warning("%U not in FIB", format_fib_prefix, prefix);
+ }
+ else
+ {
+ fib_table_entry_delete_i(fib_index, fib_entry_index, prefix, source);
+ }
+}
+
+void
+fib_table_entry_delete_index (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_prefix_t prefix;
+
+ fib_entry_get_prefix(fib_entry_index, &prefix);
+
+ fib_table_entry_delete_i(fib_entry_get_fib_index(fib_entry_index),
+ fib_entry_index, &prefix, source);
+}
+
+fib_node_index_t
+fib_table_entry_local_label_add (u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label)
+{
+ fib_node_index_t fib_entry_index;
+
+ fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix,
+ FIB_SOURCE_MPLS,
+ FIB_ENTRY_FLAG_NONE,
+ NULL);
+ fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &label);
+
+ return (fib_entry_index);
+}
+
+void
+fib_table_entry_local_label_remove (u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label)
+{
+ fib_node_index_t fib_entry_index;
+ const void *data;
+ mpls_label_t pl;
+
+ fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix);
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return;
+
+ data = fib_entry_get_source_data(fib_entry_index, FIB_SOURCE_MPLS);
+
+ if (NULL == data)
+ return;
+
+ pl = *(mpls_label_t*)data;
+
+ if (pl != label)
+ return;
+
+ pl = MPLS_LABEL_INVALID;
+
+ fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &pl);
+ fib_table_entry_special_remove(fib_index,
+ prefix,
+ FIB_SOURCE_MPLS);
+}
+
+u32
+fib_table_get_index_for_sw_if_index (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_get_index_for_sw_if_index(sw_if_index));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_get_index_for_sw_if_index(sw_if_index));
+ }
+ return (~0);
+}
+
+flow_hash_config_t
+fib_table_get_flow_hash_config (u32 fib_index,
+ fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_table_get_flow_hash_config(fib_index));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_table_get_flow_hash_config(fib_index));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_table_get_flow_hash_config(fib_index));
+ }
+ return (0);
+}
+
+
+u32
+fib_table_get_table_id_for_sw_if_index (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_table_get_index_for_sw_if_index(
+ proto, sw_if_index),
+ proto);
+
+ return ((NULL != fib_table ? fib_table->ft_table_id : ~0));
+}
+
+u32
+fib_table_find (fib_protocol_t proto,
+ u32 table_id)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_fib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_fib_index_from_table_id(table_id));
+ case FIB_PROTOCOL_MPLS:
+ return (mpls_fib_index_from_table_id(table_id));
+ }
+ return (~0);
+}
+
+u32
+fib_table_find_or_create_and_lock (fib_protocol_t proto,
+ u32 table_id)
+{
+ fib_table_t *fib_table;
+ fib_node_index_t fi;
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fi = ip4_fib_table_find_or_create_and_lock(table_id);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fi = ip6_fib_table_find_or_create_and_lock(table_id);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fi = mpls_fib_table_find_or_create_and_lock(table_id);
+ break;
+ default:
+ return (~0);
+ }
+
+ fib_table = fib_table_get(fi, proto);
+
+ fib_table->ft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+
+ return (fi);
+}
+
+u32
+fib_table_create_and_lock (fib_protocol_t proto,
+ const char *const fmt,
+ ...)
+{
+ fib_table_t *fib_table;
+ fib_node_index_t fi;
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fi = ip4_fib_table_create_and_lock();
+ break;
+ case FIB_PROTOCOL_IP6:
+ fi = ip6_fib_table_create_and_lock();
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fi = mpls_fib_table_create_and_lock();
+ break;
+ default:
+ return (~0);
+ }
+
+ fib_table = fib_table_get(fi, proto);
+
+ fib_table->ft_desc = va_format(fib_table->ft_desc, fmt, &ap);
+
+ va_end(ap);
+ return (fi);
+}
+
+static void
+fib_table_destroy (fib_table_t *fib_table)
+{
+ vec_free(fib_table->ft_desc);
+
+ switch (fib_table->ft_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip4_fib_table_destroy(&fib_table->v4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ ip6_fib_table_destroy(fib_table->ft_index);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ mpls_fib_table_destroy(&fib_table->mpls);
+ break;
+ }
+}
+
+void
+fib_table_unlock (u32 fib_index,
+ fib_protocol_t proto)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+ fib_table->ft_locks--;
+
+ if (0 == fib_table->ft_locks)
+ {
+ fib_table_destroy(fib_table);
+ }
+}
+void
+fib_table_lock (u32 fib_index,
+ fib_protocol_t proto)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+ fib_table->ft_locks++;
+}
+
+u32
+fib_table_get_num_entries (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+
+ return (fib_table->ft_src_route_counts[source]);
+}
+
+u8*
+format_fib_table_name (u8* s, va_list ap)
+{
+ fib_node_index_t fib_index = va_arg(ap, fib_node_index_t);
+ fib_protocol_t proto = va_arg(ap, int); // int promotion
+ fib_table_t *fib_table;
+
+ fib_table = fib_table_get(fib_index, proto);
+
+ s = format(s, "%v", fib_table->ft_desc);
+
+ return (s);
+}
+
+void
+fib_table_flush (u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source)
+{
+ // FIXME
+ ASSERT(0);
+}
diff --git a/vnet/vnet/fib/fib_table.h b/vnet/vnet/fib/fib_table.h
new file mode 100644
index 00000000000..d7c604f9de9
--- /dev/null
+++ b/vnet/vnet/fib/fib_table.h
@@ -0,0 +1,732 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TABLE_H__
+#define __FIB_TABLE_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/packet.h>
+
+/**
+ * @brief
+ * A protocol Independent FIB table
+ */
+typedef struct fib_table_t_
+{
+ /**
+ * A union of the protocol specific FIBs that provide the
+ * underlying LPM mechanism.
+ * This element is first in the struct so that it is in the
+ * first cache line.
+ */
+ union {
+ ip4_fib_t v4;
+ ip6_fib_t v6;
+ mpls_fib_t mpls;
+ };
+
+ /**
+ * Which protocol this table serves. Used to switch on the union above.
+ */
+ fib_protocol_t ft_proto;
+
+ /**
+ * number of locks on the table
+ */
+ u16 ft_locks;
+
+ /**
+ * Table ID (hash key) for this FIB.
+ */
+ u32 ft_table_id;
+
+ /**
+ * Index into FIB vector.
+ */
+ fib_node_index_t ft_index;
+
+ /**
+ * flow hash configuration
+ */
+ u32 ft_flow_hash_config;
+
+ /**
+ * Per-source route counters
+ */
+ u32 ft_src_route_counts[FIB_SOURCE_MAX];
+
+ /**
+ * Total route counters
+ */
+ u32 ft_total_route_counts;
+
+ /**
+ * Table description
+ */
+ u8* ft_desc;
+} fib_table_t;
+
+/**
+ * @brief
+ * Format the description/name of the table
+ */
+extern u8* format_fib_table_name(u8* s, va_list ap);
+
+/**
+ * @brief
+ * Perfom a longest prefix match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the best match, which may be the default route
+ */
+extern fib_node_index_t fib_table_lookup(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Perfom an exact match in the non-forwarding table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the fib_entry_t for the exact match, or INVALID
+ * is there is no match.
+ */
+extern fib_node_index_t fib_table_lookup_exact_match(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Get the less specific (covering) prefix
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to lookup
+ *
+ * @return
+ * The index of the less specific fib_entry_t.
+ */
+extern fib_node_index_t fib_table_get_less_specific(u32 fib_index,
+ const fib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Add a 'special' entry to the FIB that links to the adj passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the adj to link to.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param adj_index
+ * The adjacency to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or exists already).
+ */
+extern fib_node_index_t fib_table_entry_special_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ adj_index_t adj_index);
+
+/**
+ * @brief
+ * Add a 'special' entry to the FIB that links to the DPO passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the DPO to link to.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param dpo
+ * The DPO to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t stype,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Update a 'special' entry to the FIB that links to the DPO passed
+ * A special entry is an entry that the FIB is not expect to resolve
+ * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ * Instead the client/source provides the DPO to link to.
+ * Special entries are add/remove reference counted per-source. So n
+ * 'removes' are required for n 'adds', if the entry is no longer required.
+ * An 'update' can only be used after an 'add' and is therefore assumed to act
+ * on the reference instance of that add (an update is implemented as add/remove
+ * pair).
+ *
+ * @param fib_entry_index
+ * The index of the FIB entry to update
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param dpo
+ * The DPO to link to.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern void fib_table_entry_special_dpo_update (fib_node_index_t fib_entry_index,
+ fib_source_t source,
+ fib_entry_flag_t stype,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Remove a 'special' entry from the FIB.
+ * This add is reference counting per-source. So n 'removes' are required
+ * for n 'adds', if the entry is no longer required.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix to remove
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ */
+extern void fib_table_entry_special_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Add one path to an entry (aka route) in the FIB. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param next_hop_label
+ * The path's out-going label. INVALID is there is none.
+ *
+ * @param pf
+ * Flags for the path
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_path_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t next_hop_label,
+ fib_route_path_flags_t pf);
+/**
+ * @brief
+ * Add n paths to an entry (aka route) in the FIB. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *rpath);
+
+/**
+ * @brief
+ * remove one path to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param pf
+ * Flags for the path
+ */
+extern void fib_table_entry_path_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ fib_route_path_flags_t pf);
+
+/**
+ * @brief
+ * Remove n paths to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ */
+extern void fib_table_entry_path_remove2(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ const fib_route_path_t *paths);
+
+/**
+ * @brief
+ * Update an entry to have a new set of paths. If the entry does not
+ * exist, it will be created.
+ * The difference between an 'path-add' and an update, is that path-add is
+ * an incremental addition of paths, whereas an update is a wholesale swap.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ * A vector of paths.
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_update(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ const fib_route_path_t *paths);
+
+/**
+ * @brief
+ * Update the entry to have just one path. If the entry does not
+ * exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to add
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ *
+ * @param flags
+ * Flags for the entry.
+ *
+ * @paran next_hop_proto
+ * The protocol of the next hop. This cannot be derived in the event that
+ * the next hop is all zeros.
+ *
+ * @param next_hop
+ * The address of the next-hop.
+ *
+ * @param sw_if_index
+ * The index of the interface.
+ *
+ * @param next_hop_fib_index,
+ * The fib index of the next-hop for recursive resolution
+ *
+ * @param next_hop_weight
+ * [un]equal cost path weight
+ *
+ * @param next_hop_label
+ * The path's out-going label. INVALID is there is none.
+ *
+ * @param pf
+ * Flags for the path
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source,
+ fib_entry_flag_t flags,
+ fib_protocol_t next_hop_proto,
+ const ip46_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t next_hop_label,
+ fib_route_path_flags_t pf);
+
+/**
+ * @brief
+ * Add a MPLS local label for the prefix/route. If the entry does not
+ * exist, it will be created. In theory more than one local label can be
+ * added, but this is not yet supported.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to which to add the label
+ *
+ * @param label
+ * The MPLS label to add
+ *
+ * @return
+ * the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t fib_table_entry_local_label_add(u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label);
+/**
+ * @brief
+ * remove a MPLS local label for the prefix/route.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to which to add the label
+ *
+ * @param label
+ * The MPLS label to add
+ */
+extern void fib_table_entry_local_label_remove(u32 fib_index,
+ const fib_prefix_t *prefix,
+ mpls_label_t label);
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @param prefix
+ * The prefix for the entry to remove
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void fib_table_entry_delete(u32 fib_index,
+ const fib_prefix_t *prefix,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param entry_index
+ * The index of the FIB entry
+ *
+ * @param source
+ * The ID of the client/source adding the entry.
+ */
+extern void fib_table_entry_delete_index(fib_node_index_t entry_index,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Flush all entries from a table for the source
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the entries in the table
+ *
+ * @param source
+ * the source to flush
+ */
+extern void fib_table_flush(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Get the index of the FIB bound to the interface
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ * The interface index
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Get the Table-ID of the FIB bound to the interface
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ * The interface index
+ *
+ * @return fib_index
+ * The tableID of the FIB
+ */
+extern u32 fib_table_get_table_id_for_sw_if_index(fib_protocol_t proto,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES NOT create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB, which may be INVALID.
+ */
+extern u32 fib_table_find(fib_protocol_t proto, u32 table_id);
+
+
+/**
+ * @brief
+ * Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ * The Table-ID
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto,
+ u32 table_id);
+
+/**
+ * @brief
+ * Create a new table with no table ID. This means it does not get
+ * added to the hash-table and so can only be found by using the index returned.
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @param fmt
+ * A string to describe the table
+ *
+ * @return fib_index
+ * The index of the FIB
+ */
+extern u32 fib_table_create_and_lock(fib_protocol_t proto,
+ const char *const fmt,
+ ...);
+
+/**
+ * @brief
+ * Get the flow hash configured used by the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return The flow hash config
+ */
+extern flow_hash_config_t fib_table_get_flow_hash_config(u32 fib_index,
+ fib_protocol_t proto);
+
+/**
+ * @brief
+ * Take a reference counting lock on the table
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ */
+extern void fib_table_unlock(u32 fib_index,
+ fib_protocol_t proto);
+
+/**
+ * @brief
+ * Release a reference counting lock on the table. When the last lock
+ * has gone. the FIB is deleted.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ */
+extern void fib_table_lock(u32 fib_index,
+ fib_protocol_t proto);
+
+/**
+ * @brief
+ * Return the number of entries in the FIB added by a given source.
+ *
+ * @param fib_index
+ * The index of the FIB
+ *
+ * @paran proto
+ * The protocol of the FIB (and thus the entries therein)
+ *
+ * @return number of sourced entries.
+ */
+extern u32 fib_table_get_num_entries(u32 fib_index,
+ fib_protocol_t proto,
+ fib_source_t source);
+
+/**
+ * @brief
+ * Get a pointer to a FIB table
+ */
+extern fib_table_t *fib_table_get(fib_node_index_t index,
+ fib_protocol_t proto);
+
+#endif
diff --git a/vnet/vnet/fib/fib_test.c b/vnet/vnet/fib/fib_test.c
new file mode 100644
index 00000000000..898005e57fb
--- /dev/null
+++ b/vnet/vnet/fib/fib_test.c
@@ -0,0 +1,6330 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+
+#include <vnet/mpls/mpls.h>
+
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_node_list.h>
+
+#define FIB_TEST_I(_cond, _comment, _args...) \
+({ \
+ int _evald = (_cond); \
+ if (!(_evald)) { \
+ fformat(stderr, "FAIL:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } else { \
+ fformat(stderr, "PASS:%d: " _comment "\n", \
+ __LINE__, ##_args); \
+ } \
+ _evald; \
+})
+#define FIB_TEST(_cond, _comment, _args...) \
+{ \
+ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
+ return;\
+ ASSERT(!("FAIL: " _comment)); \
+ } \
+}
+
+/**
+ * A 'i'm not fussed is this is not efficient' store of test data
+ */
+typedef struct test_main_t_ {
+ /**
+ * HW if indicies
+ */
+ u32 hw_if_indicies[4];
+ /**
+ * HW interfaces
+ */
+ vnet_hw_interface_t * hw[4];
+
+} test_main_t;
+static test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 * format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+};
+
+static u8 *hw_address;
+
+static void
+fib_test_mk_intf (u32 ninterfaces)
+{
+ clib_error_t * error = NULL;
+ test_main_t *tm = &test_main;
+ u8 byte;
+ u32 i;
+
+ ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies));
+
+ for (i=0; i<6; i++)
+ {
+ byte = 0xd0+i;
+ vec_add1(hw_address, byte);
+ }
+
+ for (i = 0; i < ninterfaces; i++)
+ {
+ hw_address[5] = i;
+
+ error = ethernet_register_interface(vnet_get_main(),
+ ethernet_hw_interface_class.index,
+ i /* instance */,
+ hw_address,
+ &tm->hw_if_indicies[i],
+ /* flag change */ 0);
+
+ FIB_TEST((NULL == error), "ADD interface %d", i);
+
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ vec_validate (ip4_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
+ vec_validate (ip6_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "UP interface %d", i);
+ }
+ /*
+ * re-eval after the inevitable realloc
+ */
+ for (i = 0; i < ninterfaces; i++)
+ {
+ tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+ tm->hw_if_indicies[i]);
+ }
+}
+
+#define FIB_TEST_REC_FORW(_rec_prefix, _via_prefix) \
+{ \
+ const dpo_id_t *_rec_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup_exact_match(fib_index, (_rec_prefix))); \
+ const dpo_id_t *_via_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup(fib_index, (_via_prefix))); \
+ FIB_TEST(!dpo_cmp(_via_dpo, \
+ load_balance_get_bucket(_rec_dpo->dpoi_index, 0)), \
+ "%U is recursive via %U", \
+ format_fib_prefix, (_rec_prefix), \
+ format_fib_prefix, _via_prefix); \
+}
+
+#define FIB_TEST_LB_BUCKET_VIA_ADJ(_prefix, _bucket, _ai) \
+{ \
+ const dpo_id_t *_dpo = fib_entry_contribute_ip_forwarding( \
+ fib_table_lookup_exact_match(fib_index, (_prefix))); \
+ const dpo_id_t *_dpo1 = \
+ load_balance_get_bucket(_dpo->dpoi_index, _bucket); \
+ FIB_TEST(DPO_ADJACENCY == _dpo1->dpoi_type, "type is %U", \
+ format_dpo_type, _dpo1->dpoi_type); \
+ FIB_TEST((_ai == _dpo1->dpoi_index), \
+ "%U bucket %d resolves via %U", \
+ format_fib_prefix, (_prefix), \
+ _bucket, \
+ format_dpo_id, _dpo1, 0); \
+}
+
+static void
+fib_test_v4 (void)
+{
+ /*
+ * In the default table check for the presence and correct forwarding
+ * of the special entries
+ */
+ fib_node_index_t dfrt, fei, ai, ai2, locked_ai, ai_01, ai_02, ai_03;
+ const dpo_id_t *dpo, *dpo1, *dpo2, *dpo_drop;
+ const ip_adjacency_t *adj;
+ const load_balance_t *lb;
+ test_main_t *tm;
+ u32 fib_index;
+ int ii;
+
+ /* via 10.10.10.1 */
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ };
+ /* via 10.10.10.2 */
+ ip46_address_t nh_10_10_10_2 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ };
+
+ tm = &test_main;
+
+ /* Find or create FIB table 11 */
+ fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11);
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
+ }
+
+ fib_prefix_t pfx_0_0_0_0_s_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ fib_prefix_t pfx = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
+
+ dfrt = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ pfx.fp_len = 32;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all zeros route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all 0s route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xffffffff);
+ pfx.fp_len = 32;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all ones route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all 1s route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xe0000000);
+ pfx.fp_len = 8;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all-mcast route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "all-mcast route is DROP");
+
+ pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xf0000000);
+ pfx.fp_len = 8;
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "class-e route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "class-e route is DROP");
+
+ /*
+ * at this stage there are 5 entries in the test FIB (plus 5 in the default),
+ * all of which are special sourced and so none of which share path-lists.
+ * There are also 6 entries, and 6 non-shared path-lists, in the v6 default
+ * table
+ */
+#define NBR (5+5+6)
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add interface routes.
+ * validate presence of /24 attached and /32 recieve.
+ * test for the presence of the receive address in the glean and local adj
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+ FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on attached interface");
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &adj->sub_type.glean.receive_addr)),
+ "attached interface adj is receive ok");
+
+ local_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1, // weight
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+ FIB_TEST(((FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on local interface");
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ receive_dpo_t *rd = receive_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ FIB_TEST((2 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_INTERFACE)),
+ "2 Interface Source'd prefixes");
+
+ /*
+ * +2 interface routes +2 non-shared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Modify the default route to be via an adj not yet known.
+ * this sources the defalut route with the API source, which is
+ * a higher preference to the DEFAULT_ROUTE source
+ */
+ pfx.fp_addr.ip4.as_u32 = 0;
+ pfx.fp_len = 0;
+ fib_table_entry_path_add(fib_index, &pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx);
+ FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)),
+ "Flags set on API route");
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&nh_10_10_10_1, &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((1 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_API)),
+ "1 API Source'd prefixes");
+
+ /*
+ * find the adj in the shared db
+ */
+ locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((locked_ai == ai), "ADJ NBR DB find");
+ adj_unlock(locked_ai);
+
+ /*
+ * +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+3 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove the API source from the default route. We expected
+ * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP
+ */
+ pfx.fp_addr.ip4.as_u32 = 0;
+ pfx.fp_len = 0;
+ fib_table_entry_path_remove(fib_index, &pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // non-recursive path, so no FIB index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "Default route is DROP");
+
+ /*
+ * -1 shared-path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_10_10_10_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.1 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ fib_prefix_t pfx_10_10_10_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.2 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ },
+ };
+ fib_prefix_t pfx_11_11_11_11_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 11.11.11.11 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0b0b0b0b),
+ },
+ };
+ u8 eth_addr[] = {
+ 0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
+ };
+
+ /*
+ * Add a route via an incomplete ADJ. then complete the ADJ
+ * Expect the route LB is updated to use complete adj type.
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_11_11_11_11_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_ATTACHED,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY_INCOMPLETE == dpo1->dpoi_type,
+ "11.11.11.11/32 via incomplete adj");
+
+ ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created");
+ adj = adj_get(ai_01);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_01, eth_addr);
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type,
+ "11.11.11.11/32 via complete adj");
+
+ /*
+ * add the adj fib
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)),
+ "Flags set on adj-fib");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_11_11_11_11_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ eth_addr[5] = 0xb2;
+
+ ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created");
+ adj = adj_get(ai_02);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_02, eth_addr);
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((ai_01 != ai_02), "ADJs are different");
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_10_10_10_2_s_32);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+
+ /*
+ * +2 adj-fibs, and their non-shared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add a 2 routes via the first ADJ. ensure path-list sharing
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.1/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.1 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry and a shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /* 1.1.2.0/24 */
+ fib_prefix_t pfx_1_1_2_0_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010200),
+ }
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry only
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * modify 1.1.2.0/24 to use multipath.
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type);
+ FIB_TEST((ai_01 == dpo1->dpoi_index),
+ "1.1.2.0/24 bucket 0 resolves via 10.10.10.1 (%d=%d)",
+ ai_01, dpo1->dpoi_index);
+
+ dpo1 = load_balance_get_bucket(dpo->dpoi_index, 1);
+ FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type);
+ FIB_TEST((ai_02 == dpo1->dpoi_index),
+ "1.1.2.0/24 bucket 1 resolves via 10.10.10.2");
+
+ /*
+ * +1 shared-pathlist
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * revert the modify
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 shared-pathlist
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is %d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add 2 recursive routes:
+ * 100.100.100.100/32 via 1.1.1.1/32 => the via entry is installed.
+ * 100.100.100.101/32 via 1.1.1.1/32 => the via entry is installed.
+ */
+ fib_prefix_t bgp_100_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.100/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646464),
+ },
+ };
+ /* via 1.1.1.1 */
+ ip46_address_t nh_1_1_1_1 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_100_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_1_1_1_1,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_100_pfx, &pfx_1_1_1_1_s_32);
+
+ /*
+ * +1 entry and +1 shared-path-list
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fib_prefix_t bgp_101_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.101/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646465),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_101_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_1_1_1_1,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_101_pfx, &pfx_1_1_1_1_s_32);
+
+ /*
+ * +1 entry, but the recursive path-list is shared.
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * An EXCLUSIVE route; one where the user (me) provides the exclusive
+ * adjacency through which the route will resovle
+ */
+ fib_prefix_t ex_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.4.4.4/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04040404),
+ },
+ };
+
+ fib_table_entry_special_add(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ locked_ai);
+ fei = fib_table_lookup_exact_match(fib_index, &ex_pfx);
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Exclusive route links to user adj");
+
+ fib_table_entry_special_remove(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &ex_pfx),
+ "Exclusive reoute removed");
+
+ /*
+ * An EXCLUSIVE route; one where the user (me) provides the exclusive
+ * adjacency through which the route will resovle
+ */
+ dpo_id_t ex_dpo = DPO_NULL;
+
+ lookup_dpo_add_or_lock_w_fib_index(fib_index,
+ DPO_PROTO_IP4,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &ex_dpo);
+
+ fib_table_entry_special_dpo_add(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &ex_dpo);
+ fei = fib_table_lookup_exact_match(fib_index, &ex_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "exclusive remote uses lookup DPO");
+
+ fib_table_entry_special_remove(fib_index,
+ &ex_pfx,
+ FIB_SOURCE_SPECIAL);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &ex_pfx),
+ "Exclusive reoute removed");
+ dpo_reset(&ex_dpo);
+
+ /*
+ * Add a recursive route:
+ * 200.200.200.200/32 via 1.1.1.2/32 => the via entry is NOT installed.
+ */
+ fib_prefix_t bgp_200_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 200.200.200.200/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c8),
+ },
+ };
+ /* via 1.1.1.2 */
+ fib_prefix_t pfx_1_1_1_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010102),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * the adj should be recursive via drop, since the route resolves via
+ * the default route, which is itself a DROP
+ */
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(load_balance_is_drop(dpo1), "1.1.1.2/32 is drop");
+
+ /*
+ * +2 entry and +1 shared-path-list
+ */
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Unequal Cost load-balance. 3:1 ratio. fits in a 4 bucket LB
+ */
+ fib_prefix_t pfx_1_2_3_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01020304),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 3,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.4/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 4),
+ "1.2.3.4/32 LB has %d bucket",
+ lb->lb_n_buckets);
+
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 0, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 1, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 2, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 3, ai_02);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_1_2_3_4_s_32,
+ FIB_SOURCE_API);
+
+ /*
+ * Unequal Cost load-balance. 4:1 ratio.
+ * fits in a 16 bucket LB with ratio 13:3
+ */
+ fib_prefix_t pfx_1_2_3_5_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01020305),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 4,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.5/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 16),
+ "1.2.3.5/32 LB has %d bucket",
+ lb->lb_n_buckets);
+
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 0, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 1, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 2, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 3, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 4, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 5, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 6, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 7, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 8, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 9, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 10, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 11, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 12, ai_01);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 13, ai_02);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 14, ai_02);
+ FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 15, ai_02);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_1_2_3_5_s_32,
+ FIB_SOURCE_API);
+
+ /*
+ * Add a recursive route:
+ * 200.200.200.201/32 via 1.1.1.200/32 => the via entry is NOT installed.
+ */
+ fib_prefix_t bgp_201_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 200.200.200.201/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c9),
+ },
+ };
+ /* via 1.1.1.200 */
+ fib_prefix_t pfx_1_1_1_200_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x010101c8),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_200_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // nexthop in same fib as route
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32);
+ FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)),
+ "Flags set on RR via non-attached");
+
+ /*
+ * +2 entry (BGP & RR) and +1 shared-path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * insert a route that covers the missing 1.1.1.2/32. we epxect
+ * 200.200.200.200/32 and 200.200.200.201/32 to resolve through it.
+ */
+ fib_prefix_t pfx_1_1_1_0_s_24 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.0/24 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010100),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_24,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.0/24 resolves via 10.10.10.1");
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.2/32 resolves via 10.10.10.1");
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "1.1.1.200/24 resolves via 10.10.10.1");
+
+ /*
+ * +1 entry. 1.1.1.1/32 already uses 10.10.10.1 so no new pah-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * the recursive adj for 200.200.200.200 should be updated.
+ */
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * insert a more specific route than 1.1.1.0/24 that also covers the
+ * missing 1.1.1.2/32, but not 1.1.1.200/32. we epxect
+ * 200.200.200.200 to resolve through it.
+ */
+ fib_prefix_t pfx_1_1_1_0_s_28 = {
+ .fp_len = 28,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 1.1.1.0/24 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010100),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "1.1.1.0/24 resolves via 10.10.10.2");
+
+ /*
+ * +1 entry. +1 shared path-list
+ */
+ FIB_TEST((5 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+9 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+14 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * the recursive adj for 200.200.200.200 should be updated.
+ * 200.200.200.201 remains unchanged.
+ */
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * remove this /28. 200.200.200.200/32 should revert back to via 1.1.1.0/24
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.0/28 removed");
+ FIB_TEST((fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28) ==
+ fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24)),
+ "1.1.1.0/28 lookup via /24");
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * -1 entry. -1 shared path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove 1.1.1.0/24. 200.200.200.200/32 should revert back to via 0.0.0.0/0
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_0_s_24,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_24) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.0/24 removed");
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1.1.1.2/32 route is DROP");
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1.1.1.200/32 route is DROP");
+
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * -1 entry
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * insert the missing 1.1.1.2/32
+ */
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai = ai_01), "1.1.1.2/32 resolves via 10.10.10.1");
+
+ FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32);
+ FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32);
+
+ /*
+ * no change. 1.1.1.2/32 was already there RR sourced.
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove 200.200.200.201/32 which does not have a valid via FIB
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_200_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * -2 entries (BGP and RR). -1 shared path-list;
+ */
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.201/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32) ==
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.200/32 removed");
+
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove 200.200.200.200/32 which does have a valid via FIB
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.200/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32) !=
+ FIB_NODE_INDEX_INVALID),
+ "1.1.1.2/32 still present");
+
+ /*
+ * -1 entry (BGP, the RR source is also API sourced). -1 shared path-list;
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+9 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A recursive prefix that has a 2 path load-balance.
+ * It also shares a next-hop with other BGP prefixes and hence
+ * test the ref counting of RR sourced prefixes and 2 level LB.
+ */
+ const fib_prefix_t bgp_102 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 100.100.100.101/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x64646466),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_102);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "100.100.100.102/32 presnet");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 2), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "First via 10.10.10.1");
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 1)),
+ "Second via 10.10.10.1");
+
+ fib_table_entry_path_remove(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_102,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_102);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "100.100.100.102/32 removed");
+
+ /*
+ * remove the remaining recursives
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_100_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_101_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_100_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "100.100.100.100/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_101_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "100.100.100.101/32 removed");
+
+ /*
+ * -2 entry (2*BGP, the RR source is also API sourced). -1 shared path-list;
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add a recursive route via a connected cover, using an adj-fib that does exist
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ ~0, // no index provided.
+ fib_index, // Same as route's FIB
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * +1 entry. +1 shared path-list (recursive via 10.10.10.1)
+ */
+ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "200.200.200.200/32 is recursive via adj for 10.10.10.1");
+
+ FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)),
+ "Flags set on RR via existing attached");
+
+ /*
+ * Add a recursive route via a connected cover, using and adj-fib that does
+ * not exist
+ */
+ ip46_address_t nh_10_10_10_3 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03),
+ };
+ fib_prefix_t pfx_10_10_10_3 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = nh_10_10_10_3,
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_3,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * +2 entries (BGP and RR). +1 shared path-list (recursive via 10.10.10.3) and
+ * one unshared non-recursive via 10.10.10.3
+ */
+ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai == ai_03), "adj for 10.10.10.3/32 is via adj for 10.10.10.3");
+ FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) ==
+ fib_entry_get_flags(fei)),
+ "Flags set on RR via non-existing attached");
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 10.10.10.3");
+
+ adj_unlock(ai_03);
+
+ /*
+ * remove the recursives
+ */
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_201_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_3,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.201/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) ==
+ FIB_NODE_INDEX_INVALID),
+ "200.200.200.200/32 removed");
+ FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3) ==
+ FIB_NODE_INDEX_INVALID),
+ "10.10.10.3/32 removed");
+
+ /*
+ * -3 entries (2*BGP and RR). -2 shared path-list (recursive via 10.10.10.3 &
+ * 10.10.10.1) and one unshared non-recursive via 10.10.10.3
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+
+ /*
+ * RECURSION LOOPS
+ * Add 5.5.5.5/32 -> 5.5.5.6/32 -> 5.5.5.7/32 -> 5.5.5.5/32
+ */
+ fib_prefix_t pfx_5_5_5_5_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050505),
+ },
+ };
+ fib_prefix_t pfx_5_5_5_6_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050506),
+ },
+ };
+ fib_prefix_t pfx_5_5_5_7_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x05050507),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_7_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_7_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_5_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ /*
+ * +3 entries, +3 shared path-list
+ */
+ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * All the entries have only looped paths, so they are all drop
+ */
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * provide 5.5.5.6/32 with alternate path.
+ * this will allow only 5.5.5.6/32 to forward with this path, the others
+ * are still drop since the loop is still present.
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.6 LB has %d bucket", lb->lb_n_buckets);
+
+ dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type);
+ FIB_TEST((ai_01 == dpo2->dpoi_index),
+ "5.5.5.6 bucket 0 resolves via 10.10.10.2");
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+
+ /*
+ * remove the alternate path for 5.5.5.6/32
+ * back to all drop
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * break the loop by giving 5.5.5.5/32 a new set of paths
+ * expect all to forward via this new path.
+ */
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.5 LB has %d bucket", lb->lb_n_buckets);
+
+ dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0);
+ FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type);
+ FIB_TEST((ai_01 == dpo2->dpoi_index),
+ "5.5.5.5 bucket 0 resolves via 10.10.10.2");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_7_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo2->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo2->dpoi_index, 0)),
+ "5.5.5.5.7 via 5.5.5.5");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo1->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "5.5.5.5.6 via 5.5.5.7");
+
+ /*
+ * revert back to the loop. so we can remove the prefixes with
+ * the loop intact
+ */
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.7/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.5/32 is via adj for DROP");
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "LB for 5.5.5.6/32 is via adj for DROP");
+
+ /*
+ * remove all the 5.5.5.x/32 prefixes
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_5_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_7_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_7_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_5_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /*
+ * -3 entries, -3 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Single level loop 5.5.5.5/32 via 5.5.5.5/32
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "1-level 5.5.5.6/32 loop is via adj for DROP");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_5_5_5_6_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_5_5_5_6_s_32.fp_addr,
+ ~0, // no index provided.
+ fib_index, // same as route's FIB
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32),
+ "1-level 5.5.5.6/32 loop is removed");
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A recursive route with recursion constraints.
+ * 200.200.200.200/32 via 1.1.1.1 is recurse via host constrained
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_1_1_1_1,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+
+ /*
+ * save the load-balance. we expect it to be inplace modified
+ */
+ lb = load_balance_get(dpo1->dpoi_index);
+
+ /*
+ * add a covering prefix for the via fib that would otherwise serve
+ * as the resolving route when the host is removed
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai == ai_01),
+ "adj for 1.1.1.0/28 is via adj for 1.1.1.1");
+
+ /*
+ * remove the host via FIB - expect the BGP prefix to be drop
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for DROP");
+
+ /*
+ * add the via-entry host reoute back. expect to resolve again
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+
+ /*
+ * add another path for the recursive. it will then have 2.
+ */
+ fib_prefix_t pfx_1_1_1_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010103),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_3_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32);
+ dpo2 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32);
+ dpo1 = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 1)),
+ "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.3");
+
+ /*
+ * expect the lb-map used by the recursive's load-balance is using both buckets
+ */
+ load_balance_map_t *lbm;
+ index_t lbmi;
+
+ lb = load_balance_get(dpo->dpoi_index);
+ lbmi = lb->lb_map;
+ load_balance_map_lock(lbmi);
+ lbm = load_balance_map_get(lbmi);
+
+ FIB_TEST(lbm->lbm_buckets[0] == 0,
+ "LB maps's bucket 0 is %d",
+ lbm->lbm_buckets[0]);
+ FIB_TEST(lbm->lbm_buckets[1] == 1,
+ "LB maps's bucket 1 is %d",
+ lbm->lbm_buckets[1]);
+
+ /*
+ * withdraw one of the /32 via-entrys.
+ * that ECMP path will be unresolved and forwarding should continue on the
+ * other available path. this is an iBGP PIC edge failover.
+ * Test the forwarding changes without re-fetching the adj from the
+ * recursive entry. this ensures its the same one that is updated; i.e. an
+ * inplace-modify.
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)),
+ "post PIC 200.200.200.200/32 was inplace modified");
+
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 0)),
+ "post PIC adj for 200.200.200.200/32 is recursive"
+ " via adj for 1.1.1.3");
+
+ /*
+ * the LB maps that was locked above should have been modified to remove
+ * the path that was down, and thus its bucket points to a path that is
+ * still up.
+ */
+ FIB_TEST(lbm->lbm_buckets[0] == 1,
+ "LB maps's bucket 0 is %d",
+ lbm->lbm_buckets[0]);
+ FIB_TEST(lbm->lbm_buckets[1] == 1,
+ "LB maps's bucket 1 is %d",
+ lbm->lbm_buckets[1]);
+
+ load_balance_map_unlock(lb->lb_map);
+
+ /*
+ * add it back. again
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)),
+ "post PIC recovery adj for 200.200.200.200/32 is recursive "
+ "via adj for 1.1.1.1");
+ FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 1)),
+ "post PIC recovery adj for 200.200.200.200/32 is recursive "
+ "via adj for 1.1.1.3");
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "post PIC 200.200.200.200/32 was inplace modified");
+
+ /*
+ * add a 3rd path. this makes the LB 16 buckets.
+ */
+ fib_table_entry_path_add(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "200.200.200.200/32 was inplace modified for 3rd path");
+ FIB_TEST(16 == lb->lb_n_buckets,
+ "200.200.200.200/32 was inplace modified for 3rd path to 16 buckets");
+
+ lbmi = lb->lb_map;
+ load_balance_map_lock(lbmi);
+ lbm = load_balance_map_get(lbmi);
+
+ for (ii = 0; ii < 16; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+
+ /*
+ * trigger PIC by removing the first via-entry
+ * the first 6 buckets of the map should map to the next 6
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(lb == load_balance_get(dpo->dpoi_index),
+ "200.200.200.200/32 was inplace modified for 3rd path");
+ FIB_TEST(2 == lb->lb_n_buckets,
+ "200.200.200.200/32 was inplace modified for 3rd path remove to 2 buckets");
+
+ for (ii = 0; ii < 6; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii+6,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+ for (ii = 6; ii < 16; ii++)
+ {
+ FIB_TEST(lbm->lbm_buckets[ii] == ii,
+ "LB Map for 200.200.200.200/32 at %d is %d",
+ ii, lbm->lbm_buckets[ii]);
+ }
+
+
+ /*
+ * tidy up
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_1_1_1_1,
+ ~0,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ fib_table_entry_path_remove(fib_index,
+ &bgp_200_pfx,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_3_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_3_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_0_s_28,
+ FIB_SOURCE_API);
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)),
+ "1.1.1.1/28 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32)),
+ "1.1.1.3/32 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &bgp_200_pfx)),
+ "200.200.200.200/32 removed");
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route whose paths are built up iteratively and then removed
+ * all at once
+ */
+ fib_prefix_t pfx_4_4_4_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.4.4.4/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04040404),
+ },
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(FIB_NODE_INDEX_INVALID !=
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 present");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route with multiple paths at once
+ */
+ fib_route_path_t *r_paths = NULL;
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ fib_route_path_t r_path = {
+ .frp_proto = FIB_PROTOCOL_IP4,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
+ },
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ };
+ vec_add1(r_paths, r_path);
+ }
+
+ fib_table_entry_update(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * A route deag route
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &zero_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ lookup_dpo_t *lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "4.4.4.4/32 is deag in %d %U",
+ lkd->lkd_fib_index,
+ format_dpo_id, dpo, 0);
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_4_4_4_s_32,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
+ "4.4.4.4/32 removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * CLEANUP
+ * remove: 1.1.1.2/32, 1.1.2.0/24 and 1.1.1.1/32
+ * all of which are via 10.10.10.1, Itf1
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_2_0_s_24,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32),
+ "1.1.1.1/32 removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32),
+ "1.1.1.2/32 removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_1_1_2_0_s_24),
+ "1.1.2.0/24 removed");
+
+ /*
+ * -3 entries and -1 shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * An attached-host route. Expect to link to the incomplete adj
+ */
+ fib_prefix_t pfx_4_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 4.1.1.1/32 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x04010101),
+ },
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_4_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &zero_addr,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_4_1_1_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.1.1.1/32 present");
+ ai = fib_entry_get_adj(fei);
+
+ ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &pfx_4_1_1_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((ai == ai2), "Attached-host link to incomplete ADJ");
+ adj_unlock(ai2);
+
+ /*
+ * +1 entry and +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ fib_table_entry_delete(fib_index,
+ &pfx_4_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add a v6 prefix via v4 next-hops
+ */
+ fib_prefix_t pfx_2001_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000),
+ },
+ };
+ fei = fib_table_entry_path_add(0, //default v6 table
+ &pfx_2001_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(0, &pfx_2001_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "2001::/64 present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP),
+ "2001::/64 via ARP-adj");
+ FIB_TEST((adj->ia_link == FIB_LINK_IP6),
+ "2001::/64 is link type v6");
+ FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP4),
+ "2001::/64 ADJ-adj is NH proto v4");
+ fib_table_entry_delete(0, &pfx_2001_s_64, FIB_SOURCE_API);
+
+
+ /*
+ * CLEANUP
+ * remove adj-fibs:
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32),
+ "10.10.10.1/32 adj-fib removed");
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32),
+ "10.10.10.2/32 adj-fib removed");
+
+ /*
+ * -2 entries and -2 non-shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * unlock the 2 adjacencies for which this test provided a rewrite.
+ * These are the last locks on these adjs. they should thus go away.
+ */
+ adj_unlock(ai_02);
+ adj_unlock(ai_01);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * CLEANUP
+ * remove the interface prefixes
+ */
+ local_pfx.fp_len = 32;
+ fib_table_entry_special_remove(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx),
+ "10.10.10.10/32 adj-fib removed");
+
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx),
+ "10.10.10.10/24 adj-fib removed");
+
+ /*
+ * -2 entries and -2 non-shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Last but not least, remove the VRF
+ */
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_API)),
+ "NO API Source'd prefixes");
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_RR)),
+ "NO RR Source'd prefixes");
+ FIB_TEST((0 == fib_table_get_num_entries(fib_index,
+ FIB_PROTOCOL_IP4,
+ FIB_SOURCE_INTERFACE)),
+ "NO INterface Source'd prefixes");
+
+ fib_table_unlock(fib_index, FIB_PROTOCOL_IP4);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NBR-5 == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NBR-5 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ return;
+}
+
+static void
+fib_test_v6 (void)
+{
+ /*
+ * In the default table check for the presence and correct forwarding
+ * of the special entries
+ */
+ fib_node_index_t dfrt, fei, ai, locked_ai, ai_01, ai_02;
+ const dpo_id_t *dpo, *dpo_drop;
+ const ip_adjacency_t *adj;
+ const receive_dpo_t *rd;
+ test_main_t *tm;
+ u32 fib_index;
+ int ii;
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /* via 2001:0:0:1::2 */
+ ip46_address_t nh_2001_2 = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ },
+ };
+
+ tm = &test_main;
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP6);
+
+ /* Find or create FIB table 11 */
+ fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11);
+
+ for (ii = 0; ii < 4; ii++)
+ {
+ ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
+ }
+
+ fib_prefix_t pfx_0_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ {0, 0},
+ },
+ },
+ };
+
+ dfrt = fib_table_lookup(fib_index, &pfx_0_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ dpo = fib_entry_contribute_ip_forwarding(dfrt);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &pfx_0_0.fp_addr.ip6)),
+ "default-route; fwd and non-fwd tables match");
+
+ // FIXME - check specials.
+
+ /*
+ * At this stage there is one v4 FIB with 5 routes and two v6 FIBs
+ * each with 6 entries. All entries are special so no path-list sharing.
+ */
+#define NPS (5+6+6)
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is %d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add interface routes.
+ * validate presence of /64 attached and /128 recieve.
+ * test for the presence of the receive address in the glean and local adj
+ *
+ * receive on 2001:0:0:1::1/128
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ },
+ }
+ };
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &adj->sub_type.glean.receive_addr)),
+ "attached interface adj is receive ok");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &local_pfx.fp_addr.ip6)),
+ "attached-route; fwd and non-fwd tables match");
+
+ local_pfx.fp_len = 128;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ rd = receive_dpo_get(dpo->dpoi_index);
+
+ FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup(
+ &ip6_main,
+ 1,
+ &local_pfx.fp_addr.ip6)),
+ "local-route; fwd and non-fwd tables match");
+
+ /*
+ * +2 entries. +2 unshared path-lists
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty");
+ FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Modify the default route to be via an adj not yet known.
+ * this sources the defalut route with the API source, which is
+ * a higher preference to the DEFAULT_ROUTE source
+ */
+ fib_table_entry_path_add(fib_index, &pfx_0_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present");
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&nh_2001_2, &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ /*
+ * find the adj in the shared db
+ */
+ locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((locked_ai == ai), "ADJ NBR DB find");
+ adj_unlock(locked_ai);
+
+ /*
+ * no more entires. +1 shared path-list
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+3 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * remove the API source from the default route. We expected
+ * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP
+ */
+ fib_table_entry_path_remove(fib_index, &pfx_0_0,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0);
+
+ FIB_TEST((fei == dfrt), "default route same index");
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)),
+ "Default route is DROP");
+
+ /*
+ * no more entires. -1 shared path-list
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_2001_1_2_s_128 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000002),
+ },
+ },
+ }
+ };
+ fib_prefix_t pfx_2001_1_3_s_128 = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000001),
+ [1] = clib_host_to_net_u64(0x0000000000000003),
+ },
+ },
+ }
+ };
+ u8 eth_addr[] = {
+ 0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
+ };
+
+ ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ &pfx_2001_1_2_s_128.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created");
+ adj = adj_get(ai_01);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_01, eth_addr);
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2001_1_2_s_128,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &pfx_2001_1_2_s_128.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2001_1_2_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+
+ eth_addr[5] = 0xb2;
+
+ ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ &pfx_2001_1_3_s_128.fp_addr,
+ tm->hw[0]->sw_if_index);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created");
+ adj = adj_get(ai_02);
+ FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index),
+ "adj is incomplete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+
+ adj_nbr_update_rewrite(ai_02, eth_addr);
+ FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
+ "adj is complete");
+ FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
+ &adj->sub_type.nbr.next_hop)),
+ "adj nbr next-hop ok");
+ FIB_TEST((ai_01 != ai_02), "ADJs are different");
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2001_1_3_s_128,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &pfx_2001_1_3_s_128.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2001_1_3_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+
+ /*
+ * +2 entries, +2 unshread path-lists.
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+4 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+4 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add a 2 routes via the first ADJ. ensure path-list sharing
+ */
+ fib_prefix_t pfx_2001_a_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000a),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+ fib_prefix_t pfx_2001_b_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000b),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_a_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_2001_a_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1");
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_b_s_64,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_2001_b_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1");
+
+ /*
+ * +2 entries, +1 shared path-list.
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * add a v4 prefix via a v6 next-hop
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = 0x01010101,
+ },
+ };
+ fei = fib_table_entry_path_add(0, // default table
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &nh_2001_2,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ FIB_TEST(fei == fib_table_lookup_exact_match(0, &pfx_1_1_1_1_s_32),
+ "1.1.1.1/32 o v6 route present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP),
+ "1.1.1.1/32 via ARP-adj");
+ FIB_TEST((adj->ia_link == FIB_LINK_IP4),
+ "1.1.1.1/32 ADJ-adj is link type v4");
+ FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP6),
+ "1.1.1.1/32 ADJ-adj is NH proto v6");
+ fib_table_entry_delete(0, &pfx_1_1_1_1_s_32, FIB_SOURCE_API);
+
+ /*
+ * An attached route
+ */
+ fib_prefix_t pfx_2001_c_s_64 = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x200100000000000c),
+ [1] = clib_host_to_net_u64(0x0000000000000000),
+ },
+ },
+ }
+ };
+ fib_table_entry_path_add(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_ATTACHED,
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached route present");
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_GLEAN),
+ "2001:0:0:c/64 attached resolves via glean");
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_CLI,
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached route removed");
+
+ /*
+ * Shutdown the interface on which we have a connected and through
+ * which the routes are reachable.
+ * This will result in the connected, adj-fibs, and routes linking to drop
+ * The local/for-us prefix continues to receive.
+ */
+ clib_error_t * error;
+
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ ~VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface shutdown OK");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::b/64 resolves via drop");
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001::a/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1::1/128 not drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * no change
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * shutdown one of the other interfaces, then add a connected.
+ * and swap one of the routes to it.
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ ~VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface 1 shutdown OK");
+
+ fib_prefix_t connected_pfx = {
+ .fp_len = 64,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = {
+ /* 2001:0:0:2::1/64 */
+ .as_u64 = {
+ [0] = clib_host_to_net_u64(0x2001000000000002),
+ [1] = clib_host_to_net_u64(0x0000000000000001),
+ },
+ },
+ }
+ };
+ fib_table_entry_update_one_path(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &connected_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST(!dpo_cmp(dpo, dpo_drop),
+ "2001:0:0:2/64 not resolves via drop");
+
+ connected_pfx.fp_len = 128;
+ fib_table_entry_update_one_path(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP6,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &connected_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ dpo = load_balance_get_bucket(dpo->dpoi_index, 0);
+ FIB_TEST((DPO_RECEIVE == dpo->dpoi_type),
+ "local interface adj is local");
+ rd = receive_dpo_get(dpo->dpoi_index);
+ FIB_TEST((0 == ip46_address_cmp(&connected_pfx.fp_addr,
+ &rd->rd_addr)),
+ "local interface adj is receive ok");
+
+ /*
+ * +2 entries, +2 unshared path-lists
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+
+ /*
+ * bring the interface back up. we expected the routes to return
+ * to normal forwarding.
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ FIB_TEST((NULL == error), "Interface bring-up OK");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ ai = fib_entry_get_adj(fei);
+ FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ ai = fib_entry_get_adj(fei);
+ adj = adj_get(ai);
+ FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index),
+ "attached interface adj is glean");
+
+ /*
+ * Delete the interface that the routes reolve through.
+ * Again no routes are removed. They all point to drop.
+ *
+ * This is considered an error case. The control plane should
+ * not remove interfaces through which routes resolve, but
+ * such things can happen. ALL affected routes will drop.
+ */
+ vnet_delete_hw_interface(vnet_get_main(), tm->hw_if_indicies[0]);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::1/128 is drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * no change
+ */
+ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * Add the interface back. routes stay unresolved.
+ */
+ error = ethernet_register_interface(vnet_get_main(),
+ test_interface_device_class.index,
+ 0 /* instance */,
+ hw_address,
+ &tm->hw_if_indicies[0],
+ /* flag change */ 0);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001::b/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::3/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::2/64 resolves via drop");
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1::1/128 is drop");
+ local_pfx.fp_len = 64;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+ "2001:0:0:1/64 resolves via drop");
+
+ /*
+ * CLEANUP ALL the routes
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_c_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_a_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_b_s_64,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_1_3_s_128,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_2001_1_2_s_128,
+ FIB_SOURCE_ADJ);
+ local_pfx.fp_len = 64;
+ fib_table_entry_delete(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ connected_pfx.fp_len = 64;
+ fib_table_entry_delete(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE);
+ connected_pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index, &connected_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64)),
+ "2001::a/64 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64)),
+ "2001::b/64 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128)),
+ "2001:0:0:1::3/128 removed");
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128)),
+ "2001:0:0:1::3/128 removed");
+ local_pfx.fp_len = 64;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx)),
+ "2001:0:0:1/64 removed");
+ local_pfx.fp_len = 128;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &local_pfx)),
+ "2001:0:0:1::1/128 removed");
+ connected_pfx.fp_len = 64;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &connected_pfx)),
+ "2001:0:0:2/64 removed");
+ connected_pfx.fp_len = 128;
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, &connected_pfx)),
+ "2001:0:0:2::1/128 removed");
+
+ /*
+ * -8 entries. -7 path-lists (1 was shared).
+ */
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ /*
+ * now remove the VRF
+ */
+ fib_table_unlock(fib_index, FIB_PROTOCOL_IP6);
+
+ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
+ fib_path_list_db_size());
+ FIB_TEST((NPS-6 == fib_path_list_pool_size()), "path list pool size is%d",
+ fib_path_list_pool_size());
+ FIB_TEST((NPS-6 == fib_entry_pool_size()), "entry pool size is %d",
+ fib_entry_pool_size());
+
+ adj_unlock(ai_02);
+ adj_unlock(ai_01);
+
+ /*
+ * return the interfaces to up state
+ */
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ error = vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[1]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+}
+
+/*
+ * Test the recursive route route handling for GRE tunnels
+ */
+static void
+fib_test_gre (void)
+{
+ /* fib_node_index_t fei; */
+ /* u32 fib_index = 0; */
+ /* test_main_t *tm; */
+ /* u32 ii; */
+
+ /* tm = &test_main; */
+
+ /* for (ii = 0; ii < 4; ii++) */
+ /* { */
+ /* ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = 0; */
+ /* } */
+
+ /* /\* */
+ /* * add interface routes. We'll assume this works. It's more rigorously */
+ /* * tested elsewhere. */
+ /* *\/ */
+ /* fib_prefix_t local_pfx = { */
+ /* .fp_len = 24, */
+ /* .fp_proto = FIB_PROTOCOL_IP4, */
+ /* .fp_addr = { */
+ /* .ip4 = { */
+ /* /\* 10.10.10.10 *\/ */
+ /* .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), */
+ /* }, */
+ /* }, */
+ /* }; */
+
+ /* fib_table_entry_update_one_path(fib_index, &local_pfx, */
+ /* FIB_SOURCE_INTERFACE, */
+ /* (FIB_ENTRY_FLAG_CONNECTED | */
+ /* FIB_ENTRY_FLAG_ATTACHED), */
+ /* NULL, */
+ /* tm->hw[0]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */
+ /* "attached interface route present"); */
+
+ /* local_pfx.fp_len = 32; */
+ /* fib_table_entry_update_one_path(fib_index, &local_pfx, */
+ /* FIB_SOURCE_INTERFACE, */
+ /* (FIB_ENTRY_FLAG_CONNECTED | */
+ /* FIB_ENTRY_FLAG_LOCAL), */
+ /* NULL, */
+ /* tm->hw[0]->sw_if_index, */
+ /* ~0, // invalid fib index */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */
+
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */
+ /* "local interface route present"); */
+
+ /* fib_prefix_t local2_pfx = { */
+ /* .fp_len = 24, */
+ /* .fp_proto = FIB_PROTOCOL_IP4, */
+ /* .fp_addr = { */
+ /* .ip4 = { */
+ /* /\* 10.10.11.11 *\/ */
+ /* .as_u32 = clib_host_to_net_u32(0x0a0a0b0b), */
+ /* }, */
+ /* }, */
+ /* }; */
+
+ /* fib_table_entry_update_one_path(fib_index, &local2_pfx, */
+ /* FIB_SOURCE_INTERFACE, */
+ /* (FIB_ENTRY_FLAG_CONNECTED | */
+ /* FIB_ENTRY_FLAG_ATTACHED), */
+ /* NULL, */
+ /* tm->hw[1]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */
+ /* "attached interface route present"); */
+
+ /* local2_pfx.fp_len = 32; */
+ /* fib_table_entry_update_one_path(fib_index, &local2_pfx, */
+ /* FIB_SOURCE_INTERFACE, */
+ /* (FIB_ENTRY_FLAG_CONNECTED | */
+ /* FIB_ENTRY_FLAG_LOCAL), */
+ /* NULL, */
+ /* tm->hw[0]->sw_if_index, */
+ /* ~0, // invalid fib index */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */
+
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */
+ /* "local interface route present"); */
+
+ /* /\* */
+ /* * Add the route that will be used to resolve the tunnel's destination */
+ /* *\/ */
+ /* fib_prefix_t route_pfx = { */
+ /* .fp_len = 24, */
+ /* .fp_proto = FIB_PROTOCOL_IP4, */
+ /* .fp_addr = { */
+ /* .ip4 = { */
+ /* /\* 1.1.1.0/24 *\/ */
+ /* .as_u32 = clib_host_to_net_u32(0x01010100), */
+ /* }, */
+ /* }, */
+ /* }; */
+ /* /\* 10.10.10.2 *\/ */
+ /* ip46_address_t nh_10_10_10_2 = { */
+ /* .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), */
+ /* }; */
+
+ /* fib_table_entry_path_add(fib_index, &route_pfx, */
+ /* FIB_SOURCE_API, */
+ /* FIB_ENTRY_FLAG_NONE, */
+ /* &nh_10_10_10_2, */
+ /* tm->hw[0]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != */
+ /* fib_table_lookup_exact_match(fib_index, &local_pfx)), */
+ /* "route present"); */
+
+ /* /\* */
+ /* * Add a tunnel */
+ /* *\/ */
+ /* /\* 1.1.1.1 *\/ */
+ /* fib_prefix_t tun_dst_pfx = { */
+ /* .fp_len = 32, */
+ /* .fp_proto = FIB_PROTOCOL_IP4, */
+ /* .fp_addr = { */
+ /* .ip4.as_u32 = clib_host_to_net_u32(0x01010101), */
+ /* }, */
+ /* }; */
+ /* /\* 10.10.10.10 *\/ */
+ /* ip4_address_t tun_src = { */
+ /* .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), */
+ /* }; */
+ /* /\* 172.16.0.1 *\/ */
+ /* ip4_address_t tun_itf = { */
+ /* .as_u32 = clib_host_to_net_u32(0xac100001), */
+ /* }; */
+ /* fib_prefix_t tun_itf_pfx = { */
+ /* .fp_len = 30, */
+ /* .fp_proto = FIB_PROTOCOL_IP4, */
+ /* .fp_addr = { */
+ /* .ip4 = tun_itf, */
+ /* }, */
+ /* }; */
+ /* u32 *encap_labels = NULL; */
+ /* u32 label = 0xbaba; */
+ /* u32 encap_index; */
+ /* u32 tunnel_sw_if_index; */
+
+ /* int rv; */
+
+ /* /\* */
+ /* * First we need the MPLS Encap present */
+ /* * */
+ /* * Pretty sure this is broken. the wiki say the 1st aparamter address */
+ /* * should be the tunnel's interface address, which makes some sense. But */
+ /* * the code for tunnel creation checks for the tunnel's destination */
+ /* * address. curious... */
+ /* *\/ */
+ /* vec_add1(encap_labels, label); */
+ /* rv = vnet_mpls_add_del_encap(&tun_dst_pfx.fp_addr.ip4, */
+ /* 0, // inner VRF */
+ /* encap_labels, */
+ /* ~0, // policy_tunnel_index, */
+ /* 0, // no_dst_hash, */
+ /* &encap_index, */
+ /* 1); // ADD */
+ /* FIB_TEST((0 == rv), "MPLS encap created"); */
+
+ /* /\* */
+ /* * now create the tunnel */
+ /* *\/ */
+ /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */
+ /* &tun_dst_pfx.fp_addr.ip4, */
+ /* &tun_itf_pfx.fp_addr.ip4, */
+ /* tun_itf_pfx.fp_len, */
+ /* 0, // inner VRF */
+ /* 0, // outer VRF */
+ /* &tunnel_sw_if_index, */
+ /* 0, // l2 only */
+ /* 1); // ADD */
+ /* FIB_TEST((0 == rv), "Tunnel created"); */
+
+ /* /\* */
+ /* * add it again. just for giggles. */
+ /* *\/ */
+ /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */
+ /* &tun_dst_pfx.fp_addr.ip4, */
+ /* &tun_itf_pfx.fp_addr.ip4, */
+ /* tun_itf_pfx.fp_len, */
+ /* 0, // inner VRF */
+ /* 0, // outer VRF */
+ /* &tunnel_sw_if_index, */
+ /* 0, // l2 only */
+ /* 1); // ADD */
+ /* FIB_TEST((0 != rv), "Duplicate Tunnel not created"); */
+
+ /* /\* */
+ /* * Find the route added for the tunnel subnet and check that */
+ /* * it has a midchin adj that is stacked on the adj used to reach the */
+ /* * tunnel destination */
+ /* *\/ */
+ /* ip_adjacency_t *midchain_adj, *route_adj, *adjfib_adj; */
+ /* adj_index_t midchain_ai, route_ai, adjfib_ai1, adjfib_ai2; */
+ /* ip_lookup_main_t *lm; */
+
+ /* lm = &ip4_main.lookup_main; */
+
+ /* fei = fib_table_lookup_exact_match(fib_index, &tun_itf_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "tun itf route present"); */
+ /* midchain_ai = fib_entry_contribute_forwarding(fei); */
+ /* midchain_adj = adj_get(midchain_ai); */
+
+ /* FIB_TEST((IP_LOOKUP_NEXT_MIDCHAIN == midchain_adj->lookup_next_index), */
+ /* "Tunnel interface links to midchain"); */
+
+ /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */
+ /* route_ai = fib_entry_contribute_forwarding(fei); */
+ /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == route_ai), */
+ /* "tunnel midchain it stacked on route adj"); */
+
+ /* /\* */
+ /* * update the route to the tunnel's destination to load-balance via */
+ /* * interface 1. */
+ /* *\/ */
+ /* /\* 10.10.11.2 *\/ */
+ /* ip46_address_t nh_10_10_11_2 = { */
+ /* .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b02), */
+ /* }; */
+
+ /* fib_table_entry_path_add(fib_index, &route_pfx, */
+ /* FIB_SOURCE_API, */
+ /* FIB_ENTRY_FLAG_NONE, */
+ /* &nh_10_10_11_2, */
+ /* tm->hw[1]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+
+ /* /\* */
+ /* * the tunnels midchain should have re-stacked. This tests that the */
+ /* * route re-resolution backwalk works to a tunnel interface. */
+ /* *\/ */
+ /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */
+ /* FIB_TEST((route_ai != fib_entry_contribute_forwarding(fei)), "route changed"); */
+ /* route_ai = fib_entry_contribute_forwarding(fei); */
+
+ /* midchain_adj = adj_get(midchain_ai); */
+
+ /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == route_ai), */
+ /* "tunnel midchain has re-stacked on route adj"); */
+
+ /* route_adj = adj_get(route_ai); */
+
+ /* FIB_TEST((2 == route_adj->n_adj), "Route adj is multipath"); */
+
+ /* /\* */
+ /* * At this stage both nieghbour adjs are incomplete, so the same should */
+ /* * be true of the multipath adj */
+ /* *\/ */
+ /* FIB_TEST((IP_LOOKUP_NEXT_ARP == route_adj->lookup_next_index), */
+ /* "Adj0 is ARP: %d", route_adj->lookup_next_index); */
+ /* FIB_TEST((IP_LOOKUP_NEXT_ARP == (route_adj+1)->lookup_next_index), */
+ /* "Adj1 is ARP"); */
+
+ /* /\* */
+ /* * do the equivalent of creating an ARP entry for 10.10.10.2. */
+ /* * This will complete the adj, and this */
+ /* * change should be refelct in the multipath too. */
+ /* *\/ */
+ /* u8* rewrite = NULL, byte = 0xd; */
+ /* vec_add(rewrite, &byte, 6); */
+
+ /* adjfib_ai1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, */
+ /* FIB_LINK_IP4, */
+ /* &nh_10_10_10_2, */
+ /* tm->hw[0]->sw_if_index); */
+ /* adj_nbr_update_rewrite(FIB_PROTOCOL_IP4, */
+ /* adjfib_ai1, */
+ /* rewrite); */
+ /* adjfib_adj = adj_get(adjfib_ai1); */
+ /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adjfib_adj->lookup_next_index), */
+ /* "Adj-fib10 adj is rewrite"); */
+
+ /* adjfib_ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, */
+ /* FIB_LINK_IP4, */
+ /* &nh_10_10_11_2, */
+ /* tm->hw[1]->sw_if_index); */
+ /* adj_nbr_update_rewrite(FIB_PROTOCOL_IP4, */
+ /* adjfib_ai2, */
+ /* rewrite); */
+
+ /* adjfib_adj = adj_get(adjfib_ai2); */
+
+ /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adjfib_adj->lookup_next_index), */
+ /* "Adj-fib11 adj is rewrite"); */
+
+ /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */
+ /* FIB_TEST((route_ai != fib_entry_contribute_forwarding(fei)), "route changed"); */
+ /* route_ai = fib_entry_contribute_forwarding(fei); */
+ /* route_adj = adj_get(route_ai); */
+ /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == route_adj->lookup_next_index), */
+ /* "Adj0 is rewrite"); */
+ /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == (route_adj+1)->lookup_next_index), */
+ /* "Adj1 is rewrite"); */
+
+ /* /\* */
+ /* * CLEANUP */
+ /* *\/ */
+ /* adj_index_t drop_ai = adj_get_special(FIB_PROTOCOL_IP4, */
+ /* ADJ_SPECIAL_TYPE_DROP); */
+
+ /* /\* */
+ /* * remove the route that the tunnel resovles via. expect */
+ /* * it to now resolve via the default route, which is drop */
+ /* *\/ */
+ /* fib_table_entry_path_remove(fib_index, &route_pfx, */
+ /* FIB_SOURCE_API, */
+ /* &nh_10_10_10_2, */
+ /* tm->hw[0]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* fib_table_entry_path_remove(fib_index, &route_pfx, */
+ /* FIB_SOURCE_API, */
+ /* &nh_10_10_11_2, */
+ /* tm->hw[1]->sw_if_index, */
+ /* ~0, */
+ /* 1, */
+ /* FIB_ROUTE_PATH_FLAG_NONE); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID != */
+ /* fib_table_lookup_exact_match(fib_index, &local_pfx)), */
+ /* "route present"); */
+ /* midchain_adj = adj_get(midchain_ai); */
+ /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == drop_ai), */
+ /* "tunnel midchain has re-stacked on drop"); */
+
+ /* /\* */
+ /* * remove the tunnel and its MPLS encaps */
+ /* *\/ */
+ /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */
+ /* &tun_dst_pfx.fp_addr.ip4, */
+ /* &tun_itf_pfx.fp_addr.ip4, */
+ /* tun_itf_pfx.fp_len, */
+ /* 0, // inner VRF */
+ /* 0, // outer VRF */
+ /* &tunnel_sw_if_index, */
+ /* 0, // l2 only */
+ /* 0); // DEL */
+ /* FIB_TEST((0 == rv), "Tunnel removed"); */
+ /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */
+ /* &tun_dst_pfx.fp_addr.ip4, */
+ /* &tun_itf_pfx.fp_addr.ip4, */
+ /* tun_itf_pfx.fp_len, */
+ /* 0, // inner VRF */
+ /* 0, // outer VRF */
+ /* &tunnel_sw_if_index, */
+ /* 0, // l2 only */
+ /* 0); // DEL */
+ /* FIB_TEST((0 != rv), "No existant Tunnel not removed"); */
+
+ /* rv = vnet_mpls_add_del_encap(&tun_dst_pfx.fp_addr.ip4, */
+ /* 0, // inner VRF */
+ /* encap_labels, */
+ /* ~0, // policy_tunnel_index, */
+ /* 0, // no_dst_hash, */
+ /* NULL, */
+ /* 0); // ADD */
+ /* FIB_TEST((0 == rv), "MPLS encap deleted"); */
+
+ /* vec_free(encap_labels); */
+
+ /* /\* */
+ /* * no more FIB entries expected */
+ /* *\/ */
+ /* fei = fib_table_lookup_exact_match(fib_index, &tun_itf_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "tun itf route removed"); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &tun_dst_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "tun dst route removed"); */
+
+ /* /\* */
+ /* * CLEANUP the connecteds */
+ /* *\/ */
+ /* local2_pfx.fp_len = 24; */
+ /* fib_table_entry_delete(fib_index, &local2_pfx, */
+ /* FIB_SOURCE_INTERFACE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */
+ /* "attached interface route remove"); */
+
+ /* local2_pfx.fp_len = 32; */
+ /* fib_table_entry_special_remove(fib_index, &local2_pfx, */
+ /* FIB_SOURCE_INTERFACE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */
+ /* "local interface route removed"); */
+ /* local_pfx.fp_len = 24; */
+ /* fib_table_entry_delete(fib_index, &local_pfx, */
+ /* FIB_SOURCE_INTERFACE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */
+ /* "attached interface route remove"); */
+
+ /* local_pfx.fp_len = 32; */
+ /* fib_table_entry_special_remove(fib_index, &local_pfx, */
+ /* FIB_SOURCE_INTERFACE); */
+ /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */
+ /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */
+ /* "local interface route removed"); */
+}
+
+/*
+ * Test Attached Exports
+ */
+static void
+fib_test_ae (void)
+{
+ const dpo_id_t *dpo, *dpo_drop;
+ const u32 fib_index = 0;
+ fib_node_index_t fei;
+ test_main_t *tm;
+ ip4_main_t *im;
+
+ tm = &test_main;
+ im = &ip4_main;
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * add interface routes. We'll assume this works. It's more rigorously
+ * tested elsewhere.
+ */
+ fib_prefix_t local_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.10.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+
+ dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
+
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ /*
+ * Add an 2 ARP entry => a complete ADJ plus adj-fib.
+ */
+ fib_prefix_t pfx_10_10_10_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.1 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ fib_node_index_t ai;
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 created");
+ ai = fib_entry_get_adj(fei);
+
+ /*
+ * create another FIB table into which routes will be imported
+ */
+ u32 import_fib_index1;
+
+ import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11);
+
+ /*
+ * Add an attached route in the import FIB
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created");
+
+ /*
+ * check for the presence of the adj-fibs in the import table
+ */
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "adj-fib1 Import uses same adj as export");
+
+ /*
+ * check for the presence of the local in the import table
+ */
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * Add another adj-fin in the export table. Expect this
+ * to get magically exported;
+ */
+ fib_prefix_t pfx_10_10_10_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.2 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 present");
+ ai = fib_entry_get_adj(fei);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+
+ /*
+ * create a 2nd FIB table into which routes will be imported
+ */
+ u32 import_fib_index2;
+
+ import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12);
+
+ /*
+ * Add an attached route in the import FIB
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index2,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created");
+
+ /*
+ * check for the presence of all the adj-fibs and local in the import table
+ */
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * add a 3rd adj-fib. expect it to be exported to both tables.
+ */
+ fib_prefix_t pfx_10_10_10_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.10.10.3 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03),
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_3_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 present");
+ ai = fib_entry_get_adj(fei);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB1");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB2");
+ FIB_TEST((ai == fib_entry_get_adj(fei)),
+ "Import uses same adj as export");
+
+ /*
+ * remove the 3rd adj fib. we expect it to be removed from both FIBs
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_3_s_32,
+ FIB_SOURCE_ADJ);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 remved");
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB1");
+
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB2");
+
+ /*
+ * remove the attached route from the 2nd FIB. expect the imported
+ * entires to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(import_fib_index2,
+ &local_pfx,
+ FIB_SOURCE_API);
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached export removed");
+
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB2");
+ fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB2");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB2");
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 still in FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 still in FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local still in FIB1");
+
+ /*
+ * modify the route in FIB1 so it is no longer attached. expect the imported
+ * entires to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_2_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * modify it back to attached. expect the adj-fibs back
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported in FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported in FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported in FIB1");
+
+ /*
+ * add a covering attached next-hop for the interface address, so we have
+ * a valid adj to find when we check the forwarding tables
+ */
+ fib_prefix_t pfx_10_0_0_0_s_8 = {
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ /* 10.0.0.0 */
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a000000),
+ },
+ };
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_10_0_0_0_s_8,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_3_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ /*
+ * remove the route in the export fib. expect the adj-fibs to be removed
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "Delete export: ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * the adj-fibs in the export VRF are present in the FIB table,
+ * but not installed in forwarding, since they have no attached cover.
+ * Consequently a lookup in the MTRIE gives the adj for the covering
+ * route 10.0.0.0/8.
+ */
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export");
+
+ index_t lbi;
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.1 forwards on \n%U not \n%U",
+ format_load_balance, lbi, 0,
+ format_dpo_id, dpo, 0);
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0);
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_3_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.3 forwards on %U", format_dpo_id, dpo, 0);
+
+ /*
+ * add the export prefix back, but not as attached.
+ * No adj-fibs in export nor import tables
+ */
+ local_pfx.fp_len = 24;
+ fei = fib_table_entry_update_one_path(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_10_10_10_1_s_32.fp_addr,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "non-attached in export: ADJ-fib1 in export");
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.1 forwards on %U", format_dpo_id, dpo, 0);
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export");
+ lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4);
+ FIB_TEST(lbi == dpo->dpoi_index,
+ "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0);
+
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1");
+
+ /*
+ * modify the export prefix so it is attached. expect all covereds to return
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * modify the export prefix so connected. no change.
+ */
+ local_pfx.fp_len = 24;
+ fib_table_entry_update_one_path(fib_index, &local_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(fib_index, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)),
+ "Adj-fib1 is not drop in export");
+ fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported");
+ local_pfx.fp_len = 32;
+ fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported");
+
+ /*
+ * CLEANUP
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_10_0_0_0_s_8,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_1_s_32,
+ FIB_SOURCE_ADJ);
+ fib_table_entry_delete(fib_index,
+ &pfx_10_10_10_2_s_32,
+ FIB_SOURCE_ADJ);
+ local_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &local_pfx,
+ FIB_SOURCE_INTERFACE);
+ local_pfx.fp_len = 24;
+ fib_table_entry_delete(import_fib_index1,
+ &local_pfx,
+ FIB_SOURCE_API);
+
+ fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4);
+ fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+}
+
+typedef enum fib_test_lb_bucket_type_t_ {
+ FT_LB_LABEL_O_ADJ,
+ FT_LB_LABEL_O_LB,
+ FT_LB_O_LB,
+ FT_LB_SPECIAL,
+ FT_LB_ADJ,
+} fib_test_lb_bucket_type_t;
+
+typedef struct fib_test_lb_bucket_t_ {
+ fib_test_lb_bucket_type_t type;
+
+ union
+ {
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ adj_index_t adj;
+ } label_o_adj;
+ struct
+ {
+ mpls_eos_bit_t eos;
+ mpls_label_t label;
+ u8 ttl;
+ index_t lb;
+ } label_o_lb;
+ struct
+ {
+ index_t adj;
+ } adj;
+ struct
+ {
+ index_t lb;
+ } lb;
+ struct
+ {
+ index_t adj;
+ } special;
+ };
+} fib_test_lb_bucket_t;
+
+#define FIB_TEST_LB(_cond, _comment, _args...) \
+{ \
+ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
+ return (0); \
+ } \
+}
+
+static int
+fib_test_validate_lb_v (const load_balance_t *lb,
+ u16 n_buckets,
+ va_list ap)
+{
+ const dpo_id_t *dpo;
+ int bucket;
+
+ FIB_TEST_LB((n_buckets == lb->lb_n_buckets), "n_buckets = %d", lb->lb_n_buckets);
+
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ const fib_test_lb_bucket_t *exp;
+
+ exp = va_arg(ap, fib_test_lb_bucket_t*);
+ dpo = load_balance_get_bucket_i(lb, bucket);
+
+ switch (exp->type)
+ {
+ case FT_LB_LABEL_O_ADJ:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+ hdr = clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_o_adj.label),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_o_adj.label);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_adj.eos),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_o_adj.label,
+ format_mpls_eos_bit, exp->label_o_adj.eos);
+
+ FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on adj %d",
+ bucket,
+ exp->label_o_adj.adj);
+ }
+ break;
+ case FT_LB_LABEL_O_LB:
+ {
+ const mpls_label_dpo_t *mld;
+ mpls_label_t hdr;
+
+ FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+ hdr = clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+ exp->label_o_lb.label),
+ "bucket %d stacks on label %d",
+ bucket,
+ exp->label_o_lb.label);
+
+ FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+ exp->label_o_lb.eos),
+ "bucket %d stacks on label %d %U",
+ bucket,
+ exp->label_o_lb.label,
+ format_mpls_eos_bit, exp->label_o_lb.eos);
+
+ FIB_TEST_LB((DPO_LOAD_BALANCE == mld->mld_dpo.dpoi_type),
+ "bucket %d label stacks on %U",
+ bucket,
+ format_dpo_type, mld->mld_dpo.dpoi_type);
+
+ FIB_TEST_LB((exp->label_o_lb.lb == mld->mld_dpo.dpoi_index),
+ "bucket %d label stacks on LB %d",
+ bucket,
+ exp->label_o_lb.lb);
+ }
+ break;
+ case FT_LB_ADJ:
+ FIB_TEST_I(((DPO_ADJACENCY == dpo->dpoi_type) ||
+ (DPO_ADJACENCY_INCOMPLETE == dpo->dpoi_type)),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+ "bucket %d stacks on adj %d",
+ bucket,
+ exp->adj.adj);
+ break;
+ case FT_LB_O_LB:
+ FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->lb.lb == dpo->dpoi_index),
+ "bucket %d stacks on lb %d",
+ bucket,
+ exp->lb.lb);
+ break;
+ case FT_LB_SPECIAL:
+ FIB_TEST_I((DPO_DROP == dpo->dpoi_type),
+ "bucket %d stacks on %U",
+ bucket,
+ format_dpo_type, dpo->dpoi_type);
+ FIB_TEST_LB((exp->special.adj == dpo->dpoi_index),
+ "bucket %d stacks on drop %d",
+ bucket,
+ exp->adj.adj);
+ break;
+ }
+ }
+ return (!0);
+}
+
+static int
+fib_test_validate_entry (fib_node_index_t fei,
+ fib_forward_chain_type_t fct,
+ u16 n_buckets,
+ ...)
+{
+ const load_balance_t *lb;
+ dpo_id_t dpo = DPO_NULL;
+ va_list ap;
+ int res;
+
+ va_start(ap, n_buckets);
+
+ fib_entry_contribute_forwarding(fei, fct, &dpo);
+
+ FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type),
+ "Entry links to %U",
+ format_dpo_type, dpo.dpoi_type);
+ lb = load_balance_get(dpo.dpoi_index);
+
+ res = fib_test_validate_lb_v(lb, n_buckets, ap);
+
+ dpo_reset(&dpo);
+
+ va_end(ap);
+
+ return (res);
+}
+
+/*
+ * Test the recursive route route handling for GRE tunnels
+ */
+static void
+fib_test_label (void)
+{
+ fib_node_index_t fei, ai_mpls_10_10_10_1, ai_v4_10_10_11_1, ai_v4_10_10_11_2, ai_mpls_10_10_11_2, ai_mpls_10_10_11_1;
+ const u32 fib_index = 0;
+ test_main_t *tm;
+ ip4_main_t *im;
+ int lb_count;
+
+ lb_count = pool_elts(load_balance_pool);
+ tm = &test_main;
+ im = &ip4_main;
+
+ /*
+ * add interface routes. We'll assume this works. It's more rigorously
+ * tested elsewhere.
+ */
+ fib_prefix_t local0_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.10.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a0a),
+ },
+ },
+ };
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+
+ fib_table_entry_update_one_path(fib_index, &local0_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local0_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local0_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local0_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local0_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ fib_prefix_t local1_pfx = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ /* 10.10.11.10 */
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b0a),
+ },
+ },
+ };
+
+ vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index);
+ im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index;
+
+ fib_table_entry_update_one_path(fib_index, &local1_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local1_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "attached interface route present");
+
+ local1_pfx.fp_len = 32;
+ fib_table_entry_update_one_path(fib_index, &local1_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP4,
+ NULL,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup_exact_match(fib_index, &local1_pfx);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei),
+ "local interface route present");
+
+ ip46_address_t nh_10_10_10_1 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+ },
+ };
+ ip46_address_t nh_10_10_11_1 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b01),
+ },
+ };
+ ip46_address_t nh_10_10_11_2 = {
+ .ip4 = {
+ .as_u32 = clib_host_to_net_u32(0x0a0a0b02),
+ },
+ };
+
+ ai_v4_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index);
+ ai_v4_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index);
+ ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_MPLS,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index);
+ ai_mpls_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_MPLS,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index);
+ ai_mpls_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_MPLS,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index);
+
+ /*
+ * Add an etry with one path with a real out-going label
+ */
+ fib_prefix_t pfx_1_1_1_1_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+ },
+ };
+ fib_test_lb_bucket_t l99_eos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 99,
+ .eos = MPLS_EOS,
+ },
+ };
+ fib_test_lb_bucket_t l99_neos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 99,
+ .eos = MPLS_NON_EOS,
+ },
+ };
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ 99,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.1.1.1/32 created");
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l99_eos_o_10_10_10_1),
+ "1.1.1.1/32 LB 1 bucket via label 99 over 10.10.10.1");
+
+ /*
+ * add a path with an implicit NULL label
+ */
+ fib_test_lb_bucket_t a_o_10_10_11_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_v4_10_10_11_1,
+ },
+ };
+ fib_test_lb_bucket_t a_mpls_o_10_10_11_1 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_mpls_10_10_11_1,
+ },
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_IETF_IMPLICIT_NULL_LABEL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1");
+
+ /*
+ * assign the route a local label
+ */
+ fib_table_entry_local_label_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ 24001);
+
+ fib_prefix_t pfx_24001_eos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 24001,
+ .fp_eos = MPLS_EOS,
+ };
+ fib_prefix_t pfx_24001_neos = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = 24001,
+ .fp_eos = MPLS_NON_EOS,
+ };
+
+ /*
+ * The EOS entry should link to both the paths,
+ * and use an ip adj for the imp-null
+ * The NON-EOS entry should link to both the paths,
+ * and use an mpls adj for the imp-null
+ */
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1");
+
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 bucket via: "
+ "label 99 over 10.10.10.1 ",
+ "mpls-adj via 10.10.11.1");
+
+ /*
+ * add an unlabelled path, this is excluded from the neos chains,
+ */
+ fib_test_lb_bucket_t adj_o_10_10_11_2 = {
+ .type = FT_LB_ADJ,
+ .adj = {
+ .adj = ai_v4_10_10_11_2,
+ },
+ };
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_11_2,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj over 10.10.11.2");
+
+ /*
+ * get and lock a reference to the non-eos of the via entry 1.1.1.1/32
+ */
+ dpo_id_t non_eos_1_1_1_1 = DPO_NULL;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_1);
+
+ /*
+ * n-eos has only the 2 labelled paths
+ */
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * A labelled recursive
+ */
+ fib_prefix_t pfx_2_2_2_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020202),
+ },
+ };
+ fib_test_lb_bucket_t l1600_eos_o_1_1_1_1 = {
+ .type = FT_LB_LABEL_O_LB,
+ .label_o_lb = {
+ .lb = non_eos_1_1_1_1.dpoi_index,
+ .label = 1600,
+ .eos = MPLS_EOS,
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ 1600,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 over 1.1.1.1");
+
+ /*
+ * we are holding a lock on the non-eos LB of the via-entry.
+ * do a PIC-core failover by shutting the link of the via-entry.
+ *
+ * shut down the link with the valid label
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ 0);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "adj over 10.10.11.1, ",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "adj over 10.10.11.1, ",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 buckets via: "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * test that the pre-failover load-balance has been in-place
+ * modified
+ */
+ dpo_id_t current = DPO_NULL;
+ fib_entry_contribute_forwarding(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &current);
+
+ FIB_TEST(!dpo_cmp(&non_eos_1_1_1_1,
+ &current),
+ "PIC-core LB inplace modified %U %U",
+ format_dpo_id, &non_eos_1_1_1_1, 0,
+ format_dpo_id, &current, 0);
+
+ dpo_reset(&non_eos_1_1_1_1);
+ dpo_reset(&current);
+
+ /*
+ * no-shut the link with the valid label
+ */
+ vnet_sw_interface_set_flags(vnet_get_main(),
+ tm->hw[0]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 16, // 3 choices spread over 16 buckets
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &l99_eos_o_10_10_10_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 16 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 2,
+ &l99_neos_o_10_10_10_1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * remove the first path with the valid label
+ */
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &a_o_10_10_11_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "adj over 10.10.11.1",
+ "adj-v4 over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &a_mpls_o_10_10_11_1),
+ "24001/neos LB 1 buckets via: "
+ "adj-mpls over 10.10.11.2");
+
+ /*
+ * remove the other path with a valid label
+ */
+ fib_test_lb_bucket_t bucket_drop = {
+ .type = FT_LB_SPECIAL,
+ .special = {
+ .adj = 1,
+ },
+ };
+
+ fib_table_entry_path_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 1 buckets via: "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 1 buckets via: "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &bucket_drop),
+ "24001/eos LB 1 buckets via: DROP");
+
+ /*
+ * add back the path with the valid label
+ */
+ fib_table_entry_path_add(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ 99,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "1.1.1.1/32 LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_eos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
+ &pfx_24001_neos);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ 1,
+ &l99_neos_o_10_10_10_1),
+ "24001/neos LB 1 buckets via: "
+ "label 99 over 10.10.10.1");
+
+ /*
+ * remove the local label
+ */
+ fib_table_entry_local_label_remove(fib_index,
+ &pfx_1_1_1_1_s_32,
+ 24001);
+
+ fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l99_eos_o_10_10_10_1,
+ &adj_o_10_10_11_2),
+ "24001/eos LB 2 buckets via: "
+ "label 99 over 10.10.10.1, "
+ "adj over 10.10.11.2");
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID ==
+ mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID)),
+ "No more MPLS FIB entries => table removed");
+
+ /*
+ * add another via-entry for the recursive
+ */
+ fib_prefix_t pfx_1_1_1_2_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x01010102),
+ },
+ };
+ fib_test_lb_bucket_t l101_eos_o_10_10_10_1 = {
+ .type = FT_LB_LABEL_O_ADJ,
+ .label_o_adj = {
+ .adj = ai_mpls_10_10_10_1,
+ .label = 101,
+ .eos = MPLS_EOS,
+ },
+ };
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_10_1,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ 101,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l101_eos_o_10_10_10_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "label 101 over 10.10.10.1");
+
+ dpo_id_t non_eos_1_1_1_2 = DPO_NULL;
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_1_s_32),
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_1);
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_2_s_32),
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &non_eos_1_1_1_2);
+
+ fib_test_lb_bucket_t l1601_eos_o_1_1_1_2 = {
+ .type = FT_LB_LABEL_O_LB,
+ .label_o_lb = {
+ .lb = non_eos_1_1_1_2.dpoi_index,
+ .label = 1601,
+ .eos = MPLS_EOS,
+ },
+ };
+ l1600_eos_o_1_1_1_1.label_o_lb.lb = non_eos_1_1_1_1.dpoi_index;
+
+ fei = fib_table_entry_path_add(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_2_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ 1601,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l1600_eos_o_1_1_1_1,
+ &l1601_eos_o_1_1_1_2),
+ "2.2.2.2/32 LB 2 buckets via: "
+ "label 1600 via 1.1,1.1, "
+ "label 16001 via 1.1.1.2");
+
+ /*
+ * update the via-entry so it no longer has an imp-null path.
+ * the LB for the recursive can use an imp-null
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_IETF_IMPLICIT_NULL_LABEL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &a_o_10_10_11_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "adj 10.10.11.1");
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 2,
+ &l1600_eos_o_1_1_1_1,
+ &l1601_eos_o_1_1_1_2),
+ "2.2.2.2/32 LB 2 buckets via: "
+ "label 1600 via 1.1,1.1, "
+ "label 16001 via 1.1.1.2");
+
+ /*
+ * update the via-entry so it no longer has labelled paths.
+ * the LB for the recursive should exclue this via form its LB
+ */
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &nh_10_10_11_1,
+ tm->hw[1]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &a_o_10_10_11_1),
+ "1.1.1.2/32 LB 1 buckets via: "
+ "adj 10.10.11.1");
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 via 1.1,1.1");
+
+ dpo_reset(&non_eos_1_1_1_1);
+ dpo_reset(&non_eos_1_1_1_2);
+
+ /*
+ * Add a recursive with no out-labels. We expect to use the IP of the via
+ */
+ fib_prefix_t pfx_2_2_2_3_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020203),
+ },
+ };
+ dpo_id_t ip_1_1_1_1 = DPO_NULL;
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_3_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fib_entry_contribute_forwarding(fib_table_lookup(fib_index,
+ &pfx_1_1_1_1_s_32),
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ &ip_1_1_1_1);
+
+ fib_test_lb_bucket_t ip_o_1_1_1_1 = {
+ .type = FT_LB_O_LB,
+ .lb = {
+ .lb = ip_1_1_1_1.dpoi_index,
+ },
+ };
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "2.2.2.2.3/32 LB 1 buckets via: "
+ "ip 1.1.1.1");
+
+ /*
+ * Add a recursive with an imp-null out-label.
+ * We expect to use the IP of the via
+ */
+ fib_prefix_t pfx_2_2_2_4_s_32 = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x02020204),
+ },
+ };
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &pfx_1_1_1_1_s_32.fp_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &ip_o_1_1_1_1),
+ "2.2.2.2.4/32 LB 1 buckets via: "
+ "ip 1.1.1.1");
+
+ dpo_reset(&ip_1_1_1_1);
+
+ /*
+ * cleanup
+ */
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_2_s_32,
+ FIB_SOURCE_API);
+
+ fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32);
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &l1600_eos_o_1_1_1_1),
+ "2.2.2.2/32 LB 1 buckets via: "
+ "label 1600 via 1.1,1.1");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_1_1_1_1_s_32,
+ FIB_SOURCE_API);
+
+ FIB_TEST(fib_test_validate_entry(fei,
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ 1,
+ &bucket_drop),
+ "2.2.2.2/32 LB 1 buckets via: DROP");
+
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_2_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_3_s_32,
+ FIB_SOURCE_API);
+ fib_table_entry_delete(fib_index,
+ &pfx_2_2_2_4_s_32,
+ FIB_SOURCE_API);
+
+ adj_unlock(ai_mpls_10_10_10_1);
+ adj_unlock(ai_mpls_10_10_11_2);
+ adj_unlock(ai_v4_10_10_11_1);
+ adj_unlock(ai_v4_10_10_11_2);
+ adj_unlock(ai_mpls_10_10_11_1);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ local0_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local0_pfx,
+ FIB_SOURCE_INTERFACE);
+ local0_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local0_pfx,
+ FIB_SOURCE_INTERFACE);
+ local1_pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &local1_pfx,
+ FIB_SOURCE_INTERFACE);
+ local1_pfx.fp_len = 24;
+ fib_table_entry_delete(fib_index,
+ &local1_pfx,
+ FIB_SOURCE_INTERFACE);
+
+ /*
+ * +1 for the drop LB in the MPLS tables.
+ */
+ FIB_TEST(lb_count+1 == pool_elts(load_balance_pool),
+ "Load-balance resources freed %d of %d",
+ lb_count+1, pool_elts(load_balance_pool));
+}
+
+#define N_TEST_CHILDREN 4
+#define PARENT_INDEX 0
+
+typedef struct fib_node_test_t_
+{
+ fib_node_t node;
+ u32 sibling;
+ u32 index;
+ fib_node_back_walk_ctx_t *ctxs;
+ u32 destroyed;
+} fib_node_test_t;
+
+static fib_node_test_t fib_test_nodes[N_TEST_CHILDREN+1];
+
+#define PARENT() (&fib_test_nodes[PARENT_INDEX].node)
+
+#define FOR_EACH_TEST_CHILD(_tc) \
+ for (ii = 1, (_tc) = &fib_test_nodes[1]; \
+ ii < N_TEST_CHILDREN+1; \
+ ii++, (_tc) = &fib_test_nodes[ii])
+
+static fib_node_t *
+fib_test_child_get_node (fib_node_index_t index)
+{
+ return (&fib_test_nodes[index].node);
+}
+
+static int fib_test_walk_spawns_walks;
+
+static fib_node_back_walk_rc_t
+fib_test_child_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_test_t *tc = (fib_node_test_t*) node;
+
+ vec_add1(tc->ctxs, *ctx);
+
+ if (1 == fib_test_walk_spawns_walks)
+ fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx);
+ if (2 == fib_test_walk_spawns_walks)
+ fib_walk_async(FIB_NODE_TYPE_TEST, tc->index,
+ FIB_WALK_PRIORITY_HIGH, ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+fib_test_child_last_lock_gone (fib_node_t *node)
+{
+ fib_node_test_t *tc = (fib_node_test_t *)node;
+
+ tc->destroyed = 1;
+}
+
+/**
+ * The FIB walk's graph node virtual function table
+ */
+static const fib_node_vft_t fib_test_child_vft = {
+ .fnv_get = fib_test_child_get_node,
+ .fnv_last_lock = fib_test_child_last_lock_gone,
+ .fnv_back_walk = fib_test_child_back_walk_notify,
+};
+
+/*
+ * the function (that should have been static but isn't so I can do this)
+ * that processes the walk from the async queue,
+ */
+f64 fib_walk_process_queues(vlib_main_t * vm,
+ const f64 quota);
+u32 fib_walk_queue_get_size(fib_walk_priority_t prio);
+
+static void
+fib_test_walk (void)
+{
+ fib_node_back_walk_ctx_t high_ctx = {}, low_ctx = {};
+ fib_node_test_t *tc;
+ vlib_main_t *vm;
+ u32 ii;
+
+ vm = vlib_get_main();
+ fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft);
+
+ /*
+ * init a fake node on which we will add children
+ */
+ fib_node_init(&fib_test_nodes[PARENT_INDEX].node,
+ FIB_NODE_TYPE_TEST);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ fib_node_init(&tc->node, FIB_NODE_TYPE_TEST);
+ fib_node_lock(&tc->node);
+ tc->ctxs = NULL;
+ tc->index = ii;
+ tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST,
+ PARENT_INDEX,
+ FIB_NODE_TYPE_TEST, ii);
+ }
+
+ /*
+ * enqueue a walk across the parents children.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children pre-walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * give the walk a large amount of time so it gets to the end
+ */
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * walk again. should be no increase in the number of visits, since
+ * the walk will have terminated.
+ */
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times",
+ ii, vec_len(tc->ctxs));
+ }
+
+ /*
+ * schedule a low and hig priority walk. expect the high to be performed
+ * before the low.
+ * schedule the high prio walk first so that it is further from the head
+ * of the dependency list. that way it won't merge with the low one.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_LOW, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason,
+ "%d child visitsed by high prio walk", ii);
+ FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason,
+ "%d child visitsed by low prio walk", ii);
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post prio walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post prio walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule 2 walks of the same priority that can be megred.
+ * expect that each child is thus visited only once.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times during merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule 2 walks of the same priority that cannot be megred.
+ * expect that each child is thus visited twice and in the order
+ * in which the walks were scheduled.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
+ low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &low_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason,
+ "%d child visitsed by high prio walk", ii);
+ FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason,
+ "%d child visitsed by low prio walk", ii);
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is empty post no-merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post no-merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule a walk that makes one one child progress.
+ * we do this by giving the queue draining process zero
+ * time quanta. it's a do..while loop, so it does something.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_process_queues(vm, 0);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii == N_TEST_CHILDREN)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in zero quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 0 quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ }
+ FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post zero quanta walk");
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post zero qunta walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * another one step
+ */
+ fib_walk_process_queues(vm, 0);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd zero quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(0 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd 0 quanta walk",
+ ii, vec_len(tc->ctxs));
+ }
+ }
+ FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post zero quanta walk");
+ FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post zero qunta walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * schedule another walk that will catch-up and merge.
+ */
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd zero quanta merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ else
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in 2nd 0 quanta merge walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post 2nd zero quanta merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post 2nd zero qunta merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * park a async walk in the middle of the list, then have an sync walk catch
+ * it. same expectations as async catches async.
+ */
+ high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE;
+
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 0);
+ fib_walk_process_queues(vm, 0);
+
+ fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ if (ii >= N_TEST_CHILDREN-1)
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in sync catches async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ else
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "%d child visitsed %d times in sync catches async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+ }
+ FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH),
+ "Queue is not empty post 2nd zero quanta merge walk");
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post 2nd zero qunta merge walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * make the parent a child of one of its children, thus inducing a routing loop.
+ */
+ fib_test_nodes[PARENT_INDEX].sibling =
+ fib_node_child_add(FIB_NODE_TYPE_TEST,
+ 1, // the first child
+ FIB_NODE_TYPE_TEST,
+ PARENT_INDEX);
+
+ /*
+ * execute a sync walk from the parent. each child visited spawns more sync
+ * walks. we expect the walk to terminate.
+ */
+ fib_test_walk_spawns_walks = 1;
+
+ fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * child 1 - which is last in the list - has the loop.
+ * the other children a re thus visitsed first. the we meet
+ * child 1. we go round the loop again, visting the other children.
+ * then we meet the walk in the dep list and bail. child 1 is not visitsed
+ * again.
+ */
+ if (1 == ii)
+ {
+ FIB_TEST(1 == vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped sync walk",
+ ii, vec_len(tc->ctxs));
+ }
+ else
+ {
+ FIB_TEST(2 == vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped sync walk",
+ ii, vec_len(tc->ctxs));
+ }
+ vec_free(tc->ctxs);
+ }
+ FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children),
+ "Parent has %d children post sync loop walk",
+ fib_node_list_get_size(PARENT()->fn_children));
+
+ /*
+ * the walk doesn't reach the max depth because the infra knows that sync
+ * meets sync implies a loop and bails early.
+ */
+ FIB_TEST(high_ctx.fnbw_depth == 9,
+ "Walk context depth %d post sync loop walk",
+ high_ctx.fnbw_depth);
+
+ /*
+ * execute an async walk of the graph loop, with each child spawns sync walks
+ */
+ high_ctx.fnbw_depth = 0;
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * we don't really care how many times the children are visisted, as long as
+ * it is more than once.
+ */
+ FIB_TEST(1 <= vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped aync spawns sync walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+
+ /*
+ * execute an async walk of the graph loop, with each child spawns async walks
+ */
+ fib_test_walk_spawns_walks = 2;
+ high_ctx.fnbw_depth = 0;
+ fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ FIB_WALK_PRIORITY_HIGH, &high_ctx);
+
+ fib_walk_process_queues(vm, 1);
+
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ /*
+ * we don't really care how many times the children are visisted, as long as
+ * it is more than once.
+ */
+ FIB_TEST(1 <= vec_len(tc->ctxs),
+ "child %d visitsed %d times during looped async spawns async walk",
+ ii, vec_len(tc->ctxs));
+ vec_free(tc->ctxs);
+ }
+
+
+ fib_node_child_remove(FIB_NODE_TYPE_TEST,
+ 1, // the first child
+ fib_test_nodes[PARENT_INDEX].sibling);
+
+ /*
+ * cleanup
+ */
+ FOR_EACH_TEST_CHILD(tc)
+ {
+ fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ tc->sibling);
+ fib_node_deinit(&tc->node);
+ fib_node_unlock(&tc->node);
+ }
+ fib_node_deinit(PARENT());
+
+ /*
+ * The parent will be destroyed when the last lock on it goes.
+ * this test ensures all the walk objects are unlocking it.
+ */
+ FIB_TEST((1 == fib_test_nodes[PARENT_INDEX].destroyed),
+ "Parent was destroyed");
+}
+
+static void
+lfib_test_deagg (void)
+{
+ const mpls_label_t deag_label = 50;
+ const u32 lfib_index = 0;
+ const u32 fib_index = 0;
+ dpo_id_t dpo = DPO_NULL;
+ const dpo_id_t *dpo1;
+ fib_node_index_t lfe;
+ lookup_dpo_t *lkd;
+ test_main_t *tm;
+ int lb_count;
+
+ tm = &test_main;
+ lb_count = pool_elts(load_balance_pool);
+
+ FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
+ adj_nbr_db_size());
+
+ /*
+ * MPLS enable an interface so we get the MPLS table created
+ */
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 1);
+
+ /*
+ * Test the specials stack properly.
+ */
+ fib_prefix_t exp_null_v6_pfx = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = MPLS_EOS,
+ .fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ .fp_payload_proto = DPO_PROTO_IP6,
+ };
+ lfe = fib_table_lookup(lfib_index, &exp_null_v6_pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != lfe),
+ "%U/%U present",
+ format_mpls_unicast_label, MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ format_mpls_eos_bit, MPLS_EOS);
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table),
+ "%U/%U is lookup in interface's table",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((DPO_PROTO_IP6 == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+
+ /*
+ * A route deag route for EOS
+ */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = MPLS_EOS,
+ .fp_label = deag_label,
+ .fp_payload_proto = DPO_PROTO_IP4,
+ };
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &zero_addr,
+ ~0,
+ fib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)),
+ "%U/%U present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+ FIB_TEST((DPO_PROTO_IP4 == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+ fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index,
+ &pfx)),
+ "%U/%U not present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+ /*
+ * A route deag route for non-EOS
+ */
+ pfx.fp_eos = MPLS_NON_EOS;
+ lfe = fib_table_entry_path_add(lfib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP4,
+ &zero_addr,
+ ~0,
+ lfib_index,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)),
+ "%U/%U present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS);
+
+ fib_entry_contribute_forwarding(lfe,
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ &dpo);
+ dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0);
+ lkd = lookup_dpo_get(dpo1->dpoi_index);
+
+ FIB_TEST((fib_index == lkd->lkd_fib_index),
+ "%U/%U is deag in %d %U",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS,
+ lkd->lkd_fib_index,
+ format_dpo_id, &dpo, 0);
+ FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input),
+ "%U/%U is dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS);
+
+ FIB_TEST((DPO_PROTO_MPLS == lkd->lkd_proto),
+ "%U/%U is %U dst deag",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_NON_EOS,
+ format_dpo_proto, lkd->lkd_proto);
+
+ fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI);
+
+ FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index,
+ &pfx)),
+ "%U/%U not present",
+ format_mpls_unicast_label, deag_label,
+ format_mpls_eos_bit, MPLS_EOS);
+
+
+ mpls_sw_interface_enable_disable(&mpls_main,
+ tm->hw[0]->sw_if_index,
+ 0);
+
+ dpo_reset(&dpo);
+ /*
+ * +1 for the drop LB in the MPLS tables.
+ */
+ FIB_TEST(lb_count+1 == pool_elts(load_balance_pool),
+ "Load-balance resources freed %d of %d",
+ lb_count+1, pool_elts(load_balance_pool));
+}
+
+static clib_error_t *
+lfib_test (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ fib_test_mk_intf(4);
+
+ lfib_test_deagg();
+
+ return (NULL);
+}
+
+static clib_error_t *
+fib_test (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg)
+{
+ fib_test_mk_intf(4);
+
+ if (unformat (input, "ip"))
+ {
+ fib_test_v4();
+ fib_test_v6();
+ }
+ else if (unformat (input, "gre"))
+ {
+ fib_test_gre();
+ }
+ else if (unformat (input, "label"))
+ {
+ fib_test_label();
+ }
+ else if (unformat (input, "ae"))
+ {
+ fib_test_ae();
+ }
+ else if (unformat (input, "walk"))
+ {
+ fib_test_walk();
+ }
+ else
+ {
+ /*
+ * These walk UT aren't run as part of the full suite, since the
+ * fib-walk process must be disabled in order for the tests to work
+ *
+ * fib_test_walk();
+ */
+ fib_test_v4();
+ fib_test_v6();
+ fib_test_gre();
+ fib_test_ae();
+ fib_test_label();
+ }
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (test_fib_command, static) = {
+ .path = "test fib",
+ .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+ .function = fib_test,
+};
+
+VLIB_CLI_COMMAND (test_lfib_command, static) = {
+ .path = "test lfib",
+ .short_help = "mpls label fib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+ .function = lfib_test,
+};
+
+clib_error_t *
+fib_test_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (fib_test_init);
diff --git a/vnet/vnet/fib/fib_types.c b/vnet/vnet/fib/fib_types.c
new file mode 100644
index 00000000000..bf76c5536e6
--- /dev/null
+++ b/vnet/vnet/fib/fib_types.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/fib_internal.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * arrays of protocol and link names
+ */
+static const char* fib_protocol_names[] = FIB_PROTOCOLS;
+static const char* fib_link_names[] = FIB_LINKS;
+static const char* fib_forw_chain_names[] = FIB_FORW_CHAINS;
+
+u8 *
+format_fib_protocol (u8 * s, va_list ap)
+{
+ fib_protocol_t proto = va_arg(ap, int); // fib_protocol_t promotion
+
+ return (format (s, "%s", fib_protocol_names[proto]));
+}
+
+u8 *
+format_fib_link (u8 * s, va_list ap)
+{
+ fib_link_t link = va_arg(ap, int); // fib_link_t promotion
+
+ return (format (s, "%s", fib_link_names[link]));
+}
+
+u8 *
+format_fib_forw_chain_type (u8 * s, va_list * args)
+{
+ fib_forward_chain_type_t fct = va_arg(*args, int);
+
+ return (format (s, "%s", fib_forw_chain_names[fct]));
+}
+
+void
+fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+ fib_prefix_t *pfx)
+{
+ ASSERT(!ip46_address_is_zero(addr));
+
+ pfx->fp_proto = ((ip46_address_is_ip4(addr) ?
+ FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6));
+ pfx->fp_len = ((ip46_address_is_ip4(addr) ?
+ 32 : 128));
+ pfx->fp_addr = *addr;
+}
+
+int
+fib_prefix_cmp (const fib_prefix_t *p1,
+ const fib_prefix_t *p2)
+{
+ int res;
+
+ res = (p1->fp_proto - p2->fp_proto);
+
+ if (0 == res)
+ {
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ case FIB_PROTOCOL_IP6:
+ res = (p1->fp_len - p2->fp_len);
+
+ if (0 == res)
+ {
+ res = ip46_address_cmp(&p1->fp_addr, &p2->fp_addr);
+ }
+ break;
+ case FIB_PROTOCOL_MPLS:
+ res = (p1->fp_label - p2->fp_label);
+
+ if (0 == res)
+ {
+ res = (p1->fp_eos - p2->fp_eos);
+ }
+ break;
+ }
+ }
+
+ return (res);
+}
+
+int
+fib_prefix_is_cover (const fib_prefix_t *p1,
+ const fib_prefix_t *p2)
+{
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_destination_matches_route(&ip4_main,
+ &p1->fp_addr.ip4,
+ &p2->fp_addr.ip4,
+ p1->fp_len));
+ case FIB_PROTOCOL_IP6:
+ return (ip6_destination_matches_route(&ip6_main,
+ &p1->fp_addr.ip6,
+ &p2->fp_addr.ip6,
+ p1->fp_len));
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (0);
+}
+
+int
+fib_prefix_is_host (const fib_prefix_t *prefix)
+{
+ switch (prefix->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (prefix->fp_len == 32);
+ case FIB_PROTOCOL_IP6:
+ return (prefix->fp_len == 128);
+ case FIB_PROTOCOL_MPLS:
+ return (!0);
+ }
+ return (0);
+}
+
+u8 *
+format_fib_prefix (u8 * s, va_list * args)
+{
+ fib_prefix_t *fp = va_arg (*args, fib_prefix_t *);
+
+ /*
+ * protocol specific so it prints ::/0 correctly.
+ */
+ switch (fp->fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ {
+ ip6_address_t p6 = fp->fp_addr.ip6;
+
+ ip6_address_mask(&p6, &(ip6_main.fib_masks[fp->fp_len]));
+ s = format (s, "%U", format_ip6_address, &p6);
+ break;
+ }
+ case FIB_PROTOCOL_IP4:
+ {
+ ip4_address_t p4 = fp->fp_addr.ip4;
+ p4.as_u32 &= ip4_main.fib_masks[fp->fp_len];
+
+ s = format (s, "%U", format_ip4_address, &p4);
+ break;
+ }
+ case FIB_PROTOCOL_MPLS:
+ s = format (s, "%U:%U",
+ format_mpls_unicast_label, fp->fp_label,
+ format_mpls_eos_bit, fp->fp_eos);
+ break;
+ }
+ s = format (s, "/%d", fp->fp_len);
+
+ return (s);
+}
+
+int
+fib_route_path_cmp (const fib_route_path_t *rpath1,
+ const fib_route_path_t *rpath2)
+{
+ int res;
+
+ res = ip46_address_cmp(&rpath1->frp_addr,
+ &rpath2->frp_addr);
+
+ if (0 != res) return (res);
+
+ res = vnet_sw_interface_compare(vnet_get_main(),
+ rpath1->frp_sw_if_index,
+ rpath2->frp_sw_if_index);
+
+ if (0 != res) return (res);
+
+ if (ip46_address_is_zero(&rpath1->frp_addr))
+ {
+ res = rpath1->frp_fib_index - rpath2->frp_fib_index;
+ }
+
+ return (res);
+}
+
+dpo_proto_t
+fib_proto_to_dpo (fib_protocol_t fib_proto)
+{
+ switch (fib_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ return (DPO_PROTO_IP6);
+ case FIB_PROTOCOL_IP4:
+ return (DPO_PROTO_IP4);
+ case FIB_PROTOCOL_MPLS:
+ return (DPO_PROTO_MPLS);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+fib_protocol_t
+dpo_proto_to_fib (dpo_proto_t dpo_proto)
+{
+ switch (dpo_proto)
+ {
+ case DPO_PROTO_IP6:
+ return (FIB_PROTOCOL_IP6);
+ case DPO_PROTO_IP4:
+ return (FIB_PROTOCOL_IP4);
+ case DPO_PROTO_MPLS:
+ return (FIB_PROTOCOL_MPLS);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+fib_link_t
+fib_proto_to_link (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_LINK_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_LINK_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (FIB_LINK_MPLS);
+ }
+ ASSERT(0);
+ return (0);
+}
+
+fib_forward_chain_type_t
+fib_proto_to_forw_chain_type (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+ }
+ ASSERT(0);
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+fib_link_t
+fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ return (FIB_LINK_IP4);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ return (FIB_LINK_IP6);
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ /*
+ * insufficient information to to convert
+ */
+ ASSERT(0);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ return (FIB_LINK_MPLS);
+ }
+ return (FIB_LINK_IP4);
+}
+
+dpo_proto_t
+fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct)
+{
+ switch (fct)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ return (DPO_PROTO_IP4);
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ return (DPO_PROTO_IP6);
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ /*
+ * insufficient information to to convert
+ */
+ ASSERT(0);
+ break;
+ case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ return (DPO_PROTO_MPLS);
+ }
+ return (FIB_LINK_IP4);
+}
diff --git a/vnet/vnet/fib/fib_types.h b/vnet/vnet/fib/fib_types.h
new file mode 100644
index 00000000000..4ebd68d1450
--- /dev/null
+++ b/vnet/vnet/fib/fib_types.h
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TYPES_H__
+#define __FIB_TYPES_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A typedef of a node index.
+ * we make this typedef so the code becomes easier for a human to parse.
+ */
+typedef u32 fib_node_index_t;
+#define FIB_NODE_INDEX_INVALID ((fib_node_index_t)(~0))
+
+/**
+ * Protocol Type. packed so it consumes a u8 only
+ */
+typedef enum fib_protocol_t_ {
+#if CLIB_DEBUG > 0
+ FIB_PROTOCOL_IP4 = 1,
+#else
+ FIB_PROTOCOL_IP4 = 0,
+#endif
+ FIB_PROTOCOL_IP6,
+ FIB_PROTOCOL_MPLS,
+} __attribute__ ((packed)) fib_protocol_t;
+
+#define FIB_PROTOCOLS { \
+ [FIB_PROTOCOL_IP4] = "ipv4", \
+ [FIB_PROTOCOL_IP6] = "ipv6", \
+ [FIB_PROTOCOL_MPLS] = "MPLS", \
+}
+
+/**
+ * Definition outside of enum so it does not need to be included in non-defaulted
+ * switch statements
+ */
+#define FIB_PROTOCOL_MAX (FIB_PROTOCOL_MPLS + 1)
+
+/**
+ * Not part of the enum so it does not have to be handled in switch statements
+ */
+#define FIB_PROTOCOL_NONE (FIB_PROTOCOL_MAX+1)
+
+/**
+ * Link Type. This maps directly into the ethertype.
+ */
+typedef enum fib_link_t_ {
+#if CLIB_DEBUG > 0
+ FIB_LINK_IP4 = 1,
+#else
+ FIB_LINK_IP4 = 0,
+#endif
+ FIB_LINK_IP6,
+ FIB_LINK_MPLS,
+} __attribute__ ((packed)) fib_link_t;
+
+/**
+ * Definition outside of enum so it does not need to be included in non-defaulted
+ * switch statements
+ */
+#define FIB_LINK_NUM (FIB_LINK_MPLS+1)
+
+#define FIB_LINKS { \
+ [FIB_LINK_IP4] = "ipv4", \
+ [FIB_LINK_IP6] = "ipv6", \
+ [FIB_LINK_MPLS] = "mpls", \
+}
+
+#define FOR_EACH_FIB_LINK(_item) \
+ for (_item = FIB_LINK_IP4; \
+ _item <= FIB_LINK_MPLS; \
+ _item++)
+
+#define FOR_EACH_FIB_IP_LINK(_item) \
+ for (_item = FIB_LINK_IP4; \
+ _item <= FIB_LINK_IP6; \
+ _item++)
+
+/**
+ * @brief Convert from a protocol to a link type
+ */
+fib_link_t fib_proto_to_link (fib_protocol_t proto);
+
+/**
+ * FIB output chain type. When a child object requests a forwarding contribution
+ * from a parent, it does so for a particular scenario. This enumererates those
+ * sceanrios
+ */
+typedef enum fib_forward_chain_type_t_ {
+ /**
+ * Contribute an object that is to be used to forward IP4 packets
+ */
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+ /**
+ * Contribute an object that is to be used to forward IP6 packets
+ */
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP6,
+ /**
+ * Contribute an object that is to be used to forward non-end-of-stack
+ * MPLS packets
+ */
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ /**
+ * Contribute an object that is to be used to forward end-of-stack
+ * MPLS packets. This is a convenient ID for clients. A real EOS chain
+ * must be pay-load protocol specific. This
+ * option is converted into one of the other three internally.
+ */
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+} __attribute__ ((packed)) fib_forward_chain_type_t;
+
+#define FIB_FORW_CHAINS { \
+ [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \
+ [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \
+ [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \
+ [FIB_FORW_CHAIN_TYPE_MPLS_EOS] = "mpls-eos", \
+}
+
+#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1)
+
+#define FOR_EACH_FIB_FORW_CHAIN(_item) \
+ for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \
+ _item <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; \
+ _item++)
+
+/**
+ * @brief Convert from a chain type to the adjacencies link type
+ */
+extern fib_link_t fib_forw_chain_type_to_link_type(fib_forward_chain_type_t fct);
+
+/**
+ * @brief Convert from a payload-protocol to a chain type.
+ */
+extern fib_forward_chain_type_t fib_proto_to_forw_chain_type(fib_protocol_t proto);
+
+/**
+ * @brief Convert from a chain type to the DPO proto it will install
+ */
+extern dpo_proto_t fib_forw_chain_type_to_dpo_proto(fib_forward_chain_type_t fct);
+
+/**
+ * Aggregrate type for a prefix
+ */
+typedef struct fib_prefix_t_ {
+ /**
+ * The mask length
+ */
+ u16 fp_len;
+
+ /**
+ * protocol type
+ */
+ fib_protocol_t fp_proto;
+
+ /**
+ * Pad to keep the address 4 byte aligned
+ */
+ u8 ___fp___pad;
+
+ union {
+ /**
+ * The address type is not deriveable from the fp_addr member.
+ * If it's v4, then the first 3 u32s of the address will be 0.
+ * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned
+ * to non-zero values. true. but when it's all zero, one cannot decide.
+ */
+ ip46_address_t fp_addr;
+
+ struct {
+ mpls_label_t fp_label;
+ mpls_eos_bit_t fp_eos;
+ /**
+ * This protocol determines the payload protocol of packets
+ * that will be forwarded by this entry once the label is popped.
+ * For a non-eos entry it will be MPLS.
+ */
+ dpo_proto_t fp_payload_proto;
+ };
+ };
+} fib_prefix_t;
+
+_Static_assert(STRUCT_OFFSET_OF(fib_prefix_t, fp_addr) == 4,
+ "FIB Prefix's address is 4 byte aligned.");
+
+/**
+ * \brief Compare two prefixes for equality
+ */
+extern int fib_prefix_cmp(const fib_prefix_t *p1,
+ const fib_prefix_t *p2);
+
+/**
+ * \brief Compare two prefixes for covering relationship
+ *
+ * \return non-zero if the first prefix is a cover for the second
+ */
+extern int fib_prefix_is_cover(const fib_prefix_t *p1,
+ const fib_prefix_t *p2);
+
+/**
+ * \brief Return true is the prefix is a host prefix
+ */
+extern int fib_prefix_is_host(const fib_prefix_t *p);
+
+extern u8 * format_fib_prefix(u8 * s, va_list * args);
+extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args);
+
+extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto);
+extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto);
+
+/**
+ * Enurmeration of special path/entry types
+ */
+typedef enum fib_special_type_t_ {
+ /**
+ * Marker. Add new types after this one.
+ */
+ FIB_SPECIAL_TYPE_FIRST = 0,
+ /**
+ * Local/for-us paths
+ */
+ FIB_SPECIAL_TYPE_LOCAL = FIB_SPECIAL_TYPE_FIRST,
+ /**
+ * drop paths
+ */
+ FIB_SPECIAL_TYPE_DROP,
+ /**
+ * Marker. Add new types before this one, then update it.
+ */
+ FIB_SPECIAL_TYPE_LAST = FIB_SPECIAL_TYPE_DROP,
+} __attribute__ ((packed)) fib_special_type_t;
+
+/**
+ * The maximum number of types
+ */
+#define FIB_SPEICAL_TYPE_MAX (FIB_SPEICAL_TYPE_LAST + 1)
+
+#define FOR_EACH_FIB_SPEICAL_TYPE(_item) \
+ for (_item = FIB_TYPE_SPEICAL_FIRST; \
+ _item <= FIB_SPEICAL_TYPE_LAST; _item++)
+
+extern u8 * format_fib_protocol(u8 * s, va_list ap);
+extern u8 * format_fib_link(u8 *s, va_list ap);
+
+/**
+ * Path flags from the control plane
+ */
+typedef enum fib_route_path_flags_t_
+{
+ FIB_ROUTE_PATH_FLAG_NONE = 0,
+ /**
+ * Recursion constraint of via a host prefix
+ */
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST = (1 << 0),
+ /**
+ * Recursion constraint of via an attahced prefix
+ */
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1),
+} fib_route_path_flags_t;
+
+/**
+ * @brief
+ * A representation of a path as described by a route producer.
+ * These paramenters will determine the path 'type', of which there are:
+ * 1) Attached-next-hop:
+ * a single peer on a link.
+ * It is 'attached' because it is in the same sub-net as the router, on a link
+ * directly connected to the route.
+ * It is 'next=hop' since the next-hop address of the peer is known.
+ * 2) Attached:
+ * the next-hop is not known. but we can ARP for it.
+ * 3) Recursive.
+ * The next-hop is known but the interface is not. So to find the adj to use
+ * we must recursively resolve the next-hop.
+ * 3) deaggregate (deag)
+ * A further lookup is required.
+ */
+typedef struct fib_route_path_t_ {
+ /**
+ * The protocol of the address below. We need this since the all
+ * zeros address is ambiguous.
+ */
+ fib_protocol_t frp_proto;
+ /**
+ * The next-hop address.
+ * Will be NULL for attached paths.
+ * Will be all zeros for attached-next-hop paths on a p2p interface
+ * Will be all zeros for a deag path.
+ */
+ ip46_address_t frp_addr;
+ /**
+ * The interface.
+ * Will be invalid for recursive paths.
+ */
+ u32 frp_sw_if_index;
+ /**
+ * The FIB index to lookup the nexthop
+ * Only valid for recursive paths.
+ */
+ u32 frp_fib_index;
+ /**
+ * [un]equal cost path weight
+ */
+ u32 frp_weight;
+ /**
+ * flags on the path
+ */
+ fib_route_path_flags_t frp_flags;
+ /**
+ * The outgoing MPLS label. INVALID implies no label.
+ */
+ mpls_label_t frp_label;
+} fib_route_path_t;
+
+#endif
diff --git a/vnet/vnet/fib/fib_walk.c b/vnet/vnet/fib/fib_walk.c
new file mode 100644
index 00000000000..79e3ad0b242
--- /dev/null
+++ b/vnet/vnet/fib/fib_walk.c
@@ -0,0 +1,775 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_node_list.h>
+
+/**
+ * The flags on a walk
+ */
+typedef enum fib_walk_flags_t_
+{
+ /**
+ * A synchronous walk.
+ * This walk will run to completion, i.e. visit ALL the children.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_SYNC = (1 << 0),
+ /**
+ * An asynchronous walk.
+ * This walk will be scheduled to run in the background. It will thus visits
+ * the children at a later point in time.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_ASYNC = (1 << 1),
+ /**
+ * An indication that the walk is currently executing.
+ */
+ FIB_WALK_FLAG_EXECUTING = (1 << 2),
+} fib_walk_flags_t;
+
+/**
+ * A representation of a graph walk from a parent object to its children
+ */
+typedef struct fib_walk_t_
+{
+ /**
+ * FIB node linkage. This object is not in the FIB object graph,
+ * but it is present in other node's dependency lists, so it needs to
+ * be pointerable to.
+ */
+ fib_node_t fw_node;
+
+ /**
+ * the walk's flags
+ */
+ fib_walk_flags_t fw_flags;
+
+ /**
+ * Sibling index in the dependency list
+ */
+ u32 fw_dep_sibling;
+
+ /**
+ * Sibling index in the list of all walks
+ */
+ u32 fw_prio_sibling;
+
+ /**
+ * Pointer to the node whose dependants this walk is walking
+ */
+ fib_node_ptr_t fw_parent;
+
+ /**
+ * Number of nodes visited by this walk. saved for debugging purposes.
+ */
+ u32 fw_n_visits;
+
+ /**
+ * The reasons this walk is occuring.
+ * This is a vector ordered in time. The reasons and the front were started
+ * first, and so should be acted first when a node is visisted.
+ */
+ fib_node_back_walk_ctx_t *fw_ctx;
+} fib_walk_t;
+
+/**
+ * @brief The pool of all walk objects
+ */
+static fib_walk_t *fib_walk_pool;
+
+/**
+ * @brief There's only one event type sent to the walk process
+ */
+#define FIB_WALK_EVENT 0
+
+/**
+ * Statistics maintained per-walk queue
+ */
+typedef enum fib_walk_queue_stats_t_
+{
+ FIB_WALK_SCHEDULED,
+ FIB_WALK_COMPLETED,
+} fib_walk_queue_stats_t;
+#define FIB_WALK_QUEUE_STATS_NUM (FIB_WALK_COMPLETED+1)
+
+#define FIB_WALK_QUEUE_STATS { \
+ [FIB_WALK_SCHEDULED] = "scheduled", \
+ [FIB_WALK_COMPLETED] = "completed", \
+}
+
+#define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \
+ for ((_wqs) = FIB_WALK_SCHEDULED; \
+ (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \
+ (_wqs)++)
+
+/**
+ * The names of the walk stats
+ */
+static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS;
+
+/**
+ * A represenation of one queue of walk
+ */
+typedef struct fib_walk_queue_t_
+{
+ /**
+ * Qeuee stats
+ */
+ u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM];
+
+ /**
+ * The node list which acts as the queue
+ */
+ fib_node_list_t fwq_queue;
+} fib_walk_queue_t;
+
+/**
+ * A set of priority queues for outstanding walks
+ */
+typedef struct fib_walk_queues_t_
+{
+ fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM];
+} fib_walk_queues_t;
+
+/**
+ * The global queues of outstanding walks
+ */
+static fib_walk_queues_t fib_walk_queues;
+
+/**
+ * The names of the walk priorities
+ */
+static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES;
+
+u8*
+format_fib_walk_priority (u8 *s, va_list ap)
+{
+ fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t);
+
+ ASSERT(prio < FIB_WALK_PRIORITY_NUM);
+
+ return (format(s, "%s", fib_walk_priority_names[prio]));
+}
+static u8*
+format_fib_walk_queue_stats (u8 *s, va_list ap)
+{
+ fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t);
+
+ ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM);
+
+ return (format(s, "%s", fib_walk_queue_stats_names[wqs]));
+}
+
+static index_t
+fib_walk_get_index (fib_walk_t *fwalk)
+{
+ return (fwalk - fib_walk_pool);
+}
+
+static fib_walk_t *
+fib_walk_get (index_t fwi)
+{
+ return (pool_elt_at_index(fib_walk_pool, fwi));
+}
+
+/*
+ * not static so it can be used in the unit tests
+ */
+u32
+fib_walk_queue_get_size (fib_walk_priority_t prio)
+{
+ return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue));
+}
+
+static fib_node_index_t
+fib_walk_queue_get_front (fib_walk_priority_t prio)
+{
+ fib_node_ptr_t wp;
+
+ fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp);
+
+ return (wp.fnp_index);
+}
+
+static void
+fib_walk_destroy (fib_walk_t *fwalk)
+{
+ if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
+ {
+ fib_node_list_elt_remove(fwalk->fw_prio_sibling);
+ }
+ fib_node_child_remove(fwalk->fw_parent.fnp_type,
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_dep_sibling);
+
+ fib_node_deinit(&fwalk->fw_node);
+ pool_put(fib_walk_pool, fwalk);
+}
+
+/**
+ * return code when advancing a walk
+ */
+typedef enum fib_walk_advance_rc_t_
+{
+ /**
+ * The walk is complete
+ */
+ FIB_WALK_ADVANCE_DONE,
+ /**
+ * the walk has more work
+ */
+ FIB_WALK_ADVANCE_MORE,
+ /**
+ * The walk merged with the one in front
+ */
+ FIB_WALK_ADVANCE_MERGE,
+} fib_walk_advance_rc_t;
+
+/**
+ * @brief Advance the walk one element in its work list
+ */
+static fib_walk_advance_rc_t
+fib_walk_advance (fib_node_index_t fwi)
+{
+ fib_node_back_walk_ctx_t *ctx;
+ fib_node_back_walk_rc_t wrc;
+ fib_node_ptr_t sibling;
+ fib_walk_t *fwalk;
+ int more_elts;
+
+ /*
+ * this walk function is re-entrant - walks acan spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to retch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling);
+
+ if (more_elts)
+ {
+ vec_foreach(ctx, fwalk->fw_ctx)
+ {
+ wrc = fib_node_back_walk_one(&sibling, ctx);
+
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_n_visits++;
+
+ if (FIB_NODE_BACK_WALK_MERGE == wrc)
+ {
+ /*
+ * this walk has merged with the one further along the node's
+ * dependecy list.
+ */
+ return (FIB_WALK_ADVANCE_MERGE);
+ }
+ }
+ /*
+ * move foward to the next node to visit
+ */
+ more_elts = fib_node_list_advance(fwalk->fw_dep_sibling);
+ }
+
+ if (more_elts)
+ {
+ return (FIB_WALK_ADVANCE_MORE);
+ }
+
+ return (FIB_WALK_ADVANCE_DONE);
+}
+
+/**
+ * First guesses as to good values
+ */
+#define SHORT_SLEEP 1e-8
+#define LONG_SLEEP 1e-3
+#define QUOTA 1e-4
+
+/**
+ * @brief Service the queues
+ * This is not declared static so that it can be unit tested - i know i know...
+ */
+f64
+fib_walk_process_queues (vlib_main_t * vm,
+ const f64 quota)
+{
+ fib_walk_priority_t prio;
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+
+ f64 sleep_time, start_time; // , vector_rate;
+
+ start_time = vlib_time_now(vm);
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ while (0 != fib_walk_queue_get_size(prio))
+ {
+ fwi = fib_walk_queue_get_front(prio);
+
+ /*
+ * set this walk as executing
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ } while (((vlib_time_now(vm) - start_time) < quota) &&
+ (FIB_WALK_ADVANCE_MORE == rc));
+
+ /*
+ * if this walk has no more work then pop it from the queue
+ * and move on to the next.
+ */
+ if (FIB_WALK_ADVANCE_MORE != rc)
+ {
+ fwalk = fib_walk_get(fwi);
+ fib_walk_destroy(fwalk);
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
+ }
+ else
+ {
+ /*
+ * passed our work quota. sleep time.
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING;
+ sleep_time = SHORT_SLEEP;
+ goto that_will_do_for_now;
+ }
+ }
+ }
+ /*
+ * got to the end of all the work
+ */
+ sleep_time = LONG_SLEEP;
+
+that_will_do_for_now:
+ return (sleep_time);
+}
+
+/**
+ * @brief The 'fib-walk' process's main loop.
+ */
+static uword
+fib_walk_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ f64 sleep_time;
+
+ sleep_time = SHORT_SLEEP;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock(vm, sleep_time);
+
+ /*
+ * there may be lots of event queued between the processes,
+ * but the walks we want to schedule are in the priority queues,
+ * so we ignore the process events.
+ */
+ vlib_process_get_events(vm, NULL);
+
+ sleep_time = fib_walk_process_queues(vm, QUOTA);
+ }
+
+ /*
+ * Unreached
+ */
+ ASSERT(!"WTF");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
+ .function = fib_walk_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "fib-walk",
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Allocate a new walk object
+ */
+static fib_walk_t *
+fib_walk_alloc (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_flags_t flags,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ pool_get(fib_walk_pool, fwalk);
+
+ fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK);
+
+ fwalk->fw_flags = flags;
+ fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_parent.fnp_index = parent_index;
+ fwalk->fw_parent.fnp_type = parent_type;
+ fwalk->fw_ctx = NULL;
+
+ /*
+ * make a copy of the backwalk context so the depth count remains
+ * the same for each sibling visitsed. This is important in the case
+ * where a parents has a loop via one child, but all the others are not.
+ * if the looped child were visited first, the depth count would exceed, the
+ * max and the walk would terminate before it reached the other siblings.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+
+ return (fwalk);
+}
+
+/**
+ * @brief Enqueue a walk onto the appropriate priority queue. Then signal
+ * the background process there is work to do.
+ */
+static index_t
+fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
+ fib_walk_t *fwalk)
+{
+ index_t sibling;
+
+ sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ 0,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++;
+
+ /*
+ * poke the fib-walk process to perform the async walk.
+ * we are not passing it specific data, hence the last two args,
+ * the process will drain the queues
+ */
+ vlib_process_signal_event(vlib_get_main(),
+ fib_walk_process_node.index,
+ FIB_WALK_EVENT,
+ FIB_WALK_EVENT);
+
+ return (sibling);
+}
+
+void
+fib_walk_async (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_priority_t prio,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_ASYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+
+ fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk);
+}
+
+/**
+ * @brief Back walk all the children of a FIB node.
+ *
+ * note this is a synchronous depth first walk. Children visited may propagate
+ * the walk to thier children. Other children node types may not propagate,
+ * synchronously but instead queue the walk for later async completion.
+ */
+void
+fib_walk_sync (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_SYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fwi = fib_walk_get_index(fwalk);
+
+ while (1)
+ {
+ /*
+ * set this walk as executing
+ */
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ } while (FIB_WALK_ADVANCE_MORE == rc);
+
+
+ /*
+ * this walk function is re-entrant - walks can spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to re-fetch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_ADVANCE_MERGE == rc)
+ {
+ /*
+ * this sync walk merged with an walk in front.
+ * by reqeusting a sync walk the client wanted all children walked,
+ * so we ditch the walk object in hand and continue with the one
+ * we merged into
+ */
+ fib_node_ptr_t merged_walk;
+
+ fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
+
+ fib_walk_destroy(fwalk);
+
+ fwi = merged_walk.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags)
+ {
+ /*
+ * we are executing a sync walk, and we have met with another
+ * walk that is also executing. since only one walk executs at once
+ * (there is no multi-threading) this implies we have met ourselves
+ * and hence the is a loop in the graph.
+ * This function is re-entrant, so the walk object we met is being
+ * acted on in a stack frame below this one. We must therefore not
+ * continue with it now, but let the stack unwind and along the
+ * appropriate frame to read the depth count and bail.
+ */
+ fwalk = NULL;
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * the walk reached the end of the depdency list.
+ */
+ break;
+ }
+ }
+
+ if (NULL != fwalk)
+ {
+ fib_walk_destroy(fwalk);
+ }
+}
+
+static fib_node_t *
+fib_walk_get_node (fib_node_index_t index)
+{
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(index);
+
+ return (&(fwalk->fw_node));
+}
+
+/**
+ * Walk objects are not parents, nor are they locked.
+ * are no-ops
+ */
+static void
+fib_walk_last_lock_gone (fib_node_t *node)
+{
+ ASSERT(0);
+}
+
+static fib_walk_t*
+fib_walk_get_from_node (fib_node_t *node)
+{
+ return ((fib_walk_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(fib_walk_t, fw_node)));
+}
+
+/**
+ * @brief Another back walk has reach this walk.
+ * Megre them so there is only one left. It is this node being
+ * visited that will remain, so copy or merge the context onto it.
+ */
+static fib_node_back_walk_rc_t
+fib_walk_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_back_walk_ctx_t *old;
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get_from_node(node);
+
+ /*
+ * check whether the walk context can be merge with another,
+ * or whether it needs to be appended.
+ */
+ vec_foreach(old, fwalk->fw_ctx)
+ {
+ /*
+ * we can merge walks if the reason for the walk is the same.
+ */
+ if (old->fnbw_reason == ctx->fnbw_reason)
+ {
+ /*
+ * copy the largest of the depth values. in the presence of a loop,
+ * the same walk will merge with itself. if we take the smaller depth
+ * then it will never end.
+ */
+ old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ?
+ old->fnbw_depth :
+ ctx->fnbw_depth);
+ goto out;
+ }
+ }
+
+ /*
+ * walks could not be merged, this means that the walk infront needs to
+ * perform different action to this one that has caught up. the one in front
+ * was scheduled first so append the new walk context to the back of the list.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+
+out:
+ return (FIB_NODE_BACK_WALK_MERGE);
+}
+
+/**
+ * The FIB walk's graph node virtual function table
+ */
+static const fib_node_vft_t fib_walk_vft = {
+ .fnv_get = fib_walk_get_node,
+ .fnv_last_lock = fib_walk_last_lock_gone,
+ .fnv_back_walk = fib_walk_back_walk_notify,
+};
+
+void
+fib_walk_module_init (void)
+{
+ fib_walk_priority_t prio;
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create();
+ }
+
+ fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft);
+}
+
+static u8*
+format_fib_walk (u8* s, va_list ap)
+{
+ fib_node_index_t fwi = va_arg(ap, fib_node_index_t);
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(fwi);
+
+ return (format(s, " parent:{%s:%d} visits:%d flags:%d",
+ fib_node_type_get_name(fwalk->fw_parent.fnp_type),
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_n_visits,
+ fwalk->fw_flags));
+}
+
+static clib_error_t *
+fib_walk_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_walk_queue_stats_t wqs;
+ fib_walk_priority_t prio;
+ fib_node_ptr_t sibling;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+ int more_elts;
+
+ vlib_cli_output(vm, "FIB Walk queues:");
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ vlib_cli_output(vm, " %U priority queue:",
+ format_fib_walk_priority, prio);
+ vlib_cli_output(vm, " Stats: ");
+
+ FOR_EACH_FIB_WALK_QUEUE_STATS(wqs)
+ {
+ vlib_cli_output(vm, " %U:%d",
+ format_fib_walk_queue_stats, wqs,
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]);
+ }
+ vlib_cli_output(vm, " Occupancy:%d",
+ fib_node_list_get_size(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue));
+
+ more_elts = fib_node_list_get_front(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ &sibling);
+
+ while (more_elts)
+ {
+ ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type);
+
+ fwi = sibling.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ vlib_cli_output(vm, " %U", format_fib_walk, fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling,
+ &sibling);
+ }
+ }
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_show_command, static) = {
+ .path = "show fib walk",
+ .short_help = "show fib walk",
+ .function = fib_walk_show,
+};
diff --git a/vnet/vnet/fib/fib_walk.h b/vnet/vnet/fib/fib_walk.h
new file mode 100644
index 00000000000..7ae99d0d8aa
--- /dev/null
+++ b/vnet/vnet/fib/fib_walk.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_WALK_H__
+#define __FIB_WALK_H__
+
+#include <vnet/fib/fib_node.h>
+
+/**
+ * @brief Walk priorities.
+ * Strict priorities. All walks a priority n are completed before n+1 is started.
+ * Increasing numerical value implies decreasing priority.
+ */
+typedef enum fib_walk_priority_t_
+{
+ FIB_WALK_PRIORITY_HIGH = 0,
+ FIB_WALK_PRIORITY_LOW = 1,
+} fib_walk_priority_t;
+
+#define FIB_WALK_PRIORITY_NUM (FIB_WALK_PRIORITY_LOW+1)
+
+#define FIB_WALK_PRIORITIES { \
+ [FIB_WALK_PRIORITY_HIGH] = "high", \
+ [FIB_WALK_PRIORITY_LOW] = "low", \
+}
+
+#define FOR_EACH_FIB_WALK_PRIORITY(_prio) \
+ for ((_prio) = FIB_WALK_PRIORITY_HIGH; \
+ (_prio) < FIB_WALK_PRIORITY_NUM; \
+ (_prio)++)
+
+extern void fib_walk_module_init(void);
+
+extern void fib_walk_async(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_priority_t prio,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern void fib_walk_sync(fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_back_walk_ctx_t *ctx);
+
+extern u8* format_fib_walk_priority(u8 *s, va_list ap);
+
+#endif
+
diff --git a/vnet/vnet/fib/ip4_fib.c b/vnet/vnet/fib/ip4_fib.c
new file mode 100644
index 00000000000..21ebb7afafc
--- /dev/null
+++ b/vnet/vnet/fib/ip4_fib.c
@@ -0,0 +1,542 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+
+/*
+ * A table of pefixes to be added to tables and the sources for them
+ */
+typedef struct ip4_fib_table_special_prefix_t_ {
+ fib_prefix_t ift_prefix;
+ fib_source_t ift_source;
+ fib_entry_flag_t ift_flag;
+} ip4_fib_table_special_prefix_t;
+
+static const ip4_fib_table_special_prefix_t ip4_specials[] = {
+ {
+ /* 0.0.0.0/0*/
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /* 0.0.0.0/32*/
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0,
+ },
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /*
+ * 240.0.0.0/8
+ * drop class E
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xf0000000,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_SPECIAL,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+
+ },
+ {
+ /*
+ * 224.0.0.0/8
+ * drop all mcast
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xe0000000,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_SPECIAL,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ },
+ {
+ /*
+ * 255.255.255.255/32
+ * drop, but we'll allow it to be usurped by the likes of DHCP
+ */
+ .ift_prefix = {
+ .fp_addr = {
+ .ip4.data_u32 = 0xffffffff,
+ },
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+ .ift_source = FIB_SOURCE_DEFAULT_ROUTE,
+ .ift_flag = FIB_ENTRY_FLAG_DROP,
+ }
+};
+
+
+static u32
+ip4_create_fib_with_table_id (u32 table_id)
+{
+ fib_table_t *fib_table;
+
+ pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ memset(fib_table, 0, sizeof(*fib_table));
+
+ fib_table->ft_proto = FIB_PROTOCOL_IP4;
+ fib_table->ft_index =
+ fib_table->v4.index =
+ (fib_table - ip4_main.fibs);
+
+ hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id =
+ fib_table->v4.table_id =
+ table_id;
+ fib_table->ft_flow_hash_config =
+ fib_table->v4.flow_hash_config =
+ IP_FLOW_HASH_DEFAULT;
+ fib_table->v4.fwd_classify_table_index = ~0;
+ fib_table->v4.rev_classify_table_index = ~0;
+
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4);
+
+ ip4_mtrie_init(&fib_table->v4.mtrie);
+
+ /*
+ * add the special entries into the new FIB
+ */
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_add(fib_table->ft_index,
+ &prefix,
+ ip4_specials[ii].ift_source,
+ ip4_specials[ii].ift_flag,
+ ADJ_INDEX_INVALID);
+ }
+
+ return (fib_table->ft_index);
+}
+
+void
+ip4_fib_table_destroy (ip4_fib_t *fib)
+{
+ fib_table_t *fib_table = (fib_table_t*)fib;
+ int ii;
+
+ /*
+ * remove all the specials we added when the table was created.
+ */
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_remove(fib_table->ft_index,
+ &prefix,
+ ip4_specials[ii].ift_source);
+ }
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == fib_table->ft_total_route_counts);
+ FOR_EACH_FIB_SOURCE(ii)
+ {
+ ASSERT(0 == fib_table->ft_src_route_counts[ii]);
+ }
+
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
+ }
+ pool_put(ip4_main.fibs, fib_table);
+}
+
+
+u32
+ip4_fib_table_find_or_create_and_lock (u32 table_id)
+{
+ u32 index;
+
+ index = ip4_fib_index_from_table_id(table_id);
+ if (~0 == index)
+ return ip4_create_fib_with_table_id(table_id);
+
+ fib_table_lock(index, FIB_PROTOCOL_IP4);
+
+ return (index);
+}
+
+u32
+ip4_fib_table_create_and_lock (void)
+{
+ return (ip4_create_fib_with_table_id(~0));
+}
+
+u32
+ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip4_main.fib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip4_main.fib_index_by_sw_if_index[sw_if_index]);
+}
+
+flow_hash_config_t
+ip4_fib_table_get_flow_hash_config (u32 fib_index)
+{
+ return (ip4_fib_get(fib_index)->flow_hash_config);
+}
+
+/*
+ * ip4_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_fib_table_lookup_exact_match (const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ hash = fib->fib_entry_by_dst_address[len];
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+
+ result = hash_get(hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup_adj
+ *
+ * Longest prefix match
+ */
+index_t
+ip4_fib_table_lookup_lb (ip4_fib_t *fib,
+ const ip4_address_t *addr)
+{
+ fib_node_index_t fei;
+
+ fei = ip4_fib_table_lookup(fib, addr, 32);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ return (dpo->dpoi_index);
+ }
+ return (INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_fib_table_lookup (const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ i32 mask_len;
+ u32 key;
+
+ for (mask_len = len; mask_len >= 0; mask_len--)
+ {
+ hash = fib->fib_entry_by_dst_address[mask_len];
+ key = (addr->data_u32 & ip4_main.fib_masks[mask_len]);
+
+ result = hash_get (hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+void
+ip4_fib_table_entry_insert (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result) {
+ /*
+ * adding a new entry
+ */
+ if (NULL == hash) {
+ hash = hash_create (32 /* elts */, sizeof (uword));
+ hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
+ }
+ hash = hash_set(hash, key, fib_entry_index);
+ fib->fib_entry_by_dst_address[len] = hash;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+
+void
+ip4_fib_table_entry_remove (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result)
+ {
+ /*
+ * removing a non-existant entry. i'll allow it.
+ */
+ }
+ else
+ {
+ hash_unset(hash, key);
+ }
+
+ fib->fib_entry_by_dst_address[len] = hash;
+}
+
+void
+ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD
+}
+
+void
+ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE
+}
+
+static void
+ip4_fib_table_show_all (ip4_fib_t *fib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t *fib_entry_indicies;
+ fib_node_index_t *fib_entry_index;
+ int i;
+
+ fib_entry_indicies = NULL;
+
+ for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = fib->fib_entry_by_dst_address[i];
+
+ if (NULL != hash)
+ {
+ hash_pair_t * p;
+
+ hash_foreach_pair (p, hash,
+ ({
+ vec_add1(fib_entry_indicies, p->value[0]);
+ }));
+ }
+ }
+
+ vec_sort_with_function(fib_entry_indicies, fib_entry_cmp_for_sort);
+
+ vec_foreach(fib_entry_index, fib_entry_indicies)
+ {
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ *fib_entry_index,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(fib_entry_indicies);
+}
+
+static void
+ip4_fib_table_show_one (ip4_fib_t *fib,
+ vlib_main_t * vm,
+ ip4_address_t *address,
+ u32 mask_len)
+{
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ ip4_fib_table_lookup(fib, address, mask_len),
+ FIB_ENTRY_FORMAT_DETAIL);
+}
+
+static clib_error_t *
+ip4_show_fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip4_main_t * im4 = &ip4_main;
+ fib_table_t * fib_table;
+ int verbose, matching, mtrie;
+ ip4_address_t matching_address;
+ u32 matching_mask = 32;
+ int i, table_id = -1, fib_index = ~0;
+
+ verbose = 1;
+ matching = 0;
+ mtrie = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "mtrie"))
+ mtrie = 1;
+
+ else if (unformat (input, "%U/%d",
+ unformat_ip4_address, &matching_address, &matching_mask))
+ matching = 1;
+
+ else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
+ matching = 1;
+
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, im4->fibs,
+ ({
+ ip4_fib_t *fib = &fib_table->v4;
+
+ if (table_id >= 0 && table_id != (int)fib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)fib->index)
+ continue;
+
+ vlib_cli_output (vm, "%U, fib_index %d, flow hash: %U",
+ format_fib_table_name, fib->index, FIB_PROTOCOL_IP4,
+ fib->index,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+ for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ {
+ uword * hash = fib->fib_entry_by_dst_address[i];
+ uword n_elts = hash_elts (hash);
+ if (n_elts > 0)
+ vlib_cli_output (vm, "%20d%16d", i, n_elts);
+ }
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip4_fib_table_show_all(fib, vm);
+ }
+ else
+ {
+ ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask);
+ }
+
+ if (mtrie)
+ vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+ }));
+
+ return 0;
+}
+
+/*?
+ * Show FIB/route entries
+ *
+ * @cliexpar
+ * @cliexstart{show ip fib}
+ * Display the IPv4 FIB.
+ * This command will run for a long time when the FIBs comprise millions of entries.
+ * vpp# sh ip fib
+ * Table 0
+ * Destination Packets Bytes Adjacency
+ * 6.0.0.0/8 0 0 weight 1, index 3
+ * arp fake-eth0 6.0.0.1/8
+ * 6.0.0.1/32 0 0 weight 1, index 4
+ * local 6.0.0.1/8
+ *
+ * And so forth. Use 'show ip fib summary' for a summary:
+ *
+ * vpp# sh ip fib summary
+ * Table 0
+ * Prefix length Count
+ * 8 1
+ * 32 4
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show ip fib",
+ .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]",
+ .function = ip4_show_fib,
+};
diff --git a/vnet/vnet/fib/ip4_fib.h b/vnet/vnet/fib/ip4_fib.h
new file mode 100644
index 00000000000..cf312cdc629
--- /dev/null
+++ b/vnet/vnet/fib/ip4_fib.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 FIB
+ *
+ * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding
+ * table contains all the routes that the control plane has programmed, the
+ * forwarding table contains the sub-set of those routes that can be used to
+ * forward packets.
+ * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed
+ * by mask length, the forwarding table is an mtrie
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_FIB_H__
+#define __IP4_FIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+
+extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip4_fib_table_lookup_exact_match(const ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_table_entry_remove(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_table_entry_insert(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_fib_table_destroy(ip4_fib_t *fib);
+
+extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
+ const ip4_address_t * dst);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip4_fib_t *
+ip4_fib_get (u32 index)
+{
+ return (&(pool_elt_at_index(ip4_main.fibs, index)->v4));
+}
+
+always_inline u32
+ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
+{
+ return (ip4_fib_table_lookup_lb(
+ ip4_fib_get(vec_elt (im->fib_index_by_sw_if_index, sw_if_index)),
+ dst));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id);
+extern u32 ip4_fib_table_create_and_lock(void);
+
+
+static inline
+u32 ip4_fib_index_from_table_id (u32 table_id)
+{
+ ip4_main_t * im = &ip4_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+extern flow_hash_config_t ip4_fib_table_get_flow_hash_config(u32 fib_index);
+
+
+always_inline index_t
+ip4_fib_forwarding_lookup (u32 fib_index,
+ const ip4_address_t * addr)
+{
+ ip4_fib_mtrie_leaf_t leaf;
+ ip4_fib_mtrie_t * mtrie;
+
+ mtrie = &ip4_fib_get(fib_index)->mtrie;
+
+ leaf = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0);
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
+ leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
+
+ /* Handle default route. */
+ leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf);
+
+ return (ip4_fib_mtrie_leaf_get_adj_index(leaf));
+}
+
+
+#endif
+
diff --git a/vnet/vnet/fib/ip6_fib.c b/vnet/vnet/fib/ip6_fib.c
new file mode 100644
index 00000000000..772ce74430b
--- /dev/null
+++ b/vnet/vnet/fib/ip6_fib.c
@@ -0,0 +1,698 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/fib_table.h>
+
+static void
+vnet_ip6_fib_init (u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 0,
+ .fp_addr = {
+ .ip6 = {
+ { 0, 0, },
+ },
+ }
+ };
+
+ /*
+ * Add the default route.
+ */
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_DEFAULT_ROUTE,
+ FIB_ENTRY_FLAG_DROP,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * Add ff02::1:ff00:0/104 via local route for all tables.
+ * This is required for neighbor discovery to work.
+ */
+ ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0);
+ pfx.fp_len = 104;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * Add all-routers multicast address via local route for all tables
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * Add all-nodes multicast address via local route for all tables
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ pfx.fp_len = 128;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * Add all-mldv2 multicast address via local route for all tables
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+
+ /*
+ * all link local for us
+ */
+ pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ pfx.fp_addr.ip6.as_u64[1] = 0;
+ pfx.fp_len = 10;
+ fib_table_entry_special_add(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_LOCAL,
+ ADJ_INDEX_INVALID);
+}
+
+static u32
+create_fib_with_table_id (u32 table_id)
+{
+ fib_table_t *fib_table;
+
+ pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ memset(fib_table, 0, sizeof(*fib_table));
+
+ fib_table->ft_proto = FIB_PROTOCOL_IP6;
+ fib_table->ft_index =
+ fib_table->v6.index =
+ (fib_table - ip6_main.fibs);
+
+ hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id =
+ fib_table->v6.table_id =
+ table_id;
+ fib_table->ft_flow_hash_config =
+ fib_table->v6.flow_hash_config =
+ IP_FLOW_HASH_DEFAULT;
+
+ vnet_ip6_fib_init(fib_table->ft_index);
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6);
+
+ return (fib_table->ft_index);
+}
+
+u32
+ip6_fib_table_find_or_create_and_lock (u32 table_id)
+{
+ uword * p;
+
+ p = hash_get (ip6_main.fib_index_by_table_id, table_id);
+ if (NULL == p)
+ return create_fib_with_table_id(table_id);
+
+ fib_table_lock(p[0], FIB_PROTOCOL_IP6);
+
+ return (p[0]);
+}
+
+u32
+ip6_fib_table_create_and_lock (void)
+{
+ return (create_fib_with_table_id(~0));
+}
+
+void
+ip6_fib_table_destroy (u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 0,
+ .fp_addr = {
+ .ip6 = {
+ { 0, 0, },
+ },
+ }
+ };
+
+ /*
+ * the default route.
+ */
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_DEFAULT_ROUTE);
+
+
+ /*
+ * ff02::1:ff00:0/104
+ */
+ ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0);
+ pfx.fp_len = 104;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-routers multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-nodes multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all-mldv2 multicast address
+ */
+ ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+ pfx.fp_len = 128;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ /*
+ * all link local
+ */
+ pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ pfx.fp_addr.ip6.as_u64[1] = 0;
+ pfx.fp_len = 10;
+ fib_table_entry_special_remove(fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL);
+
+ fib_table_t *fib_table = fib_table_get(fib_index, FIB_PROTOCOL_IP6);
+ fib_source_t source;
+
+ /*
+ * validate no more routes.
+ */
+ ASSERT(0 == fib_table->ft_total_route_counts);
+ FOR_EACH_FIB_SOURCE(source)
+ {
+ ASSERT(0 == fib_table->ft_src_route_counts[source]);
+ }
+
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
+ }
+ pool_put(ip6_main.fibs, fib_table);
+}
+
+fib_node_index_t
+ip6_fib_table_lookup (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ const ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv, value;
+ int i, n_p, rv;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ n_p = vec_len (table->prefix_lengths_in_search_order);
+
+ kv.key[0] = addr->as_u64[0];
+ kv.key[1] = addr->as_u64[1];
+ fib = ((u64)((fib_index))<<32);
+
+ /*
+ * start search from a mask length same length or shorter.
+ * we don't want matches longer than the mask passed
+ */
+ i = 0;
+ while (i < n_p && table->prefix_lengths_in_search_order[i] > len)
+ {
+ i++;
+ }
+
+ for (; i < n_p; i++)
+ {
+ int dst_address_length = table->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length];
+
+ ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
+ //As lengths are decreasing, masks are increasingly specific.
+ kv.key[0] &= mask->as_u64[0];
+ kv.key[1] &= mask->as_u64[1];
+ kv.key[2] = fib | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+ip6_fib_table_lookup_exact_match (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ const ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv, value;
+ ip6_address_t *mask;
+ u64 fib;
+ int rv;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+compute_prefix_lengths_in_search_order (ip6_fib_table_instance_t *table)
+{
+ int i;
+ vec_reset_length (table->prefix_lengths_in_search_order);
+ /* Note: bitmap reversed so this is in fact a longest prefix match */
+ clib_bitmap_foreach (i, table->non_empty_dst_address_length_bitmap,
+ ({
+ int dst_address_length = 128 - i;
+ vec_add1(table->prefix_lengths_in_search_order, dst_address_length);
+ }));
+}
+
+void
+ip6_fib_table_entry_remove (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0);
+
+ /* refcount accounting */
+ ASSERT (table->dst_address_length_refcounts[len] > 0);
+ if (--table->dst_address_length_refcounts[len] == 0)
+ {
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 0);
+ compute_prefix_lengths_in_search_order (table);
+ }
+}
+
+void
+ip6_fib_table_entry_insert (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = fib_entry_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1);
+
+ table->dst_address_length_refcounts[len]++;
+
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 1);
+ compute_prefix_lengths_in_search_order (table);
+}
+
+u32
+ip6_fib_table_fwding_lookup (ip6_main_t * im,
+ u32 fib_index,
+ const ip6_address_t * dst)
+{
+ const ip6_fib_table_instance_t *table;
+ int i, len;
+ int rv;
+ BVT(clib_bihash_kv) kv, value;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ len = vec_len (table->prefix_lengths_in_search_order);
+
+ kv.key[0] = dst->as_u64[0];
+ kv.key[1] = dst->as_u64[1];
+ fib = ((u64)((fib_index))<<32);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = table->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length];
+
+ ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
+ //As lengths are decreasing, masks are increasingly specific.
+ kv.key[0] &= mask->as_u64[0];
+ kv.key[1] &= mask->as_u64[1];
+ kv.key[2] = fib | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ /* default route is always present */
+ ASSERT(0);
+ return 0;
+}
+
+u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im,
+ u32 sw_if_index,
+ const ip6_address_t * dst)
+{
+ u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+ return ip6_fib_table_fwding_lookup(im, fib_index, dst);
+}
+
+flow_hash_config_t
+ip6_fib_table_get_flow_hash_config (u32 fib_index)
+{
+ return (ip6_fib_get(fib_index)->flow_hash_config);
+}
+
+u32
+ip6_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ if (sw_if_index >= vec_len(ip6_main.fib_index_by_sw_if_index))
+ {
+ /*
+ * This is the case for interfaces that are not yet mapped to
+ * a IP table
+ */
+ return (~0);
+ }
+ return (ip6_main.fib_index_by_sw_if_index[sw_if_index]);
+}
+
+void
+ip6_fib_table_fwding_dpo_update (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = dpo->dpoi_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1);
+
+ table->dst_address_length_refcounts[len]++;
+
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 1);
+ compute_prefix_lengths_in_search_order (table);
+}
+
+void
+ip6_fib_table_fwding_dpo_remove (u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip6_fib_table_instance_t *table;
+ BVT(clib_bihash_kv) kv;
+ ip6_address_t *mask;
+ u64 fib;
+
+ table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING];
+ mask = &ip6_main.fib_masks[len];
+ fib = ((u64)((fib_index))<<32);
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = fib | len;
+ kv.value = dpo->dpoi_index;
+
+ BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0);
+
+ /* refcount accounting */
+ ASSERT (table->dst_address_length_refcounts[len] > 0);
+ if (--table->dst_address_length_refcounts[len] == 0)
+ {
+ table->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
+ 128 - len, 0);
+ compute_prefix_lengths_in_search_order (table);
+ }
+}
+
+typedef struct ip6_fib_show_ctx_t_ {
+ u32 fib_index;
+ fib_node_index_t *entries;
+} ip6_fib_show_ctx_t;
+
+static void
+ip6_fib_table_collect_entries (clib_bihash_kv_24_8_t * kvp,
+ void *arg)
+{
+ ip6_fib_show_ctx_t *ctx = arg;
+
+ if ((kvp->key[2] >> 32) == ctx->fib_index)
+ {
+ vec_add1(ctx->entries, kvp->value);
+ }
+}
+
+static void
+ip6_fib_table_show_all (ip6_fib_t *fib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t *fib_entry_index;
+ ip6_fib_show_ctx_t ctx = {
+ .fib_index = fib->index,
+ .entries = NULL,
+ };
+ ip6_main_t *im = &ip6_main;
+
+ BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ ip6_fib_table_collect_entries,
+ &ctx);
+
+ vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort);
+
+ vec_foreach(fib_entry_index, ctx.entries)
+ {
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ *fib_entry_index,
+ FIB_ENTRY_FORMAT_BRIEF);
+ }
+
+ vec_free(ctx.entries);
+}
+
+static void
+ip6_fib_table_show_one (ip6_fib_t *fib,
+ vlib_main_t * vm,
+ ip6_address_t *address,
+ u32 mask_len)
+{
+ vlib_cli_output(vm, "%U",
+ format_fib_entry,
+ ip6_fib_table_lookup(fib->index, address, mask_len),
+ FIB_ENTRY_FORMAT_DETAIL);
+}
+
+typedef struct {
+ u32 fib_index;
+ u64 count_by_prefix_length[129];
+} count_routes_in_fib_at_prefix_length_arg_t;
+
+static void count_routes_in_fib_at_prefix_length
+(BVT(clib_bihash_kv) * kvp, void *arg)
+{
+ count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
+ int mask_width;
+
+ if ((kvp->key[2]>>32) != ap->fib_index)
+ return;
+
+ mask_width = kvp->key[2] & 0xFF;
+
+ ap->count_by_prefix_length[mask_width]++;
+}
+
+static clib_error_t *
+ip6_show_fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
+ ip6_main_t * im6 = &ip6_main;
+ fib_table_t *fib_table;
+ ip6_fib_t * fib;
+ int verbose, matching;
+ ip6_address_t matching_address;
+ u32 mask_len = 128;
+ int table_id = -1, fib_index = ~0;
+
+ verbose = 1;
+ matching = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") ||
+ unformat (input, "summary") ||
+ unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "%U/%d",
+ unformat_ip6_address, &matching_address, &mask_len))
+ matching = 1;
+
+ else if (unformat (input, "%U", unformat_ip6_address, &matching_address))
+ matching = 1;
+
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "index %d", &fib_index))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ fib = &(fib_table->v6);
+ if (table_id >= 0 && table_id != (int)fib->table_id)
+ continue;
+ if (fib_index != ~0 && fib_index != (int)fib->index)
+ continue;
+
+ vlib_cli_output (vm, "%s, fib_index %d, flow hash: %U",
+ fib_table->ft_desc, fib->index,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ BVT(clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash;
+ int len;
+
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+
+ memset (ca, 0, sizeof(*ca));
+ ca->fib_index = fib->index;
+
+ BV(clib_bihash_foreach_key_value_pair)
+ (h, count_routes_in_fib_at_prefix_length, ca);
+
+ for (len = 128; len >= 0; len--)
+ {
+ if (ca->count_by_prefix_length[len])
+ vlib_cli_output (vm, "%=20d%=16lld",
+ len, ca->count_by_prefix_length[len]);
+ }
+ continue;
+ }
+
+ if (!matching)
+ {
+ ip6_fib_table_show_all(fib, vm);
+ }
+ else
+ {
+ ip6_fib_table_show_one(fib, vm, &matching_address, mask_len);
+ }
+ }));
+
+ return 0;
+}
+
+/*?
+ * Show FIB6/route entries
+ *
+ * @cliexpar
+ * @cliexstart{show ip fib}
+ * Display the IPv6 FIB.
+ * This command will run for a long time when the FIBs comprise millions of entries.
+ * See 'show ip fib'
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
+ .path = "show ip6 fib",
+ .short_help = "show ip6 fib [summary] [table <n>] [<ip6-addr>] [verboase]",
+ .function = ip6_show_fib,
+};
diff --git a/vnet/vnet/fib/ip6_fib.h b/vnet/vnet/fib/ip6_fib.h
new file mode 100644
index 00000000000..f6af993a3c2
--- /dev/null
+++ b/vnet/vnet/fib/ip6_fib.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __IP6_FIB_H__
+#define __IP6_FIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+
+extern fib_node_index_t ip6_fib_table_lookup(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip6_fib_table_lookup_exact_match(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+
+extern void ip6_fib_table_entry_remove(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len);
+
+extern void ip6_fib_table_entry_insert(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip6_fib_table_destroy(u32 fib_index);
+
+extern void ip6_fib_table_fwding_dpo_update(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index,
+ const ip6_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im,
+ u32 sw_if_index,
+ const ip6_address_t * dst);
+u32 ip6_fib_table_fwding_lookup(ip6_main_t * im,
+ u32 fib_index,
+ const ip6_address_t * dst);
+
+/**
+ * @biref return the DPO that the LB stacks on.
+ */
+always_inline u32
+ip6_src_lookup_for_packet (ip6_main_t * im,
+ vlib_buffer_t * b,
+ ip6_header_t * i)
+{
+ if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0)
+ {
+ const dpo_id_t *dpo;
+ index_t lbi;
+
+ lbi = ip6_fib_table_fwding_lookup_with_if_index(
+ im,
+ vnet_buffer (b)->sw_if_index[VLIB_RX],
+ &i->src_address);
+
+ dpo = load_balance_get_bucket_i(load_balance_get(lbi), 0);
+
+ if (dpo_is_adj(dpo))
+ {
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] = dpo->dpoi_index;
+ }
+ }
+ return vnet_buffer (b)->ip.adj_index[VLIB_RX];
+}
+
+/**
+ * \brief Get or create an IPv6 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * \param im
+ * ip4_main pointer.
+ * \param table_id
+ * When set to \c ~0, an arbitrary and unused fib ID is picked
+ * and can be retrieved with \c ret->table_id.
+ * Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * \returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id);
+extern u32 ip6_fib_table_create_and_lock(void);
+
+static inline ip6_fib_t *
+ip6_fib_get (fib_node_index_t index)
+{
+ ASSERT(!pool_is_free_index(ip6_main.fibs, index));
+ return (&pool_elt_at_index (ip6_main.fibs, index)->v6);
+}
+
+static inline
+u32 ip6_fib_index_from_table_id (u32 table_id)
+{
+ ip6_main_t * im = &ip6_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+extern u32 ip6_fib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+extern flow_hash_config_t ip6_fib_table_get_flow_hash_config(u32 fib_index);
+
+#endif
+
diff --git a/vnet/vnet/fib/mpls_fib.c b/vnet/vnet/fib/mpls_fib.c
new file mode 100644
index 00000000000..8f1ccef9061
--- /dev/null
+++ b/vnet/vnet/fib/mpls_fib.c
@@ -0,0 +1,439 @@
+/*
+ * mpls_fib.h: The Label/MPLS FIB
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An MPLS_FIB table;
+ *
+ * The entries in the table are programmed wtih one or more MOIs. These MOIs
+ * may result in different forwarding actions for end-of-stack (EOS) and non-EOS
+ * packets. Whether the two actions are the same more often than they are
+ * different, or vice versa, is a function of the deployment in which the router
+ * is used and thus not predictable.
+ * The desgin choice to make with an MPLS_FIB table is:
+ * 1 - 20 bit key: label only.
+ * When the EOS and non-EOS actions differ the result is a 'EOS-choice' object.
+ * 2 - 21 bit key: label and EOS-bit.
+ * The result is then the specific action based on EOS-bit.
+ *
+ * 20 bit key:
+ * Advantages:
+ * - lower memory overhead, since there are few DB entries.
+ * Disadvantages:
+ * - slower DP performance in the case the chains differ, as more objects are
+ * encounterd in the switch path
+ *
+ * 21 bit key:
+ * Advantages:
+ * - faster DP performance
+ * Disadvantages
+ * - increased memory footprint.
+ *
+ * Switching between schemes based on observed/measured action similarity is not
+ * considered on the grounds of complexity and flip-flopping.
+ *
+ * VPP mantra - favour performance over memory. We choose a 21 bit key.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/**
+ * All lookups in an MPLS_FIB table must result in a DPO of type load-balance.
+ * This is the default result which links to drop
+ */
+static index_t mpls_fib_drop_dpo_index = INDEX_INVALID;
+
+/**
+ * FIXME
+ */
+#define MPLS_FLOW_HASH_DEFAULT 0
+
+static inline u32
+mpls_fib_entry_mk_key (mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ ASSERT(eos <= 1);
+ return (label << 1 | eos);
+}
+
+u32
+mpls_fib_index_from_table_id (u32 table_id)
+{
+ mpls_main_t *mm = &mpls_main;
+ uword * p;
+
+ p = hash_get (mm->fib_index_by_table_id, table_id);
+ if (!p)
+ return FIB_NODE_INDEX_INVALID;
+
+ return p[0];
+}
+
+static u32
+mpls_fib_create_with_table_id (u32 table_id)
+{
+ dpo_id_t dpo = DPO_NULL;
+ fib_table_t *fib_table;
+ mpls_eos_bit_t eos;
+ mpls_fib_t *mf;
+ int i;
+
+ pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ memset(fib_table, 0, sizeof(*fib_table));
+
+ fib_table->ft_proto = FIB_PROTOCOL_MPLS;
+ fib_table->ft_index =
+ (fib_table - mpls_main.fibs);
+
+ hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index);
+
+ fib_table->ft_table_id =
+ table_id;
+ fib_table->ft_flow_hash_config =
+ MPLS_FLOW_HASH_DEFAULT;
+ fib_table->v4.fwd_classify_table_index = ~0;
+ fib_table->v4.rev_classify_table_index = ~0;
+
+ fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS);
+
+ if (INDEX_INVALID == mpls_fib_drop_dpo_index)
+ {
+ mpls_fib_drop_dpo_index = load_balance_create(1, DPO_PROTO_MPLS, 0);
+ load_balance_set_bucket(mpls_fib_drop_dpo_index,
+ 0,
+ drop_dpo_get(DPO_PROTO_MPLS));
+ }
+
+ mf = &fib_table->mpls;
+ mf->mf_entries = hash_create(0, sizeof(fib_node_index_t));
+ for (i = 0; i < MPLS_FIB_DB_SIZE; i++)
+ {
+ /*
+ * initialise each DPO in the data-path lookup table
+ * to be the special MPLS drop
+ */
+ mf->mf_lbs[i] = mpls_fib_drop_dpo_index;
+ }
+
+ /*
+ * non-default forwarding for the special labels.
+ */
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ /*
+ * PUNT the router alert, both EOS and non-eos
+ */
+ prefix.fp_label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_eos = eos;
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ punt_dpo_get(DPO_PROTO_MPLS));
+ }
+
+ /*
+ * IPv4 explicit NULL EOS lookup in the interface's IPv4 table
+ */
+ prefix.fp_label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ prefix.fp_payload_proto = DPO_PROTO_IP4;
+ prefix.fp_eos = MPLS_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, // unused
+ DPO_PROTO_IP4,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ prefix.fp_payload_proto = DPO_PROTO_MPLS;
+ prefix.fp_eos = MPLS_NON_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, //unsued
+ DPO_PROTO_MPLS,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ /*
+ * IPv6 explicit NULL EOS lookup in the interface's IPv6 table
+ */
+ prefix.fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ prefix.fp_payload_proto = DPO_PROTO_IP6;
+ prefix.fp_eos = MPLS_EOS;
+
+ lookup_dpo_add_or_lock_w_fib_index(0, //unused
+ DPO_PROTO_IP6,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ prefix.fp_payload_proto = DPO_PROTO_MPLS;
+ prefix.fp_eos = MPLS_NON_EOS;
+ lookup_dpo_add_or_lock_w_fib_index(0, // unsued
+ DPO_PROTO_MPLS,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ &dpo);
+ fib_table_entry_special_dpo_add(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+
+ return (fib_table->ft_index);
+}
+
+u32
+mpls_fib_table_find_or_create_and_lock (u32 table_id)
+{
+ u32 index;
+
+ index = mpls_fib_index_from_table_id(table_id);
+ if (~0 == index)
+ return mpls_fib_create_with_table_id(table_id);
+
+ fib_table_lock(index, FIB_PROTOCOL_MPLS);
+
+ return (index);
+}
+u32
+mpls_fib_table_create_and_lock (void)
+{
+ return (mpls_fib_create_with_table_id(~0));
+}
+
+void
+mpls_fib_table_destroy (mpls_fib_t *mf)
+{
+ fib_table_t *fib_table = (fib_table_t*)mf;
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ };
+ mpls_label_t special_labels[] = {
+ MPLS_IETF_ROUTER_ALERT_LABEL,
+ MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL,
+ MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL,
+ };
+ mpls_eos_bit_t eos;
+ u32 ii;
+
+ for (ii = 0; ii < ARRAY_LEN(special_labels); ii++)
+ {
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ prefix.fp_label = special_labels[ii];
+ prefix.fp_eos = eos;
+
+ fib_table_entry_delete(fib_table->ft_index,
+ &prefix,
+ FIB_SOURCE_SPECIAL);
+ }
+ }
+ if (~0 != fib_table->ft_table_id)
+ {
+ hash_unset(mpls_main.fib_index_by_table_id,
+ fib_table->ft_table_id);
+ }
+ hash_delete(mf->mf_entries);
+
+ pool_put(mpls_main.fibs, fib_table);
+}
+
+fib_node_index_t
+mpls_fib_table_lookup (const mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ uword *p;
+
+ p = hash_get(mf->mf_entries, mpls_fib_entry_mk_key(label, eos));
+
+ if (NULL == p)
+ return FIB_NODE_INDEX_INVALID;
+
+ return p[0];
+}
+
+void
+mpls_fib_table_entry_insert (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t lfei)
+{
+ hash_set(mf->mf_entries, mpls_fib_entry_mk_key(label, eos), lfei);
+}
+
+void
+mpls_fib_table_entry_remove (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ hash_unset(mf->mf_entries, mpls_fib_entry_mk_key(label, eos));
+}
+
+void
+mpls_fib_forwarding_table_update (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ const dpo_id_t *dpo)
+{
+ mpls_label_t key;
+
+ ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+
+ key = mpls_fib_entry_mk_key(label, eos);
+
+ mf->mf_lbs[key] = dpo->dpoi_index;
+}
+
+void
+mpls_fib_forwarding_table_reset (mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos)
+{
+ mpls_label_t key;
+
+ key = mpls_fib_entry_mk_key(label, eos);
+
+ mf->mf_lbs[key] = mpls_fib_drop_dpo_index;
+}
+
+flow_hash_config_t
+mpls_fib_table_get_flow_hash_config (u32 fib_index)
+{
+ // FIXME.
+ return (0);
+}
+
+static void
+mpls_fib_table_show_all (const mpls_fib_t *mpls_fib,
+ vlib_main_t * vm)
+{
+ fib_node_index_t lfei, *lfeip, *lfeis = NULL;
+ mpls_label_t key;
+
+ hash_foreach(key, lfei, mpls_fib->mf_entries,
+ ({
+ vec_add1(lfeis, lfei);
+ }));
+
+ vec_sort_with_function(lfeis, fib_entry_cmp_for_sort);
+
+ vec_foreach(lfeip, lfeis)
+ {
+ vlib_cli_output (vm, "%U",
+ format_fib_entry, *lfeip,
+ FIB_ENTRY_FORMAT_DETAIL);
+ }
+ vec_free(lfeis);
+}
+
+static void
+mpls_fib_table_show_one (const mpls_fib_t *mpls_fib,
+ mpls_label_t label,
+ vlib_main_t * vm)
+{
+ fib_node_index_t lfei;
+ mpls_eos_bit_t eos;
+
+ FOR_EACH_MPLS_EOS_BIT(eos)
+ {
+ lfei = mpls_fib_table_lookup(mpls_fib, label, eos);
+
+ if (FIB_NODE_INDEX_INVALID != lfei)
+ {
+ vlib_cli_output (vm, "%U",
+ format_fib_entry, lfei, FIB_ENTRY_FORMAT_DETAIL);
+ }
+ }
+}
+
+static clib_error_t *
+mpls_fib_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_table_t * fib_table;
+ mpls_label_t label;
+ int table_id;
+
+ table_id = -1;
+ label = MPLS_LABEL_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* if (unformat (input, "brief") || unformat (input, "summary") */
+ /* || unformat (input, "sum")) */
+ /* verbose = 0; */
+
+ if (unformat (input, "%d", &label))
+ continue;
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else
+ break;
+ }
+
+ pool_foreach (fib_table, mpls_main.fibs,
+ ({
+ if (table_id >= 0 && table_id != fib_table->ft_table_id)
+ continue;
+
+ vlib_cli_output (vm, "%v, fib_index %d",
+ fib_table->ft_desc, mpls_main.fibs - fib_table);
+
+ if (MPLS_LABEL_INVALID == label)
+ {
+ mpls_fib_table_show_all(&(fib_table->mpls), vm);
+ }
+ else
+ {
+ mpls_fib_table_show_one(&(fib_table->mpls), label, vm);
+ }
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (mpls_fib_show_command, static) = {
+ .path = "show mpls fib",
+ .short_help = "show mpls fib [summary] [table <n>]",
+ .function = mpls_fib_show,
+};
diff --git a/vnet/vnet/fib/mpls_fib.h b/vnet/vnet/fib/mpls_fib.h
new file mode 100644
index 00000000000..42c9a865276
--- /dev/null
+++ b/vnet/vnet/fib/mpls_fib.h
@@ -0,0 +1,106 @@
+/*
+ * mpls_fib.h: The Label/MPLS FIB
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_FIB_TABLE_H__
+#define __MPLS_FIB_TABLE_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_table.h>
+
+static inline mpls_fib_t*
+mpls_fib_get (fib_node_index_t index)
+{
+ if (!pool_is_free_index(mpls_main.fibs, index))
+ return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls));
+ return (NULL);
+}
+
+extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id);
+extern u32 mpls_fib_table_create_and_lock(void);
+// extern mpls_fib_t * mpls_fib_find(u32 table_id);
+extern u32 mpls_fib_index_from_table_id(u32 table_id);
+
+extern u8 *format_mpls_fib_table_name(u8 * s, va_list * args);
+
+extern fib_node_index_t mpls_fib_table_entry_add_from_ip_fib_entry (
+ u32 table_id,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t fib_entry_index);
+
+
+extern fib_node_index_t mpls_fib_table_lookup(const mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+
+extern void mpls_fib_table_entry_remove(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+extern void mpls_fib_table_entry_insert(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ fib_node_index_t fei);
+extern void mpls_fib_table_destroy(mpls_fib_t *mf);
+
+
+
+extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos,
+ const dpo_id_t *dpo);
+extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf,
+ mpls_label_t label,
+ mpls_eos_bit_t eos);
+
+/**
+ * @brief
+ * Lookup a label and EOS bit in the MPLS_FIB table to retrieve the
+ * load-balance index to be used for packet forwarding.
+ */
+static inline index_t
+mpls_fib_table_forwarding_lookup (u32 mpls_fib_index,
+ const mpls_unicast_header_t *hdr)
+{
+ mpls_label_t label;
+ mpls_fib_t *mf;
+ u32 key;
+
+ label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ key = (vnet_mpls_uc_get_label(label) << 1) | vnet_mpls_uc_get_s(label);
+
+ mf = mpls_fib_get(mpls_fib_index);
+
+ return (mf->mf_lbs[key]);
+}
+
+static inline u32
+mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+ mpls_main_t *mm = &mpls_main;
+
+ ASSERT(vec_len(mm->fib_index_by_sw_if_index) < sw_if_index);
+
+ return (mm->fib_index_by_sw_if_index[sw_if_index]);
+}
+
+extern flow_hash_config_t mpls_fib_table_get_flow_hash_config(u32 fib_index);
+
+#endif
diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c
index f00977c8cd6..9f8adc79ff0 100644
--- a/vnet/vnet/gre/gre.c
+++ b/vnet/vnet/gre/gre.c
@@ -17,14 +17,10 @@
#include <vnet/vnet.h>
#include <vnet/gre/gre.h>
+#include <vnet/adj/adj.h>
gre_main_t gre_main;
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4;
- gre_header_t gre;
-}) ip4_and_gre_header_t;
-
typedef struct {
union {
ip4_and_gre_header_t ip4_and_gre;
@@ -233,179 +229,39 @@ gre_interface_tx (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
/*
- * As long as we have enough pkts left to process two pkts
- * and prefetch two pkts...
+ * FIXME DUAL LOOP
*/
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t * b0, * b1;
- ip4_header_t * ip0, * ip1;
- ip4_and_gre_union_t * h0, * h1;
- u32 bi0, next0, bi1, next1;
- __attribute__((unused)) u8 error0, error1;
- u16 gre_protocol0, gre_protocol1;
-
- /* Prefetch the next iteration */
- {
- vlib_buffer_t * p2, * p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- /*
- * Prefetch packet data. We expect to overwrite
- * the inbound L2 header with an ip header and a
- * gre header. Might want to prefetch the last line
- * of rewrite space as well; need profile data
- */
- CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
- CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
- }
-
- /* Pick up the next two buffer indices */
- bi0 = from[0];
- bi1 = from[1];
-
- /* Speculatively enqueue them where we sent the last buffer */
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = t->outer_fib_index;
-
- if (PREDICT_FALSE(t->teb))
- {
- gre_protocol0 = clib_net_to_host_u16(GRE_PROTOCOL_teb);
- gre_protocol1 = clib_net_to_host_u16(GRE_PROTOCOL_teb);
- }
- else
- {
- ip0 = vlib_buffer_get_current (b0);
- gre_protocol0 = clib_net_to_host_u16 (0x800);
- gre_protocol0 =
- ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ?
- 0x86DD : gre_protocol0;
-
- ip1 = vlib_buffer_get_current (b1);
- gre_protocol1 = clib_net_to_host_u16 (0x800);
- gre_protocol1 =
- ((ip1->ip_version_and_header_length & 0xF0) == 0x60) ?
- 0x86DD : gre_protocol1;
- }
-
- vlib_buffer_advance (b0, -sizeof(*h0));
- vlib_buffer_advance (b1, -sizeof(*h1));
-
- h0 = vlib_buffer_get_current (b0);
- h1 = vlib_buffer_get_current (b1);
- h0->as_u64[0] = 0;
- h0->as_u64[1] = 0;
- h0->as_u64[2] = 0;
-
- h1->as_u64[0] = 0;
- h1->as_u64[1] = 0;
- h1->as_u64[2] = 0;
-
- ip0 = &h0->ip4_and_gre.ip4;
- h0->ip4_and_gre.gre.protocol = gre_protocol0;
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_GRE;
-
- ip1 = &h1->ip4_and_gre.ip4;
- h1->ip4_and_gre.gre.protocol = gre_protocol1;
- ip1->ip_version_and_header_length = 0x45;
- ip1->ttl = 254;
- ip1->protocol = IP_PROTOCOL_GRE;
-
- ip0->length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip1->length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
- ip0->src_address.as_u32 = t->tunnel_src.as_u32;
- ip1->src_address.as_u32 = t->tunnel_src.as_u32;
- ip0->dst_address.as_u32 = t->tunnel_dst.as_u32;
- ip1->dst_address.as_u32 = t->tunnel_dst.as_u32;
- ip0->checksum = ip4_header_checksum (ip0);
- ip1->checksum = ip4_header_checksum (ip1);
-
- /* ip4_lookup will route to the tunnel partner */
- next0 = GRE_OUTPUT_NEXT_LOOKUP;
- next1 = GRE_OUTPUT_NEXT_LOOKUP;
- error0 = GRE_ERROR_NONE;
- error1 = GRE_ERROR_NONE;
-
- /*
- * Enqueue 2 pkts. This macro deals with next0 != next1,
- * acquiring enqueue rights to the indicated next
- * node input frame, etc.
- */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
while (n_left_from > 0 && n_left_to_next > 0)
{
- vlib_buffer_t * b0;
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
ip4_header_t * ip0;
- ip4_and_gre_union_t * h0;
- u32 bi0, next0;
- __attribute__((unused)) u8 error0;
- u16 gre_protocol0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
ip0 = vlib_buffer_get_current (b0);
- if (PREDICT_FALSE(t->teb))
- {
- gre_protocol0 = clib_net_to_host_u16(GRE_PROTOCOL_teb);
- }
- else
- {
- gre_protocol0 = clib_net_to_host_u16 (0x800);
- gre_protocol0 =
- ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ?
- 0x86DD : gre_protocol0;
- }
-
- vlib_buffer_advance (b0, -sizeof(*h0));
-
- h0 = vlib_buffer_get_current (b0);
- h0->as_u64[0] = 0;
- h0->as_u64[1] = 0;
- h0->as_u64[2] = 0;
-
- ip0 = &h0->ip4_and_gre.ip4;
- h0->ip4_and_gre.gre.protocol = gre_protocol0;
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_GRE;
+
+ /* Fixup the checksum and len fields in the LISP tunnel encap
+ * that was applied at the midchain node */
ip0->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip0->src_address.as_u32 = t->tunnel_src.as_u32;
- ip0->dst_address.as_u32 = t->tunnel_dst.as_u32;
ip0->checksum = ip4_header_checksum (ip0);
- next0 = GRE_OUTPUT_NEXT_LOOKUP;
- error0 = GRE_ERROR_NONE;
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h
index ad599d2f09e..beb13d989ee 100644
--- a/vnet/vnet/gre/gre.h
+++ b/vnet/vnet/gre/gre.h
@@ -25,6 +25,7 @@
#include <vnet/ip/ip4_packet.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/format.h>
+#include <vnet/adj/adj_types.h>
extern vnet_hw_interface_class_t gre_hw_interface_class;
@@ -50,12 +51,44 @@ typedef struct {
} gre_protocol_info_t;
typedef struct {
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /**
+ * The tunnel's source/local address
+ */
ip4_address_t tunnel_src;
+ /**
+ * The tunnel's destination/remote address
+ */
ip4_address_t tunnel_dst;
+ /**
+ * The FIB in which the src.dst address are present
+ */
u32 outer_fib_index;
u32 hw_if_index;
u32 sw_if_index;
u8 teb;
+
+ /**
+ * The FIB entry sourced by the tunnel for its destination prefix
+ */
+ fib_node_index_t fib_entry_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its desintion. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+
+ /**
+ * The index of the midchain adjacency created for this tunnel
+ */
+ adj_index_t adj_index[FIB_LINK_NUM];
} gre_tunnel_t;
typedef struct {
@@ -80,6 +113,15 @@ typedef struct {
vnet_main_t * vnet_main;
} gre_main_t;
+/**
+ * @brief IPv4 and GRE header.
+ *
+*/
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ gre_header_t gre;
+}) ip4_and_gre_header_t;
+
always_inline gre_protocol_info_t *
gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
{
diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c
index 864c384b992..10e9ff9be8c 100644
--- a/vnet/vnet/gre/interface.c
+++ b/vnet/vnet/gre/interface.c
@@ -19,10 +19,24 @@
#include <vnet/pg/pg.h>
#include <vnet/gre/gre.h>
#include <vnet/ip/format.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/mpls/mpls.h>
+
+static inline u64
+gre_mk_key (const ip4_address_t *src,
+ const ip4_address_t *dst,
+ u32 out_fib_index)
+{
+ // FIXME. the fib index should be part of the key
+ return ((u64)src->as_u32 << 32 | (u64)dst->as_u32);
+}
-u8 * format_gre_tunnel (u8 * s, va_list * args)
+static u8 *
+format_gre_tunnel (u8 * s, va_list * args)
{
gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *);
+ int detail = va_arg (*args, int);
gre_main_t * gm = &gre_main;
s = format (s,
@@ -32,11 +46,193 @@ u8 * format_gre_tunnel (u8 * s, va_list * args)
format_ip4_address, &t->tunnel_dst,
(t->teb ? "teb" : "ip"),
t->outer_fib_index);
+ if (detail)
+ {
+ s = format (s, "\n fib-entry:%d adj-ip4:%d adj-ip6:%d adj-mpls:%d",
+ t->fib_entry_index,
+ t->adj_index[FIB_LINK_IP4],
+ t->adj_index[FIB_LINK_IP6],
+ t->adj_index[FIB_LINK_MPLS]);
+ }
+
return s;
}
-int vnet_gre_add_del_tunnel
- (vnet_gre_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+static gre_tunnel_t *
+gre_tunnel_db_find (const ip4_address_t *src,
+ const ip4_address_t *dst,
+ u32 out_fib_index)
+{
+ gre_main_t * gm = &gre_main;
+ uword * p;
+ u64 key;
+
+ key = gre_mk_key(src, dst, out_fib_index);
+
+ p = hash_get (gm->tunnel_by_key, key);
+
+ if (NULL == p)
+ return (NULL);
+
+ return (pool_elt_at_index (gm->tunnels, p[0]));
+}
+
+static void
+gre_tunnel_db_add (const gre_tunnel_t *t)
+{
+ gre_main_t * gm = &gre_main;
+ u64 key;
+
+ key = gre_mk_key(&t->tunnel_src, &t->tunnel_dst, t->outer_fib_index);
+ hash_set (gm->tunnel_by_key, key, t - gm->tunnels);
+}
+
+static void
+gre_tunnel_db_remove (const gre_tunnel_t *t)
+{
+ gre_main_t * gm = &gre_main;
+ u64 key;
+
+ key = gre_mk_key(&t->tunnel_src, &t->tunnel_dst, t->outer_fib_index);
+ hash_unset (gm->tunnel_by_key, key);
+}
+
+static gre_tunnel_t *
+gre_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_GRE_TUNNEL == node->fn_type);
+#endif
+ return ((gre_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(gre_tunnel_t, node)));
+}
+
+/*
+ * gre_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+static void
+gre_tunnel_stack (gre_tunnel_t *gt)
+{
+ fib_link_t linkt;
+
+ /*
+ * find the adjacency that is contributed by the FIB entry
+ * that this tunnel resovles via, and use it as the next adj
+ * in the midchain
+ */
+ FOR_EACH_FIB_LINK(linkt)
+ {
+ if (ADJ_INDEX_INVALID != gt->adj_index[linkt])
+ {
+ adj_nbr_midchain_stack(
+ gt->adj_index[linkt],
+ fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
+ }
+ }
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+gre_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ gre_tunnel_stack(gre_tunnel_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+gre_tunnel_fib_node_get (fib_node_index_t index)
+{
+ gre_tunnel_t * gt;
+ gre_main_t * gm;
+
+ gm = &gre_main;
+ gt = pool_elt_at_index(gm->tunnels, index);
+
+ return (&gt->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+gre_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The MPLS GRE tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t gre_vft = {
+ .fnv_get = gre_tunnel_fib_node_get,
+ .fnv_last_lock = gre_tunnel_last_lock_gone,
+ .fnv_back_walk = gre_tunnel_back_walk,
+};
+
+static int
+gre_proto_from_fib_link (fib_link_t link)
+{
+ switch (link)
+ {
+ case FIB_LINK_IP4:
+ return (GRE_PROTOCOL_ip4);
+ case FIB_LINK_IP6:
+ return (GRE_PROTOCOL_ip6);
+ case FIB_LINK_MPLS:
+ return (GRE_PROTOCOL_mpls_unicast);
+ }
+ ASSERT(0);
+ return (GRE_PROTOCOL_ip4);
+}
+
+static u8 *
+gre_rewrite (gre_tunnel_t * t,
+ fib_link_t link)
+{
+ ip4_and_gre_header_t * h0;
+ u8 * rewrite_data = 0;
+
+ vec_validate_init_empty (rewrite_data, sizeof (*h0) - 1, 0);
+
+ h0 = (ip4_and_gre_header_t *) rewrite_data;
+
+ if (t->teb)
+ {
+ h0->gre.protocol = clib_net_to_host_u16(GRE_PROTOCOL_teb);
+ }
+ else
+ {
+ h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link));
+ }
+
+ h0->ip4.ip_version_and_header_length = 0x45;
+ h0->ip4.ttl = 254;
+ h0->ip4.protocol = IP_PROTOCOL_GRE;
+ /* $$$ fixup ip4 header length and checksum after-the-fact */
+ h0->ip4.src_address.as_u32 = t->tunnel_src.as_u32;
+ h0->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32;
+ h0->ip4.checksum = ip4_header_checksum (&h0->ip4);
+
+ return (rewrite_data);
+}
+
+static int
+vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
{
gre_main_t * gm = &gre_main;
vnet_main_t * vnm = gm->vnet_main;
@@ -44,49 +240,45 @@ int vnet_gre_add_del_tunnel
gre_tunnel_t * t;
vnet_hw_interface_t * hi;
u32 hw_if_index, sw_if_index;
- u32 slot;
u32 outer_fib_index;
- uword * p;
- u64 key;
u8 address[6];
clib_error_t *error;
+ fib_link_t linkt;
+ u8 *rewrite;
- key = (u64)a->src.as_u32 << 32 | (u64)a->dst.as_u32;
- p = hash_get (gm->tunnel_by_key, key);
+ outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id);
- if (a->is_add) {
- /* check if same src/dst pair exists */
- if (p)
- return VNET_API_ERROR_INVALID_VALUE;
+ if (~0 == outer_fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
- p = hash_get (im->fib_index_by_table_id, a->outer_fib_id);
- if (! p)
- return VNET_API_ERROR_NO_SUCH_FIB;
+ t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id);
- outer_fib_index = p[0];
+ if (NULL != t)
+ return VNET_API_ERROR_INVALID_VALUE;
- pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- memset (t, 0, sizeof (*t));
+ pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+ fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
- if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) {
- vnet_interface_main_t * im = &vnm->interface_main;
+ if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) {
+ vnet_interface_main_t * im = &vnm->interface_main;
- hw_if_index = gm->free_gre_tunnel_hw_if_indices
+ hw_if_index = gm->free_gre_tunnel_hw_if_indices
[vec_len (gm->free_gre_tunnel_hw_if_indices)-1];
- _vec_len (gm->free_gre_tunnel_hw_if_indices) -= 1;
+ _vec_len (gm->free_gre_tunnel_hw_if_indices) -= 1;
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- hi->dev_instance = t - gm->tunnels;
- hi->hw_instance = hi->dev_instance;
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - gm->tunnels;
+ hi->hw_instance = hi->dev_instance;
- /* clear old stats of freed tunnel before reuse */
- sw_if_index = hi->sw_if_index;
- vnet_interface_counter_lock(im);
- vlib_zero_combined_counter
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock(im);
+ vlib_zero_combined_counter
(&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
- vlib_zero_combined_counter
+ vlib_zero_combined_counter
(&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
- vlib_zero_simple_counter
+ vlib_zero_simple_counter
(&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
vnet_interface_counter_unlock(im);
} else {
@@ -111,67 +303,186 @@ int vnet_gre_add_del_tunnel
return VNET_API_ERROR_INVALID_REGISTRATION;
}
} else {
- hw_if_index = vnet_register_interface
- (vnm, gre_device_class.index, t - gm->tunnels,
- gre_hw_interface_class.index,
- t - gm->tunnels);
+ hw_if_index = vnet_register_interface
+ (vnm, gre_device_class.index, t - gm->tunnels,
+ gre_hw_interface_class.index,
+ t - gm->tunnels);
}
hi = vnet_get_hw_interface (vnm, hw_if_index);
sw_if_index = hi->sw_if_index;
}
- t->hw_if_index = hw_if_index;
- t->outer_fib_index = outer_fib_index;
- t->sw_if_index = sw_if_index;
+ t->hw_if_index = hw_if_index;
+ t->outer_fib_index = outer_fib_index;
+ t->sw_if_index = sw_if_index;
- vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
+ vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- im->fib_index_by_sw_if_index[sw_if_index] = t->outer_fib_index;
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ im->fib_index_by_sw_if_index[sw_if_index] = t->outer_fib_index;
+ ip4_sw_interface_enable_disable(sw_if_index, 1);
- hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
- hi->per_packet_overhead_bytes =
+ hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
+ hi->per_packet_overhead_bytes =
/* preamble */ 8 + /* inter frame gap */ 12;
- /* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ /* Standard default gre MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+
+ gre_tunnel_db_add(t);
+
+ /*
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = t->tunnel_dst,
+ }
+ };
+
+ t->fib_entry_index =
+ fib_table_entry_special_add(outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+ t->sibling_index =
+ fib_entry_child_add(t->fib_entry_index,
+ FIB_NODE_TYPE_GRE_TUNNEL,
+ t - gm->tunnels);
+
+ /*
+ * create and update the midchain adj this tunnel sources.
+ * We could be smarter here and trigger this on an interface proto enable,
+ * like we do for MPLS.
+ */
+ for (linkt = FIB_LINK_IP4; linkt <= FIB_LINK_IP6; linkt++)
+ {
+ t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ linkt,
+ &zero_addr,
+ sw_if_index);
+
+ rewrite = gre_rewrite(t, linkt);
+ adj_nbr_midchain_update_rewrite(t->adj_index[linkt],
+ hi->tx_node_index,
+ rewrite);
+ vec_free(rewrite);
+ }
+ t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
- t->teb = a->teb;
- clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
- clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+ t->teb = a->teb;
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+ gre_tunnel_stack(t);
- hash_set (gm->tunnel_by_key, key, t - gm->tunnels);
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
- slot = vlib_node_add_named_next_with_slot
- (vnm->vlib_main, hi->tx_node_index, "ip4-lookup", GRE_OUTPUT_NEXT_LOOKUP);
+ return 0;
+}
- ASSERT (slot == GRE_OUTPUT_NEXT_LOOKUP);
+static int
+vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
+{
+ gre_main_t * gm = &gre_main;
+ vnet_main_t * vnm = gm->vnet_main;
+ gre_tunnel_t * t;
+ fib_link_t linkt;
+ u32 sw_if_index;
+
+ t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id);
- } else { /* !is_add => delete */
- /* tunnel needs to exist */
- if (! p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (NULL == t)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- t = pool_elt_at_index (gm->tunnels, p[0]);
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
+ vec_add1 (gm->free_gre_tunnel_hw_if_indices, t->hw_if_index);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+ ip4_sw_interface_enable_disable(sw_if_index, 0);
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
- /* make sure tunnel is removed from l2 bd or xconnect */
- set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
- vec_add1 (gm->free_gre_tunnel_hw_if_indices, t->hw_if_index);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+ fib_entry_child_remove(t->fib_entry_index,
+ t->sibling_index);
+ fib_table_entry_delete_index(t->fib_entry_index,
+ FIB_SOURCE_RR);
- hash_unset (gm->tunnel_by_key, key);
- pool_put (gm->tunnels, t);
+ FOR_EACH_FIB_LINK(linkt)
+ {
+ adj_unlock(t->adj_index[linkt]);
}
+ gre_tunnel_db_remove(t);
+ fib_node_deinit(&t->node);
+ pool_put (gm->tunnels, t);
+
if (sw_if_indexp)
*sw_if_indexp = sw_if_index;
return 0;
}
+int
+vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a,
+ u32 * sw_if_indexp)
+{
+ if (a->is_add)
+ return (vnet_gre_tunnel_add(a, sw_if_indexp));
+ else
+ return (vnet_gre_tunnel_delete(a, sw_if_indexp));
+}
+
+static void
+gre_sw_interface_mpls_state_change (u32 sw_if_index,
+ u32 is_enable)
+{
+ gre_main_t *gm = &gre_main;
+ vnet_hw_interface_t * hi;
+ gre_tunnel_t *t;
+ u8 *rewrite;
+
+ if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ return;
+
+ t = pool_elt_at_index(gm->tunnels,
+ gm->tunnel_index_by_sw_if_index[sw_if_index]);
+
+ if (is_enable)
+ {
+ hi = vnet_get_hw_interface (vnet_get_main(), t->hw_if_index);
+ t->adj_index[FIB_LINK_MPLS] =
+ adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_MPLS,
+ &zero_addr,
+ sw_if_index);
+
+ rewrite = gre_rewrite(t, FIB_LINK_MPLS);
+ adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS],
+ hi->tx_node_index,
+ rewrite);
+ vec_free(rewrite);
+ }
+ else
+ {
+ adj_unlock(t->adj_index[FIB_LINK_MPLS]);
+ t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
+ }
+
+ gre_tunnel_stack(t);
+}
static clib_error_t *
create_gre_tunnel_command_fn (vlib_main_t * vm,
@@ -216,13 +527,15 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
return clib_error_return (0, "src and dst are identical");
memset (a, 0, sizeof (*a));
- a->is_add = is_add;
a->outer_fib_id = outer_fib_id;
a->teb = teb;
clib_memcpy(&a->src, &src, sizeof(src));
clib_memcpy(&a->dst, &dst, sizeof(dst));
- rv = vnet_gre_add_del_tunnel (a, &sw_if_index);
+ if (is_add)
+ rv = vnet_gre_tunnel_add(a, &sw_if_index);
+ else
+ rv = vnet_gre_tunnel_delete(a, &sw_if_index);
switch(rv)
{
@@ -255,14 +568,32 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
{
gre_main_t * gm = &gre_main;
gre_tunnel_t * t;
+ u32 ti = ~0;
if (pool_elts (gm->tunnels) == 0)
vlib_cli_output (vm, "No GRE tunnels configured...");
- pool_foreach (t, gm->tunnels,
- ({
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }));
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ti))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == ti)
+ {
+ pool_foreach (t, gm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t, 0);
+ }));
+ }
+ else
+ {
+ t = pool_elt_at_index(gm->tunnels, ti);
+
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t, 1);
+ }
return 0;
}
@@ -275,6 +606,11 @@ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
/* force inclusion from application's main.c */
clib_error_t *gre_interface_init (vlib_main_t *vm)
{
+ vec_add1(mpls_main.mpls_interface_state_change_callbacks,
+ gre_sw_interface_mpls_state_change);
+
+ fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft);
+
return 0;
}
VLIB_INIT_FUNCTION(gre_interface_init);
diff --git a/vnet/vnet/gre/node.c b/vnet/vnet/gre/node.c
index d5ea4b65ddb..b55f5511916 100644
--- a/vnet/vnet/gre/node.c
+++ b/vnet/vnet/gre/node.c
@@ -18,6 +18,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/gre/gre.h>
+#include <vnet/mpls/mpls.h>
#include <vppinfra/sparse_vec.h>
#define foreach_gre_input_next \
@@ -25,7 +26,8 @@ _(PUNT, "error-punt") \
_(DROP, "error-drop") \
_(ETHERNET_INPUT, "ethernet-input") \
_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input")
+_(IP6_INPUT, "ip6-input") \
+_(MPLS_INPUT, "mpls-input")
typedef enum {
#define _(s,n) GRE_INPUT_NEXT_##s,
@@ -66,13 +68,17 @@ gre_input (vlib_main_t * vm,
vlib_frame_t * from_frame)
{
gre_main_t * gm = &gre_main;
+ mpls_main_t * mm = &mpls_main;
+ ip4_main_t * ip4m = &ip4_main;
gre_input_runtime_t * rt = (void *) node->runtime_data;
__attribute__((unused)) u32 n_left_from, next_index, * from, * to_next;
u64 cached_tunnel_key = (u64) ~0;
- u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index;
+ u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0;
u32 cached_tunnel_fib_index = 0, tunnel_fib_index;
u32 cpu_index = os_get_cpu_number();
+ u32 len;
+ vnet_interface_main_t *im = &gm->vnet_main->interface_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -141,7 +147,7 @@ gre_input (vlib_main_t * vm,
/* Index sparse array with network byte order. */
protocol0 = h0->protocol;
protocol1 = h1->protocol;
- sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1,
+ sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1,
&i0, &i1);
next0 = vec_elt(rt->next_by_protocol, i0);
next1 = vec_elt(rt->next_by_protocol, i1);
@@ -154,10 +160,10 @@ gre_input (vlib_main_t * vm,
version1 = clib_net_to_host_u16 (h1->flags_and_version);
verr1 = version1 & GRE_VERSION_MASK;
- b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
+ b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
: b0->error;
next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0;
- b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
+ b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION]
: b1->error;
next1 = verr1 ? GRE_INPUT_NEXT_DROP : next1;
@@ -176,7 +182,6 @@ gre_input (vlib_main_t * vm,
gre_tunnel_t * t;
uword * p;
- ip4_main_t * ip4m = &ip4_main;
p = hash_get (gm->tunnel_by_key, key);
if (!p)
{
@@ -199,19 +204,56 @@ gre_input (vlib_main_t * vm,
tunnel_sw_if_index = cached_tunnel_sw_if_index;
tunnel_fib_index = cached_tunnel_fib_index;
}
+ }
+ else if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+ u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) |
+ (u64)(vnet_buffer(b0)->gre.src);
+
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ mpls_gre_tunnel_t * t;
+ uword * p;
- u32 len = vlib_buffer_length_in_chain (vm, b0);
- vnet_interface_main_t *im = &gm->vnet_main->interface_main;
- vlib_increment_combined_counter (im->combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- cpu_index,
- tunnel_sw_if_index,
- 1 /* packets */,
- len /* bytes */);
-
- vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index;
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ p = hash_get (gm->tunnel_by_key, key);
+ if (!p)
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop0;
+ }
+ t = pool_elt_at_index (mm->gre_tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index,
+ tunnel_sw_if_index);
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ cached_tunnel_fib_index = tunnel_fib_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ tunnel_fib_index = cached_tunnel_fib_index;
+ }
}
+ else
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ goto drop0;
+ }
+ len = vlib_buffer_length_in_chain (vm, b0);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ cpu_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
drop0:
if (PREDICT_FALSE(next1 == GRE_INPUT_NEXT_IP4_INPUT
@@ -227,7 +269,6 @@ drop0:
gre_tunnel_t * t;
uword * p;
- ip4_main_t * ip4m = &ip4_main;
p = hash_get (gm->tunnel_by_key, key);
if (!p)
{
@@ -250,23 +291,62 @@ drop0:
tunnel_sw_if_index = cached_tunnel_sw_if_index;
tunnel_fib_index = cached_tunnel_fib_index;
}
+ }
+ else if (PREDICT_TRUE(next1 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+ u64 key = ((u64)(vnet_buffer(b1)->gre.dst) << 32) |
+ (u64)(vnet_buffer(b1)->gre.src);
- u32 len = vlib_buffer_length_in_chain (vm, b1);
- vnet_interface_main_t *im = &gm->vnet_main->interface_main;
- vlib_increment_combined_counter (im->combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- cpu_index,
- tunnel_sw_if_index,
- 1 /* packets */,
- len /* bytes */);
-
- vnet_buffer(b1)->sw_if_index[VLIB_TX] = tunnel_fib_index;
- vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ mpls_gre_tunnel_t * t;
+ uword * p;
+
+ ip4_main_t * ip4m = &ip4_main;
+ p = hash_get (gm->tunnel_by_key, key);
+ if (!p)
+ {
+ next1 = GRE_INPUT_NEXT_DROP;
+ b1->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop1;
+ }
+ t = pool_elt_at_index (mm->gre_tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index,
+ tunnel_sw_if_index);
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ cached_tunnel_fib_index = tunnel_fib_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ tunnel_fib_index = cached_tunnel_fib_index;
+ }
}
+ else
+ {
+ next1 = GRE_INPUT_NEXT_DROP;
+ goto drop1;
+ }
+ len = vlib_buffer_length_in_chain (vm, b1);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ cpu_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = tunnel_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+
drop1:
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
- gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node,
b0, sizeof (*tr));
tr->tunnel_id = ~0;
tr->length = ip0->length;
@@ -274,9 +354,9 @@ drop1:
tr->dst.as_u32 = ip0->dst_address.as_u32;
}
- if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
{
- gre_rx_trace_t *tr = vlib_add_trace (vm, node,
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node,
b1, sizeof (*tr));
tr->tunnel_id = ~0;
tr->length = ip1->length;
@@ -336,6 +416,7 @@ drop1:
/* For IP payload we need to find source interface
so we can increase counters and help forward node to
pick right FIB */
+ /* RPF check for ip4/ip6 input */
if (PREDICT_FALSE(next0 == GRE_INPUT_NEXT_IP4_INPUT
|| next0 == GRE_INPUT_NEXT_IP6_INPUT
|| next0 == GRE_INPUT_NEXT_ETHERNET_INPUT))
@@ -349,7 +430,6 @@ drop1:
gre_tunnel_t * t;
uword * p;
- ip4_main_t * ip4m = &ip4_main;
p = hash_get (gm->tunnel_by_key, key);
if (!p)
{
@@ -372,26 +452,63 @@ drop1:
tunnel_sw_if_index = cached_tunnel_sw_if_index;
tunnel_fib_index = cached_tunnel_fib_index;
}
+ }
+ else if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_MPLS_INPUT))
+ {
+ u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) |
+ (u64)(vnet_buffer(b0)->gre.src);
- u32 len = vlib_buffer_length_in_chain (vm, b0);
- vnet_interface_main_t *im = &gm->vnet_main->interface_main;
- vlib_increment_combined_counter (im->combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- cpu_index,
- tunnel_sw_if_index,
- 1 /* packets */,
- len /* bytes */);
-
- vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index;
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
+ if (cached_tunnel_key != key)
+ {
+ vnet_hw_interface_t * hi;
+ mpls_gre_tunnel_t * t;
+ uword * p;
+
+ p = hash_get (gm->tunnel_by_key, key);
+ if (!p)
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ goto drop;
+ }
+ t = pool_elt_at_index (mm->gre_tunnels, p[0]);
+ hi = vnet_get_hw_interface (gm->vnet_main,
+ t->hw_if_index);
+ tunnel_sw_if_index = hi->sw_if_index;
+ tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index,
+ tunnel_sw_if_index);
+
+ cached_tunnel_sw_if_index = tunnel_sw_if_index;
+ cached_tunnel_fib_index = tunnel_fib_index;
+ }
+ else
+ {
+ tunnel_sw_if_index = cached_tunnel_sw_if_index;
+ tunnel_fib_index = cached_tunnel_fib_index;
+ }
+ }
+ else
+ {
+ next0 = GRE_INPUT_NEXT_DROP;
+ goto drop;
}
+ len = vlib_buffer_length_in_chain (vm, b0);
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ cpu_index,
+ tunnel_sw_if_index,
+ 1 /* packets */,
+ len /* bytes */);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index;
drop:
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
gre_rx_trace_t *tr = vlib_add_trace (vm, node,
b0, sizeof (*tr));
- tr->tunnel_id = ~0;
+ tr->tunnel_id = tunnel_sw_if_index;
tr->length = ip0->length;
tr->src.as_u32 = ip0->src_address.as_u32;
tr->dst.as_u32 = ip0->dst_address.as_u32;
@@ -509,7 +626,7 @@ static clib_error_t * gre_input_init (vlib_main_t * vm)
ASSERT(ip4_input);
ip6_input = vlib_get_node_by_name (vm, (u8 *)"ip6-input");
ASSERT(ip6_input);
- mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-gre-input");
+ mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-input");
ASSERT(mpls_unicast_input);
gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
diff --git a/vnet/vnet/handoff.c b/vnet/vnet/handoff.c
index 67fc6417414..05eea0329b1 100644
--- a/vnet/vnet/handoff.c
+++ b/vnet/vnet/handoff.c
@@ -515,11 +515,11 @@ VLIB_REGISTER_NODE (handoff_dispatch_node) = {
.n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
.next_nodes = {
- [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
- [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
- [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
- [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
- [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input",
+ [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
+ [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
+ [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input",
},
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/handoff.h b/vnet/vnet/handoff.h
index 00832635a5a..9320f5602b5 100644
--- a/vnet/vnet/handoff.h
+++ b/vnet/vnet/handoff.h
@@ -20,7 +20,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
-#include <vnet/mpls-gre/packet.h>
+#include <vnet/mpls/packet.h>
typedef enum
{
diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c
index 772c3bca75f..595ed1432bc 100644
--- a/vnet/vnet/interface.c
+++ b/vnet/vnet/interface.c
@@ -449,8 +449,16 @@ vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index,
mc_serialize (vm->mc_main, &vnet_sw_interface_set_flags_msg, &s);
}
- error = call_elf_section_interface_callbacks
- (vnm, sw_if_index, flags, vnm->sw_interface_admin_up_down_functions);
+ /* set the flags now before invoking the registered clients
+ * so that the state they query is consistent with the state here notified */
+ old_flags = si->flags;
+ si->flags &= ~mask;
+ si->flags |= flags;
+ if ((flags | old_flags) & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ error = call_elf_section_interface_callbacks
+ (vnm, sw_if_index, flags,
+ vnm->sw_interface_admin_up_down_functions);
+ si->flags = old_flags;
if (error)
goto done;
diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h
index 7738bb6edc7..9f032e987bb 100644
--- a/vnet/vnet/interface.h
+++ b/vnet/vnet/interface.h
@@ -459,7 +459,8 @@ typedef enum
VNET_INTERFACE_COUNTER_RX_MISS = 5,
VNET_INTERFACE_COUNTER_RX_ERROR = 6,
VNET_INTERFACE_COUNTER_TX_ERROR = 7,
- VNET_N_SIMPLE_INTERFACE_COUNTER = 8,
+ VNET_INTERFACE_COUNTER_MPLS = 8,
+ VNET_N_SIMPLE_INTERFACE_COUNTER = 9,
/* Combined counters. */
VNET_INTERFACE_COUNTER_RX = 0,
VNET_INTERFACE_COUNTER_TX = 1,
diff --git a/vnet/vnet/interface_cli.c b/vnet/vnet/interface_cli.c
index 7d828f54951..477716d4b97 100644
--- a/vnet/vnet/interface_cli.c
+++ b/vnet/vnet/interface_cli.c
@@ -45,6 +45,8 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vppinfra/bitmap.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
static int
compare_interface_names (void *a1, void *a2)
@@ -290,8 +292,8 @@ show_sw_interfaces (vlib_main_t * vm,
fib_index6 = vec_elt (im6->fib_index_by_sw_if_index,
si->sw_if_index);
- fib4 = vec_elt_at_index (im4->fibs, fib_index4);
- fib6 = vec_elt_at_index (im6->fibs, fib_index6);
+ fib4 = ip4_fib_get (fib_index4);
+ fib6 = ip6_fib_get (fib_index6);
if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
vlib_cli_output
diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h
index 81a819a64bd..735d47ec192 100644
--- a/vnet/vnet/interface_funcs.h
+++ b/vnet/vnet/interface_funcs.h
@@ -105,6 +105,7 @@ clib_error_t *vnet_create_sw_interface (vnet_main_t * vnm,
void vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index);
void vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index);
+int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index);
always_inline uword
vnet_sw_interface_get_flags (vnet_main_t * vnm, u32 sw_if_index)
diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h
index 4d73d6b1bf2..0d0eb6c9476 100644
--- a/vnet/vnet/ip/format.h
+++ b/vnet/vnet/ip/format.h
@@ -48,6 +48,12 @@ unformat_function_t unformat_ip_protocol;
format_function_t format_tcp_udp_port;
unformat_function_t unformat_tcp_udp_port;
+typedef enum format_ip_adjacency_flags_t_
+{
+ FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
+} format_ip_adjacency_flags_t;
+
format_function_t format_ip_adjacency;
format_function_t format_ip_adjacency_packet_data;
diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h
index fc74e9d61ed..f9fe48687c1 100644
--- a/vnet/vnet/ip/ip4.h
+++ b/vnet/vnet/ip/ip4.h
@@ -47,10 +47,7 @@
typedef struct ip4_fib_t {
/* Hash table for each prefix length mapping. */
- uword * adj_index_by_dst_address[33];
-
- /* Temporary vectors for holding new/old values for hash_set. */
- uword * new_hash_values, * old_hash_values;
+ uword * fib_entry_by_dst_address[33];
/* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
ip4_fib_mtrie_t mtrie;
@@ -62,7 +59,7 @@ typedef struct ip4_fib_t {
u32 index;
/* flow hash configuration */
- u32 flow_hash_config;
+ flow_hash_config_t flow_hash_config;
/* N-tuple classifier indices */
u32 fwd_classify_table_index;
@@ -72,22 +69,6 @@ typedef struct ip4_fib_t {
struct ip4_main_t;
-typedef void (ip4_add_del_route_function_t)
- (struct ip4_main_t * im,
- uword opaque,
- ip4_fib_t * fib,
- u32 flags,
- ip4_address_t * address,
- u32 address_length,
- void * old_result,
- void * new_result);
-
-typedef struct {
- ip4_add_del_route_function_t * function;
- uword required_flags;
- uword function_opaque;
-} ip4_add_del_route_callback_t;
-
typedef void (ip4_add_del_interface_address_function_t)
(struct ip4_main_t * im,
uword opaque,
@@ -115,23 +96,20 @@ typedef struct ip4_main_t {
ip_lookup_main_t lookup_main;
/** Vector of FIBs. */
- ip4_fib_t * fibs;
+ struct fib_table_t_ * fibs;
u32 fib_masks[33];
/** Table index indexed by software interface. */
u32 * fib_index_by_sw_if_index;
+ /* IP4 enabled count by software interface */
+ u8 * ip_enabled_by_sw_if_index;
+
/** Hash table mapping table id to fib index.
ID space is not necessarily dense; index space is dense. */
uword * fib_index_by_table_id;
- /** Vector of functions to call when routes are added/deleted. */
- ip4_add_del_route_callback_t * add_del_route_callbacks;
-
- /** Hash table mapping interface route rewrite adjacency index by sw if index. */
- uword * interface_route_adj_index_by_sw_if_index;
-
/** Functions to call when interface address changes. */
ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks;
@@ -159,11 +137,15 @@ typedef struct ip4_main_t {
u32 ip4_unicast_rx_feature_lookup;
/** Built-in unicast feature path index, see @ref ip_feature_init_cast() */
u32 ip4_unicast_rx_feature_source_and_port_range_check;
+ /** Built-in unicast feature path indice, see @ref ip_feature_init_cast() */
+ u32 ip4_unicast_rx_feature_drop;
/** Built-in multicast feature path index */
u32 ip4_multicast_rx_feature_vpath;
/** Built-in multicast feature path index */
u32 ip4_multicast_rx_feature_lookup;
+ /** Built-in multicast feature path indices */
+ u32 ip4_multicast_rx_feature_drop;
/** Built-in unicast feature path index, see @ref ip_feature_init_cast() */
u32 ip4_unicast_tx_feature_source_and_port_range_check;
@@ -235,30 +217,13 @@ extern vlib_node_registration_t ip4_lookup_node;
extern vlib_node_registration_t ip4_rewrite_node;
extern vlib_node_registration_t ip4_rewrite_local_node;
extern vlib_node_registration_t ip4_arp_node;
-
-u32 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, ip4_address_t * dst,
- u32 disable_default_route);
-
-always_inline u32
-ip4_fib_lookup_buffer (ip4_main_t * im, u32 fib_index, ip4_address_t * dst,
- vlib_buffer_t * b)
-{
- return ip4_fib_lookup_with_table (im, fib_index, dst,
- /* disable_default_route */ 0);
-}
-
-always_inline u32
-ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
-{
- u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- return ip4_fib_lookup_with_table (im, fib_index, dst,
- /* disable_default_route */ 0);
-}
+extern vlib_node_registration_t ip4_glean_node;
+extern vlib_node_registration_t ip4_midchain_node;
always_inline uword
-ip4_destination_matches_route (ip4_main_t * im,
- ip4_address_t * key,
- ip4_address_t * dest,
+ip4_destination_matches_route (const ip4_main_t * im,
+ const ip4_address_t * key,
+ const ip4_address_t * dest,
uword dest_length)
{ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); }
@@ -280,15 +245,26 @@ ip4_unaligned_destination_matches_route (ip4_main_t * im,
{ return 0 == ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->data_u32) & im->fib_masks[dest_length]); }
always_inline int
-ip4_src_address_for_packet (ip4_main_t * im, vlib_buffer_t * p, ip4_address_t * src, u32 sw_if_index)
+ip4_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ ip4_address_t * src)
{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index);
- if (ia == NULL)
- return -1;
- ip4_address_t * a = ip_interface_address_get_address (lm, ia);
- *src = a[0];
- return 0;
+ u32 if_add_index =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE(if_add_index != ~0)) {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index(lm->if_address_pool, if_add_index);
+ ip4_address_t *if_ip =
+ ip_interface_address_get_address(lm, if_add);
+ *src = *if_ip;
+ return 0;
+ }
+ else
+ {
+ ASSERT(0);
+ src->as_u32 = 0;
+ }
+ return (!0);
}
/* Find interface address which matches destination. */
@@ -315,126 +291,20 @@ ip4_interface_address_matching_destination (ip4_main_t * im, ip4_address_t * dst
return result;
}
+ip4_address_t *
+ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t ** result_ia);
+
clib_error_t *
ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
ip4_address_t * address, u32 address_length,
u32 is_del);
-int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
-
-/* Add/del a route to the FIB. */
-
-#define IP4_ROUTE_FLAG_ADD (0 << 0)
-#define IP4_ROUTE_FLAG_DEL (1 << 0)
-#define IP4_ROUTE_FLAG_TABLE_ID (0 << 1)
-#define IP4_ROUTE_FLAG_FIB_INDEX (1 << 1)
-#define IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2)
-#define IP4_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3)
-/* Not last add/del in group. Facilities batching requests into packets. */
-#define IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4)
-/* Dynamic route created via ARP reply. */
-#define IP4_ROUTE_FLAG_NEIGHBOR (1 << 5)
-
-typedef struct {
- /* IP4_ROUTE_FLAG_* */
- u32 flags;
-
- /* Either index of fib or table_id to hash and get fib.
- IP4_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */
- u32 table_index_or_table_id;
-
- /* Destination address (prefix) and length. */
- ip4_address_t dst_address;
- u32 dst_address_length;
-
- /* Adjacency to use for this destination. */
- u32 adj_index;
-
- /* If specified adjacencies to add and then
- use for this destination. add_adj/n_add_adj
- are override adj_index if specified. */
- ip_adjacency_t * add_adj;
- u32 n_add_adj;
-} ip4_add_del_route_args_t;
-
-/**
- * \brief Get or create an IPv4 fib.
- *
- * Get or create an IPv4 fib with the provided fib ID or index.
- * The fib ID is a possibly-sparse user-defined value while
- * the fib index defines the position of the fib in the fib vector.
- *
- * \param im
- * ip4_main pointer.
- * \param table_index_or_id
- * The table index if \c IP4_ROUTE_FLAG_FIB_INDEX bit is set in \p flags.
- * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked
- * and can be retrieved with \c ret->table_id.
- * Otherwise, the fib ID to be used to retrieve or create the desired fib.
- * \param flags
- * Indicates whether \p table_index_or_id is the fib index or ID.
- * When the bit \c IP4_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id
- * is considered as the fib index, and the fib ID otherwise.
- * \returns A pointer to the retrieved or created fib.
- *
- * \remark When getting a fib with the fib index, the fib MUST already exist.
- */
-ip4_fib_t *
-find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
- u32 table_index_or_id, u32 flags);
-
-void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * args);
-
-void ip4_add_del_route_next_hop (ip4_main_t * im,
- u32 flags,
- ip4_address_t * dst_address,
- u32 dst_address_length,
- ip4_address_t * next_hop,
- u32 next_hop_sw_if_index,
- u32 next_hop_weight, u32 adj_index,
- u32 explicit_fib_index);
-
-u32
-ip4_route_get_next_hop_adj (ip4_main_t * im,
- u32 fib_index,
- ip4_address_t *next_hop,
- u32 next_hop_sw_if_index,
- u32 explicit_fib_index);
-
-void *
-ip4_get_route (ip4_main_t * im,
- u32 fib_index_or_table_id,
- u32 flags,
- u8 * address,
- u32 address_length);
-
void
-ip4_foreach_matching_route (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip4_address_t * address,
- u32 address_length,
- ip4_address_t ** results,
- u8 ** result_lengths);
-
-void ip4_delete_matching_routes (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip4_address_t * address,
- u32 address_length);
-
-void ip4_maybe_remap_adjacencies (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags);
-
-void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
- ip_adjacency_t * adj,
- u32 sw_if_index,
- u32 if_address_index);
+ip4_sw_interface_enable_disable (u32 sw_if_index,
+ u32 is_enable);
-ip4_address_t *
-ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
- ip_interface_address_t ** result_ia);
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
/* Send an ARP request to see if given destination is reachable on given interface. */
clib_error_t *
@@ -458,7 +328,7 @@ void ip4_register_protocol (u32 protocol, u32 node_index);
serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
-int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config);
+int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config);
void ip4_mtrie_init (ip4_fib_mtrie_t * m);
@@ -468,7 +338,8 @@ int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
/* Compute flow hash. We'll use it to select which adjacency to use for this
flow. And other things. */
always_inline u32
-ip4_compute_flow_hash (ip4_header_t * ip, u32 flow_hash_config)
+ip4_compute_flow_hash (const ip4_header_t * ip,
+ flow_hash_config_t flow_hash_config)
{
tcp_header_t * tcp = (void *) (ip + 1);
u32 a, b, c, t1, t2;
diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c
index 751260a72ea..4c49d0e4916 100644
--- a/vnet/vnet/ip/ip4_forward.c
+++ b/vnet/vnet/ip/ip4_forward.c
@@ -39,668 +39,16 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
-/** for ethernet_header_t */
-#include <vnet/ethernet/ethernet.h>
-/** for ethernet_arp_header_t */
-#include <vnet/ethernet/arp_packet.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
#include <vnet/ppp/ppp.h>
-/** for srp_hw_interface_class */
-#include <vnet/srp/srp.h>
-/** for API error numbers */
-#include <vnet/api_errno.h>
-
-/** @file
- vnet ip4 forwarding
-*/
-
-/* This is really, really simple but stupid fib. */
-u32
-ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
- ip4_address_t * dst,
- u32 disable_default_route)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
- uword * p, * hash, key;
- i32 i, i_min, dst_address, ai;
-
- i_min = disable_default_route ? 1 : 0;
- dst_address = clib_mem_unaligned (&dst->data_u32, u32);
- for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
- {
- hash = fib->adj_index_by_dst_address[i];
- if (! hash)
- continue;
-
- key = dst_address & im->fib_masks[i];
- if ((p = hash_get (hash, key)) != 0)
- {
- ai = p[0];
- goto done;
- }
- }
-
- /* Nothing matches in table. */
- ai = lm->miss_adj_index;
-
- done:
- return ai;
-}
-
-/** @brief Create FIB from table ID and init all hashing.
- @param im - @ref ip4_main_t
- @param table_id - table ID
- @return fib - @ref ip4_fib_t
-*/
-static ip4_fib_t *
-create_fib_with_table_id (ip4_main_t * im, u32 table_id)
-{
- ip4_fib_t * fib;
- hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
- vec_add2 (im->fibs, fib, 1);
- fib->table_id = table_id;
- fib->index = fib - im->fibs;
- /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
- fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
- fib->fwd_classify_table_index = ~0;
- fib->rev_classify_table_index = ~0;
- ip4_mtrie_init (&fib->mtrie);
- return fib;
-}
-
-/** @brief Find existing or Create new FIB based on index
- @param im @ref ip4_main_t
- @param table_index_or_id - overloaded parameter referring
- to the table or a table's index in the FIB vector
- @param flags - used to check if table_index_or_id was a table or
- an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
- @return either the existing or a new ip4_fib_t entry
-*/
-ip4_fib_t *
-find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
- u32 table_index_or_id, u32 flags)
-{
- uword * p, fib_index;
-
- fib_index = table_index_or_id;
- /* If this isn't a FIB_INDEX ... */
- if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
- {
- /* If passed ~0 then request the next table available */
- if (table_index_or_id == ~0) {
- table_index_or_id = 0;
- while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
- table_index_or_id++;
- }
- /* Create the next table and return the ip4_fib_t associated with it */
- return create_fib_with_table_id (im, table_index_or_id);
- }
- /* A specific table_id was requested.. */
- p = hash_get (im->fib_index_by_table_id, table_index_or_id);
- /* ... and if it doesn't exist create it else grab its index */
- if (! p)
- return create_fib_with_table_id (im, table_index_or_id);
- fib_index = p[0];
- }
- /* Return the ip4_fib_t associated with this index */
- return vec_elt_at_index (im->fibs, fib_index);
-}
-
-static void
-ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
- ip4_fib_t * fib,
- u32 address_length)
-{
- hash_t * h;
- uword max_index;
-
- ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
- lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
-
- fib->adj_index_by_dst_address[address_length] =
- hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
-
- hash_set_flags (fib->adj_index_by_dst_address[address_length],
- HASH_FLAG_NO_AUTO_SHRINK);
-
- h = hash_header (fib->adj_index_by_dst_address[address_length]);
- max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
-
- /* Initialize new/old hash value vectors. */
- vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
- vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
-}
-
-static void
-ip4_fib_set_adj_index (ip4_main_t * im,
- ip4_fib_t * fib,
- u32 flags,
- u32 dst_address_u32,
- u32 dst_address_length,
- u32 adj_index)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- uword * hash;
-
- if (vec_bytes(fib->old_hash_values))
- memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
- if (vec_bytes(fib->new_hash_values))
- memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
- fib->new_hash_values[0] = adj_index;
-
- /* Make sure adj index is valid. */
- if (CLIB_DEBUG > 0)
- (void) ip_get_adjacency (lm, adj_index);
-
- hash = fib->adj_index_by_dst_address[dst_address_length];
-
- hash = _hash_set3 (hash, dst_address_u32,
- fib->new_hash_values,
- fib->old_hash_values);
-
- fib->adj_index_by_dst_address[dst_address_length] = hash;
-
- if (vec_len (im->add_del_route_callbacks) > 0)
- {
- ip4_add_del_route_callback_t * cb;
- ip4_address_t d;
- uword * p;
-
- d.data_u32 = dst_address_u32;
- vec_foreach (cb, im->add_del_route_callbacks)
- if ((flags & cb->required_flags) == cb->required_flags)
- cb->function (im, cb->function_opaque,
- fib, flags,
- &d, dst_address_length,
- fib->old_hash_values,
- fib->new_hash_values);
-
- p = hash_get (hash, dst_address_u32);
- /* hash_get should never return NULL here */
- if (p)
- clib_memcpy (p, fib->new_hash_values,
- vec_bytes (fib->new_hash_values));
- else
- ASSERT(0);
- }
-}
-
-void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip4_fib_t * fib;
- u32 dst_address, dst_address_length, adj_index, old_adj_index;
- uword * hash, is_del;
- ip4_add_del_route_callback_t * cb;
-
- /* Either create new adjacency or use given one depending on arguments. */
- if (a->n_add_adj > 0)
- {
- ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
- }
- else
- adj_index = a->adj_index;
-
- dst_address = a->dst_address.data_u32;
- dst_address_length = a->dst_address_length;
- fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
-
- ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
- dst_address &= im->fib_masks[dst_address_length];
-
- if (! fib->adj_index_by_dst_address[dst_address_length])
- ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
-
- hash = fib->adj_index_by_dst_address[dst_address_length];
-
- is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
-
- if (is_del)
- {
- fib->old_hash_values[0] = ~0;
- hash = _hash_unset (hash, dst_address, fib->old_hash_values);
- fib->adj_index_by_dst_address[dst_address_length] = hash;
-
- if (vec_len (im->add_del_route_callbacks) > 0
- && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
- {
- fib->new_hash_values[0] = ~0;
- vec_foreach (cb, im->add_del_route_callbacks)
- if ((a->flags & cb->required_flags) == cb->required_flags)
- cb->function (im, cb->function_opaque,
- fib, a->flags,
- &a->dst_address, dst_address_length,
- fib->old_hash_values,
- fib->new_hash_values);
- }
- }
- else
- ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
- adj_index);
-
- old_adj_index = fib->old_hash_values[0];
-
- /* Avoid spurious reference count increments */
- if (old_adj_index == adj_index
- && adj_index != ~0
- && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
- {
- ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
- if (adj->share_count > 0)
- adj->share_count --;
- }
-
- ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
- is_del ? old_adj_index : adj_index,
- is_del);
-
- /* Delete old adjacency index if present and changed. */
- if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
- && old_adj_index != ~0
- && old_adj_index != adj_index)
- ip_del_adjacency (lm, old_adj_index);
-}
-
-
-u32
-ip4_route_get_next_hop_adj (ip4_main_t * im,
- u32 fib_index,
- ip4_address_t *next_hop,
- u32 next_hop_sw_if_index,
- u32 explicit_fib_index)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- vnet_main_t * vnm = vnet_get_main();
- uword * nh_hash, * nh_result;
- int is_interface_next_hop;
- u32 nh_adj_index;
- ip4_fib_t * fib;
-
- fib = vec_elt_at_index (im->fibs, fib_index);
-
- is_interface_next_hop = next_hop->data_u32 == 0;
- if (is_interface_next_hop)
- {
- nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
- if (nh_result)
- nh_adj_index = *nh_result;
- else
- {
- ip_adjacency_t * adj;
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &nh_adj_index);
- ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
- ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
- hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
- }
- }
- else if (next_hop_sw_if_index == ~0)
- {
- /* next-hop is recursive. we always need a indirect adj
- * for recursive paths. Any LPM we perform now will give
- * us a valid adj, but without tracking the next-hop we
- * have no way to keep it valid.
- */
- ip_adjacency_t add_adj;
- memset (&add_adj, 0, sizeof(add_adj));
- add_adj.n_adj = 1;
- add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
- add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
- add_adj.explicit_fib_index = explicit_fib_index;
- ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
- }
- else
- {
- nh_hash = fib->adj_index_by_dst_address[32];
- nh_result = hash_get (nh_hash, next_hop->data_u32);
-
- /* Next hop must be known. */
- if (! nh_result)
- {
- ip_adjacency_t * adj;
-
- /* no /32 exists, get the longest prefix match */
- nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
- next_hop, 0);
- adj = ip_get_adjacency (lm, nh_adj_index);
- /* if ARP interface adjacency is present, we need to
- install ARP adjaceny for specific next hop */
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- adj->arp.next_hop.ip4.as_u32 == 0)
- {
- nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
- }
- }
- else
- {
- nh_adj_index = *nh_result;
- }
- }
-
- return (nh_adj_index);
-}
-
-void
-ip4_add_del_route_next_hop (ip4_main_t * im,
- u32 flags,
- ip4_address_t * dst_address,
- u32 dst_address_length,
- ip4_address_t * next_hop,
- u32 next_hop_sw_if_index,
- u32 next_hop_weight, u32 adj_index,
- u32 explicit_fib_index)
-{
- vnet_main_t * vnm = vnet_get_main();
- ip_lookup_main_t * lm = &im->lookup_main;
- u32 fib_index;
- ip4_fib_t * fib;
- u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
- u32 dst_adj_index, nh_adj_index;
- uword * dst_hash, * dst_result;
- ip_adjacency_t * dst_adj;
- ip_multipath_adjacency_t * old_mp, * new_mp;
- int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
- clib_error_t * error = 0;
-
- if (explicit_fib_index == (u32)~0)
- fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
- else
- fib_index = explicit_fib_index;
-
- fib = vec_elt_at_index (im->fibs, fib_index);
-
- /* Lookup next hop to be added or deleted. */
- if (adj_index == (u32)~0)
- {
- nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
- next_hop,
- next_hop_sw_if_index,
- explicit_fib_index);
- }
- else
- {
- nh_adj_index = adj_index;
- }
- ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
- dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
-
- dst_hash = fib->adj_index_by_dst_address[dst_address_length];
- dst_result = hash_get (dst_hash, dst_address_u32);
- if (dst_result)
- {
- dst_adj_index = dst_result[0];
- dst_adj = ip_get_adjacency (lm, dst_adj_index);
- }
- else
- {
- /* For deletes destination must be known. */
- if (is_del)
- {
- vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
- error = clib_error_return (0, "unknown destination %U/%d",
- format_ip4_address, dst_address,
- dst_address_length);
- goto done;
- }
-
- dst_adj_index = ~0;
- dst_adj = 0;
- }
-
- /* Ignore adds of X/32 with next hop of X. */
- if (! is_del
- && dst_address_length == 32
- && dst_address->data_u32 == next_hop->data_u32
- && adj_index != (u32)~0)
- {
- vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
- error = clib_error_return (0, "prefix matches next hop %U/%d",
- format_ip4_address, dst_address,
- dst_address_length);
- goto done;
- }
-
- /* Destination is not known and default weight is set so add route
- to existing non-multipath adjacency */
- if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
- {
- /* create / delete additional mapping of existing adjacency */
- ip4_add_del_route_args_t a;
-
- a.table_index_or_table_id = fib_index;
- a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
- | IP4_ROUTE_FLAG_FIB_INDEX
- | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
- | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
- | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
- a.dst_address = dst_address[0];
- a.dst_address_length = dst_address_length;
- a.adj_index = nh_adj_index;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- ip4_add_del_route (im, &a);
- goto done;
- }
-
- old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
-
- if (! ip_multipath_adjacency_add_del_next_hop
- (lm, is_del,
- old_mp_adj_index,
- nh_adj_index,
- next_hop_weight,
- &new_mp_adj_index))
- {
- vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
- error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
- format_ip4_address, next_hop);
- goto done;
- }
-
- old_mp = new_mp = 0;
- if (old_mp_adj_index != ~0)
- old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
- if (new_mp_adj_index != ~0)
- new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
-
- if (old_mp != new_mp)
- {
- ip4_add_del_route_args_t a;
- ip_adjacency_t * adj;
-
- a.table_index_or_table_id = fib_index;
- a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
- | IP4_ROUTE_FLAG_FIB_INDEX
- | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
- | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
- a.dst_address = dst_address[0];
- a.dst_address_length = dst_address_length;
- a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- ip4_add_del_route (im, &a);
-
- adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
- if (adj->n_adj == 1)
- adj->share_count += is_del ? -1 : 1;
- }
-
- done:
- if (error)
- clib_error_report (error);
-}
-
-void *
-ip4_get_route (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- u8 * address,
- u32 address_length)
-{
- ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
- u32 dst_address = * (u32 *) address;
- uword * hash, * p;
-
- ASSERT (address_length < ARRAY_LEN (im->fib_masks));
- dst_address &= im->fib_masks[address_length];
-
- hash = fib->adj_index_by_dst_address[address_length];
- p = hash_get (hash, dst_address);
- return (void *) p;
-}
-
-void
-ip4_foreach_matching_route (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip4_address_t * address,
- u32 address_length,
- ip4_address_t ** results,
- u8 ** result_lengths)
-{
- ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
- u32 dst_address = address->data_u32;
- u32 this_length = address_length;
-
- if (*results)
- _vec_len (*results) = 0;
- if (*result_lengths)
- _vec_len (*result_lengths) = 0;
-
- while (this_length <= 32 && vec_len (results) == 0)
- {
- uword k, v;
- hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
- if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
- {
- ip4_address_t a;
- a.data_u32 = k;
- vec_add1 (*results, a);
- vec_add1 (*result_lengths, this_length);
- }
- }));
-
- this_length++;
- }
-}
-
-void ip4_maybe_remap_adjacencies (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags)
-{
- ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
- ip_lookup_main_t * lm = &im->lookup_main;
- u32 i, l;
- ip4_address_t a;
- ip4_add_del_route_callback_t * cb;
- static ip4_address_t * to_delete;
-
- if (lm->n_adjacency_remaps == 0)
- return;
-
- for (l = 0; l <= 32; l++)
- {
- hash_pair_t * p;
- uword * hash = fib->adj_index_by_dst_address[l];
-
- if (hash_elts (hash) == 0)
- continue;
-
- if (to_delete)
- _vec_len (to_delete) = 0;
-
- hash_foreach_pair (p, hash, ({
- u32 adj_index = p->value[0];
- u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
-
- if (m)
- {
- /* Record destination address from hash key. */
- a.data_u32 = p->key;
-
- /* New adjacency points to nothing: so delete prefix. */
- if (m == ~0)
- vec_add1 (to_delete, a);
- else
- {
- /* Remap to new adjacency. */
- clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
-
- /* Set new adjacency value. */
- fib->new_hash_values[0] = p->value[0] = m - 1;
-
- vec_foreach (cb, im->add_del_route_callbacks)
- if ((flags & cb->required_flags) == cb->required_flags)
- cb->function (im, cb->function_opaque,
- fib, flags | IP4_ROUTE_FLAG_ADD,
- &a, l,
- fib->old_hash_values,
- fib->new_hash_values);
- }
- }
- }));
-
- fib->new_hash_values[0] = ~0;
- for (i = 0; i < vec_len (to_delete); i++)
- {
- hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
- vec_foreach (cb, im->add_del_route_callbacks)
- if ((flags & cb->required_flags) == cb->required_flags)
- cb->function (im, cb->function_opaque,
- fib, flags | IP4_ROUTE_FLAG_DEL,
- &a, l,
- fib->old_hash_values,
- fib->new_hash_values);
- }
- }
-
- /* Also remap adjacencies in mtrie. */
- ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
-
- /* Reset mapping table. */
- vec_zero (lm->adjacency_remap_table);
-
- /* All remaps have been performed. */
- lm->n_adjacency_remaps = 0;
-}
-
-void ip4_delete_matching_routes (ip4_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip4_address_t * address,
- u32 address_length)
-{
- static ip4_address_t * matching_addresses;
- static u8 * matching_address_lengths;
- u32 l, i;
- ip4_add_del_route_args_t a;
-
- a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
- a.table_index_or_table_id = table_index_or_table_id;
- a.adj_index = ~0;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- for (l = address_length + 1; l <= 32; l++)
- {
- ip4_foreach_matching_route (im, table_index_or_table_id, flags,
- address,
- l,
- &matching_addresses,
- &matching_address_lengths);
- for (i = 0; i < vec_len (matching_addresses); i++)
- {
- a.dst_address = matching_addresses[i];
- a.dst_address_length = matching_address_lengths[i];
- ip4_add_del_route (im, &a);
- }
- }
-
- ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
-}
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/classify_dpo.h>
void
ip4_forward_next_trace (vlib_main_t * vm,
@@ -712,12 +60,10 @@ always_inline uword
ip4_lookup_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame,
- int lookup_for_responses_to_locally_received_packets,
- int is_indirect)
+ int lookup_for_responses_to_locally_received_packets)
{
ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
- vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, n_left_to_next, * from, * to_next;
ip_lookup_next_t next;
u32 cpu_index = os_get_cpu_number();
@@ -732,217 +78,194 @@ ip4_lookup_inline (vlib_main_t * vm,
to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t * p0, * p1;
- ip4_header_t * ip0, * ip1;
- __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
- ip_lookup_next_t next0, next1;
- ip_adjacency_t * adj0, * adj1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip4_address_t * dst_addr0, *dst_addr1;
- __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
- __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
- u32 flow_hash_config0, flow_hash_config1;
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
+ ip_lookup_next_t next0, next1;
+ const load_balance_t * lb0, * lb1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_address_t * dst_addr0, *dst_addr1;
+ __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
+ __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
+ flow_hash_config_t flow_hash_config0, flow_hash_config1;
u32 hash_c0, hash_c1;
- u32 wrong_next;
+ u32 wrong_next;
+ const dpo_id_t *dpo0, *dpo1;
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
- CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
- CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
- }
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
- p0 = vlib_get_buffer (vm, pi0);
- p1 = vlib_get_buffer (vm, pi1);
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
- if (is_indirect)
- {
- ip_adjacency_t * iadj0, * iadj1;
- iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
- iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
- dst_addr0 = &iadj0->indirect.next_hop.ip4;
- dst_addr1 = &iadj1->indirect.next_hop.ip4;
- }
- else
- {
- dst_addr0 = &ip0->dst_address;
- dst_addr1 = &ip1->dst_address;
- }
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
- if (! lookup_for_responses_to_locally_received_packets)
- {
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
- mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
-
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
- }
-
- tcp0 = (void *) (ip0 + 1);
- tcp1 = (void *) (ip1 + 1);
-
- is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
- || ip0->protocol == IP_PROTOCOL_UDP);
- is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
- || ip1->protocol == IP_PROTOCOL_UDP);
-
- if (! lookup_for_responses_to_locally_received_packets)
- {
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
- }
-
- if (! lookup_for_responses_to_locally_received_packets)
- {
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
- }
-
- if (! lookup_for_responses_to_locally_received_packets)
- {
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
- }
-
- if (lookup_for_responses_to_locally_received_packets)
- {
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
- adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
- }
- else
- {
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
-
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
- }
-
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- dst_addr0,
- /* no_default_route */ 0));
- ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
- dst_addr1,
- /* no_default_route */ 0));
- adj0 = ip_get_adjacency (lm, adj_index0);
- adj1 = ip_get_adjacency (lm, adj_index1);
-
- next0 = adj0->lookup_next_index;
- next1 = adj1->lookup_next_index;
-
- /* Use flow hash to compute multipath adjacency. */
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
+ }
+
+ tcp0 = (void *) (ip0 + 1);
+ tcp1 = (void *) (ip1 + 1);
+
+ is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+ is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
+ || ip1->protocol == IP_PROTOCOL_UDP);
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
+ }
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
+ }
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
+ }
+
+ if (lookup_for_responses_to_locally_received_packets)
+ {
+ lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
+ }
+ else
+ {
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ }
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+
+ /* Use flow hash to compute multipath adjacency. */
hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
- if (PREDICT_FALSE (adj0->n_adj > 1))
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
{
- flow_hash_config0 =
- vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
- hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
ip4_compute_flow_hash (ip0, flow_hash_config0);
}
- if (PREDICT_FALSE(adj1->n_adj > 1))
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
- flow_hash_config1 =
- vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
- hash_c1 = vnet_buffer (p1)->ip.flow_hash =
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash =
ip4_compute_flow_hash (ip1, flow_hash_config1);
}
- ASSERT (adj0->n_adj > 0);
- ASSERT (adj1->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- ASSERT (is_pow2 (adj1->n_adj));
- adj_index0 += (hash_c0 & (adj0->n_adj - 1));
- adj_index1 += (hash_c1 & (adj1->n_adj - 1));
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
-
- if (is_indirect)
- {
- /* ARP for next-hop not packet's destination address */
- if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
- ip0->dst_address.as_u32 = dst_addr0->as_u32;
- if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)
- ip1->dst_address.as_u32 = dst_addr1->as_u32;
- }
-
- vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0)
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i(lb1,
+ (hash_c1 &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0)
+ sizeof(ethernet_header_t));
- vlib_increment_combined_counter
- (cm, cpu_index, adj_index1, 1,
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index1, 1,
vlib_buffer_length_in_chain (vm, p1)
+ sizeof(ethernet_header_t));
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- wrong_next = (next0 != next) + 2*(next1 != next);
- if (PREDICT_FALSE (wrong_next != 0))
- {
- switch (wrong_next)
- {
- case 1:
- /* A B A */
- to_next[-2] = pi1;
- to_next -= 1;
- n_left_to_next += 1;
- vlib_set_next_frame_buffer (vm, node, next0, pi0);
- break;
-
- case 2:
- /* A A B */
- to_next -= 1;
- n_left_to_next += 1;
- vlib_set_next_frame_buffer (vm, node, next1, pi1);
- break;
-
- case 3:
- /* A B C */
- to_next -= 2;
- n_left_to_next += 2;
- vlib_set_next_frame_buffer (vm, node, next0, pi0);
- vlib_set_next_frame_buffer (vm, node, next1, pi1);
- if (next0 == next1)
- {
- /* A B B */
- vlib_put_next_frame (vm, node, next, n_left_to_next);
- next = next1;
- vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
- }
- }
- }
- }
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2*(next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ }
+ }
+ }
+ }
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -950,12 +273,14 @@ ip4_lookup_inline (vlib_main_t * vm,
ip4_header_t * ip0;
__attribute__((unused)) tcp_header_t * tcp0;
ip_lookup_next_t next0;
- ip_adjacency_t * adj0;
+ const load_balance_t *lb0;
ip4_fib_mtrie_t * mtrie0;
ip4_fib_mtrie_leaf_t leaf0;
ip4_address_t * dst_addr0;
- __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
- u32 flow_hash_config0, hash_c0;
+ __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
+ flow_hash_config_t flow_hash_config0;
+ const dpo_id_t *dpo0;
+ u32 hash_c0;
pi0 = from[0];
to_next[0] = pi0;
@@ -964,16 +289,7 @@ ip4_lookup_inline (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (p0);
- if (is_indirect)
- {
- ip_adjacency_t * iadj0;
- iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
- dst_addr0 = &iadj0->indirect.next_hop.ip4;
- }
- else
- {
- dst_addr0 = &ip0->dst_address;
- }
+ dst_addr0 = &ip0->dst_address;
fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
@@ -981,7 +297,7 @@ ip4_lookup_inline (vlib_main_t * vm,
if (! lookup_for_responses_to_locally_received_packets)
{
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+ mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -1003,50 +319,39 @@ ip4_lookup_inline (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
if (lookup_for_responses_to_locally_received_packets)
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
else
{
/* Handle default route. */
leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
}
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- dst_addr0,
- /* no_default_route */ 0));
-
- adj0 = ip_get_adjacency (lm, adj_index0);
-
- next0 = adj0->lookup_next_index;
+ lb0 = load_balance_get (lbi0);
/* Use flow hash to compute multipath adjacency. */
hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE(adj0->n_adj > 1))
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
- flow_hash_config0 =
- vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+ flow_hash_config0 = lb0->lb_hash_config;
hash_c0 = vnet_buffer (p0)->ip.flow_hash =
ip4_compute_flow_hash (ip0, flow_hash_config0);
}
- ASSERT (adj0->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- adj_index0 += (hash_c0 & (adj0->n_adj - 1));
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
- if (is_indirect)
- {
- /* ARP for next-hop not packet's destination address */
- if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
- ip0->dst_address.as_u32 = dst_addr0->as_u32;
- }
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0)
- + sizeof(ethernet_header_t));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
from += 1;
to_next += 1;
@@ -1113,55 +418,135 @@ ip4_lookup (vlib_main_t * vm,
vlib_frame_t * frame)
{
return ip4_lookup_inline (vm, node, frame,
- /* lookup_for_responses_to_locally_received_packets */ 0,
- /* is_indirect */ 0);
+ /* lookup_for_responses_to_locally_received_packets */ 0);
}
-void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
- ip_adjacency_t * adj,
- u32 sw_if_index,
- u32 if_address_index)
+static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
+
+VLIB_REGISTER_NODE (ip4_lookup_node) = {
+ .function = ip4_lookup,
+ .name = "ip4-lookup",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_lookup_trace,
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = IP4_LOOKUP_NEXT_NODES,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
+
+always_inline uword
+ip4_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- ip_lookup_next_t n;
- vnet_l3_packet_type_t packet_type;
- u32 node_index;
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
- if (hw->hw_class_index == ethernet_hw_interface_class.index
- || hw->hw_class_index == srp_hw_interface_class.index)
- {
- /*
- * We have a bit of a problem in this case. ip4-arp uses
- * the rewrite_header.next_index to hand pkts to the
- * indicated inteface output node. We can end up in
- * ip4_rewrite_local, too, which also pays attention to
- * rewrite_header.next index. Net result: a hack in
- * ip4_rewrite_local...
- */
- n = IP_LOOKUP_NEXT_ARP;
- node_index = ip4_arp_node.index;
- adj->if_address_index = if_address_index;
- adj->arp.next_hop.ip4.as_u32 = 0;
- ip46_address_reset(&adj->arp.next_hop);
- packet_type = VNET_L3_PACKET_TYPE_ARP;
- }
- else
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace(vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
{
- n = IP_LOOKUP_NEXT_REWRITE;
- node_index = ip4_rewrite_node.index;
- packet_type = VNET_L3_PACKET_TYPE_IP4;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t * p0;
+ u32 pi0, lbi0, hc0;
+ const ip4_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+ hc0 = lb0->lb_hash_config;
+ vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ vnet_buffer(p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
}
- adj->lookup_next_index = n;
- vnet_rewrite_for_sw_interface
- (vnm,
- packet_type,
- sw_if_index,
- node_index,
- VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
+ return frame->n_vectors;
+}
+
+static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
+
+VLIB_REGISTER_NODE (ip4_load_balance_node) = {
+ .function = ip4_load_balance,
+ .name = "ip4-load-balance",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
+
+/* get first interface address */
+ip4_address_t *
+ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia = 0;
+ ip4_address_t * result = 0;
+
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
}
static void
@@ -1169,115 +554,160 @@ ip4_add_interface_routes (u32 sw_if_index,
ip4_main_t * im, u32 fib_index,
ip_interface_address_t * a)
{
- vnet_main_t * vnm = vnet_get_main();
ip_lookup_main_t * lm = &im->lookup_main;
- ip_adjacency_t * adj;
ip4_address_t * address = ip_interface_address_get_address (lm, a);
- ip4_add_del_route_args_t x;
- vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
- u32 classify_table_index;
-
- /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
- x.table_index_or_table_id = fib_index;
- x.flags = (IP4_ROUTE_FLAG_ADD
- | IP4_ROUTE_FLAG_FIB_INDEX
- | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
- x.dst_address = address[0];
- x.dst_address_length = a->address_length;
- x.n_add_adj = 0;
- x.add_adj = 0;
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
a->neighbor_probe_adj_index = ~0;
- if (a->address_length < 32)
- {
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &x.adj_index);
- ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
- ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
- ip4_add_del_route (im, &x);
- a->neighbor_probe_adj_index = x.adj_index;
- }
-
- /* Add e.g. 1.1.1.1/32 as local to this host. */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &x.adj_index);
-
- classify_table_index = ~0;
+
+ if (pfx.fp_len < 32)
+ {
+ fib_node_index_t fei;
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ NULL, /* No next-hop address */
+ sw_if_index,
+ ~0, // invalid FIB index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
+ }
+
+ pfx.fp_len = 32;
+
if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
- classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
- if (classify_table_index != (u32) ~0)
- {
- adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
- adj->classify.table_index = classify_table_index;
- }
- else
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
-
- adj->if_address_index = a - lm->if_address_pool;
- adj->rewrite_header.sw_if_index = sw_if_index;
- adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
- /*
- * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
- * fail an RPF-ish check, but still go thru the rewrite code...
- */
- adj->rewrite_header.data_bytes = 0;
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index [sw_if_index];
+ if (classify_table_index != (u32) ~0)
+ {
+ dpo_id_t dpo = DPO_NULL;
+
+ dpo_set(&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create(FIB_PROTOCOL_IP4,
+ classify_table_index));
+
+ fib_table_entry_special_dpo_add(fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ }
- ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
- x.dst_address_length = 32;
- ip4_add_del_route (im, &x);
+ fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP4,
+ &pfx.fp_addr,
+ sw_if_index,
+ ~0, // invalid FIB index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
}
static void
-ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
+ip4_del_interface_routes (ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t * address,
+ u32 address_length)
{
- ip4_add_del_route_args_t x;
-
- /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
- x.table_index_or_table_id = fib_index;
- x.flags = (IP4_ROUTE_FLAG_DEL
- | IP4_ROUTE_FLAG_FIB_INDEX
- | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
- x.dst_address = address[0];
- x.dst_address_length = address_length;
- x.adj_index = ~0;
- x.n_add_adj = 0;
- x.add_adj = 0;
-
- if (address_length < 32)
- ip4_add_del_route (im, &x);
-
- x.dst_address_length = 32;
- ip4_add_del_route (im, &x);
-
- ip4_delete_matching_routes (im,
- fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX,
- address,
- address_length);
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len < 32)
+ {
+ fib_table_entry_delete(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE);
+ }
+
+ pfx.fp_len = 32;
+ fib_table_entry_delete(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE);
}
-typedef struct {
- u32 sw_if_index;
- ip4_address_t address;
- u32 length;
-} ip4_interface_address_t;
+void
+ip4_sw_interface_enable_disable (u32 sw_if_index,
+ u32 is_enable)
+{
+ vlib_main_t * vm = vlib_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 ci, cast;
+ u32 lookup_feature_index;
-static clib_error_t *
-ip4_add_del_interface_address_internal (vlib_main_t * vm,
- u32 sw_if_index,
- ip4_address_t * new_address,
- u32 new_length,
- u32 redistribute,
- u32 insert_routes,
- u32 is_del);
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+
+ for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
+ {
+ ip_config_main_t * cm = &lm->feature_config_mains[cast];
+ vnet_config_main_t * vcm = &cm->config_main;
+
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ if (cast == VNET_IP_RX_UNICAST_FEAT)
+ lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
+ else
+ lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
+
+ if (is_enable)
+ ci = vnet_config_add_feature (vm, vcm,
+ ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ ci = vnet_config_del_feature (vm, vcm,
+ ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ }
+}
static clib_error_t *
ip4_add_del_interface_address_internal (vlib_main_t * vm,
u32 sw_if_index,
ip4_address_t * address,
u32 address_length,
- u32 redistribute,
- u32 insert_routes,
u32 is_del)
{
vnet_main_t * vnm = vnet_get_main();
@@ -1292,9 +722,15 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip4_af);
- /* When adding an address check that it does not conflict with an existing address. */
+ /* FIXME-LATER
+ * there is no support for adj-fib handling in the presence of overlapping
+ * subnets on interfaces. Easy fix - disallow overlapping subnets, like
+ * most routers do.
+ */
if (! is_del)
{
+ /* When adding an address check that it does not conflict
+ with an existing address. */
ip_interface_address_t * ia;
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
@@ -1307,7 +743,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
format_ip4_address_and_length, address, address_length,
format_ip4_address_and_length, x, ia->address_length,
format_vnet_sw_if_index_name, vnm, sw_if_index);
- }));
+ }));
}
elts_before = pool_elts (lm->if_address_pool);
@@ -1322,18 +758,16 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
if (error)
goto done;
- if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
- {
- if (is_del)
- ip4_del_interface_routes (im, ip4_af.fib_index, address,
- address_length);
-
- else
- ip4_add_interface_routes (sw_if_index,
- im, ip4_af.fib_index,
- pool_elt_at_index
- (lm->if_address_pool, if_address_index));
- }
+ ip4_sw_interface_enable_disable(sw_if_index, !is_del);
+
+ if (is_del)
+ ip4_del_interface_routes (im, ip4_af.fib_index, address,
+ address_length);
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
/* If pool did not grow/shrink: add duplicate address. */
if (elts_before != pool_elts (lm->if_address_pool))
@@ -1358,48 +792,9 @@ ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
{
return ip4_add_del_interface_address_internal
(vm, sw_if_index, address, address_length,
- /* redistribute */ 1,
- /* insert_routes */ 1,
is_del);
}
-static clib_error_t *
-ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 flags)
-{
- ip4_main_t * im = &ip4_main;
- ip_interface_address_t * ia;
- ip4_address_t * a;
- u32 is_admin_up, fib_index;
-
- /* Fill in lookup tables with default table (0). */
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
-
- vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
-
- is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
-
- foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
- 0 /* honor unnumbered */,
- ({
- a = ip_interface_address_get_address (&im->lookup_main, ia);
- if (is_admin_up)
- ip4_add_interface_routes (sw_if_index,
- im, fib_index,
- ia);
- else
- ip4_del_interface_routes (im, fib_index,
- a, ia->address_length);
- }));
-
- return 0;
-}
-
-VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
-
/* Built-in ip4 unicast rx feature path definition */
VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
.node_name = "ip4-inacl",
@@ -1449,10 +844,17 @@ VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
.node_name = "ip4-lookup",
- .runs_before = 0, /* not before any other features */
+ .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
.feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
};
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
+ .node_name = "ip4-drop",
+ .runs_before = 0, /* not before any other features */
+ .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
+};
+
+
/* Built-in ip4 multicast rx feature path definition */
VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
.node_name = "vpath-input-ip4",
@@ -1462,10 +864,16 @@ VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
.node_name = "ip4-lookup-multicast",
- .runs_before = 0, /* not before any other features */
+ .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
.feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
};
+VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
+ .node_name = "ip4-drop",
+ .runs_before = 0, /* last feature */
+ .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
+};
+
static char * rx_feature_start_nodes[] =
{ "ip4-input", "ip4-input-no-checksum"};
@@ -1488,7 +896,6 @@ VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
.feature_index = &ip4_main.ip4_tx_feature_interface_output,
};
-
static clib_error_t *
ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
{
@@ -1520,7 +927,7 @@ ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
feature_start_nodes,
feature_start_len,
cast,
- 1 /* is_ip4 */)))
+ VNET_L3_PACKET_TYPE_IP4)))
return error;
}
@@ -1538,6 +945,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
u32 ci, cast;
u32 feature_index;
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
{
ip_config_main_t * cm = &lm->feature_config_mains[cast];
@@ -1547,9 +957,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
ci = cm->config_index_by_sw_if_index[sw_if_index];
if (cast == VNET_IP_RX_UNICAST_FEAT)
- feature_index = im->ip4_unicast_rx_feature_lookup;
+ feature_index = im->ip4_unicast_rx_feature_drop;
else if (cast == VNET_IP_RX_MULTICAST_FEAT)
- feature_index = im->ip4_multicast_rx_feature_lookup;
+ feature_index = im->ip4_multicast_rx_feature_drop;
else
feature_index = im->ip4_tx_feature_interface_output;
@@ -1560,14 +970,16 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
/* config data */ 0,
/* # bytes of config data */ 0);
else
- ci = vnet_config_del_feature (vm, vcm,
- ci,
- feature_index,
- /* config data */ 0,
- /* # bytes of config data */ 0);
-
+ {
+ ci = vnet_config_del_feature (vm, vcm, ci,
+ feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
+ im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
+ }
cm->config_index_by_sw_if_index[sw_if_index] = ci;
- /*
+ /*
* note: do not update the tx feature count here.
*/
}
@@ -1577,44 +989,6 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
-static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
-
-VLIB_REGISTER_NODE (ip4_lookup_node) = {
- .function = ip4_lookup,
- .name = "ip4-lookup",
- .vector_size = sizeof (u32),
-
- .format_trace = format_ip4_lookup_trace,
-
- .n_next_nodes = IP4_LOOKUP_N_NEXT,
- .next_nodes = IP4_LOOKUP_NEXT_NODES,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
-
-static uword
-ip4_indirect (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip4_lookup_inline (vm, node, frame,
- /* lookup_for_responses_to_locally_received_packets */ 0,
- /* is_indirect */ 1);
-}
-
-VLIB_REGISTER_NODE (ip4_indirect_node) = {
- .function = ip4_indirect,
- .name = "ip4-indirect",
- .vector_size = sizeof (u32),
- .sibling_of = "ip4-lookup",
- .format_trace = format_ip4_lookup_trace,
-
- .n_next_nodes = 0,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
-
-
/* Global IP4 main. */
ip4_main_t ip4_main;
@@ -1636,11 +1010,11 @@ ip4_lookup_init (vlib_main_t * vm)
im->fib_masks[i] = clib_host_to_net_u32 (m);
}
- /* Create FIB with index 0 and table id of 0. */
- find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
-
ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
+
{
pg_node_t * pn;
pn = pg_get_node (ip4_lookup_node.index);
@@ -1708,12 +1082,12 @@ static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
vnet_main_t * vnm = vnet_get_main();
- ip4_main_t * im = &ip4_main;
uword indent = format_get_indent (s);
s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
format_ip4_header, t->packet_data);
@@ -1726,16 +1100,16 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
vnet_main_t * vnm = vnet_get_main();
- ip4_main_t * im = &ip4_main;
uword indent = format_get_indent (s);
s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
format_ip_adjacency_packet_data,
- vnm, &im->lookup_main, t->adj_index,
+ vnm, t->adj_index,
t->packet_data, sizeof (t->packet_data));
return s;
}
@@ -1863,12 +1237,6 @@ ip4_punt (vlib_main_t * vm,
vlib_frame_t * frame)
{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
-static uword
-ip4_miss (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
-
VLIB_REGISTER_NODE (ip4_drop_node,static) = {
.function = ip4_drop,
.name = "ip4-drop",
@@ -1882,7 +1250,7 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
VLIB_REGISTER_NODE (ip4_punt_node,static) = {
.function = ip4_punt,
@@ -1897,22 +1265,7 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
-
-VLIB_REGISTER_NODE (ip4_miss_node,static) = {
- .function = ip4_miss,
- .name = "ip4-miss",
- .vector_size = sizeof (u32),
-
- .format_trace = format_ip4_forward_next_trace,
-
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
/* Compute TCP/UDP/ICMP4 checksum in software. */
u16
@@ -2009,26 +1362,27 @@ ip4_local (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t * p0, * p1;
- ip4_header_t * ip0, * ip1;
- udp_header_t * udp0, * udp1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip_adjacency_t * adj0, * adj1;
- u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
- u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
- i32 len_diff0, len_diff1;
- u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
- u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
- u8 enqueue_code;
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ udp_header_t * udp0, * udp1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
+ i32 len_diff0, len_diff1;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
+ u8 enqueue_code;
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
p0 = vlib_get_buffer (vm, pi0);
p1 = vlib_get_buffer (vm, pi1);
@@ -2041,8 +1395,8 @@ ip4_local (vlib_main_t * vm,
fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
vnet_buffer(p1)->sw_if_index[VLIB_RX]);
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
- mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -2130,41 +1484,42 @@ ip4_local (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
- vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
-
- vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- &ip0->src_address,
- /* no_default_route */ 1));
- ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
- &ip1->src_address,
- /* no_default_route */ 1));
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
- adj0 = ip_get_adjacency (lm, adj_index0);
- adj1 = ip_get_adjacency (lm, adj_index1);
+ lb0 = load_balance_get(lbi0);
+ lb1 = load_balance_get(lbi1);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
/*
* Must have a route to source otherwise we drop the packet.
* ip4 broadcasts are accepted, e.g. to make dhcp client work
*/
error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
+ && dpo0->dpoi_type != DPO_ADJACENCY
+ && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
&& ip0->dst_address.as_u32 != 0xFFFFFFFF
? IP4_ERROR_SRC_LOOKUP_MISS
: error0);
+ error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS :
+ error0);
error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
- && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
- && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
- && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
- && ip0->dst_address.as_u32 != 0xFFFFFFFF
+ && dpo1->dpoi_type != DPO_ADJACENCY
+ && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
+ && ip1->dst_address.as_u32 != 0xFFFFFFFF
? IP4_ERROR_SRC_LOOKUP_MISS
: error1);
+ error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS :
+ error1);
next0 = lm->local_next_by_ip_protocol[proto0];
next1 = lm->local_next_by_ip_protocol[proto1];
@@ -2220,11 +1575,12 @@ ip4_local (vlib_main_t * vm,
udp_header_t * udp0;
ip4_fib_mtrie_t * mtrie0;
ip4_fib_mtrie_leaf_t leaf0;
- ip_adjacency_t * adj0;
- u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
+ u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
i32 len_diff0;
u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
-
+ load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+
pi0 = to_next[0] = from[0];
from += 1;
n_left_from -= 1;
@@ -2238,7 +1594,7 @@ ip4_local (vlib_main_t * vm,
fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
vnet_buffer(p0)->sw_if_index[VLIB_RX]);
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -2296,24 +1652,30 @@ ip4_local (vlib_main_t * vm,
: error0);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- &ip0->src_address,
- /* no_default_route */ 1));
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
- adj0 = ip_get_adjacency (lm, adj_index0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
+ dpo0->dpoi_index;
/* Must have a route to source otherwise we drop the packet. */
error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
+ && dpo0->dpoi_type != DPO_ADJACENCY
+ && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
+ && dpo0->dpoi_type != DPO_RECEIVE
&& ip0->dst_address.as_u32 != 0xFFFFFFFF
? IP4_ERROR_SRC_LOOKUP_MISS
: error0);
+ /* Packet originated from a local address => spoofing */
+ error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS :
+ error0);
next0 = lm->local_next_by_ip_protocol[proto0];
@@ -2356,7 +1718,7 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
void ip4_register_protocol (u32 protocol, u32 node_index)
{
@@ -2394,10 +1756,11 @@ VLIB_CLI_COMMAND (show_ip_local, static) = {
.short_help = "Show ip local protocol table",
};
-static uword
-ip4_arp (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+always_inline uword
+ip4_arp_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_glean)
{
vnet_main_t * vnm = vnet_get_main();
ip4_main_t * im = &ip4_main;
@@ -2441,12 +1804,11 @@ ip4_arp (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next_drop > 0)
{
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ ip_adjacency_t * adj0;
vlib_buffer_t * p0;
ip4_header_t * ip0;
- ethernet_header_t * eh0;
- u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
uword bm0;
- ip_adjacency_t * adj0;
pi0 = from[0];
@@ -2456,35 +1818,10 @@ ip4_arp (vlib_main_t * vm,
adj0 = ip_get_adjacency (lm, adj_index0);
ip0 = vlib_buffer_get_current (p0);
- /* If packet destination is not local, send ARP to next hop */
- if (adj0->arp.next_hop.ip4.as_u32)
- ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
-
- /*
- * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
- * rewrite to this packet, we need to skip it here.
- * Note, to distinguish from src IP addr *.8.6.*, we
- * check for a bcast eth dest instead of IPv4 version.
- */
- eh0 = (ethernet_header_t*)ip0;
- if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
- {
- u32 vlan_num = 0;
- u16 * etype = &eh0->type;
- while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q
- || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad
- {
- vlan_num += 1;
- etype += 2; //vlan tag also 16 bits, same as etype
- }
- if (*etype == clib_host_to_net_u16 (0x0806)) //arp
- {
- vlib_buffer_advance (
- p0, sizeof(ethernet_header_t) + (4*vlan_num));
- ip0 = vlib_buffer_get_current (p0);
- }
- }
-
+ /*
+ * this is the Glean case, so we are ARPing for the
+ * packet's destination
+ */
a0 = hash_seeds[0];
b0 = hash_seeds[1];
c0 = hash_seeds[2];
@@ -2492,7 +1829,14 @@ ip4_arp (vlib_main_t * vm,
sw_if_index0 = adj0->rewrite_header.sw_if_index;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
- a0 ^= ip0->dst_address.data_u32;
+ if (is_glean)
+ {
+ a0 ^= ip0->dst_address.data_u32;
+ }
+ else
+ {
+ a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
+ }
b0 ^= sw_if_index0;
hash_v3_finalize32 (a0, b0, c0);
@@ -2522,10 +1866,11 @@ ip4_arp (vlib_main_t * vm,
* Can happen if the control-plane is programming tables
* with traffic flowing; at least that's today's lame excuse.
*/
- if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
- {
- p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
- }
+ if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
+ (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ }
else
/* Send ARP request. */
{
@@ -2545,15 +1890,32 @@ ip4_arp (vlib_main_t * vm,
clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
sizeof (h0->ip4_over_ethernet[0].ethernet));
- if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
- //No source address available
- p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
- vlib_buffer_free(vm, &bi0, 1);
- continue;
+ if (is_glean)
+ {
+ /* The interface's source address is stashed in the Glean Adj */
+ h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
+
+ /* Copy in destination address we are requesting. This is the
+ * glean case, so it's the packet's destination.*/
+ h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
}
+ else
+ {
+ /* Src IP address in ARP header. */
+ if (ip4_src_address_for_packet(lm, sw_if_index0,
+ &h0->ip4_over_ethernet[0].ip4))
+ {
+ /* No source address available */
+ p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free(vm, &bi0, 1);
+ continue;
+ }
- /* Copy in destination address we are requesting. */
- h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
+ /* Copy in destination address we are requesting from the
+ incomplete adj */
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ adj0->sub_type.nbr.next_hop.ip4.as_u32;
+ }
vlib_buffer_copy_trace_flag (vm, p0, bi0);
b0 = vlib_get_buffer (vm, bi0);
@@ -2571,6 +1933,22 @@ ip4_arp (vlib_main_t * vm,
return frame->n_vectors;
}
+static uword
+ip4_arp (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip4_arp_inline(vm, node, frame, 0));
+}
+
+static uword
+ip4_glean (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip4_arp_inline(vm, node, frame, 1));
+}
+
static char * ip4_arp_error_strings[] = {
[IP4_ARP_ERROR_DROP] = "address overflow drops",
[IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
@@ -2596,6 +1974,22 @@ VLIB_REGISTER_NODE (ip4_arp_node) = {
},
};
+VLIB_REGISTER_NODE (ip4_glean_node) = {
+ .function = ip4_glean,
+ .name = "ip4-glean",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_errors = ARRAY_LEN (ip4_arp_error_strings),
+ .error_strings = ip4_arp_error_strings,
+
+ .n_next_nodes = IP4_ARP_N_NEXT,
+ .next_nodes = {
+ [IP4_ARP_NEXT_DROP] = "error-drop",
+ },
+};
+
#define foreach_notrace_ip4_arp_error \
_(DROP) \
_(REQUEST_SENT) \
@@ -2720,7 +2114,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
u32 next0_override, next1_override;
u32 tx_sw_if_index0, tx_sw_if_index1;
-
+
if (rewrite_for_locally_received_packets)
next0_override = next1_override = 0;
@@ -2818,21 +2212,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
if (rewrite_for_locally_received_packets)
{
- /*
- * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
- * we end up here with a local adjacency in hand
- * The local adj rewrite data is 0xfefe on purpose.
- * Bad engineer, no donut for you.
- */
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
if (PREDICT_FALSE(adj0->lookup_next_index
== IP_LOOKUP_NEXT_ARP))
next0_override = IP4_REWRITE_NEXT_ARP;
- if (PREDICT_FALSE(adj1->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
if (PREDICT_FALSE(adj1->lookup_next_index
== IP_LOOKUP_NEXT_ARP))
next1_override = IP4_REWRITE_NEXT_ARP;
@@ -2869,14 +2251,14 @@ ip4_rewrite_inline (vlib_main_t * vm,
*/
if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
vlib_increment_combined_counter
- (&lm->adjacency_counters,
+ (&adjacency_counters,
cpu_index, adj_index0,
/* packet increment */ 0,
/* byte increment */ rw_len0-sizeof(ethernet_header_t));
if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
vlib_increment_combined_counter
- (&lm->adjacency_counters,
+ (&adjacency_counters,
cpu_index, adj_index1,
/* packet increment */ 0,
/* byte increment */ rw_len1-sizeof(ethernet_header_t));
@@ -2945,7 +2327,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
u32 next0_override;
u32 tx_sw_if_index0;
-
+
if (rewrite_for_locally_received_packets)
next0_override = 0;
@@ -3000,15 +2382,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
if (rewrite_for_locally_received_packets)
{
- /*
- * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
- * we end up here with a local adjacency in hand
- * The local adj rewrite data is 0xfefe on purpose.
- * Bad engineer, no donut for you.
- */
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
/*
* We have to override the next_index in ARP adjacencies,
* because they're set up for ip4-arp, not this node...
@@ -3028,7 +2401,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
vlib_increment_combined_counter
- (&lm->adjacency_counters,
+ (&adjacency_counters,
cpu_index, adj_index0,
/* packet increment */ 0,
/* byte increment */ rw_len0-sizeof(ethernet_header_t));
@@ -3172,6 +2545,15 @@ ip4_rewrite_local (vlib_main_t * vm,
/* rewrite_for_locally_received_packets */ 1);
}
+static uword
+ip4_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 0);
+}
+
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
.function = ip4_rewrite_transit,
.name = "ip4-rewrite-transit",
@@ -3187,7 +2569,23 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
+
+VLIB_REGISTER_NODE (ip4_midchain_node) = {
+ .function = ip4_midchain,
+ .name = "ip4-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
.function = ip4_rewrite_local,
@@ -3201,7 +2599,7 @@ VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
.n_next_nodes = 0,
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
static clib_error_t *
add_del_interface_table (vlib_main_t * vm,
@@ -3232,13 +2630,18 @@ add_del_interface_table (vlib_main_t * vm,
{
ip4_main_t * im = &ip4_main;
- ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
-
- if (fib)
- {
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
- }
+ u32 fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ table_id);
+
+ //
+ // FIXME-LATER
+ // changing an interface's table has consequences for any connecteds
+ // and adj-fibs already installed.
+ //
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
}
done:
@@ -3272,8 +2675,7 @@ ip4_lookup_multicast (vlib_main_t * vm,
vlib_frame_t * frame)
{
ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
- vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, n_left_to_next, * from, * to_next;
ip_lookup_next_t next;
u32 cpu_index = os_get_cpu_number();
@@ -3290,12 +2692,12 @@ ip4_lookup_multicast (vlib_main_t * vm,
while (n_left_from >= 4 && n_left_to_next >= 2)
{
vlib_buffer_t * p0, * p1;
- u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
+ u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
ip_lookup_next_t next0, next1;
ip4_header_t * ip0, * ip1;
- ip_adjacency_t * adj0, * adj1;
u32 fib_index0, fib_index1;
- u32 flow_hash_config0, flow_hash_config1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t * lb0, * lb1;
/* Prefetch next iteration. */
{
@@ -3327,46 +2729,44 @@ ip4_lookup_multicast (vlib_main_t * vm,
fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
- adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
- &ip0->dst_address, p0);
- adj_index1 = ip4_fib_lookup_buffer (im, fib_index1,
- &ip1->dst_address, p1);
-
- adj0 = ip_get_adjacency (lm, adj_index0);
- adj1 = ip_get_adjacency (lm, adj_index1);
-
- next0 = adj0->lookup_next_index;
- next1 = adj1->lookup_next_index;
+ lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
+ &ip0->dst_address);
+ lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
+ &ip1->dst_address);
- flow_hash_config0 =
- vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
- flow_hash_config1 =
- vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
- (ip0, flow_hash_config0);
+ (ip0, lb0->lb_hash_config);
vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
- (ip1, flow_hash_config1);
+ (ip1, lb1->lb_hash_config);
- ASSERT (adj0->n_adj > 0);
- ASSERT (adj1->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- ASSERT (is_pow2 (adj1->n_adj));
- adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
- adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i(lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
if (1) /* $$$$$$ HACK FIXME */
vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
+ (cm, cpu_index, lb_index0, 1,
vlib_buffer_length_in_chain (vm, p0));
if (1) /* $$$$$$ HACK FIXME */
vlib_increment_combined_counter
- (cm, cpu_index, adj_index1, 1,
+ (cm, cpu_index, lb_index1, 1,
vlib_buffer_length_in_chain (vm, p1));
from += 2;
@@ -3415,11 +2815,11 @@ ip4_lookup_multicast (vlib_main_t * vm,
{
vlib_buffer_t * p0;
ip4_header_t * ip0;
- u32 pi0, adj_index0;
+ u32 pi0, lb_index0;
ip_lookup_next_t next0;
- ip_adjacency_t * adj0;
u32 fib_index0;
- u32 flow_hash_config0;
+ const dpo_id_t *dpo0;
+ const load_balance_t * lb0;
pi0 = from[0];
to_next[0] = pi0;
@@ -3433,28 +2833,27 @@ ip4_lookup_multicast (vlib_main_t * vm,
fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
- adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
- &ip0->dst_address, p0);
-
- adj0 = ip_get_adjacency (lm, adj_index0);
+ lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
+ &ip0->dst_address);
- next0 = adj0->lookup_next_index;
+ lb0 = load_balance_get (lb_index0);
- flow_hash_config0 =
- vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
- vnet_buffer (p0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, flow_hash_config0);
+ vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
+ (ip0, lb0->lb_hash_config);
- ASSERT (adj0->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
if (1) /* $$$$$$ HACK FIXME */
vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
+ (cm, cpu_index, lb_index0, 1,
vlib_buffer_length_in_chain (vm, p0));
from += 1;
@@ -3494,7 +2893,7 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
.n_next_nodes = 0,
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
.function = ip4_drop,
@@ -3511,12 +2910,11 @@ VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
{
- ip4_main_t * im = &ip4_main;
ip4_fib_mtrie_t * mtrie0;
ip4_fib_mtrie_leaf_t leaf0;
- u32 adj_index0;
+ u32 lbi0;
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
@@ -3527,11 +2925,9 @@ int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
/* Handle default route. */
leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- a,
- /* no_default_route */ 0);
+ return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
}
static clib_error_t *
@@ -3595,7 +2991,7 @@ int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
if (p == 0)
return VNET_API_ERROR_NO_SUCH_FIB;
- fib = vec_elt_at_index (im4->fibs, p[0]);
+ fib = ip4_fib_get (p[0]);
fib->flow_hash_config = flow_hash_config;
return 0;
@@ -3719,44 +3115,3 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
.function = set_ip_classify_command_fn,
};
-
-#define TEST_CODE 1
-#if TEST_CODE > 0
-
-static clib_error_t *
-set_interface_output_feature_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vnet_main_t * vnm = vnet_get_main();
- u32 sw_if_index = ~0;
- int is_add = 1;
- ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (input, "del"))
- is_add = 0;
- else
- break;
- }
-
- if (sw_if_index == ~0)
- return clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, input);
-
- lm->tx_sw_if_has_ip_output_features =
- clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
-
- return 0;
-}
-
-VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
- .path = "set interface output feature",
- .function = set_interface_output_feature_command_fn,
- .short_help = "set interface output feature <intfc>",
-};
-#endif /* TEST_CODE */
diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c
index 006610a0f4e..364182415ba 100644
--- a/vnet/vnet/ip/ip4_mtrie.c
+++ b/vnet/vnet/ip/ip4_mtrie.c
@@ -38,6 +38,7 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_entry.h>
static void
ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len)
@@ -401,21 +402,27 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
unset_leaf (m, &a, root_ply, 0);
/* Find next less specific route and insert into mtrie. */
- for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--)
+ for (i = dst_address_length - 1; i >= 1; i--)
{
uword * p;
+ index_t lbi;
ip4_address_t key;
- if (! fib->adj_index_by_dst_address[i])
+ if (! fib->fib_entry_by_dst_address[i])
continue;
key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
- p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32);
+ p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
if (p)
{
+ lbi = fib_entry_contribute_ip_forwarding(p[0])->dpoi_index;
+ if (INDEX_INVALID == lbi)
+ continue;
+
a.dst_address = key;
+ a.adj_index = lbi;
a.dst_address_length = i;
- a.adj_index = p[0];
+
set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
break;
}
@@ -424,65 +431,6 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
}
}
-always_inline uword
-maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p)
-{
- ip4_fib_mtrie_leaf_t l = p[0];
- uword was_remapped_to_empty_leaf = 0;
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- {
- u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l);
- u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
- if (m)
- {
- was_remapped_to_empty_leaf = m == ~0;
-
- /*
- * The intent of the original form - which dates to 2013 or
- * earlier - is not obvious. Here's the original:
- *
- * if (was_remapped_to_empty_leaf)
- * p[0] = (was_remapped_to_empty_leaf
- * ? IP4_FIB_MTRIE_LEAF_EMPTY
- * : ip4_fib_mtrie_leaf_set_adj_index (m - 1));
- *
- * Notice the outer "if (was_remapped_to_empty_leaf)"
- * means that p[0] is always set to IP4_FIB_MTRIE_LEAF_EMPTY,
- * and is otherwise left intact.
- *
- * It seems unlikely that the adjacency mapping scheme
- * works in detail. Coverity correctly complains that the
- * else-case of the original ternary expression is dead code.
- */
- if (was_remapped_to_empty_leaf)
- p[0] = IP4_FIB_MTRIE_LEAF_EMPTY;
- }
- }
- return was_remapped_to_empty_leaf;
-}
-
-static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply)
-{
- u32 n_remapped_to_empty = 0;
- u32 i;
- for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
- n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]);
- if (n_remapped_to_empty > 0)
- {
- ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs);
- ply->n_non_empty_leafs -= n_remapped_to_empty;
- if (ply->n_non_empty_leafs == 0)
- os_panic ();
- }
-}
-
-void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m)
-{
- ip4_fib_mtrie_ply_t * ply;
- pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply));
- maybe_remap_leaf (lm, &m->default_leaf);
-}
-
/* Returns number of bytes of memory used by mtrie. */
static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
{
diff --git a/vnet/vnet/ip/ip4_mtrie.h b/vnet/vnet/ip/ip4_mtrie.h
index 31de41e14fa..c49937d6814 100644
--- a/vnet/vnet/ip/ip4_mtrie.h
+++ b/vnet/vnet/ip/ip4_mtrie.h
@@ -51,7 +51,7 @@
1 => empty (adjacency index of zero is special miss adjacency). */
typedef u32 ip4_fib_mtrie_leaf_t;
-#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*IP_LOOKUP_MISS_ADJ_INDEX)
+#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0)
always_inline u32 ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n)
@@ -115,6 +115,9 @@ typedef struct {
- 1 * sizeof (i32)];
} ip4_fib_mtrie_ply_t;
+_Static_assert(0 == sizeof(ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES,
+ "IP4 Mtrie ply cache line");
+
typedef struct {
/* Pool of plies. Index zero is root ply. */
ip4_fib_mtrie_ply_t * ply_pool;
@@ -136,15 +139,13 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t * f,
/* Returns adjacency index. */
u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst);
-void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m);
-
format_function_t format_ip4_fib_mtrie;
/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
always_inline ip4_fib_mtrie_leaf_t
ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m,
ip4_fib_mtrie_leaf_t current_leaf,
- ip4_address_t * dst_address,
+ const ip4_address_t * dst_address,
u32 dst_address_byte_index)
{
ip4_fib_mtrie_leaf_t next_leaf;
diff --git a/vnet/vnet/ip/ip4_source_and_port_range_check.c b/vnet/vnet/ip/ip4_source_and_port_range_check.c
index ebfa767d8f0..8a469baa804 100644
--- a/vnet/vnet/ip/ip4_source_and_port_range_check.c
+++ b/vnet/vnet/ip/ip4_source_and_port_range_check.c
@@ -14,7 +14,19 @@
*/
#include <vnet/ip/ip.h>
#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+/**
+ * @brief The pool of range chack DPOs
+ */
+static protocol_port_range_dpo_t *ppr_dpo_pool;
+
+/**
+ * @brief Dynamically registered DPO type
+ */
+static dpo_type_t ppr_dpo_type;
vlib_node_registration_t ip4_source_port_and_range_check_rx;
vlib_node_registration_t ip4_source_port_and_range_check_tx;
@@ -73,23 +85,20 @@ typedef enum
static inline u32
-check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next)
+check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
+ u16 dst_port, u32 next)
{
- protocol_port_range_t *range;
+ const protocol_port_range_t *range;
u16x8vec_t key;
u16x8vec_t diff1;
u16x8vec_t diff2;
u16x8vec_t sum, sum_equal_diff2;
u16 sum_nonzero, sum_equal, winner_mask;
int i;
- u8 *rwh;
- if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR || dst_port == 0)
+ if (NULL == ppr_dpo || dst_port == 0)
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
- rwh = (u8 *) (&adj->rewrite_header);
- range = (protocol_port_range_t *) rwh;
-
/* Make the obvious screw-case work. A variant also works w/ no MMX */
if (PREDICT_FALSE (dst_port == 65535))
{
@@ -100,20 +109,20 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next)
i++)
{
for (j = 0; j < 8; j++)
- if (range->low.as_u16[j] == 65535)
+ if (ppr_dpo->blocks[i].low.as_u16[j] == 65535)
return next;
- range++;
}
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
}
key.as_u16x8 = u16x8_splat (dst_port);
- for (i = 0; i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t);
- i++)
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
{
- diff1.as_u16x8 = u16x8_sub_saturate (range->low.as_u16x8, key.as_u16x8);
- diff2.as_u16x8 = u16x8_sub_saturate (range->hi.as_u16x8, key.as_u16x8);
+ diff1.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8);
+ diff2.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8);
sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8);
sum_equal_diff2.as_u16x8 =
u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8);
@@ -127,6 +136,12 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next)
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
}
+always_inline protocol_port_range_dpo_t *
+protocol_port_range_dpo_get (index_t index)
+{
+ return (pool_elt_at_index (ppr_dpo_pool, index));
+}
+
always_inline uword
ip4_source_and_port_range_check_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -154,264 +169,263 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip0, *ip1;
- ip4_fib_mtrie_t *mtrie0, *mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip_source_and_port_range_check_config_t *c0, *c1;
- ip_adjacency_t *adj0 = 0, *adj1 = 0;
- u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0;
- u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1;
- udp_header_t *udp0, *udp1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
- CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- fib_index0 =
- vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]);
- fib_index1 =
- vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (b1)->sw_if_index[VLIB_RX]);
-
- ip0 = vlib_buffer_get_current (b0);
- ip1 = vlib_buffer_get_current (b1);
-
- if (is_tx)
- {
- c0 = vnet_get_config_data (&tx_cm->config_main,
- &b0->current_config_index,
- &next0, sizeof (c0[0]));
- c1 = vnet_get_config_data (&tx_cm->config_main,
- &b1->current_config_index,
- &next1, sizeof (c1[0]));
- }
- else
- {
- c0 = vnet_get_config_data (&rx_cm->config_main,
- &b0->current_config_index,
- &next0, sizeof (c0[0]));
- c1 = vnet_get_config_data (&rx_cm->config_main,
- &b1->current_config_index,
- &next1, sizeof (c1[0]));
- }
-
- /* we can't use the default VRF here... */
- for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
- {
- ASSERT (c0->fib_index[i] && c1->fib_index[i]);
- }
-
-
- if (is_tx)
- {
- if (ip0->protocol == IP_PROTOCOL_UDP)
- fib_index0 =
- c0->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN];
- if (ip0->protocol == IP_PROTOCOL_TCP)
- fib_index0 =
- c0->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN];
- }
- else
- {
- if (ip0->protocol == IP_PROTOCOL_UDP)
- fib_index0 =
- c0->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT];
- if (ip0->protocol == IP_PROTOCOL_TCP)
- fib_index0 =
- c0->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT];
- }
-
- if (PREDICT_TRUE (fib_index0 != ~0))
- {
-
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
-
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 0);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 1);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
-
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
- &ip0->src_address,
- 0
- /* use dflt rt */
- ));
- adj0 = ip_get_adjacency (lm, adj_index0);
- }
-
- if (is_tx)
- {
- if (ip1->protocol == IP_PROTOCOL_UDP)
- fib_index1 =
- c1->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN];
- if (ip1->protocol == IP_PROTOCOL_TCP)
- fib_index1 =
- c1->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN];
- }
- else
- {
- if (ip1->protocol == IP_PROTOCOL_UDP)
- fib_index1 =
- c1->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT];
- if (ip1->protocol == IP_PROTOCOL_TCP)
- fib_index1 =
- c1->fib_index
- [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT];
- }
-
- if (PREDICT_TRUE (fib_index1 != ~0))
- {
-
- mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
-
- leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 0);
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 1);
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 2);
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 3);
-
- adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
-
- ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
- &ip1->src_address,
- 0));
- adj1 = ip_get_adjacency (lm, adj_index1);
- }
-
- pass0 = 0;
- pass0 |= adj0 == 0;
- pass0 |= ip4_address_is_multicast (&ip0->src_address);
- pass0 |=
- ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
- pass0 |= (ip0->protocol != IP_PROTOCOL_UDP)
- && (ip0->protocol != IP_PROTOCOL_TCP);
-
- pass1 = 0;
- pass1 |= adj1 == 0;
- pass1 |= ip4_address_is_multicast (&ip1->src_address);
- pass1 |=
- ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
- pass1 |= (ip1->protocol != IP_PROTOCOL_UDP)
- && (ip1->protocol != IP_PROTOCOL_TCP);
-
- save_next0 = next0;
- udp0 = ip4_next_header (ip0);
- save_next1 = next1;
- udp1 = ip4_next_header (ip1);
-
- if (PREDICT_TRUE (pass0 == 0))
- {
- good_packets++;
- next0 = check_adj_port_range_x1
- (adj0, clib_net_to_host_u16 (udp0->dst_port), next0);
- good_packets -= (save_next0 != next0);
- b0->error = error_node->errors
- [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
- }
-
- if (PREDICT_TRUE (pass1 == 0))
- {
- good_packets++;
- next1 = check_adj_port_range_x1
- (adj1, clib_net_to_host_u16 (udp1->dst_port), next1);
- good_packets -= (save_next1 != next1);
- b1->error = error_node->errors
- [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
- }
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- ip4_source_and_port_range_check_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->pass = next0 == save_next0;
- t->bypass = pass0;
- t->fib_index = fib_index0;
- t->src_addr.as_u32 = ip0->src_address.as_u32;
- t->port = (pass0 == 0) ?
- clib_net_to_host_u16 (udp0->dst_port) : 0;
- t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP;
- }
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b1->flags & VLIB_BUFFER_IS_TRACED)))
- {
- ip4_source_and_port_range_check_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->pass = next1 == save_next1;
- t->bypass = pass1;
- t->fib_index = fib_index1;
- t->src_addr.as_u32 = ip1->src_address.as_u32;
- t->port = (pass1 == 0) ?
- clib_net_to_host_u16 (udp1->dst_port) : 0;
- t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* vlib_buffer_t *b0, *b1; */
+ /* ip4_header_t *ip0, *ip1; */
+ /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */
+ /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */
+ /* ip_source_and_port_range_check_config_t *c0, *c1; */
+ /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */
+ /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */
+ /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */
+ /* udp_header_t *udp0, *udp1; */
+
+ /* /\* Prefetch next iteration. *\/ */
+ /* { */
+ /* vlib_buffer_t *p2, *p3; */
+
+ /* p2 = vlib_get_buffer (vm, from[2]); */
+ /* p3 = vlib_get_buffer (vm, from[3]); */
+
+ /* vlib_prefetch_buffer_header (p2, LOAD); */
+ /* vlib_prefetch_buffer_header (p3, LOAD); */
+
+ /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */
+ /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */
+ /* } */
+
+ /* bi0 = to_next[0] = from[0]; */
+ /* bi1 = to_next[1] = from[1]; */
+ /* from += 2; */
+ /* to_next += 2; */
+ /* n_left_from -= 2; */
+ /* n_left_to_next -= 2; */
+
+ /* b0 = vlib_get_buffer (vm, bi0); */
+ /* b1 = vlib_get_buffer (vm, bi1); */
+
+ /* fib_index0 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */
+ /* fib_index1 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */
+
+ /* ip0 = vlib_buffer_get_current (b0); */
+ /* ip1 = vlib_buffer_get_current (b1); */
+
+ /* if (is_tx) */
+ /* { */
+ /* c0 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+ /* else */
+ /* { */
+ /* c0 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+
+ /* /\* we can't use the default VRF here... *\/ */
+ /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */
+ /* { */
+ /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */
+ /* } */
+
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index0 != ~0)) */
+ /* { */
+
+ /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */
+
+ /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 0); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 1); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 2); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 3); */
+
+ /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */
+
+ /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */
+ /* &ip0->src_address, */
+ /* 0 */
+ /* /\* use dflt rt *\/ */
+ /* )); */
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* } */
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index1 != ~0)) */
+ /* { */
+
+ /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */
+
+ /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 0); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 1); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 2); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 3); */
+
+ /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */
+
+ /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */
+ /* &ip1->src_address, */
+ /* 0)); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+ /* } */
+
+ /* pass0 = 0; */
+ /* pass0 |= adj0 == 0; */
+ /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */
+ /* pass0 |= */
+ /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip0->protocol != IP_PROTOCOL_TCP); */
+
+ /* pass1 = 0; */
+ /* pass1 |= adj1 == 0; */
+ /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */
+ /* pass1 |= */
+ /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip1->protocol != IP_PROTOCOL_TCP); */
+
+ /* save_next0 = next0; */
+ /* udp0 = ip4_next_header (ip0); */
+ /* save_next1 = next1; */
+ /* udp1 = ip4_next_header (ip1); */
+
+ /* if (PREDICT_TRUE (pass0 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next0 = check_adj_port_range_x1 */
+ /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */
+ /* good_packets -= (save_next0 != next0); */
+ /* b0->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (pass1 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next1 = check_adj_port_range_x1 */
+ /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */
+ /* good_packets -= (save_next1 != next1); */
+ /* b1->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b0, sizeof (*t)); */
+ /* t->pass = next0 == save_next0; */
+ /* t->bypass = pass0; */
+ /* t->fib_index = fib_index0; */
+ /* t->src_addr.as_u32 = ip0->src_address.as_u32; */
+ /* t->port = (pass0 == 0) ? */
+ /* clib_net_to_host_u16 (udp0->dst_port) : 0; */
+ /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b1, sizeof (*t)); */
+ /* t->pass = next1 == save_next1; */
+ /* t->bypass = pass1; */
+ /* t->fib_index = fib_index1; */
+ /* t->src_addr.as_u32 = ip1->src_address.as_u32; */
+ /* t->port = (pass1 == 0) ? */
+ /* clib_net_to_host_u16 (udp1->dst_port) : 0; */
+ /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */
+ /* to_next, n_left_to_next, */
+ /* bi0, bi1, next0, next1); */
+ /* } */
while (n_left_from > 0 && n_left_to_next > 0)
{
vlib_buffer_t *b0;
ip4_header_t *ip0;
- ip4_fib_mtrie_t *mtrie0;
- ip4_fib_mtrie_leaf_t leaf0;
ip_source_and_port_range_check_config_t *c0;
- ip_adjacency_t *adj0 = 0;
- u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0;
+ u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0;
udp_header_t *udp0;
+ const protocol_port_range_dpo_t *ppr_dpo0 = NULL;
+ const dpo_id_t *dpo;
bi0 = from[0];
to_next[0] = bi0;
@@ -476,35 +490,25 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm,
if (fib_index0 != ~0)
{
+ lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
+ &ip0->src_address);
- mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
-
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 0);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 1);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
+ dpo =
+ load_balance_get_bucket_i (load_balance_get (lb_index0), 0);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-
- ASSERT (adj_index0 == ip4_fib_lookup_with_table
- (im, fib_index0,
- &ip0->src_address, 0 /* use default route */ ));
- adj0 = ip_get_adjacency (lm, adj_index0);
+ if (ppr_dpo_type == dpo->dpoi_type)
+ {
+ ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index);
+ }
+ /*
+ * else the lookup hit an enty that was no inserted
+ * by this range checker, which is the default route
+ */
}
/*
* $$$ which (src,dst) categories should we always pass?
*/
pass0 = 0;
- pass0 |= adj0 == 0;
pass0 |= ip4_address_is_multicast (&ip0->src_address);
pass0 |=
ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
@@ -518,7 +522,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm,
{
good_packets++;
next0 = check_adj_port_range_x1
- (adj0, clib_net_to_host_u16 (udp0->dst_port), next0);
+ (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0);
good_packets -= (save_next0 != next0);
b0->error = error_node->errors
[IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
@@ -558,6 +562,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm,
IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
good_packets);
return frame->n_vectors;
+ return 0;
}
static uword
@@ -786,209 +791,299 @@ VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command,
/* *INDENT-ON* */
static u8 *
-format_source_and_port_rc_adjacency (u8 * s, va_list * args)
+format_ppr_dpo (u8 * s, va_list * args)
{
- CLIB_UNUSED (vnet_main_t * vnm) = va_arg (*args, vnet_main_t *);
- ip_lookup_main_t *lm = va_arg (*args, ip_lookup_main_t *);
- u32 adj_index = va_arg (*args, u32);
- ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
- source_range_check_main_t *srm = &source_range_check_main;
- u8 *rwh = (u8 *) (&adj->rewrite_header);
- protocol_port_range_t *range;
+ index_t index = va_arg (args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (args, u32);
+
+ protocol_port_range_dpo_t *ppr_dpo;
int i, j;
int printed = 0;
- range = (protocol_port_range_t *) rwh;
+ ppr_dpo = protocol_port_range_dpo_get (index);
s = format (s, "allow ");
- for (i = 0; i < srm->ranges_per_adjacency; i++)
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
{
for (j = 0; j < 8; j++)
{
- if (range->low.as_u16[j])
+ if (ppr_dpo->blocks[i].low.as_u16[j])
{
if (printed)
s = format (s, ", ");
- if (range->hi.as_u16[j] > (range->low.as_u16[j] + 1))
- s = format (s, "%d-%d", (u32) range->low.as_u16[j],
- (u32) range->hi.as_u16[j] - 1);
+ if (ppr_dpo->blocks[i].hi.as_u16[j] >
+ (ppr_dpo->blocks[i].low.as_u16[j] + 1))
+ s =
+ format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1);
else
- s = format (s, "%d", range->low.as_u16[j]);
+ s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]);
printed = 1;
}
}
- range++;
}
return s;
}
+static void
+ppr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+static void
+ppr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t ppr_vft = {
+ .dv_lock = ppr_dpo_lock,
+ .dv_unlock = ppr_dpo_unlock,
+ .dv_format = format_ppr_dpo,
+};
+
+const static char *const ppr_ip4_nodes[] = {
+ "ip4-source-and-port-range-check-rx",
+ NULL,
+};
+
+const static char *const *const ppr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = ppr_ip4_nodes,
+};
+
clib_error_t *
ip4_source_and_port_range_check_init (vlib_main_t * vm)
{
source_range_check_main_t *srm = &source_range_check_main;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
srm->vlib_main = vm;
srm->vnet_main = vnet_get_main ();
- srm->ranges_per_adjacency =
- VLIB_BUFFER_PRE_DATA_SIZE / (2 * sizeof (u16x8));
- srm->special_adjacency_format_function_index =
- vnet_register_special_adjacency_format_function (lm,
- format_source_and_port_rc_adjacency);
- ASSERT (srm->special_adjacency_format_function_index);
+ ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes);
return 0;
}
VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init);
-int
-add_port_range_adjacency (ip4_address_t * address,
- u32 length,
- u32 adj_index,
- u16 * low_ports, u16 * high_ports, u32 fib_index)
+protocol_port_range_dpo_t *
+protocol_port_range_dpo_alloc (void)
{
- ip_adjacency_t *adj;
- int i, j, k;
- source_range_check_main_t *srm = &source_range_check_main;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
- protocol_port_range_t *range;
- u8 *rwh;
+ protocol_port_range_dpo_t *ppr_dpo;
- adj = ip_get_adjacency (lm, adj_index);
- /* $$$$ fixme: add ports if address + mask match */
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ICMP_ERROR)
- return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES);
+ memset (ppr_dpo, 0, sizeof (*ppr_dpo));
- ip_adjacency_t template_adj;
- ip4_add_del_route_args_t a;
+ ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO;
- memset (&template_adj, 0, sizeof (template_adj));
+ return (ppr_dpo);
+}
- template_adj.lookup_next_index = IP_LOOKUP_NEXT_ICMP_ERROR;
- template_adj.if_address_index = ~0;
- template_adj.special_adjacency_format_function_index =
- srm->special_adjacency_format_function_index;
- rwh = (u8 *) (&template_adj.rewrite_header);
+static int
+add_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ dpo_id_t dpop = DPO_NULL;
+ int i, j, k;
- range = (protocol_port_range_t *) rwh;
+ fib_node_index_t fei;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have already sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * this is a first time add for this prefix.
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us, and if so get the ragne DPO from it.
+ */
+ dpo_id_t dpo = DPO_NULL;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * there is no PPR state associated with this prefix,
+ * so we'll need a new DPO
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ }
- if (vec_len (low_ports) > 8 * srm->ranges_per_adjacency)
+ if (vec_len (low_ports) > ppr_dpo->n_free_ranges)
return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY;
j = k = 0;
for (i = 0; i < vec_len (low_ports); i++)
{
- for (; j < srm->ranges_per_adjacency; j++)
+ for (; j < N_BLOCKS_PER_DPO; j++)
{
for (; k < 8; k++)
{
- if (range->low.as_u16[k] == 0)
+ if (ppr_dpo->blocks[j].low.as_u16[k] == 0)
{
- range->low.as_u16[k] = low_ports[i];
- range->hi.as_u16[k] = high_ports[i];
- k++;
- if (k == 7)
- {
- k = 0;
- j++;
- }
- goto doublebreak2;
+ ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i];
+ ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i];
+ goto doublebreak;
}
}
- k = 0;
- range++;
}
- j = 0;
- /* Too many ports specified... */
- return VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY;
-
- doublebreak2:;
+ doublebreak:;
}
+ ppr_dpo->n_used_blocks = j + 1;
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX;
- a.table_index_or_table_id = fib_index;
- a.dst_address = address[0];
- a.dst_address_length = length;
- a.add_adj = &template_adj;
- a.n_add_adj = 1;
+ /*
+ * add or update the entry in the FIB
+ */
+ dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool));
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+ else
+ {
+ fib_table_entry_special_dpo_update (fei,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
- ip4_add_del_route (im, &a);
return 0;
}
-int
-remove_port_range_adjacency (ip4_address_t * address,
- u32 length,
- u32 adj_index,
- u16 * low_ports, u16 * high_ports, u32 fib_index)
+static int
+remove_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
{
- ip_adjacency_t *adj;
+ protocol_port_range_dpo_t *ppr_dpo;
+ fib_node_index_t fei;
int i, j, k;
- source_range_check_main_t *srm = &source_range_check_main;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
- protocol_port_range_t *range;
- u8 *rwh;
- adj = ip_get_adjacency (lm, adj_index);
- if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR) /* _ICMP_ERROR is a dummy placeholder */
- return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
- rwh = (u8 *) (&adj->rewrite_header);
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_NULL;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ }
for (i = 0; i < vec_len (low_ports); i++)
{
- range = (protocol_port_range_t *) rwh;
- for (j = 0; j < srm->ranges_per_adjacency; j++)
+ for (j = 0; j < N_BLOCKS_PER_DPO; j++)
{
for (k = 0; k < 8; k++)
{
- if (low_ports[i] == range->low.as_u16[k] &&
- high_ports[i] == range->hi.as_u16[k])
+ if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] &&
+ high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k])
{
- range->low.as_u16[k] = range->hi.as_u16[k] = 0;
+ ppr_dpo->blocks[j].low.as_u16[k] =
+ ppr_dpo->blocks[j].hi.as_u16[k] = 0;
goto doublebreak;
}
}
- range++;
}
doublebreak:;
}
- range = (protocol_port_range_t *) rwh;
+ ppr_dpo->n_free_ranges = 0;
+
/* Have we deleted all ranges yet? */
- for (i = 0; i < srm->ranges_per_adjacency; i++)
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
{
for (j = 0; j < 8; j++)
{
- if (range->low.as_u16[i] != 0)
- goto still_occupied;
+ if (ppr_dpo->blocks[j].low.as_u16[i] == 0)
+ ppr_dpo->n_free_ranges++;
}
- range++;
}
- /* Yes, lose the adjacency... */
- {
- ip4_add_del_route_args_t a;
-
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = fib_index;
- a.dst_address = address[0];
- a.dst_address_length = length;
- a.adj_index = adj_index;
- ip4_add_del_route (im, &a);
- }
-
-still_occupied:
- ;
+
+ if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges)
+ {
+ /* Yes, lose the adjacency... */
+ fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL);
+ }
+ else
+ {
+ /*
+ * compact the ranges down to a contiguous block
+ */
+ // FIXME. TODO.
+ }
+
return 0;
}
@@ -1010,35 +1105,19 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address,
u16 * low_ports,
u16 * high_ports, int is_add)
{
-
- ip4_main_t *im = &ip4_main;
- // ip_lookup_main_t * lm = &im->lookup_main;
- uword *p;
u32 fib_index;
- u32 adj_index;
-
- p = hash_get (im->fib_index_by_table_id, vrf_id);
- if (!p)
- {
- ip4_fib_t *f;
- f = find_ip4_fib_by_table_index_or_id (im, vrf_id, 0 /* flags */ );
- fib_index = f->index;
- }
- else
- fib_index = p[0];
- adj_index = ip4_fib_lookup_with_table
- (im, fib_index, address, 0 /* disable_default_route */ );
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id);
if (is_add == 0)
{
- remove_port_range_adjacency (address, length, adj_index, low_ports,
- high_ports, fib_index);
+ remove_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
}
else
{
- add_port_range_adjacency (address, length, adj_index, low_ports,
- high_ports, fib_index);
+ add_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
}
return 0;
@@ -1159,24 +1238,20 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- source_range_check_main_t *srm = &source_range_check_main;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
- protocol_port_range_t *range;
+ protocol_port_range_dpo_t *ppr_dpo;
u32 fib_index;
- ip4_address_t addr;
u8 addr_set = 0;
u32 vrf_id = ~0;
int rv, i, j;
- u32 adj_index;
- ip_adjacency_t *adj;
u32 port = 0;
- u8 *rwh;
- uword *p;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ };
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "%U", unformat_ip4_address, &addr))
+ if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4))
addr_set = 1;
else if (unformat (input, "vrf %d", &vrf_id))
;
@@ -1192,51 +1267,58 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
if (vrf_id == ~0)
return clib_error_return (0, "VRF ID required, not specified");
- p = hash_get (im->fib_index_by_table_id, vrf_id);
- if (p == 0)
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ if (~0 == fib_index)
return clib_error_return (0, "VRF %d not found", vrf_id);
- fib_index = p[0];
- adj_index = ip4_fib_lookup_with_table
- (im, fib_index, &addr, 0 /* disable_default_route */ );
+ /*
+ * find the longest prefix match on the address requested,
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_NULL;
+ const dpo_id_t *bucket;
- adj = ip_get_adjacency (lm, adj_index);
-
- if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR)
+ if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx),
+ FIB_SOURCE_SPECIAL, &dpo))
{
- vlib_cli_output (vm, "%U: src address drop", format_ip4_address, &addr);
+ /*
+ * not one of ours
+ */
+ vlib_cli_output (vm, "%U: src address drop", format_ip4_address,
+ &pfx.fp_addr.ip4);
return 0;
}
+ bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+
if (port)
{
- rv = check_adj_port_range_x1 (adj, (u16) port, 1234);
+ rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234);
if (rv == 1234)
vlib_cli_output (vm, "%U port %d PASS", format_ip4_address,
- &addr, port);
+ &pfx.fp_addr.ip4, port);
else
vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address,
- &addr, port);
+ &pfx.fp_addr.ip4, port);
return 0;
}
else
{
u8 *s;
- rwh = (u8 *) (&adj->rewrite_header);
-
- s = format (0, "%U: ", format_ip4_address, &addr);
- range = (protocol_port_range_t *) rwh;
+ s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4);
- for (i = 0; i < srm->ranges_per_adjacency; i++)
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
{
for (j = 0; j < 8; j++)
{
- if (range->low.as_u16[j])
- s = format (s, "%d - %d ", (u32) range->low.as_u16[j],
- (u32) range->hi.as_u16[j]);
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ s = format (s, "%d - %d ",
+ (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j]);
}
- range++;
}
vlib_cli_output (vm, "%s", s);
vec_free (s);
diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c
index 1f8e7214ff1..2323ac291aa 100644
--- a/vnet/vnet/ip/ip4_source_check.c
+++ b/vnet/vnet/ip/ip4_source_check.c
@@ -38,6 +38,8 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
typedef struct {
u8 packet_data[64];
@@ -110,9 +112,12 @@ ip4_source_check_inline (vlib_main_t * vm,
ip4_fib_mtrie_t * mtrie0, * mtrie1;
ip4_fib_mtrie_leaf_t leaf0, leaf1;
ip4_source_check_config_t * c0, * c1;
- ip_adjacency_t * adj0, * adj1;
- u32 pi0, next0, pass0, adj_index0;
- u32 pi1, next1, pass1, adj_index1;
+ const load_balance_t * lb0, * lb1;
+ u32 pi0, next0, pass0, lb_index0;
+ u32 pi1, next1, pass1, lb_index1;
+ const ip_adjacency_t *adj0, *adj1;
+ const dpo_id_t *dpo0, *dpo1;
+ u32 ii0, ii1;
/* Prefetch next iteration. */
{
@@ -150,8 +155,8 @@ ip4_source_check_inline (vlib_main_t * vm,
&next1,
sizeof (c1[0]));
- mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie;
- mtrie1 = &vec_elt_at_index (im->fibs, c1->fib_index)->mtrie;
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -167,29 +172,70 @@ ip4_source_check_inline (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index,
- &ip0->src_address,
- c0->no_default_route));
- ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, c1->fib_index,
- &ip1->src_address,
- c1->no_default_route));
-
- adj0 = ip_get_adjacency (lm, adj_index0);
- adj1 = ip_get_adjacency (lm, adj_index1);
+ lb0 = load_balance_get(lb_index0);
+ lb1 = load_balance_get(lb_index1);
/* Pass multicast. */
pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
pass1 = ip4_address_is_multicast (&ip1->src_address) || ip1->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
- pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
- && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
- || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index));
- pass1 |= (adj1->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
- && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
- || vnet_buffer (p1)->sw_if_index[VLIB_RX] == adj1->rewrite_header.sw_if_index));
+ if (PREDICT_TRUE(1 == lb0->lb_n_buckets))
+ {
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+ if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY))
+ {
+ pass0 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj0 = adj_get(dpo0->dpoi_index);
+ pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] ==
+ adj0->rewrite_header.sw_if_index);
+ }
+ }
+ else
+ {
+ for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++)
+ {
+ dpo0 = load_balance_get_bucket_i(lb0, ii0);
+ if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY))
+ {
+ pass0 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj0 = adj_get(dpo0->dpoi_index);
+ pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] ==
+ adj0->rewrite_header.sw_if_index);
+ }
+ }
+ }
+ if (PREDICT_TRUE(1 == lb1->lb_n_buckets))
+ {
+ dpo1 = load_balance_get_bucket_i(lb1, 0);
+ if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY))
+ {
+ pass1 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj1 = adj_get(dpo1->dpoi_index);
+ pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] ==
+ adj1->rewrite_header.sw_if_index);
+ }
+ }
+ else
+ {
+ for (ii1 = 0; ii1 < lb1->lb_n_buckets && !pass1; ii1++)
+ {
+ dpo1 = load_balance_get_bucket_i(lb1, ii1);
+ if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY))
+ {
+ pass1 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj1 = adj_get(dpo1->dpoi_index);
+ pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] ==
+ adj1->rewrite_header.sw_if_index);
+ }
+ }
+ }
next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP);
@@ -210,7 +256,10 @@ ip4_source_check_inline (vlib_main_t * vm,
ip4_fib_mtrie_leaf_t leaf0;
ip4_source_check_config_t * c0;
ip_adjacency_t * adj0;
- u32 pi0, next0, pass0, adj_index0;
+ u32 pi0, next0, pass0, lb_index0;
+ const load_balance_t * lb0;
+ const dpo_id_t *dpo0;
+ u32 ii0;
pi0 = from[0];
to_next[0] = pi0;
@@ -227,7 +276,7 @@ ip4_source_check_inline (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie;
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
@@ -239,19 +288,40 @@ ip4_source_check_inline (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
- adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index,
- &ip0->src_address,
- c0->no_default_route));
- adj0 = ip_get_adjacency (lm, adj_index0);
+ lb0 = load_balance_get(lb_index0);
/* Pass multicast. */
pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
- pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
- && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
- || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index));
+ if (PREDICT_TRUE(1 == lb0->lb_n_buckets))
+ {
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+ if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY))
+ {
+ pass0 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj0 = adj_get(dpo0->dpoi_index);
+ pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] ==
+ adj0->rewrite_header.sw_if_index);
+ }
+ }
+ else
+ {
+ for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++)
+ {
+ dpo0 = load_balance_get_bucket_i(lb0, ii0);
+ if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY))
+ {
+ pass0 |= (source_check_type ==
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+ adj0 = adj_get(dpo0->dpoi_index);
+ pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] ==
+ adj0->rewrite_header.sw_if_index);
+ }
+ }
+ }
next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
diff --git a/vnet/vnet/ip/ip4_test.c b/vnet/vnet/ip/ip4_test.c
index ff088e78f3e..b76a719fe13 100644
--- a/vnet/vnet/ip/ip4_test.c
+++ b/vnet/vnet/ip/ip4_test.c
@@ -142,7 +142,7 @@ thrash (vlib_main_t * vm,
}
/* Find or create FIB table 11 */
- fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
+ fib = ip4_fib_find_or_create_fib_by_table_id (table_id);
for (i = tm->test_interfaces_created; i < ninterfaces; i++)
{
@@ -164,6 +164,7 @@ thrash (vlib_main_t * vm,
hw = vnet_get_hw_interface (vnm, hw_if_index);
vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index);
im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index;
+ ip4_sw_interface_enable_disable(sw_if_index, 1);
}
tm->test_interfaces_created = ninterfaces;
diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h
index f5f3de84676..36be64948c9 100644
--- a/vnet/vnet/ip/ip6.h
+++ b/vnet/vnet/ip/ip6.h
@@ -71,27 +71,11 @@ typedef struct {
u32 index;
/* flow hash configuration */
- u32 flow_hash_config;
+ flow_hash_config_t flow_hash_config;
} ip6_fib_t;
struct ip6_main_t;
-typedef void (ip6_add_del_route_function_t)
- (struct ip6_main_t * im,
- uword opaque,
- ip6_fib_t * fib,
- u32 flags,
- ip6_address_t * address,
- u32 address_length,
- void * old_result,
- void * new_result);
-
-typedef struct {
- ip6_add_del_route_function_t * function;
- uword required_flags;
- uword function_opaque;
-} ip6_add_del_route_callback_t;
-
typedef void (ip6_add_del_interface_address_function_t)
(struct ip6_main_t * im,
uword opaque,
@@ -106,31 +90,63 @@ typedef struct {
uword function_opaque;
} ip6_add_del_interface_address_callback_t;
-typedef struct ip6_main_t {
- BVT(clib_bihash) ip6_lookup_table;
+/**
+ * Enumeration of the FIB table instance types
+ */
+typedef enum ip6_fib_table_instance_type_t_ {
+ /**
+ * This table stores the routes that are used to forward traffic.
+ * The key is the prefix, the result the adjacnecy to forward on.
+ */
+ IP6_FIB_TABLE_FWDING,
+ /**
+ * The table that stores ALL routes learned by the DP.
+ * Some of these routes may not be ready to install in forwarding
+ * at a given time.
+ * The key in this table is the prefix, the result is the fib_entry_t
+ */
+ IP6_FIB_TABLE_NON_FWDING,
+} ip6_fib_table_instance_type_t;
+
+#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1)
- ip_lookup_main_t lookup_main;
+/**
+ * A represenation of a single IP6 table
+ */
+typedef struct ip6_fib_table_instance_t_ {
+ /* The hash table */
+ BVT(clib_bihash) ip6_hash;
/* bitmap / refcounts / vector of mask widths to search */
uword * non_empty_dst_address_length_bitmap;
u8 * prefix_lengths_in_search_order;
i32 dst_address_length_refcounts[129];
+} ip6_fib_table_instance_t;
+
+typedef struct ip6_main_t {
+ /**
+ * The two FIB tables; fwding and non-fwding
+ */
+ ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES];
+
+ ip_lookup_main_t lookup_main;
- /* Vector of FIBs. */
- ip6_fib_t * fibs;
+ /* Pool of FIBs. */
+ struct fib_table_t_ * fibs;
+ /* Network byte orders subnet mask for each prefix length */
ip6_address_t fib_masks[129];
/* Table index indexed by software interface. */
u32 * fib_index_by_sw_if_index;
+ /* IP6 enabled count by software interface */
+ u8 * ip_enabled_by_sw_if_index;
+
/* Hash table mapping table id to fib index.
ID space is not necessarily dense; index space is dense. */
uword * fib_index_by_table_id;
- /* Vector of functions to call when routes are added/deleted. */
- ip6_add_del_route_callback_t * add_del_route_callbacks;
-
/* Hash table mapping interface rewrite adjacency index by sw if index. */
uword * interface_route_adj_index_by_sw_if_index;
@@ -156,8 +172,10 @@ typedef struct ip6_main_t {
u32 ip6_unicast_rx_feature_l2tp_decap;
u32 ip6_unicast_rx_feature_vpath;
u32 ip6_unicast_rx_feature_lookup;
+ u32 ip6_unicast_rx_feature_drop;
/* Built-in multicast feature path indices */
+ u32 ip6_multicast_rx_feature_drop;
u32 ip6_multicast_rx_feature_vpath;
u32 ip6_multicast_rx_feature_lookup;
@@ -226,6 +244,8 @@ extern vlib_node_registration_t ip6_input_node;
extern vlib_node_registration_t ip6_rewrite_node;
extern vlib_node_registration_t ip6_rewrite_local_node;
extern vlib_node_registration_t ip6_discover_neighbor_node;
+extern vlib_node_registration_t ip6_glean_node;
+extern vlib_node_registration_t ip6_midchain_node;
extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node;
@@ -242,40 +262,10 @@ typedef union {
} up_down_event;
} ip6_icmp_neighbor_discovery_event_data_t;
-u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst);
-u32 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index,
- ip6_address_t * dst);
-
-/**
- * \brief Get or create an IPv6 fib.
- *
- * Get or create an IPv6 fib with the provided fib ID or index.
- * The fib ID is a possibly-sparse user-defined value while
- * the fib index defines the position of the fib in the fib vector.
- *
- * \param im
- * ip6_main pointer.
- * \param table_index_or_id
- * The table index if \c IP6_ROUTE_FLAG_FIB_INDEX bit is set in \p flags.
- * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked
- * and can be retrieved with \c ret->table_id.
- * Otherwise, it is the fib ID to be used to retrieve or create the desired fib.
- * \param flags
- * Indicates whether \p table_index_or_id is the fib index or ID.
- * When the bit \c IP6_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id
- * is considered as the fib index, and the fib ID otherwise.
- * \return A pointer to the retrieved or created fib.
- *
- * \remark When getting a fib with the fib index, the fib MUST already exist.
- */
-ip6_fib_t * find_ip6_fib_by_table_index_or_id (ip6_main_t * im,
- u32 table_index_or_id,
- u32 flags);
-
always_inline uword
-ip6_destination_matches_route (ip6_main_t * im,
- ip6_address_t * key,
- ip6_address_t * dest,
+ip6_destination_matches_route (const ip6_main_t * im,
+ const ip6_address_t * key,
+ const ip6_address_t * dest,
uword dest_length)
{
int i;
@@ -313,25 +303,26 @@ ip6_unaligned_destination_matches_route (ip6_main_t * im,
}
always_inline int
-ip6_src_address_for_packet (ip6_main_t * im, vlib_buffer_t * p, ip6_address_t * src, u32 sw_if_index)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index);
- if (ia == NULL)
- return -1;
- ip6_address_t * a = ip_interface_address_get_address (lm, ia);
- *src = a[0];
- return 0;
-}
-
-always_inline u32
-ip6_src_lookup_for_packet (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
+ip6_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ ip6_address_t * src)
{
- if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0)
- vnet_buffer (b)->ip.adj_index[VLIB_RX]
- = ip6_fib_lookup (im, vnet_buffer (b)->sw_if_index[VLIB_RX],
- &i->src_address);
- return vnet_buffer (b)->ip.adj_index[VLIB_RX];
+ u32 if_add_index =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE(if_add_index != ~0)) {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index(lm->if_address_pool, if_add_index);
+ ip6_address_t *if_ip =
+ ip_interface_address_get_address(lm, if_add);
+ *src = *if_ip;
+ return (0);
+ }
+ else
+ {
+ src->as_u64[0] = 0;
+ src->as_u64[1] = 0;
+ }
+ return (!0);
}
/* Find interface address which matches destination. */
@@ -362,95 +353,12 @@ clib_error_t *
ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
ip6_address_t * address, u32 address_length,
u32 is_del);
+void
+ip6_sw_interface_enable_disable (u32 sw_if_index,
+ u32 is_enable);
int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2);
-/* Add/del a route to the FIB. */
-
-#define IP6_ROUTE_FLAG_ADD (0 << 0)
-#define IP6_ROUTE_FLAG_DEL (1 << 0)
-#define IP6_ROUTE_FLAG_TABLE_ID (0 << 1)
-#define IP6_ROUTE_FLAG_FIB_INDEX (1 << 1)
-#define IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2)
-#define IP6_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3)
-#define IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4)
-/* Dynamic route created via neighbor discovery. */
-#define IP6_ROUTE_FLAG_NEIGHBOR (1 << 5)
-
-typedef struct {
- /* IP6_ROUTE_FLAG_* */
- u32 flags;
-
- /* Either index of fib or table_id to hash and get fib.
- IP6_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */
- u32 table_index_or_table_id;
-
- /* Destination address (prefix) and length. */
- ip6_address_t dst_address;
- u32 dst_address_length;
-
- /* Adjacency to use for this destination. */
- u32 adj_index;
-
- /* If specified adjacencies to add and then
- use for this destination. add_adj/n_add_adj
- are override adj_index if specified. */
- ip_adjacency_t * add_adj;
- u32 n_add_adj;
-} ip6_add_del_route_args_t;
-
-void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * args);
-
-void ip6_add_del_route_next_hop (ip6_main_t * im,
- u32 flags,
- ip6_address_t * dst_address,
- u32 dst_address_length,
- ip6_address_t * next_hop,
- u32 next_hop_sw_if_index,
- u32 next_hop_weight, u32 adj_index,
- u32 explicit_fib_index);
-
-u32
-ip6_route_get_next_hop_adj (ip6_main_t * im,
- u32 fib_index,
- ip6_address_t *next_hop,
- u32 next_hop_sw_if_index,
- u32 explicit_fib_index);
-
-u32
-ip6_get_route (ip6_main_t * im,
- u32 fib_index_or_table_id,
- u32 flags,
- ip6_address_t * address,
- u32 address_length);
-
-void
-ip6_foreach_matching_route (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip6_address_t * address,
- u32 address_length,
- ip6_address_t ** results,
- u8 ** result_length);
-
-void ip6_delete_matching_routes (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip6_address_t * address,
- u32 address_length);
-
-void ip6_maybe_remap_adjacencies (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags);
-
-void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
- ip_adjacency_t * adj,
- u32 sw_if_index,
- u32 if_address_index);
-
-u32
-vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg);
-
clib_error_t *
ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index);
@@ -481,8 +389,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
ip6_address_t * a,
u8 * link_layer_address,
uword n_bytes_link_layer_address);
-void
-vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index);
void
ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
@@ -492,7 +398,8 @@ void
ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
ip6_address_t *ip);
-int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config);
+int vnet_set_ip6_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
int
ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index,
@@ -560,7 +467,8 @@ extern vlib_node_registration_t ip6_lookup_node;
/* Compute flow hash. We'll use it to select which Sponge to use for this
flow. And other things. */
always_inline u32
-ip6_compute_flow_hash (ip6_header_t * ip, u32 flow_hash_config)
+ip6_compute_flow_hash (const ip6_header_t * ip,
+ flow_hash_config_t flow_hash_config)
{
tcp_header_t * tcp = (void *) (ip + 1);
u64 a, b, c;
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c
index c977960285d..f7514dc3cbf 100644
--- a/vnet/vnet/ip/ip6_forward.c
+++ b/vnet/vnet/ip/ip6_forward.c
@@ -42,668 +42,13 @@
#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
#include <vppinfra/cache.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/classify_dpo.h>
#include <vppinfra/bihash_template.c>
-static void compute_prefix_lengths_in_search_order (ip6_main_t * im)
-{
- int i;
- vec_reset_length (im->prefix_lengths_in_search_order);
- /* Note: bitmap reversed so this is in fact a longest prefix match */
- clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap,
- ({
- int dst_address_length = 128 - i;
- vec_add1 (im->prefix_lengths_in_search_order, dst_address_length);
- }));
-}
-
-u32
-ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- int i, len;
- int rv;
- BVT(clib_bihash_kv) kv, value;
- u64 fib;
-
- len = vec_len (im->prefix_lengths_in_search_order);
-
- kv.key[0] = dst->as_u64[0];
- kv.key[1] = dst->as_u64[1];
- fib = ((u64)((fib_index))<<32);
-
- for (i = 0; i < len; i++)
- {
- int dst_address_length = im->prefix_lengths_in_search_order[i];
- ip6_address_t * mask = &im->fib_masks[dst_address_length];
-
- ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
- //As lengths are decreasing, masks are increasingly specific.
- kv.key[0] &= mask->as_u64[0];
- kv.key[1] &= mask->as_u64[1];
- kv.key[2] = fib | dst_address_length;
-
- rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
- if (rv == 0)
- return value.value;
- }
-
- return lm->miss_adj_index;
-}
-
-u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst)
-{
- u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- return ip6_fib_lookup_with_table (im, fib_index, dst);
-}
-
-void
-vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip6_add_del_route_args_t a;
- ip_adjacency_t * adj;
-
- memset(&a, 0x0, sizeof(ip6_add_del_route_args_t));
-
- a.table_index_or_table_id = fib_index;
- a.flags = (IP6_ROUTE_FLAG_ADD
- | IP6_ROUTE_FLAG_FIB_INDEX
- | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
- | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
-
- /* Add ff02::1:ff00:0/104 via local route for all tables.
- This is required for neighbor discovery to work. */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a.adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- adj->if_address_index = ~0;
- adj->rewrite_header.data_bytes = 0;
-
- ip6_set_solicited_node_multicast_address (&a.dst_address, 0);
-
- a.dst_address_length = 104;
- ip6_add_del_route (im, &a);
-
- /* Add all-routers multicast address via local route for all tables */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a.adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- adj->if_address_index = ~0;
- adj->rewrite_header.data_bytes = 0;
-
- ip6_set_reserved_multicast_address (&a.dst_address,
- IP6_MULTICAST_SCOPE_link_local,
- IP6_MULTICAST_GROUP_ID_all_routers);
-
- a.dst_address_length = 128;
- ip6_add_del_route (im, &a);
-
- /* Add all-nodes multicast address via local route for all tables */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a.adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- adj->if_address_index = ~0;
- adj->rewrite_header.data_bytes = 0;
-
- ip6_set_reserved_multicast_address (&a.dst_address,
- IP6_MULTICAST_SCOPE_link_local,
- IP6_MULTICAST_GROUP_ID_all_hosts);
-
- a.dst_address_length = 128;
- ip6_add_del_route (im, &a);
-
- /* Add all-mldv2 multicast address via local route for all tables */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a.adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- adj->if_address_index = ~0;
- adj->rewrite_header.data_bytes = 0;
-
- ip6_set_reserved_multicast_address (&a.dst_address,
- IP6_MULTICAST_SCOPE_link_local,
- IP6_MULTICAST_GROUP_ID_mldv2_routers);
-
- a.dst_address_length = 128;
- ip6_add_del_route (im, &a);
-}
-
-static ip6_fib_t *
-create_fib_with_table_id (ip6_main_t * im, u32 table_id)
-{
- ip6_fib_t * fib;
- hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
- vec_add2 (im->fibs, fib, 1);
- fib->table_id = table_id;
- fib->index = fib - im->fibs;
- fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
- vnet_ip6_fib_init (im, fib->index);
- return fib;
-}
-
-ip6_fib_t *
-find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags)
-{
- uword * p, fib_index;
-
- fib_index = table_index_or_id;
- if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX))
- {
- if (table_index_or_id == ~0) {
- table_index_or_id = 0;
- while (hash_get (im->fib_index_by_table_id, table_index_or_id)) {
- table_index_or_id++;
- }
- return create_fib_with_table_id (im, table_index_or_id);
- }
-
- p = hash_get (im->fib_index_by_table_id, table_index_or_id);
- if (! p)
- return create_fib_with_table_id (im, table_index_or_id);
- fib_index = p[0];
- }
- return vec_elt_at_index (im->fibs, fib_index);
-}
-
-void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- ip6_fib_t * fib;
- ip6_address_t dst_address;
- u32 dst_address_length, adj_index;
- uword is_del;
- u32 old_adj_index = ~0;
- BVT(clib_bihash_kv) kv, value;
-
- vlib_smp_unsafe_warning();
-
- is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0;
-
- /* Either create new adjacency or use given one depending on arguments. */
- if (a->n_add_adj > 0)
- {
- ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
- }
- else
- adj_index = a->adj_index;
-
- dst_address = a->dst_address;
- dst_address_length = a->dst_address_length;
- fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id,
- a->flags);
-
- ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
- ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]);
-
- /* refcount accounting */
- if (is_del)
- {
- ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0);
- if (--im->dst_address_length_refcounts[dst_address_length] == 0)
- {
- im->non_empty_dst_address_length_bitmap =
- clib_bitmap_set (im->non_empty_dst_address_length_bitmap,
- 128 - dst_address_length, 0);
- compute_prefix_lengths_in_search_order (im);
- }
- }
- else
- {
- im->dst_address_length_refcounts[dst_address_length]++;
-
- im->non_empty_dst_address_length_bitmap =
- clib_bitmap_set (im->non_empty_dst_address_length_bitmap,
- 128 - dst_address_length, 1);
- compute_prefix_lengths_in_search_order (im);
- }
-
- kv.key[0] = dst_address.as_u64[0];
- kv.key[1] = dst_address.as_u64[1];
- kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
-
- if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
- old_adj_index = value.value;
-
- if (is_del)
- BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */);
- else
- {
- /* Make sure adj index is valid. */
- if (CLIB_DEBUG > 0)
- (void) ip_get_adjacency (lm, adj_index);
-
- kv.value = adj_index;
-
- BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */);
- }
-
- /* Avoid spurious reference count increments */
- if (old_adj_index == adj_index
- && adj_index != ~0
- && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
- {
- ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
- if (adj->share_count > 0)
- adj->share_count --;
- }
-
- /* Delete old adjacency index if present and changed. */
- {
- if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
- && old_adj_index != ~0
- && old_adj_index != adj_index)
- ip_del_adjacency (lm, old_adj_index);
- }
-}
-
-u32
-ip6_route_get_next_hop_adj (ip6_main_t * im,
- u32 fib_index,
- ip6_address_t *next_hop,
- u32 next_hop_sw_if_index,
- u32 explicit_fib_index)
-{
- ip_lookup_main_t * lm = &im->lookup_main;
- vnet_main_t * vnm = vnet_get_main();
- int is_interface_next_hop;
- uword * nh_result;
- u32 nh_adj_index;
- ip6_fib_t * fib;
-
- fib = vec_elt_at_index (im->fibs, fib_index);
-
- is_interface_next_hop = ip6_address_is_zero (next_hop);
-
- if (is_interface_next_hop)
- {
- nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index,
- next_hop_sw_if_index);
- if (nh_result)
- nh_adj_index = *nh_result;
- else
- {
- ip_adjacency_t * adj;
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &nh_adj_index);
- ip6_adjacency_set_interface_route (vnm, adj,
- next_hop_sw_if_index, ~0);
- ip_call_add_del_adjacency_callbacks
- (lm, next_hop_sw_if_index, /* is_del */ 0);
- hash_set (im->interface_route_adj_index_by_sw_if_index,
- next_hop_sw_if_index, nh_adj_index);
- }
- }
- else if (next_hop_sw_if_index == ~0)
- {
- /* next-hop is recursive. we always need a indirect adj
- * for recursive paths. Any LPM we perform now will give
- * us a valid adj, but without tracking the next-hop we
- * have no way to keep it valid.
- */
- ip_adjacency_t add_adj;
- memset (&add_adj, 0, sizeof(add_adj));
- add_adj.n_adj = 1;
- add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
- add_adj.indirect.next_hop.ip6.as_u64[0] = next_hop->as_u64[0];
- add_adj.indirect.next_hop.ip6.as_u64[1] = next_hop->as_u64[1];
- add_adj.explicit_fib_index = explicit_fib_index;
- ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
- }
- else
- {
- BVT(clib_bihash_kv) kv, value;
-
- /* Look for the interface /128 route */
- kv.key[0] = next_hop->as_u64[0];
- kv.key[1] = next_hop->as_u64[1];
- kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
-after_nd:
- if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
- {
- ip_adjacency_t * adj;
- nh_adj_index = ip6_fib_lookup_with_table (im, fib_index, next_hop);
- adj = ip_get_adjacency (lm, nh_adj_index);
- /* if ND interface adjacencty is present, we need to
- install ND adjaceny for specific next hop */
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- adj->arp.next_hop.ip6.as_u64[0] == 0 &&
- adj->arp.next_hop.ip6.as_u64[1] == 0)
- {
- nh_adj_index = vnet_ip6_neighbor_glean_add(fib_index, next_hop);
- }
- else if (next_hop->as_u8[0] == 0xfe)
- {
- //Next hop is link-local. No indirect in this case.
- //Let's add it as a possible neighbor on this interface
- ip6_address_t null_addr= {};
- ip6_add_del_route_next_hop (im, IP6_ROUTE_FLAG_ADD,
- next_hop, 128,
- &null_addr, next_hop_sw_if_index,
- 1, ~0, fib_index);
- goto after_nd;
- }
- }
- else
- {
- nh_adj_index = value.value;
- }
- }
-
- return (nh_adj_index);
-}
-
-void
-ip6_add_del_route_next_hop (ip6_main_t * im,
- u32 flags,
- ip6_address_t * dst_address,
- u32 dst_address_length,
- ip6_address_t * next_hop,
- u32 next_hop_sw_if_index,
- u32 next_hop_weight, u32 adj_index,
- u32 explicit_fib_index)
-{
- vnet_main_t * vnm = vnet_get_main();
- ip_lookup_main_t * lm = &im->lookup_main;
- u32 fib_index;
- ip6_fib_t * fib;
- ip6_address_t masked_dst_address;
- u32 old_mp_adj_index, new_mp_adj_index;
- u32 dst_adj_index, nh_adj_index;
- int rv;
- ip_adjacency_t * dst_adj;
- ip_multipath_adjacency_t * old_mp, * new_mp;
- int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0;
- clib_error_t * error = 0;
- BVT(clib_bihash_kv) kv, value;
-
- vlib_smp_unsafe_warning();
-
- if (explicit_fib_index == (u32)~0)
- fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
- else
- fib_index = explicit_fib_index;
-
- fib = vec_elt_at_index (im->fibs, fib_index);
-
- /* Lookup next hop to be added or deleted. */
- if (adj_index == (u32)~0)
- {
- nh_adj_index = ip6_route_get_next_hop_adj(im, fib_index,
- next_hop,
- next_hop_sw_if_index,
- explicit_fib_index);
- }
- else
- {
- /* Look for the interface /128 route */
- kv.key[0] = next_hop->as_u64[0];
- kv.key[1] = next_hop->as_u64[1];
- kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
-
- if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
- {
- vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
- error = clib_error_return (0, "next-hop %U/128 not in FIB",
- format_ip6_address, next_hop);
- goto done;
- }
-
- nh_adj_index = value.value;
- }
-
- ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
- masked_dst_address = dst_address[0];
- ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]);
-
- kv.key[0] = masked_dst_address.as_u64[0];
- kv.key[1] = masked_dst_address.as_u64[1];
- kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
-
- rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value);
-
- if (rv == 0)
- {
- dst_adj_index = value.value;
- dst_adj = ip_get_adjacency (lm, dst_adj_index);
- }
- else
- {
- /* For deletes destination must be known. */
- if (is_del)
- {
- vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
- error = clib_error_return (0, "unknown destination %U/%d",
- format_ip6_address, dst_address,
- dst_address_length);
- goto done;
- }
-
- dst_adj_index = ~0;
- dst_adj = 0;
- }
-
- /* Ignore adds of X/128 with next hop of X. */
- if (! is_del
- && dst_address_length == 128
- && ip6_address_is_equal (dst_address, next_hop))
- {
- vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
- error = clib_error_return (0, "prefix matches next hop %U/%d",
- format_ip6_address, dst_address,
- dst_address_length);
- goto done;
- }
-
- /* Destination is not known and default weight is set so add route
- to existing non-multipath adjacency */
- if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
- {
- /* create / delete additional mapping of existing adjacency */
- ip6_add_del_route_args_t a;
-
- a.table_index_or_table_id = fib_index;
- a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
- | IP6_ROUTE_FLAG_FIB_INDEX
- | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
- | (flags & (IP6_ROUTE_FLAG_NO_REDISTRIBUTE
- | IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
- a.dst_address = dst_address[0];
- a.dst_address_length = dst_address_length;
- a.adj_index = nh_adj_index;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- ip6_add_del_route (im, &a);
- goto done;
- }
-
- old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
-
- if (! ip_multipath_adjacency_add_del_next_hop
- (lm, is_del,
- dst_adj ? dst_adj->heap_handle : ~0,
- nh_adj_index,
- next_hop_weight,
- &new_mp_adj_index))
- {
- vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
- error = clib_error_return
- (0, "requested deleting next-hop %U not found in multi-path",
- format_ip6_address, next_hop);
- goto done;
- }
-
- old_mp = new_mp = 0;
- if (old_mp_adj_index != ~0)
- old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
- if (new_mp_adj_index != ~0)
- new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
-
- if (old_mp != new_mp)
- {
- ip6_add_del_route_args_t a;
- ip_adjacency_t * adj;
-
- a.table_index_or_table_id = fib_index;
- a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
- | IP6_ROUTE_FLAG_FIB_INDEX
- | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
- | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE));
- a.dst_address = dst_address[0];
- a.dst_address_length = dst_address_length;
- a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- ip6_add_del_route (im, &a);
-
- adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
- if (adj->n_adj == 1)
- adj->share_count += is_del ? -1 : 1;
- }
-
- done:
- if (error)
- clib_error_report (error);
-}
-
-u32
-ip6_get_route (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip6_address_t * address,
- u32 address_length)
-{
- ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
- ip6_address_t masked_address;
- BVT(clib_bihash_kv) kv, value;
-
- ASSERT (address_length < ARRAY_LEN (im->fib_masks));
- clib_memcpy (&masked_address, address, sizeof (masked_address));
- ip6_address_mask (&masked_address, &im->fib_masks[address_length]);
-
- kv.key[0] = masked_address.as_u64[0];
- kv.key[1] = masked_address.as_u64[1];
- kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length;
-
- if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
- return (value.value);
- return 0;
-}
-
-void
-ip6_foreach_matching_route (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip6_address_t * dst_address,
- u32 address_length,
- ip6_address_t ** results,
- u8 ** result_lengths)
-{
- ip6_fib_t * fib =
- find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
- BVT(clib_bihash) * h = &im->ip6_lookup_table;
- BVT(clib_bihash_value) * v;
- clib_bihash_bucket_t * b;
- int i, j, k;
-
- if (*results)
- _vec_len (*results) = 0;
- if (*result_lengths)
- _vec_len (*result_lengths) = 0;
-
- /* Walk the table looking for routes which match the supplied address */
- for (i = 0; i < h->nbuckets; i++)
- {
- b = &h->buckets [i];
- if (b->offset == 0)
- continue;
-
- v = BV(clib_bihash_get_value) (h, b->offset);
- for (j = 0; j < (1<<b->log2_pages); j++)
- {
- for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
- {
- if (BV(clib_bihash_is_free)(&v->kvp[k]))
- continue;
-
- if ((v->kvp[k].key[2]
- == (((u64)((fib - im->fibs))<<32) | address_length))
- && ip6_destination_matches_route
- (im, dst_address, (ip6_address_t *) &v->kvp[k],
- address_length))
- {
- ip6_address_t * a;
-
- a = (ip6_address_t *)(&v->kvp[k]);
-
- vec_add1 (*results, a[0]);
- vec_add1 (*result_lengths, address_length);
- }
- }
- v++;
- }
- }
-}
-
-void ip6_maybe_remap_adjacencies (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags)
-{
-#if SOONE
- ip6_fib_t * fib
- = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
-#endif
- ip_lookup_main_t * lm = &im->lookup_main;
-
- if (lm->n_adjacency_remaps == 0)
- return;
-
- clib_warning ("unimplemented, please report to vpp-dev@cisco.com");
-
- /* All remaps have been performed. */
- lm->n_adjacency_remaps = 0;
-}
-
-void ip6_delete_matching_routes (ip6_main_t * im,
- u32 table_index_or_table_id,
- u32 flags,
- ip6_address_t * address,
- u32 address_length)
-{
- /* $$$$ static may be OK - this should happen only on thread 0 */
- static ip6_address_t * matching_addresses;
- static u8 * matching_address_lengths;
- u32 l, i;
- ip6_add_del_route_args_t a;
-
- vlib_smp_unsafe_warning();
-
- a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
- a.table_index_or_table_id = table_index_or_table_id;
- a.adj_index = ~0;
- a.add_adj = 0;
- a.n_add_adj = 0;
-
- for (l = address_length + 1; l <= 128; l++)
- {
- ip6_foreach_matching_route (im, table_index_or_table_id, flags,
- address,
- l,
- &matching_addresses,
- &matching_address_lengths);
- for (i = 0; i < vec_len (matching_addresses); i++)
- {
- a.dst_address = matching_addresses[i];
- a.dst_address_length = matching_address_lengths[i];
- ip6_add_del_route (im, &a);
- }
- }
-
- ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
-}
-
void
ip6_forward_next_trace (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -713,12 +58,10 @@ ip6_forward_next_trace (vlib_main_t * vm,
always_inline uword
ip6_lookup_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- int is_indirect)
+ vlib_frame_t * frame)
{
ip6_main_t * im = &ip6_main;
- ip_lookup_main_t * lm = &im->lookup_main;
- vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, n_left_to_next, * from, * to_next;
ip_lookup_next_t next;
u32 cpu_index = os_get_cpu_number();
@@ -735,13 +78,14 @@ ip6_lookup_inline (vlib_main_t * vm,
while (n_left_from >= 4 && n_left_to_next >= 2)
{
vlib_buffer_t * p0, * p1;
- u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
+ u32 pi0, pi1, lbi0, lbi1, wrong_next;
ip_lookup_next_t next0, next1;
ip6_header_t * ip0, * ip1;
- ip_adjacency_t * adj0, * adj1;
ip6_address_t * dst_addr0, * dst_addr1;
u32 fib_index0, fib_index1;
u32 flow_hash_config0, flow_hash_config1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
/* Prefetch next iteration. */
{
@@ -765,19 +109,8 @@ ip6_lookup_inline (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (p0);
ip1 = vlib_buffer_get_current (p1);
- if (PREDICT_FALSE(is_indirect))
- {
- ip_adjacency_t * iadj0, * iadj1;
- iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
- iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
- dst_addr0 = &iadj0->indirect.next_hop.ip6;
- dst_addr1 = &iadj1->indirect.next_hop.ip6;
- }
- else
- {
- dst_addr0 = &ip0->dst_address;
- dst_addr1 = &ip1->dst_address;
- }
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
@@ -787,69 +120,60 @@ ip6_lookup_inline (vlib_main_t * vm,
fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
- adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0);
- adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, dst_addr1);
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+ lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
- adj0 = ip_get_adjacency (lm, adj_index0);
- adj1 = ip_get_adjacency (lm, adj_index1);
-
- if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
- {
- adj_index0 = ip6_fib_lookup_with_table
- (im, adj0->explicit_fib_index, dst_addr0);
- adj0 = ip_get_adjacency (lm, adj_index0);
- }
- if (PREDICT_FALSE (adj1->explicit_fib_index != ~0))
- {
- adj_index1 = ip6_fib_lookup_with_table
- (im, adj1->explicit_fib_index, dst_addr1);
- adj1 = ip_get_adjacency (lm, adj_index1);
- }
-
- next0 = adj0->lookup_next_index;
- next1 = adj1->lookup_next_index;
-
- /* Only process the HBH Option Header if explicitly configured to do so */
- next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled &&
- adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index;
- next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled &&
- adj_index1 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj1->lookup_next_index;
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
vnet_buffer (p0)->ip.flow_hash =
vnet_buffer(p1)->ip.flow_hash = 0;
- if (PREDICT_FALSE(adj0->n_adj > 1))
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
- flow_hash_config0 =
- vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+ flow_hash_config0 = lb0->lb_hash_config;
vnet_buffer (p0)->ip.flow_hash =
ip6_compute_flow_hash (ip0, flow_hash_config0);
}
-
- if (PREDICT_FALSE(adj1->n_adj > 1))
+ if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
{
- flow_hash_config1 =
- vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
-
+ flow_hash_config1 = lb1->lb_hash_config;
vnet_buffer (p1)->ip.flow_hash =
ip6_compute_flow_hash (ip1, flow_hash_config1);
}
- ASSERT (adj0->n_adj > 0);
- ASSERT (adj1->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- ASSERT (is_pow2 (adj1->n_adj));
- adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
- adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ dpo1 = load_balance_get_bucket_i(lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ lb1->lb_n_buckets_minus_1));
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
+ im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
+ next0;
+ next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
+ im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
+ next1;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
+ (cm, cpu_index, lbi0, 1,
vlib_buffer_length_in_chain (vm, p0));
vlib_increment_combined_counter
- (cm, cpu_index, adj_index1, 1,
+ (cm, cpu_index, lbi1, 1,
vlib_buffer_length_in_chain (vm, p1));
from += 2;
@@ -898,11 +222,12 @@ ip6_lookup_inline (vlib_main_t * vm,
{
vlib_buffer_t * p0;
ip6_header_t * ip0;
- u32 pi0, adj_index0;
+ u32 pi0, lbi0;
ip_lookup_next_t next0;
- ip_adjacency_t * adj0;
+ load_balance_t * lb0;
ip6_address_t * dst_addr0;
u32 fib_index0, flow_hash_config0;
+ const dpo_id_t *dpo0;
pi0 = from[0];
to_next[0] = pi0;
@@ -911,57 +236,44 @@ ip6_lookup_inline (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (p0);
- if (PREDICT_FALSE(is_indirect))
- {
- ip_adjacency_t * iadj0;
- iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
- dst_addr0 = &iadj0->indirect.next_hop.ip6;
- }
- else
- {
- dst_addr0 = &ip0->dst_address;
- }
+ dst_addr0 = &ip0->dst_address;
fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
flow_hash_config0 =
- vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+ ip6_fib_get (fib_index0)->flow_hash_config;
- adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0);
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
- adj0 = ip_get_adjacency (lm, adj_index0);
-
- if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
- {
- adj_index0 = ip6_fib_lookup_with_table
- (im, adj0->explicit_fib_index, dst_addr0);
- adj0 = ip_get_adjacency (lm, adj_index0);
- }
-
- /* Only process the HBH Option Header if explicitly configured to do so */
- next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled &&
- adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index;
+ lb0 = load_balance_get (lbi0);
vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE(adj0->n_adj > 1))
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
- flow_hash_config0 =
- vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+ flow_hash_config0 = lb0->lb_hash_config;
vnet_buffer (p0)->ip.flow_hash =
ip6_compute_flow_hash (ip0, flow_hash_config0);
}
- ASSERT (adj0->n_adj > 0);
- ASSERT (is_pow2 (adj0->n_adj));
- adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ next0 = dpo0->dpoi_next_node;
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
+ im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
+ next0;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
vlib_increment_combined_counter
- (cm, cpu_index, adj_index0, 1,
+ (cm, cpu_index, lbi0, 1,
vlib_buffer_length_in_chain (vm, p0));
from += 1;
@@ -986,163 +298,171 @@ ip6_lookup_inline (vlib_main_t * vm,
}
if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip6_forward_next_trace(vm, node, frame, VLIB_TX);
+ ip6_forward_next_trace(vm, node, frame, VLIB_TX);
return frame->n_vectors;
}
-void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
- ip_adjacency_t * adj,
- u32 sw_if_index,
- u32 if_address_index)
-{
- vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- ip_lookup_next_t n;
- u32 node_index;
-
- if (hw->hw_class_index == ethernet_hw_interface_class.index
- || hw->hw_class_index == srp_hw_interface_class.index)
- {
- n = IP_LOOKUP_NEXT_ARP;
- node_index = ip6_discover_neighbor_node.index;
- adj->if_address_index = if_address_index;
- adj->arp.next_hop.ip6.as_u64[0] = 0;
- adj->arp.next_hop.ip6.as_u64[1] = 0;
- }
- else
- {
- n = IP_LOOKUP_NEXT_REWRITE;
- node_index = ip6_rewrite_node.index;
- }
-
- adj->lookup_next_index = n;
- adj->explicit_fib_index = ~0;
-
- vnet_rewrite_for_sw_interface
- (vnm,
- VNET_L3_PACKET_TYPE_IP6,
- sw_if_index,
- node_index,
- VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
-}
-
static void
ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
ip6_main_t * im, u32 fib_index,
ip_interface_address_t * a)
{
ip_lookup_main_t * lm = &im->lookup_main;
- ip_adjacency_t * adj;
ip6_address_t * address = ip_interface_address_get_address (lm, a);
- ip6_add_del_route_args_t x;
- vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
- u32 classify_table_index;
-
- /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
- x.table_index_or_table_id = fib_index;
- x.flags = (IP6_ROUTE_FLAG_ADD
- | IP6_ROUTE_FLAG_FIB_INDEX
- | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
- x.dst_address = address[0];
- x.dst_address_length = a->address_length;
- x.n_add_adj = 0;
- x.add_adj = 0;
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
a->neighbor_probe_adj_index = ~0;
if (a->address_length < 128)
- {
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &x.adj_index);
- ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
- ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
- ip6_add_del_route (im, &x);
- a->neighbor_probe_adj_index = x.adj_index;
- }
-
- /* Add e.g. ::1/128 as local to this host. */
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &x.adj_index);
+ {
+ fib_node_index_t fei;
+
+ fei = fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP6,
+ NULL, /* No next-hop address */
+ sw_if_index,
+ ~0, // invalid FIB index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
+ }
- classify_table_index = ~0;
+ pfx.fp_len = 128;
if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
- classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
- if (classify_table_index != (u32) ~0)
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index [sw_if_index];
+ if (classify_table_index != (u32) ~0)
+ {
+ dpo_id_t dpo = DPO_NULL;
+
+ dpo_set(&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create(FIB_PROTOCOL_IP6,
+ classify_table_index));
+
+ fib_table_entry_special_dpo_add(fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ FIB_PROTOCOL_IP6,
+ &pfx.fp_addr,
+ sw_if_index,
+ ~0, // invalid FIB index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip6_del_interface_routes (ip6_main_t * im,
+ u32 fib_index,
+ ip6_address_t * address,
+ u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (pfx.fp_len < 128)
{
- adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
- adj->classify.table_index = classify_table_index;
+ fib_table_entry_delete(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE);
+
}
- else
- adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
-
- adj->if_address_index = a - lm->if_address_pool;
- adj->rewrite_header.sw_if_index = sw_if_index;
- adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
- adj->rewrite_header.data_bytes = 0;
- ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
- x.dst_address_length = 128;
- ip6_add_del_route (im, &x);
+
+ pfx.fp_len = 128;
+ fib_table_entry_delete(fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE);
}
-static void
-ip6_del_interface_routes (ip6_main_t * im, u32 fib_index,
- ip6_address_t * address, u32 address_length)
+void
+ip6_sw_interface_enable_disable (u32 sw_if_index,
+ u32 is_enable)
{
- ip6_add_del_route_args_t x;
-
- /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
- x.table_index_or_table_id = fib_index;
- x.flags = (IP6_ROUTE_FLAG_DEL
- | IP6_ROUTE_FLAG_FIB_INDEX
- | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
- x.dst_address = address[0];
- x.dst_address_length = address_length;
- x.adj_index = ~0;
- x.n_add_adj = 0;
- x.add_adj = 0;
-
- if (address_length < 128)
+ vlib_main_t * vm = vlib_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 ci, cast;
+ u32 lookup_feature_index;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
{
- /* Don't wipe out fe80::0/64 */
- if (address_length != 64 ||
- address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL))
- ip6_add_del_route (im, &x);
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
}
- x.dst_address_length = 128;
- ip6_add_del_route (im, &x);
+ for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
+ {
+ ip_config_main_t * cm = &lm->feature_config_mains[cast];
+ vnet_config_main_t * vcm = &cm->config_main;
- ip6_delete_matching_routes (im,
- fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX,
- address,
- address_length);
-}
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
-typedef struct {
- u32 sw_if_index;
- ip6_address_t address;
- u32 length;
-} ip6_interface_address_t;
+ if (cast == VNET_IP_RX_UNICAST_FEAT)
+ lookup_feature_index = im->ip6_unicast_rx_feature_lookup;
+ else
+ lookup_feature_index = im->ip6_multicast_rx_feature_lookup;
-static clib_error_t *
-ip6_add_del_interface_address_internal (vlib_main_t * vm,
- u32 sw_if_index,
- ip6_address_t * new_address,
- u32 new_length,
- u32 redistribute,
- u32 insert_routes,
- u32 is_del);
+ if (is_enable)
+ ci = vnet_config_add_feature (vm, vcm,
+ ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ ci = vnet_config_del_feature (vm, vcm,
+ ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
-static clib_error_t *
-ip6_add_del_interface_address_internal (vlib_main_t * vm,
- u32 sw_if_index,
- ip6_address_t * address,
- u32 address_length,
- u32 redistribute,
- u32 insert_routes,
- u32 is_del)
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ }
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 is_del)
{
vnet_main_t * vnm = vnet_get_main();
ip6_main_t * im = &ip6_main;
@@ -1174,17 +494,13 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm,
goto done;
}
- if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
- {
- if (is_del)
- ip6_del_interface_routes (im, ip6_af.fib_index, address,
- address_length);
-
- else
- ip6_add_interface_routes (vnm, sw_if_index,
- im, ip6_af.fib_index,
- pool_elt_at_index (lm->if_address_pool, if_address_index));
- }
+ if (is_del)
+ ip6_del_interface_routes (im, ip6_af.fib_index, address,
+ address_length);
+ else
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, ip6_af.fib_index,
+ pool_elt_at_index (lm->if_address_pool, if_address_index));
{
ip6_add_del_interface_address_callback_t * cb;
@@ -1201,18 +517,6 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm,
}
clib_error_t *
-ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
- ip6_address_t * address, u32 address_length,
- u32 is_del)
-{
- return ip6_add_del_interface_address_internal
- (vm, sw_if_index, address, address_length,
- /* redistribute */ 1,
- /* insert_routes */ 1,
- is_del);
-}
-
-clib_error_t *
ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
u32 sw_if_index,
u32 flags)
@@ -1282,10 +586,16 @@ VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = {
VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = {
.node_name = "ip6-lookup",
- .runs_before = 0, /* not before any other features */
+ .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
.feature_index = &ip6_main.ip6_unicast_rx_feature_lookup,
};
+VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = {
+ .node_name = "ip6-drop",
+ .runs_before = 0, /*last feature*/
+ .feature_index = &ip6_main.ip6_unicast_rx_feature_drop,
+};
+
/* Built-in ip6 multicast rx feature path definition (none now) */
VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = {
.node_name = "vpath-input-ip6",
@@ -1295,10 +605,16 @@ VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = {
VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = {
.node_name = "ip6-lookup",
- .runs_before = 0, /* not before any other features */
+ .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
.feature_index = &ip6_main.ip6_multicast_rx_feature_lookup,
};
+VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = {
+ .node_name = "ip6-drop",
+ .runs_before = 0, /* last feature */
+ .feature_index = &ip6_main.ip6_multicast_rx_feature_drop,
+};
+
static char * rx_feature_start_nodes[] =
{"ip6-input"};
@@ -1343,7 +659,7 @@ ip6_feature_init (vlib_main_t * vm, ip6_main_t * im)
feature_start_nodes,
feature_start_len,
cast,
- 0 /* is_ip4 */)))
+ VNET_L3_PACKET_TYPE_IP6)))
return error;
}
return 0;
@@ -1369,9 +685,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm,
ci = cm->config_index_by_sw_if_index[sw_if_index];
if (cast == VNET_IP_RX_UNICAST_FEAT)
- feature_index = im->ip6_unicast_rx_feature_lookup;
+ feature_index = im->ip6_unicast_rx_feature_drop;
else if (cast == VNET_IP_RX_MULTICAST_FEAT)
- feature_index = im->ip6_multicast_rx_feature_lookup;
+ feature_index = im->ip6_multicast_rx_feature_drop;
else
feature_index = im->ip6_tx_feature_interface_output;
@@ -1382,12 +698,14 @@ ip6_sw_interface_add_del (vnet_main_t * vnm,
/* config data */ 0,
/* # bytes of config data */ 0);
else
- ci = vnet_config_del_feature (vm, vcm,
- ci,
- feature_index,
- /* config data */ 0,
- /* # bytes of config data */ 0);
-
+ {
+ ci = vnet_config_del_feature (vm, vcm, ci,
+ feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
+ im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
+ }
cm->config_index_by_sw_if_index[sw_if_index] = ci;
/*
* note: do not update the tx feature count here.
@@ -1403,7 +721,7 @@ ip6_lookup (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 0);
+ return ip6_lookup_inline (vm, node, frame);
}
static u8 * format_ip6_lookup_trace (u8 * s, va_list * args);
@@ -1419,27 +737,97 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = {
.next_nodes = IP6_LOOKUP_NEXT_NODES,
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup)
-static uword
-ip6_indirect (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+always_inline uword
+ip6_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 1);
-}
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace(vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t * p0;
+ u32 pi0, lbi0, hc0;
+ const ip6_header_t *ip0;
+ const dpo_id_t *dpo0;
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+ hc0 = lb0->lb_hash_config;
+ vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0);
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ vnet_buffer(p0)->ip.flow_hash &
+ (lb0->lb_n_buckets - 1));
-VLIB_REGISTER_NODE (ip6_indirect_node) = {
- .function = ip6_indirect,
- .name = "ip6-indirect",
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_load_balance_node) = {
+ .function = ip6_load_balance,
+ .name = "ip6-load-balance",
.vector_size = sizeof (u32),
.sibling_of = "ip6-lookup",
.format_trace = format_ip6_lookup_trace,
.n_next_nodes = 0,
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_indirect_node, ip6_indirect);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance)
typedef struct {
/* Adjacency taken. */
@@ -1469,13 +857,10 @@ static u8 * format_ip6_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
- vnet_main_t * vnm = vnet_get_main();
- ip6_main_t * im = &ip6_main;
uword indent = format_get_indent (s);
- s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
- t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ s = format (s, "fib %d dpo-idx %d : flow hash: 0x%08x",
+ t->fib_index, t->adj_index, t->flow_hash);
s = format(s, "\n%U%U",
format_white_space, indent,
format_ip6_header, t->packet_data);
@@ -1489,16 +874,16 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
vnet_main_t * vnm = vnet_get_main();
- ip6_main_t * im = &ip6_main;
uword indent = format_get_indent (s);
s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
format_ip_adjacency_packet_data,
- vnm, &im->lookup_main, t->adj_index,
+ vnm, t->adj_index,
t->packet_data, sizeof (t->packet_data));
return s;
}
@@ -1628,12 +1013,6 @@ ip6_punt (vlib_main_t * vm,
vlib_frame_t * frame)
{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
-static uword
-ip6_miss (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); }
-
VLIB_REGISTER_NODE (ip6_drop_node,static) = {
.function = ip6_drop,
.name = "ip6-drop",
@@ -1647,7 +1026,7 @@ VLIB_REGISTER_NODE (ip6_drop_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop)
VLIB_REGISTER_NODE (ip6_punt_node,static) = {
.function = ip6_punt,
@@ -1662,22 +1041,7 @@ VLIB_REGISTER_NODE (ip6_punt_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
-
-VLIB_REGISTER_NODE (ip6_miss_node,static) = {
- .function = ip6_miss,
- .name = "ip6-miss",
- .vector_size = sizeof (u32),
-
- .format_trace = format_ip6_forward_next_trace,
-
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_miss_node, ip6_miss);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt)
VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
.function = ip6_drop,
@@ -1931,17 +1295,21 @@ ip6_local (vlib_main_t * vm,
/* Drop packets from unroutable hosts. */
/* If this is a neighbor solicitation (ICMP), skip source RPF check */
- if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast(&ip0->src_address))
{
u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
- error0 = (lm->miss_adj_index == src_adj_index0
+ error0 = (ADJ_INDEX_INVALID == src_adj_index0
? IP6_ERROR_SRC_LOOKUP_MISS
: error0);
}
- if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP)
+ if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type1 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast(&ip1->src_address))
{
u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
- error1 = (lm->miss_adj_index == src_adj_index1
+ error1 = (ADJ_INDEX_INVALID == src_adj_index1
? IP6_ERROR_SRC_LOOKUP_MISS
: error1);
}
@@ -2018,10 +1386,12 @@ ip6_local (vlib_main_t * vm,
: error0);
/* If this is a neighbor solicitation (ICMP), skip source RPF check */
- if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast(&ip0->src_address))
{
u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
- error0 = (lm->miss_adj_index == src_adj_index0
+ error0 = (ADJ_INDEX_INVALID == src_adj_index0
? IP6_ERROR_SRC_LOOKUP_MISS
: error0);
}
@@ -2057,7 +1427,7 @@ VLIB_REGISTER_NODE (ip6_local_node,static) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local)
void ip6_register_protocol (u32 protocol, u32 node_index)
{
@@ -2082,9 +1452,10 @@ typedef enum {
} ip6_discover_neighbor_error_t;
static uword
-ip6_discover_neighbor (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ip6_discover_neighbor_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_glean)
{
vnet_main_t * vnm = vnet_get_main();
ip6_main_t * im = &ip6_main;
@@ -2144,11 +1515,11 @@ ip6_discover_neighbor (vlib_main_t * vm,
adj0 = ip_get_adjacency (lm, adj_index0);
- if (adj0->arp.next_hop.ip6.as_u64[0] ||
- adj0->arp.next_hop.ip6.as_u64[1]) {
- ip0->dst_address.as_u64[0] = adj0->arp.next_hop.ip6.as_u64[0];
- ip0->dst_address.as_u64[1] = adj0->arp.next_hop.ip6.as_u64[1];
- }
+ if (!is_glean)
+ {
+ ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
+ }
a0 = hash_seeds[0];
b0 = hash_seeds[1];
@@ -2209,13 +1580,15 @@ ip6_discover_neighbor (vlib_main_t * vm,
* Choose source address based on destination lookup
* adjacency.
*/
- if (ip6_src_address_for_packet (im, p0, &h0->ip.src_address,
- sw_if_index0)) {
- //There is no address on the interface
+ if (ip6_src_address_for_packet (lm,
+ sw_if_index0,
+ &h0->ip.src_address))
+ {
+ /* There is no address on the interface */
p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
vlib_buffer_free(vm, &bi0, 1);
continue;
- }
+ }
/*
* Destination address is a solicited node multicast address.
@@ -2262,6 +1635,22 @@ ip6_discover_neighbor (vlib_main_t * vm,
return frame->n_vectors;
}
+static uword
+ip6_discover_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline(vm, node, frame, 0));
+}
+
+static uword
+ip6_glean (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline(vm, node, frame, 1));
+}
+
static char * ip6_discover_neighbor_error_strings[] = {
[IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
[IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]
@@ -2287,6 +1676,23 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
},
};
+VLIB_REGISTER_NODE (ip6_glean_node) = {
+ .function = ip6_glean,
+ .name = "ip6-glean",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes = {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+
clib_error_t *
ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
{
@@ -2474,31 +1880,17 @@ ip6_rewrite_inline (vlib_main_t * vm,
adj0 = ip_get_adjacency (lm, adj_index0);
adj1 = ip_get_adjacency (lm, adj_index1);
- if (rewrite_for_locally_received_packets)
- {
- /*
- * If someone sends e.g. an icmp6 w/ src = dst = interface addr,
- * we end up here with a local adjacency in hand
- */
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
- if (PREDICT_FALSE(adj1->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
- }
-
rw_len0 = adj0[0].rewrite_header.data_bytes;
rw_len1 = adj1[0].rewrite_header.data_bytes;
vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
- vlib_increment_combined_counter (&lm->adjacency_counters,
+ vlib_increment_combined_counter (&adjacency_counters,
cpu_index,
adj_index0,
/* packet increment */ 0,
/* byte increment */ rw_len0);
- vlib_increment_combined_counter (&lm->adjacency_counters,
+ vlib_increment_combined_counter (&adjacency_counters,
cpu_index,
adj_index1,
/* packet increment */ 0,
@@ -2621,13 +2013,6 @@ ip6_rewrite_inline (vlib_main_t * vm,
}
}
- if (rewrite_for_locally_received_packets)
- {
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_LOCAL))
- error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
- }
-
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2635,7 +2020,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
rw_len0 = adj0[0].rewrite_header.data_bytes;
vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
- vlib_increment_combined_counter (&lm->adjacency_counters,
+ vlib_increment_combined_counter (&adjacency_counters,
cpu_index,
adj_index0,
/* packet increment */ 0,
@@ -2712,6 +2097,29 @@ ip6_rewrite_local (vlib_main_t * vm,
/* rewrite_for_locally_received_packets */ 1);
}
+static uword
+ip6_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip6_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_midchain_node) = {
+ .function = ip6_midchain,
+ .name = "ip6-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .next_nodes = {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
+
VLIB_REGISTER_NODE (ip6_rewrite_node) = {
.function = ip6_rewrite_transit,
.name = "ip6-rewrite",
@@ -3207,12 +2615,17 @@ ip6_lookup_init (vlib_main_t * vm)
if (im->lookup_table_size == 0)
im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
- BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table",
+ BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
+ "ip6 FIB fwding table",
im->lookup_table_nbuckets,
im->lookup_table_size);
-
+ BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ "ip6 FIB non-fwding table",
+ im->lookup_table_nbuckets,
+ im->lookup_table_size);
+
/* Create FIB with index 0 and table id of 0. */
- find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID);
+ fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0);
{
pg_node_t * pn;
@@ -3282,17 +2695,14 @@ add_del_ip6_interface_table (vlib_main_t * vm,
}
{
- ip6_main_t * im = &ip6_main;
- ip6_fib_t * fib =
- find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID);
+ u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
+ table_id);
- if (fib)
- {
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
- }
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
}
+
done:
return error;
}
@@ -3368,7 +2778,7 @@ int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
if (p == 0)
return -1;
- fib = vec_elt_at_index (im6->fibs, p[0]);
+ fib = ip6_fib_get (p[0]);
fib->flow_hash_config = flow_hash_config;
return 1;
diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c
index 2a037033d13..d927d279bff 100644
--- a/vnet/vnet/ip/ip6_hop_by_hop.c
+++ b/vnet/vnet/ip/ip6_hop_by_hop.c
@@ -24,6 +24,7 @@
#include <vppinfra/elog.h>
#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/fib/ip6_fib.h>
char *ppc_state[] = { "None", "Encap", "Decap" };
@@ -935,48 +936,22 @@ ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width, u32 vrf_id,
ip_lookup_main_t *lm = &im->lookup_main;
ip_adjacency_t *adj;
u32 fib_index;
- u32 len, adj_index;
- int i, rv;
- uword *p;
- BVT (clib_bihash_kv) kv, value;
+ u32 adj_index;
if ((is_add + is_pop + is_none) != 1)
return VNET_API_ERROR_INVALID_VALUE_2;
/* Go find the adjacency we're supposed to tickle */
- p = hash_get (im->fib_index_by_table_id, vrf_id);
+ fib_index = ip6_fib_index_from_table_id (vrf_id);
- if (p == 0)
+ if (~0 == fib_index)
return VNET_API_ERROR_NO_SUCH_FIB;
- fib_index = p[0];
+ adj_index = ip6_fib_table_fwding_lookup (im, fib_index, addr);
- len = vec_len (im->prefix_lengths_in_search_order);
-
- for (i = 0; i < len; i++)
- {
- int dst_address_length = im->prefix_lengths_in_search_order[i];
- ip6_address_t *mask = &im->fib_masks[dst_address_length];
-
- if (dst_address_length != mask_width)
- continue;
-
- kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
- kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
- kv.key[2] = ((u64) ((fib_index)) << 32) | dst_address_length;
-
- rv =
- BV (clib_bihash_search_inline_2) (&im->ip6_lookup_table, &kv, &value);
- if (rv == 0)
- goto found;
-
- }
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
-found:
+ ASSERT (!"Not an ADJ");
/* Got it, modify as directed... */
- adj_index = value.value;
adj = ip_get_adjacency (lm, adj_index);
/* Restore original lookup-next action */
@@ -1015,7 +990,7 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm,
int is_pop = 0;
int is_none = 0;
u32 vrf_id = 0;
- int rv;
+ // int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -1038,19 +1013,23 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm,
if (mask_width == ~0)
return clib_error_return (0, "<address>/<mask-width> required");
- rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
- is_add, is_pop, is_none);
+ /* rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, */
+ /* is_add, is_pop, is_none); */
- switch (rv)
- {
- case 0:
- break;
- default:
- return clib_error_return (0, "ip6_ioam_set_destination returned %d",
- rv);
- }
+ /* switch (rv) */
+ /* { */
+ /* case 0: */
+ /* break; */
+ /* default: */
+ /* return clib_error_return (0, "ip6_ioam_set_destination returned %d", */
+ /* rv); */
+ /* } */
- return 0;
+ /* return 0; */
+
+ return clib_error_return (0,
+ "ip6_ioam_set_destination Currnetly Disabled due to FIB2.0",
+ 1);
}
/* *INDENT-OFF* */
diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c
index a35f58a3039..11df776e1fc 100644
--- a/vnet/vnet/ip/ip6_neighbor.c
+++ b/vnet/vnet/ip/ip6_neighbor.c
@@ -19,6 +19,9 @@
#include <vnet/ethernet/ethernet.h>
#include <vppinfra/mhash.h>
#include <vppinfra/md5.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
#if DPDK==1
#include <vnet/devices/dpdk/dpdk.h>
@@ -38,9 +41,9 @@ typedef struct {
u8 link_layer_address[8];
u16 flags;
#define IP6_NEIGHBOR_FLAG_STATIC (1 << 0)
-#define IP6_NEIGHBOR_FLAG_GLEAN (2 << 0)
+#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0)
u64 cpu_time_last_updated;
- u32 *adjacencies;
+ adj_index_t adj_index;
} ip6_neighbor_t;
/* advertised prefix option */
@@ -121,9 +124,9 @@ typedef struct {
u32 seed;
u64 randomizer;
int ref_count;
- u32 all_nodes_adj_index;
- u32 all_routers_adj_index;
- u32 all_mldv2_routers_adj_index;
+ adj_index_t all_nodes_adj_index;
+ adj_index_t all_routers_adj_index;
+ adj_index_t all_mldv2_routers_adj_index;
/* timing information */
#define DEF_MAX_RADV_INTERVAL 200
@@ -217,8 +220,8 @@ static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
if (! n)
return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags", "Link layer", "Interface");
- if (n->flags & IP6_NEIGHBOR_FLAG_GLEAN)
- flags = format(flags, "G");
+ if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC)
+ flags = format(flags, "D");
if (n->flags & IP6_NEIGHBOR_FLAG_STATIC)
flags = format(flags, "S");
@@ -330,6 +333,52 @@ static void set_unset_ip6_neighbor_rpc
}
#endif
+static void
+ip6_nd_mk_complete (ip6_neighbor_t * nbr)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = nbr->key.ip6_address,
+ },
+ };
+ ip6_main_t *im;
+ u32 fib_index;
+
+ im = &ip6_main;
+ fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
+
+ /* only once please */
+ if (ADJ_INDEX_INVALID == nbr->adj_index)
+ {
+ nbr->adj_index =
+ adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ &pfx.fp_addr,
+ nbr->key.sw_if_index,
+ nbr->link_layer_address);
+ ASSERT(ADJ_INDEX_INVALID != nbr->adj_index);
+
+ fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &pfx.fp_addr,
+ nbr->key.sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else
+ {
+ adj_nbr_update_rewrite(nbr->adj_index,
+ nbr->link_layer_address);
+ }
+}
+
int
vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
u32 sw_if_index,
@@ -338,17 +387,12 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
uword n_bytes_link_layer_address,
int is_static)
{
- vnet_main_t * vnm = vnet_get_main();
ip6_neighbor_main_t * nm = &ip6_neighbor_main;
ip6_neighbor_key_t k;
ip6_neighbor_t * n = 0;
- ip6_main_t * im = &ip6_main;
- ip_lookup_main_t * lm = &im->lookup_main;
int make_new_nd_cache_entry=1;
uword * p;
u32 next_index;
- u32 adj_index;
- ip_adjacency_t *existing_adj;
pending_resolution_t * pr, * mc;
#if DPDK > 0
@@ -376,77 +420,26 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
make_new_nd_cache_entry = 0;
}
- /* Note: always install the route. It might have been deleted */
- ip6_add_del_route_args_t args;
- ip_adjacency_t adj;
-
- memset (&adj, 0, sizeof(adj));
- adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj.explicit_fib_index = ~0;
-
- vnet_rewrite_for_sw_interface
- (vnm,
- VNET_L3_PACKET_TYPE_IP6,
- sw_if_index,
- ip6_rewrite_node.index,
- link_layer_address,
- &adj.rewrite_header,
- sizeof (adj.rewrite_data));
-
- /* result of this lookup should be next-hop adjacency */
- adj_index = ip6_fib_lookup_with_table (im, im->fib_index_by_sw_if_index[sw_if_index], a);
- existing_adj = ip_get_adjacency(lm, adj_index);
-
- if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- existing_adj->arp.next_hop.ip6.as_u64[0] == a->as_u64[0] &&
- existing_adj->arp.next_hop.ip6.as_u64[1] == a->as_u64[1])
- {
- u32 * ai;
- u32 * adjs = 0;
-
- if (n)
- adjs = vec_dup(n->adjacencies);
- else
- clib_warning ("ip6 neighbor n not set");
-
- /* Update all adj assigned to this arp entry */
- vec_foreach(ai, adjs)
- {
- int i;
- ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai);
- for (i = 0; i < uadj->n_adj; i++)
- if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- uadj[i].arp.next_hop.ip6.as_u64[0] == a->as_u64[0] &&
- uadj[i].arp.next_hop.ip6.as_u64[1] == a->as_u64[1])
- ip_update_adjacency (lm, *ai + i, &adj);
- }
- vec_free(adjs);
- }
- else
- {
- /* create new adj */
- args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index];
- args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR;
- args.dst_address = a[0];
- args.dst_address_length = 128;
- args.adj_index = ~0;
- args.add_adj = &adj;
- args.n_add_adj = 1;
- ip6_add_del_route (im, &args);
- }
-
if (make_new_nd_cache_entry) {
pool_get (nm->neighbor_pool, n);
mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
/* old value */ 0);
n->key = k;
+ n->adj_index = ADJ_INDEX_INVALID;
}
/* Update time stamp and ethernet address. */
- clib_memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address);
+ clib_memcpy (n->link_layer_address,
+ link_layer_address,
+ n_bytes_link_layer_address);
+
n->cpu_time_last_updated = clib_cpu_time_now ();
if (is_static)
n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
+ else
+ n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
+
+ ip6_nd_mk_complete(n);
/* Customer(s) waiting for this address to be resolved? */
p = mhash_get (&nm->pending_resolutions_by_address, a);
@@ -499,6 +492,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
return 0;
}
+static void
+ip6_nd_mk_incomplete (ip6_neighbor_t *nbr)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = nbr->key.ip6_address,
+ },
+ };
+ u32 fib_index;
+ ip6_main_t *im;
+
+ im = &ip6_main;
+ fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
+
+ /*
+ * revert the adj this ND entry sourced to incomplete
+ */
+ adj_nbr_update_rewrite(nbr->adj_index,
+ NULL);
+
+ /*
+ * remove the FIB entry the ND entry sourced
+ */
+ fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ);
+
+ /*
+ * Unlock the adj now that the ARP entry is no longer a source
+ */
+ adj_unlock(nbr->adj_index);
+ nbr->adj_index = ADJ_INDEX_INVALID;
+}
+
int
vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
u32 sw_if_index,
@@ -509,8 +536,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
ip6_neighbor_main_t * nm = &ip6_neighbor_main;
ip6_neighbor_key_t k;
ip6_neighbor_t * n;
- ip6_main_t * im = &ip6_main;
- ip6_add_del_route_args_t args;
uword * p;
int rv = 0;
@@ -537,73 +562,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
}
n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+
+ ip6_nd_mk_incomplete(n);
mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
pool_put (nm->neighbor_pool, n);
- args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index];
- args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_DEL
- | IP6_ROUTE_FLAG_NEIGHBOR;
- args.dst_address = a[0];
- args.dst_address_length = 128;
- args.adj_index = ~0;
- args.add_adj = NULL;
- args.n_add_adj = 0;
- ip6_add_del_route (im, &args);
out:
vlib_worker_thread_barrier_release(vm);
return rv;
}
-
-u32
-vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg)
-{
- ip6_neighbor_main_t * nm = &ip6_neighbor_main;
- ip6_main_t * im = &ip6_main;
- ip_lookup_main_t * lm = &im->lookup_main;
- ip6_address_t * next_hop = next_hop_arg;
- ip_adjacency_t add_adj, *adj;
- ip6_add_del_route_args_t args;
- ip6_neighbor_t * n;
- ip6_neighbor_key_t k;
- u32 adj_index;
-
- adj_index = ip6_fib_lookup_with_table(im, fib_index, next_hop);
- adj = ip_get_adjacency(lm, adj_index);
-
- if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
- return ~0;
-
- if (adj->arp.next_hop.ip6.as_u64[0] ||
- adj->arp.next_hop.ip6.as_u64[1])
- return adj_index;
-
- k.sw_if_index = adj->rewrite_header.sw_if_index;
- k.ip6_address = *next_hop;
- k.pad = 0;
- if (mhash_get (&nm->neighbor_index_by_key, &k))
- return adj_index;
-
- pool_get (nm->neighbor_pool, n);
- mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0);
- n->key = k;
- n->cpu_time_last_updated = clib_cpu_time_now ();
- n->flags = IP6_NEIGHBOR_FLAG_GLEAN;
-
- memset(&args, 0, sizeof(args));
- memcpy(&add_adj, adj, sizeof(add_adj));
- add_adj.arp.next_hop.ip6 = *next_hop; /* install neighbor /128 route */
- args.table_index_or_table_id = fib_index;
- args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR;
- args.dst_address = *next_hop;
- args.dst_address_length = 128;
- args.adj_index = ~0;
- args.add_adj = &add_adj;
- args.n_add_adj = 1;
- ip6_add_del_route (im, &args);
- return ip6_fib_lookup_with_table (im, fib_index, next_hop);
-}
-
#if DPDK > 0
static void ip6_neighbor_set_unset_rpc_callback
( ip6_neighbor_set_unset_rpc_args_t * a)
@@ -728,7 +696,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
{
vnet_main_t * vnm = vnet_get_main();
ip6_main_t * im = &ip6_main;
- ip_lookup_main_t * lm = &im->lookup_main;
uword n_packets = frame->n_vectors;
u32 * from, * to_next;
u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent;
@@ -787,17 +754,25 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
if (!ip6_sadd_unspecified && !ip6_sadd_link_local)
{
u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
- ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0);
- /* Allow all realistic-looking rewrite adjacencies to pass */
- ni0 = adj0->lookup_next_index;
- is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
- (ni0 < IP6_LOOKUP_N_NEXT);
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0);
- error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
- || ! is_rewrite0)
- ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
- : error0);
+ /* Allow all realistic-looking rewrite adjacencies to pass */
+ ni0 = adj0->lookup_next_index;
+ is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
+ (ni0 < IP6_LOOKUP_N_NEXT);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || ! is_rewrite0)
+ ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
}
o0 = (void *) (h0 + 1);
@@ -820,21 +795,28 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
if (is_solicitation && error0 == ICMP6_ERROR_NONE)
{
- /* Check that target address is one that we know about. */
- ip_interface_address_t * ia0;
- ip6_address_fib_t ip6_af0;
- void * oldheap;
-
- ip6_addr_fib_init (&ip6_af0, &h0->target_address,
- vec_elt (im->fib_index_by_sw_if_index,
- sw_if_index0));
-
- /* Gross kludge, "thank you" MJ, don't even ask */
- oldheap = clib_mem_set_heap (clib_per_cpu_mheaps[0]);
- ia0 = ip_get_interface_address (lm, &ip6_af0);
- clib_mem_set_heap (oldheap);
- error0 = ia0 == 0 ?
- ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN : error0;
+ /* Check that target address is local to this router. */
+ fib_node_index_t fei;
+ u32 fib_index;
+
+ fib_index = ip6_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ if (~0 == fib_index)
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ fei = ip6_fib_table_lookup_exact_match(fib_index,
+ &h0->target_address,
+ 128);
+
+ if (FIB_NODE_INDEX_INVALID == fei ||
+ !(FIB_ENTRY_FLAG_LOCAL & fib_entry_get_flags(fei)))
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ }
}
if (is_solicitation)
@@ -1052,13 +1034,20 @@ icmp6_router_solicitation(vlib_main_t * vm,
if (!is_unspecified && !is_link_local)
{
u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
- ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0);
- error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
- || (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
- && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE))
- ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
- : error0);
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main,
+ src_adj_index0);
+
+ error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0
+ ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
}
/* check for source LL option and process */
@@ -1472,8 +1461,7 @@ icmp6_router_advertisement(vlib_main_t * vm,
/* check for MTU or prefix options or .. */
u8 * opt_hdr = (u8 *)(h0 + 1);
- while( options_len0 > 0 &&
- opt_hdr < p0->data + p0->current_data)
+ while( options_len0 > 0)
{
icmp6_neighbor_discovery_option_header_t *o0 = ( icmp6_neighbor_discovery_option_header_t *)opt_hdr;
int opt_len = o0->n_data_u64s << 3;
@@ -1606,11 +1594,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
u32 sw_if_index,
u32 is_add)
{
- ip6_main_t * im = &ip6_main;
ip6_neighbor_main_t * nm = &ip6_neighbor_main;
- ip_lookup_main_t * lm = &im->lookup_main;
ip6_radv_t * a= 0;
- u32 ri = ~0;;
+ u32 ri = ~0;
vnet_sw_interface_t * sw_if0;
ethernet_interface_t * eth_if0 = 0;
@@ -1636,9 +1622,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
ip6_mldp_group_t *m;
/* remove adjacencies */
- ip_del_adjacency (lm, a->all_nodes_adj_index);
- ip_del_adjacency (lm, a->all_routers_adj_index);
- ip_del_adjacency (lm, a->all_mldv2_routers_adj_index);
+ adj_unlock(a->all_nodes_adj_index);
+ adj_unlock(a->all_routers_adj_index);
+ adj_unlock(a->all_mldv2_routers_adj_index);
/* clean up prefix_pool */
pool_foreach (p, a->adv_prefixes_pool, ({
@@ -1672,6 +1658,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
pool_put (nm->if_radv_pool, a);
nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0;
ri = ~0;
+ ip6_sw_interface_enable_disable(sw_if_index, 0);
}
}
else
@@ -1680,6 +1667,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
{
vnet_hw_interface_t * hw_if0;
+ ip6_sw_interface_enable_disable(sw_if_index, 1);
hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
pool_get (nm->if_radv_pool, a);
@@ -1702,10 +1690,11 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS;
a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS;
a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME;
- a->seed = (u32) (clib_cpu_time_now() & 0xFFFFFFFF);
+ a->seed = random_default_seed();
/* for generating random interface ids */
- a->randomizer = random_u64 (&a->seed);
+ a->randomizer = 0x1119194911191949;
+ a->randomizer = random_u64 ((u32 *)&a->randomizer);
a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS ;
a->initial_adverts_sent = a->initial_adverts_count-1;
@@ -1727,66 +1716,34 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t));
{
- ip_adjacency_t *adj;
u8 link_layer_address[6] =
{0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_hosts};
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a->all_nodes_adj_index);
-
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj->if_address_index = ~0;
-
- vnet_rewrite_for_sw_interface
- (vnm,
- VNET_L3_PACKET_TYPE_IP6,
- sw_if_index,
- ip6_rewrite_node.index,
- link_layer_address,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
+ a->all_nodes_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ sw_if_index,
+ link_layer_address);
}
{
- ip_adjacency_t *adj;
u8 link_layer_address[6] =
{0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_routers};
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a->all_routers_adj_index);
-
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj->if_address_index = ~0;
-
- vnet_rewrite_for_sw_interface
- (vnm,
- VNET_L3_PACKET_TYPE_IP6,
- sw_if_index,
- ip6_rewrite_node.index,
- link_layer_address,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
+ a->all_routers_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ sw_if_index,
+ link_layer_address);
}
{
- ip_adjacency_t *adj;
u8 link_layer_address[6] =
{0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_mldv2_routers};
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &a->all_mldv2_routers_adj_index);
-
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj->if_address_index = ~0;
-
- vnet_rewrite_for_sw_interface
- (vnm,
- VNET_L3_PACKET_TYPE_IP6,
- sw_if_index,
- ip6_rewrite_node.index,
- link_layer_address,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
+ a->all_mldv2_routers_adj_index =
+ adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6,
+ FIB_LINK_IP6,
+ sw_if_index,
+ link_layer_address);
}
/* add multicast groups we will always be reporting */
@@ -2969,7 +2926,8 @@ enable_ip6_interface(vlib_main_t * vm,
/* essentially "enables" ipv6 on this interface */
error = ip6_add_del_interface_address (vm, sw_if_index,
- &link_local_address, 64 /* address width */,
+ &link_local_address,
+ 128 /* address width */,
0 /* is_del */);
if(error)
@@ -3255,87 +3213,10 @@ clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit)
return 0;
}
-
-static void
-ip6_neighbor_entry_del_adj(ip6_neighbor_t *n, u32 adj_index)
-{
- int done = 0;
- int i;
- while (!done)
- {
- vec_foreach_index(i, n->adjacencies)
- if (vec_elt(n->adjacencies, i) == adj_index)
- {
- vec_del1(n->adjacencies, i);
- continue;
- }
- done = 1;
- }
-}
-
-static void
-ip6_neighbor_entry_add_adj(ip6_neighbor_t *n, u32 adj_index)
-{
- int i;
- vec_foreach_index(i, n->adjacencies)
- if (vec_elt(n->adjacencies, i) == adj_index)
- return;
- vec_add1(n->adjacencies, adj_index);
-}
-
-static void
-ip6_neighbor_add_del_adj_cb (struct ip_lookup_main_t * lm,
- u32 adj_index,
- ip_adjacency_t * adj,
- u32 is_del)
-{
- ip6_neighbor_main_t * nm = &ip6_neighbor_main;
- ip6_neighbor_key_t k;
- ip6_neighbor_t *n = 0;
- uword * p;
- u32 ai;
-
- for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++)
- {
- adj = ip_get_adjacency (lm, ai);
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1]))
- {
- k.sw_if_index = adj->rewrite_header.sw_if_index;
- k.ip6_address.as_u64[0] = adj->arp.next_hop.ip6.as_u64[0];
- k.ip6_address.as_u64[1] = adj->arp.next_hop.ip6.as_u64[1];
- k.pad = 0;
- p = mhash_get (&nm->neighbor_index_by_key, &k);
- if (p)
- n = pool_elt_at_index (nm->neighbor_pool, p[0]);
- }
- else
- continue;
-
- if (is_del)
- {
- if (!n)
- clib_warning("Adjacency contains unknown ND next hop %U (del)",
- format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6);
- else
- ip6_neighbor_entry_del_adj(n, adj->heap_handle);
- }
- else /* add */
- {
- if (!n)
- clib_warning("Adjacency contains unknown ND next hop %U (add)",
- format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6);
- else
- ip6_neighbor_entry_add_adj(n, adj->heap_handle);
- }
- }
-}
-
static clib_error_t * ip6_neighbor_init (vlib_main_t * vm)
{
ip6_neighbor_main_t * nm = &ip6_neighbor_main;
ip6_main_t * im = &ip6_main;
- ip_lookup_main_t * lm = &im->lookup_main;
mhash_init (&nm->neighbor_index_by_key,
/* value size */ sizeof (uword),
@@ -3375,8 +3256,6 @@ static clib_error_t * ip6_neighbor_init (vlib_main_t * vm)
(im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */);
#endif
- ip_register_add_del_adjacency_callback(lm, ip6_neighbor_add_del_adj_cb);
-
return 0;
}
@@ -3593,5 +3472,3 @@ int vnet_ip6_nd_term (vlib_main_t * vm,
return 0;
}
-
-
diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h
index c83e5764803..29fa4a4e128 100644
--- a/vnet/vnet/ip/ip6_packet.h
+++ b/vnet/vnet/ip/ip6_packet.h
@@ -70,6 +70,8 @@ typedef CLIB_PACKED (union {
#define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0)
#define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0])
#define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0)
+#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1)))
+#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0))
always_inline void
ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
@@ -303,6 +305,22 @@ ip6_next_header (ip6_header_t * i)
{ return (void *) (i + 1); }
always_inline void
+ip6_copy_header (ip6_header_t * dst,
+ const ip6_header_t *src)
+{
+ dst->ip_version_traffic_class_and_flow_label =
+ src->ip_version_traffic_class_and_flow_label;
+ dst->payload_length = src->payload_length;
+ dst->protocol = src->protocol;
+ dst->hop_limit = src->hop_limit;
+
+ dst->src_address.as_uword[0] = src->src_address.as_uword[0];
+ dst->src_address.as_uword[1] = src->src_address.as_uword[1];
+ dst->dst_address.as_uword[0] = src->dst_address.as_uword[0];
+ dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
+}
+
+always_inline void
ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
{
{
diff --git a/vnet/vnet/ip/ip_feature_registration.c b/vnet/vnet/ip/ip_feature_registration.c
index 9505a09e20d..b96f81bd58d 100644
--- a/vnet/vnet/ip/ip_feature_registration.c
+++ b/vnet/vnet/ip/ip_feature_registration.c
@@ -15,6 +15,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
/** \file
@@ -131,7 +132,7 @@ ip_feature_init_cast (vlib_main_t * vm,
vnet_config_main_t * vcm,
char **feature_start_nodes,
int num_feature_start_nodes,
- vnet_cast_t cast, int is_ip4)
+ vnet_cast_t cast, vnet_l3_packet_type_t proto)
{
uword *index_by_name;
uword *reg_by_index;
@@ -155,33 +156,43 @@ ip_feature_init_cast (vlib_main_t * vm,
u8 **keys_to_delete = 0;
ip4_main_t *im4 = &ip4_main;
ip6_main_t *im6 = &ip6_main;
+ mpls_main_t *mm = &mpls_main;
index_by_name = hash_create_string (0, sizeof (uword));
reg_by_index = hash_create (0, sizeof (uword));
if (cast == VNET_IP_RX_UNICAST_FEAT)
{
- if (is_ip4)
+ if (proto == VNET_L3_PACKET_TYPE_IP4)
first_reg = im4->next_uc_feature;
- else
+ else if (proto == VNET_L3_PACKET_TYPE_IP6)
first_reg = im6->next_uc_feature;
+ else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST)
+ first_reg = mm->next_feature;
+ else
+ return clib_error_return (0,
+ "protocol %d cast %d unsupport for features",
+ proto, cast);
}
else if (cast == VNET_IP_RX_MULTICAST_FEAT)
{
- if (is_ip4)
+ if (proto == VNET_L3_PACKET_TYPE_IP4)
first_reg = im4->next_mc_feature;
- else
+ else if (proto == VNET_L3_PACKET_TYPE_IP6)
first_reg = im6->next_mc_feature;
+ else
+ return clib_error_return (0,
+ "protocol %d cast %d unsupport for features",
+ proto, cast);
}
else if (cast == VNET_IP_TX_FEAT)
{
- if (is_ip4)
+ if (proto == VNET_L3_PACKET_TYPE_IP4)
first_reg = im4->next_tx_feature;
else
first_reg = im6->next_tx_feature;
}
-
this_reg = first_reg;
/* pass 1, collect feature node names, construct a before b pairs */
@@ -281,8 +292,7 @@ again:
/* see if we got a partial order... */
if (vec_len (result) != n_features)
return clib_error_return
- (0, "ip%s_feature_init_cast (cast=%d), no partial order!",
- is_ip4 ? "4" : "6", cast);
+ (0, "%d feature_init_cast (cast=%d), no partial order!", proto, cast);
/*
* We win.
@@ -308,10 +318,12 @@ again:
feature_nodes, vec_len (feature_nodes));
/* Save a copy for show command */
- if (is_ip4)
+ if (proto == VNET_L3_PACKET_TYPE_IP4)
im4->feature_nodes[cast] = feature_nodes;
- else
+ else if (proto == VNET_L3_PACKET_TYPE_IP6)
im6->feature_nodes[cast] = feature_nodes;
+ else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST)
+ mm->feature_nodes = feature_nodes;
/* Finally, clean up all the shit we allocated */
/* *INDENT-OFF* */
diff --git a/vnet/vnet/ip/ip_feature_registration.h b/vnet/vnet/ip/ip_feature_registration.h
index 2d9a15bcf2c..95ee78ad8fe 100644
--- a/vnet/vnet/ip/ip_feature_registration.h
+++ b/vnet/vnet/ip/ip_feature_registration.h
@@ -39,7 +39,8 @@ clib_error_t *ip_feature_init_cast (vlib_main_t * vm,
vnet_config_main_t * vcm,
char **feature_start_nodes,
int num_feature_start_nodes,
- vnet_cast_t cast, int is_ip4);
+ vnet_cast_t cast,
+ vnet_l3_packet_type_t proto);
#endif /* included_ip_feature_registration_h */
diff --git a/vnet/vnet/ip/ip_source_and_port_range_check.h b/vnet/vnet/ip/ip_source_and_port_range_check.h
index 5b49aabd849..fefe5ff1fd9 100644
--- a/vnet/vnet/ip/ip_source_and_port_range_check.h
+++ b/vnet/vnet/ip/ip_source_and_port_range_check.h
@@ -19,9 +19,6 @@
typedef struct
{
- u32 ranges_per_adjacency;
- u32 special_adjacency_format_function_index;
-
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
@@ -60,6 +57,69 @@ typedef struct
u16x8vec_t hi;
} protocol_port_range_t;
+/**
+ * @brief The number of supported ranges per-data path object.
+ * If more ranges are required, bump this number.
+ */
+#define N_PORT_RANGES_PER_DPO 64
+#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2)
+#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK)
+
+/**
+ * @brief
+ * The object that is in the data-path to perform the check.
+ *
+ * Some trade-offs here; memory vs performance.
+ *
+ * performance:
+ * the principle factor is d-cache line misses/hits.
+ * so we want the data layout to minimise the d-cache misses. This
+ * means not following dependent reads. i.e. not doing
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t *ragnes; // vector of ranges.
+ * }
+ *
+ * so to read ranges[0] we would first d-cache miss on the address
+ * of the object of type B, for which we would need to wait before we
+ * can get the address of B->ranges.
+ * So this layout is better:
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t ragnes[N];
+ * }
+ *
+ * memory:
+ * the latter layout above is more memory hungry. And N needs to be:
+ * 1 - sized for the maximum required
+ * 2 - fixed, so that objects of type B can be pool allocated and so
+ * 'get'-able using an index.
+ * An option over fixed might be to allocate contiguous chunk from
+ * the pool (like we used to do for multi-path adjs).
+ */
+typedef struct protocol_port_range_dpo_t_
+{
+ /**
+ * The number of blocks from the 'block' array below
+ * that have rnages configured. We keep this count so that in the data-path
+ * we can limit the loop to be only over the blocks we need
+ */
+ u16 n_used_blocks;
+
+ /**
+ * The total number of free ranges from all blocks.
+ * Used to prevent overrun of the ranges available.
+ */
+ u16 n_free_ranges;
+
+ /**
+ * the fixed size array of ranges
+ */
+ protocol_port_range_t blocks[N_BLOCKS_PER_DPO];
+} protocol_port_range_dpo_t;
+
int ip4_source_and_port_range_check_add_del (ip4_address_t * address,
u32 length,
u32 vrf_id,
diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c
index 47138071639..a695ef765a0 100644
--- a/vnet/vnet/ip/lookup.c
+++ b/vnet/vnet/ip/lookup.c
@@ -37,728 +37,16 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <vppinfra/math.h> /* for fabs */
#include <vnet/ip/ip.h>
-#include <vnet/ip/adj_alloc.h>
-
-static void
-ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index);
-
-always_inline void
-ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj)
-{
- if (CLIB_DEBUG > 0)
- {
- u32 save_handle = adj->heap_handle;;
- u32 save_n_adj = adj->n_adj;
-
- memset (adj, 0xfe, n_adj * sizeof (adj[0]));
-
- adj->heap_handle = save_handle;
- adj->n_adj = save_n_adj;
- }
-}
-
-static void
-ip_share_adjacency(ip_lookup_main_t * lm, u32 adj_index)
-{
- ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
- uword * p;
- u32 old_ai;
- uword signature = vnet_ip_adjacency_signature (adj);
-
- p = hash_get (lm->adj_index_by_signature, signature);
- /* Hash collision? */
- if (p)
- {
- /* Save the adj index, p[0] will be toast after the unset! */
- old_ai = p[0];
- hash_unset (lm->adj_index_by_signature, signature);
- hash_set (lm->adj_index_by_signature, signature, adj_index);
- adj->next_adj_with_signature = old_ai;
- }
- else
- {
- adj->next_adj_with_signature = 0;
- hash_set (lm->adj_index_by_signature, signature, adj_index);
- }
-}
-
-static void
-ip_unshare_adjacency(ip_lookup_main_t * lm, u32 adj_index)
-{
- ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
- uword signature;
- uword * p;
- u32 this_ai;
- ip_adjacency_t * this_adj, * prev_adj = 0;
-
- signature = vnet_ip_adjacency_signature (adj);
- p = hash_get (lm->adj_index_by_signature, signature);
- if (p == 0)
- return;
-
- this_ai = p[0];
- /* At the top of the signature chain (likely)? */
- if (this_ai == adj_index)
- {
- if (adj->next_adj_with_signature == 0)
- {
- hash_unset (lm->adj_index_by_signature, signature);
- return;
- }
- else
- {
- this_adj = ip_get_adjacency (lm, adj->next_adj_with_signature);
- hash_unset (lm->adj_index_by_signature, signature);
- hash_set (lm->adj_index_by_signature, signature,
- this_adj->heap_handle);
- }
- }
- else /* walk signature chain */
- {
- this_adj = ip_get_adjacency (lm, this_ai);
- while (this_adj != adj)
- {
- prev_adj = this_adj;
- this_adj = ip_get_adjacency
- (lm, this_adj->next_adj_with_signature);
- /*
- * This can happen when creating the first multipath adj of a set
- * We end up looking at the miss adjacency (handle==0).
- */
- if (this_adj->heap_handle == 0)
- return;
- }
- prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature;
- }
-}
-
-int ip_register_adjacency(vlib_main_t *vm,
- u8 is_ip4,
- ip_adj_register_t *reg)
-{
- ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main;
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) ((is_ip4)?"ip4-lookup":"ip6-lookup"));
- vlib_node_t *next_node = vlib_get_node_by_name(vm, (u8 *) reg->node_name);
- *reg->next_index = vlib_node_add_next (vm, node->index, next_node->index);
- vec_validate(lm->registered_adjacencies, *reg->next_index);
- lm->registered_adjacencies[*reg->next_index] = *reg;
- return 0;
-}
-
-int ip_init_registered_adjacencies(u8 is_ip4)
-{
- vlib_main_t *vm = vlib_get_main();
- ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main;
- ip_adj_register_t *reg = lm->registered_adjacencies;
- lm->registered_adjacencies = 0; //Init vector
- int rv;
- while (reg) {
- if((rv = ip_register_adjacency(vm, is_ip4, reg)))
- return rv;
- reg = reg->next;
- }
- return 0;
-}
-
-/* Create new block of given number of contiguous adjacencies. */
-ip_adjacency_t *
-ip_add_adjacency (ip_lookup_main_t * lm,
- ip_adjacency_t * copy_adj,
- u32 n_adj,
- u32 * adj_index_return)
-{
- ip_adjacency_t * adj;
- u32 ai, i, handle;
-
- /* See if we know enough to attempt to share an existing adjacency */
- if (copy_adj && n_adj == 1)
- {
- uword signature;
- uword * p;
-
- switch (copy_adj->lookup_next_index)
- {
- case IP_LOOKUP_NEXT_DROP:
- if (lm->drop_adj_index)
- {
- adj = ip_get_adjacency (lm, lm->drop_adj_index);
- *adj_index_return = lm->drop_adj_index;
- return (adj);
- }
- break;
-
- case IP_LOOKUP_NEXT_LOCAL:
- if (lm->local_adj_index)
- {
- adj = ip_get_adjacency (lm, lm->local_adj_index);
- *adj_index_return = lm->local_adj_index;
- return (adj);
- }
- default:
- break;
- }
-
- signature = vnet_ip_adjacency_signature (copy_adj);
- p = hash_get (lm->adj_index_by_signature, signature);
- if (p)
- {
- adj = vec_elt_at_index (lm->adjacency_heap, p[0]);
- while (1)
- {
- if (vnet_ip_adjacency_share_compare (adj, copy_adj))
- {
- adj->share_count++;
- *adj_index_return = p[0];
- return adj;
- }
- if (adj->next_adj_with_signature == 0)
- break;
- adj = vec_elt_at_index (lm->adjacency_heap,
- adj->next_adj_with_signature);
- }
- }
- }
-
- lm->adjacency_heap = aa_alloc (lm->adjacency_heap, &adj, n_adj);
- handle = ai = adj->heap_handle;
-
- ip_poison_adjacencies (adj, n_adj);
-
- /* Validate adjacency counters. */
- vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1);
-
- for (i = 0; i < n_adj; i++)
- {
- /* Make sure certain fields are always initialized. */
- adj[i].rewrite_header.sw_if_index = ~0;
- adj[i].explicit_fib_index = ~0;
- adj[i].mcast_group_index = ~0;
- adj[i].classify.table_index = ~0;
- adj[i].saved_lookup_next_index = 0;
- adj[i].special_adjacency_format_function_index = 0;
-
- if (copy_adj)
- adj[i] = copy_adj[i];
-
- adj[i].heap_handle = handle;
- adj[i].n_adj = n_adj;
- adj[i].share_count = 0;
- adj[i].next_adj_with_signature = 0;
-
- /* Zero possibly stale counters for re-used adjacencies. */
- vlib_zero_combined_counter (&lm->adjacency_counters, ai + i);
- }
-
- /* Set up to share the adj later */
- if (copy_adj && n_adj == 1)
- ip_share_adjacency(lm, ai);
-
- *adj_index_return = ai;
- return adj;
-}
-
-void
-ip_update_adjacency (ip_lookup_main_t * lm,
- u32 adj_index,
- ip_adjacency_t * copy_adj)
-{
- ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
-
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
- ip_unshare_adjacency(lm, adj_index);
-
- /* temporary redirect to drop while updating rewrite data */
- adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
- CLIB_MEMORY_BARRIER();
-
- clib_memcpy (&adj->rewrite_header, &copy_adj->rewrite_header,
- VLIB_BUFFER_PRE_DATA_SIZE);
- adj->lookup_next_index = copy_adj->lookup_next_index;
- ip_share_adjacency(lm, adj_index);
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
-}
-
-static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency)
-{
- ip_adjacency_t * adj;
-
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
-
- adj = ip_get_adjacency (lm, adj_index);
-
- /* Special-case miss, local, drop adjs */
- if (adj_index < 3)
- return;
-
- if (adj->n_adj == 1)
- {
- if (adj->share_count > 0)
- {
- adj->share_count --;
- return;
- }
-
- ip_unshare_adjacency(lm, adj_index);
- }
-
- if (delete_multipath_adjacency)
- ip_multipath_del_adjacency (lm, adj_index);
-
- ip_poison_adjacencies (adj, adj->n_adj);
-
- aa_free (lm->adjacency_heap, adj);
-}
-
-void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index)
-{ ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); }
-
-static int
-next_hop_sort_by_weight (ip_multipath_next_hop_t * n1,
- ip_multipath_next_hop_t * n2)
-{
- int cmp = (int) n1->weight - (int) n2->weight;
- return (cmp == 0
- ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index
- : (cmp > 0 ? +1 : -1));
-}
-
-/* Given next hop vector is over-written with normalized one with sorted weights and
- with weights corresponding to the number of adjacencies for each next hop.
- Returns number of adjacencies in block. */
-static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm,
- ip_multipath_next_hop_t * raw_next_hops,
- ip_multipath_next_hop_t ** normalized_next_hops)
-{
- ip_multipath_next_hop_t * nhs;
- uword n_nhs, n_adj, n_adj_left, i;
- f64 sum_weight, norm, error;
-
- n_nhs = vec_len (raw_next_hops);
- ASSERT (n_nhs > 0);
- if (n_nhs == 0)
- return 0;
-
- /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
- nhs = *normalized_next_hops;
- vec_validate (nhs, 2*n_nhs - 1);
-
- /* Fast path: 1 next hop in block. */
- n_adj = n_nhs;
- if (n_nhs == 1)
- {
- nhs[0] = raw_next_hops[0];
- nhs[0].weight = 1;
- _vec_len (nhs) = 1;
- goto done;
- }
-
- else if (n_nhs == 2)
- {
- int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
-
- /* Fast sort. */
- nhs[0] = raw_next_hops[cmp];
- nhs[1] = raw_next_hops[cmp ^ 1];
-
- /* Fast path: equal cost multipath with 2 next hops. */
- if (nhs[0].weight == nhs[1].weight)
- {
- nhs[0].weight = nhs[1].weight = 1;
- _vec_len (nhs) = 2;
- goto done;
- }
- }
- else
- {
- clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
- qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
- }
-
- /* Find total weight to normalize weights. */
- sum_weight = 0;
- for (i = 0; i < n_nhs; i++)
- sum_weight += nhs[i].weight;
-
- /* In the unlikely case that all weights are given as 0, set them all to 1. */
- if (sum_weight == 0)
- {
- for (i = 0; i < n_nhs; i++)
- nhs[i].weight = 1;
- sum_weight = n_nhs;
- }
-
- /* Save copies of all next hop weights to avoid being overwritten in loop below. */
- for (i = 0; i < n_nhs; i++)
- nhs[n_nhs + i].weight = nhs[i].weight;
-
- /* Try larger and larger power of 2 sized adjacency blocks until we
- find one where traffic flows to within 1% of specified weights. */
- for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
- {
- error = 0;
-
- norm = n_adj / sum_weight;
- n_adj_left = n_adj;
- for (i = 0; i < n_nhs; i++)
- {
- f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */
- word n = flt_round_nearest (nf);
-
- n = n > n_adj_left ? n_adj_left : n;
- n_adj_left -= n;
- error += fabs (nf - n);
- nhs[i].weight = n;
- }
-
- nhs[0].weight += n_adj_left;
-
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= lm->multipath_next_hop_error_tolerance*n_adj)
- {
- /* Truncate any next hops with zero weight. */
- _vec_len (nhs) = i;
- break;
- }
- }
-
- done:
- /* Save vector for next call. */
- *normalized_next_hops = nhs;
- return n_adj;
-}
-
-always_inline uword
-ip_next_hop_hash_key_from_handle (uword handle)
-{ return 1 + 2*handle; }
-
-always_inline uword
-ip_next_hop_hash_key_is_heap_handle (uword k)
-{ return k & 1; }
-
-always_inline uword
-ip_next_hop_hash_key_get_heap_handle (uword k)
-{
- ASSERT (ip_next_hop_hash_key_is_heap_handle (k));
- return k / 2;
-}
-
-static u32
-ip_multipath_adjacency_get (ip_lookup_main_t * lm,
- ip_multipath_next_hop_t * raw_next_hops,
- uword create_if_non_existent)
-{
- uword * p;
- u32 i, j, n_adj, adj_index, adj_heap_handle;
- ip_adjacency_t * adj, * copy_adj;
- ip_multipath_next_hop_t * nh, * nhs;
- ip_multipath_adjacency_t * madj;
-
- n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized);
- nhs = lm->next_hop_hash_lookup_key_normalized;
-
- /* Basic sanity. */
- ASSERT (n_adj >= vec_len (raw_next_hops));
-
- /* Use normalized next hops to see if we've seen a block equivalent to this one before. */
- p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs);
- if (p)
- return p[0];
-
- if (! create_if_non_existent)
- return 0;
-
- adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index);
- adj_heap_handle = adj[0].heap_handle;
-
- /* Fill in adjacencies in block based on corresponding next hop adjacencies. */
- i = 0;
- vec_foreach (nh, nhs)
- {
- copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index);
- for (j = 0; j < nh->weight; j++)
- {
- adj[i] = copy_adj[0];
- adj[i].heap_handle = adj_heap_handle;
- adj[i].n_adj = n_adj;
- i++;
- }
- }
-
- /* All adjacencies should have been initialized. */
- ASSERT (i == n_adj);
-
- vec_validate (lm->multipath_adjacencies, adj_heap_handle);
- madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle);
-
- madj->adj_index = adj_index;
- madj->n_adj_in_block = n_adj;
- madj->reference_count = 0; /* caller will set to one. */
-
- madj->normalized_next_hops.count = vec_len (nhs);
- madj->normalized_next_hops.heap_offset
- = heap_alloc (lm->next_hop_heap, vec_len (nhs),
- madj->normalized_next_hops.heap_handle);
- clib_memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset,
- nhs, vec_bytes (nhs));
-
- hash_set (lm->multipath_adjacency_by_next_hops,
- ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle),
- madj - lm->multipath_adjacencies);
-
- madj->unnormalized_next_hops.count = vec_len (raw_next_hops);
- madj->unnormalized_next_hops.heap_offset
- = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops),
- madj->unnormalized_next_hops.heap_handle);
- clib_memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset,
- raw_next_hops, vec_bytes (raw_next_hops));
-
- ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
-
- return adj_heap_handle;
-}
-
-/* Returns 0 for next hop not found. */
-u32
-ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
- u32 is_del,
- u32 old_mp_adj_index,
- u32 next_hop_adj_index,
- u32 next_hop_weight,
- u32 * new_mp_adj_index)
-{
- ip_multipath_adjacency_t * mp_old, * mp_new;
- ip_multipath_next_hop_t * nh, * nhs, * hash_nhs;
- u32 n_nhs, i_nh;
-
- mp_new = mp_old = 0;
- n_nhs = 0;
- i_nh = 0;
- nhs = 0;
-
- /* If old adj is not multipath, we need to "convert" it by calling this
- * function recursively */
- if (old_mp_adj_index != ~0 && !ip_adjacency_is_multipath(lm, old_mp_adj_index))
- {
- ip_multipath_adjacency_add_del_next_hop(lm, /* is_del */ 0,
- /* old_mp_adj_index */ ~0,
- /* nh_adj_index */ old_mp_adj_index,
- /* weight * */ 1,
- &old_mp_adj_index);
- }
-
- /* If old multipath adjacency is valid, find requested next hop. */
- if (old_mp_adj_index < vec_len (lm->multipath_adjacencies)
- && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0)
- {
- mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
-
- nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset);
- n_nhs = mp_old->unnormalized_next_hops.count;
-
- /* Linear search: ok since n_next_hops is small. */
- for (i_nh = 0; i_nh < n_nhs; i_nh++)
- if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index)
- break;
-
- /* Given next hop not found. */
- if (i_nh >= n_nhs && is_del)
- return 0;
- }
-
- hash_nhs = lm->next_hop_hash_lookup_key;
- if (hash_nhs)
- _vec_len (hash_nhs) = 0;
-
- if (is_del)
- {
- if (n_nhs > 1)
- {
- /* Prepare lookup key for multipath with target next hop deleted. */
- if (i_nh > 0)
- vec_add (hash_nhs, nhs + 0, i_nh);
- if (i_nh + 1 < n_nhs)
- vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1));
- }
- }
- else /* it's an add. */
- {
- /* If next hop is already there with the same weight, we have nothing to do. */
- if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight)
- {
- new_mp_adj_index[0] = ~0;
- goto done;
- }
-
- /* Copy old next hops to lookup key vector. */
- if (n_nhs > 0)
- vec_add (hash_nhs, nhs, n_nhs);
-
- if (i_nh < n_nhs)
- {
- /* Change weight of existing next hop. */
- nh = vec_elt_at_index (hash_nhs, i_nh);
- }
- else
- {
- /* Add a new next hop. */
- vec_add2 (hash_nhs, nh, 1);
- nh->next_hop_adj_index = next_hop_adj_index;
- }
-
- /* Set weight for added or old next hop. */
- nh->weight = next_hop_weight;
- }
-
- if (vec_len (hash_nhs) > 0)
- {
- u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs,
- /* create_if_non_existent */ 1);
- if (tmp != ~0)
- mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp);
-
- /* Fetch again since pool may have moved. */
- if (mp_old)
- mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
- }
-
- new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0;
-
- if (mp_new != mp_old)
- {
- if (mp_old)
- {
- ASSERT (mp_old->reference_count > 0);
- mp_old->reference_count -= 1;
- }
- if (mp_new)
- mp_new->reference_count += 1;
- }
-
- if (mp_old && mp_old->reference_count == 0)
- ip_multipath_adjacency_free (lm, mp_old);
-
- done:
- /* Save key vector next call. */
- lm->next_hop_hash_lookup_key = hash_nhs;
-
- return 1;
-}
-
-static void
-ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index)
-{
- ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index);
- ip_multipath_adjacency_t * madj, * new_madj;
- ip_multipath_next_hop_t * nhs, * hash_nhs;
- u32 i, n_nhs, madj_index, new_madj_index;
-
- if (adj->heap_handle >= vec_len (lm->multipath_adjacencies))
- return;
-
- vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1);
-
- for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++)
- {
- madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index);
- if (madj->n_adj_in_block == 0)
- continue;
-
- nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset);
- n_nhs = madj->unnormalized_next_hops.count;
- for (i = 0; i < n_nhs; i++)
- if (nhs[i].next_hop_adj_index == del_adj_index)
- break;
-
- /* del_adj_index not found in unnormalized_next_hops? We're done. */
- if (i >= n_nhs)
- continue;
-
- new_madj = 0;
- if (n_nhs > 1)
- {
- hash_nhs = lm->next_hop_hash_lookup_key;
- if (hash_nhs)
- _vec_len (hash_nhs) = 0;
- if (i > 0)
- vec_add (hash_nhs, nhs + 0, i);
- if (i + 1 < n_nhs)
- vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1));
-
- new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1);
-
- lm->next_hop_hash_lookup_key = hash_nhs;
-
- if (new_madj_index == madj_index)
- continue;
-
- new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index);
- }
-
- lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0;
- lm->n_adjacency_remaps += 1;
- ip_multipath_adjacency_free (lm, madj);
- }
-}
-
-void
-ip_multipath_adjacency_free (ip_lookup_main_t * lm,
- ip_multipath_adjacency_t * a)
-{
- hash_unset (lm->multipath_adjacency_by_next_hops,
- ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle));
- heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle);
- heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle);
-
- ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0);
- memset (a, 0, sizeof (a[0]));
-}
-
-always_inline ip_multipath_next_hop_t *
-ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k,
- uword * n_next_hops)
-{
- ip_multipath_next_hop_t * nhs;
- uword n_nhs;
- if (ip_next_hop_hash_key_is_heap_handle (k))
- {
- uword handle = ip_next_hop_hash_key_get_heap_handle (k);
- nhs = heap_elt_with_handle (lm->next_hop_heap, handle);
- n_nhs = heap_len (lm->next_hop_heap, handle);
- }
- else
- {
- nhs = uword_to_pointer (k, ip_multipath_next_hop_t *);
- n_nhs = vec_len (nhs);
- }
- *n_next_hops = n_nhs;
- return nhs;
-}
-
-static uword
-ip_next_hop_hash_key_sum (hash_t * h, uword key0)
-{
- ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
- ip_multipath_next_hop_t * k0;
- uword n0;
-
- k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
- return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0);
-}
-
-static uword
-ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1)
-{
- ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
- ip_multipath_next_hop_t * k0, * k1;
- uword n0, n1;
-
- k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
- k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1);
-
- return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0]));
-}
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
clib_error_t *
ip_interface_address_add_del (ip_lookup_main_t * lm,
@@ -869,52 +157,16 @@ ip_interface_address_add_del (ip_lookup_main_t * lm,
void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
{
- ip_adjacency_t * adj;
- ip_adjacency_t template_adj;
-
/* ensure that adjacency is cacheline aligned and sized */
ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0);
ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES);
- lm->adj_index_by_signature = hash_create (0, sizeof (uword));
- memset (&template_adj, 0, sizeof (template_adj));
-
/* Preallocate three "special" adjacencies */
- lm->adjacency_heap = aa_bootstrap (0, 3 /* n=1 free items */);
-
- /* Hand-craft special miss adjacency to use when nothing matches in the
- routing table. Same for drop adjacency. */
- adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1,
- &lm->miss_adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_MISS;
- ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX);
-
- /* Make the "drop" adj sharable */
- template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP;
- adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1,
- &lm->drop_adj_index);
-
- /* Make the "local" adj sharable */
- template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- template_adj.if_address_index = ~0;
- adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1,
- &lm->local_adj_index);
+ lm->adjacency_heap = adj_heap;
if (! lm->fib_result_n_bytes)
lm->fib_result_n_bytes = sizeof (uword);
- lm->multipath_adjacency_by_next_hops
- = hash_create2 (/* elts */ 0,
- /* user */ pointer_to_uword (lm),
- /* value_bytes */ sizeof (uword),
- ip_next_hop_hash_key_sum,
- ip_next_hop_hash_key_equal,
- /* format pair/arg */
- 0, 0);
-
- /* 1% max error tolerance for multipath. */
- lm->multipath_next_hop_error_tolerance = .01;
-
lm->is_ip6 = is_ip6;
if (is_ip6)
{
@@ -944,14 +196,12 @@ void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP;
lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP;
}
-
- ip_init_registered_adjacencies(!is_ip6);
}
u8 * format_ip_flow_hash_config (u8 * s, va_list * args)
{
- u32 flow_hash_config = va_arg (*args, u32);
-
+ flow_hash_config_t flow_hash_config = va_arg (*args, u32);
+
#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
foreach_flow_hash_bit;
#undef _
@@ -961,31 +211,20 @@ u8 * format_ip_flow_hash_config (u8 * s, va_list * args)
u8 * format_ip_lookup_next (u8 * s, va_list * args)
{
- ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
- ip_lookup_next_t n = va_arg (*args, u32);
- ip_adj_register_t *reg;
-
+ ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t);
char * t = 0;
switch (n)
{
default:
- vec_validate(lm->registered_adjacencies, n);
- reg = vec_elt_at_index(lm->registered_adjacencies, n);
- if (reg->node_name) {
- s = format (s, "%s:", reg->node_name);
- }
+ s = format (s, "unknown %d", n);
return s;
- case IP_LOOKUP_NEXT_MISS: t = "miss"; break;
case IP_LOOKUP_NEXT_DROP: t = "drop"; break;
case IP_LOOKUP_NEXT_PUNT: t = "punt"; break;
- case IP_LOOKUP_NEXT_LOCAL: t = "local"; break;
case IP_LOOKUP_NEXT_ARP: t = "arp"; break;
- case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break;
- case IP_LOOKUP_NEXT_MAP: t = "map"; break;
- case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break;
- case IP_LOOKUP_NEXT_INDIRECT: t="indirect"; break;
+ case IP_LOOKUP_NEXT_MIDCHAIN: t="midchain"; break;
+ case IP_LOOKUP_NEXT_GLEAN: t="glean"; break;
case IP_LOOKUP_NEXT_REWRITE:
break;
}
@@ -996,120 +235,13 @@ u8 * format_ip_lookup_next (u8 * s, va_list * args)
return s;
}
-static u8 * format_ip_interface_address (u8 * s, va_list * args)
-{
- ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
- u32 if_address_index = va_arg (*args, u32);
- ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
- void * a = ip_interface_address_get_address (lm, ia);
-
- if (lm->is_ip6)
- return format (s, "%U", format_ip6_address_and_length, a, ia->address_length);
- else
- return format (s, "%U", format_ip4_address_and_length, a, ia->address_length);
-}
-
-u32 vnet_register_special_adjacency_format_function
-(ip_lookup_main_t * lm, format_function_t * fp)
-{
- u32 rv;
- /*
- * Initialize the format function registration vector
- * Index 0 must be invalid, to avoid finding and fixing trivial bugs
- * all over the place
- */
- if (vec_len (lm->special_adjacency_format_functions) == 0)
- {
- vec_add1 (lm->special_adjacency_format_functions,
- (format_function_t *) 0);
- }
-
- rv = vec_len (lm->special_adjacency_format_functions);
- vec_add1 (lm->special_adjacency_format_functions, fp);
- return rv;
-}
-
-/** @brief Pretty print helper function for formatting specific adjacencies.
- @param s - input string to format
- @param args - other args passed to format function such as:
- - vnet_main_t
- - ip_lookup_main_t
- - adj_index
-*/
-u8 * format_ip_adjacency (u8 * s, va_list * args)
-{
- vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
- ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
- u32 adj_index = va_arg (*args, u32);
- ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
- ip_adj_register_t *reg;
-
- if (adj->lookup_next_index < vec_len (lm->registered_adjacencies))
- {
- reg = vec_elt_at_index(lm->registered_adjacencies,
- adj->lookup_next_index);
- if (reg->fn)
- {
- s = format(s, " %U", reg->fn, lm, adj);
- goto format_done;
- }
- }
-
- switch (adj->lookup_next_index)
- {
- case IP_LOOKUP_NEXT_REWRITE:
- s = format (s, "%U",
- format_vnet_rewrite,
- vnm->vlib_main, &adj->rewrite_header,
- sizeof (adj->rewrite_data));
- break;
-
- case IP_LOOKUP_NEXT_ARP:
- if (adj->if_address_index != ~0)
- s = format (s, " %U", format_ip_interface_address, lm,
- adj->if_address_index);
- if (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1])
- s = format (s, " via %U", format_ip46_address,
- &adj->arp.next_hop, IP46_TYPE_ANY);
- break;
- case IP_LOOKUP_NEXT_LOCAL:
- if (adj->if_address_index != ~0)
- s = format (s, " %U", format_ip_interface_address, lm,
- adj->if_address_index);
- break;
-
- case IP_LOOKUP_NEXT_CLASSIFY:
- s = format (s, " table %d", adj->classify.table_index);
- break;
- case IP_LOOKUP_NEXT_INDIRECT:
- s = format (s, " via %U", format_ip46_address,
- &adj->indirect.next_hop, IP46_TYPE_ANY);
- break;
-
- default:
- s = format (s, " unknown %d", adj->lookup_next_index);
- break;
- }
-
- format_done:
- if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0)
- s = format (s, " lookup fib index %d", adj->explicit_fib_index);
- if (adj->share_count > 0)
- s = format (s, " shared %d", adj->share_count + 1);
- if (adj->next_adj_with_signature)
- s = format (s, " next_adj_with_signature %d", adj->next_adj_with_signature);
-
- return s;
-}
-
u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args)
{
vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
- ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
u32 adj_index = va_arg (*args, u32);
u8 * packet_data = va_arg (*args, u8 *);
u32 n_packet_data_bytes = va_arg (*args, u32);
- ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
+ ip_adjacency_t * adj = adj_get(adj_index);
switch (adj->lookup_next_index)
{
@@ -1126,119 +258,90 @@ u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args)
return s;
}
-static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args)
+static uword unformat_dpo (unformat_input_t * input, va_list * args)
{
- ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *);
- ip_lookup_next_t n;
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t *);
+ fib_protocol_t fp = va_arg (*args, int);
+ dpo_proto_t proto;
- if (unformat (input, "drop"))
- n = IP_LOOKUP_NEXT_DROP;
+ proto = fib_proto_to_dpo(fp);
+ if (unformat (input, "drop"))
+ dpo_copy(dpo, drop_dpo_get(proto));
else if (unformat (input, "punt"))
- n = IP_LOOKUP_NEXT_PUNT;
-
+ dpo_copy(dpo, punt_dpo_get(proto));
else if (unformat (input, "local"))
- n = IP_LOOKUP_NEXT_LOCAL;
-
- else if (unformat (input, "arp"))
- n = IP_LOOKUP_NEXT_ARP;
-
+ receive_dpo_add_or_lock(proto, ~0, NULL, dpo);
else if (unformat (input, "classify"))
- n = IP_LOOKUP_NEXT_CLASSIFY;
+ {
+ u32 classify_table_index;
+
+ if (!unformat (input, "%d", &classify_table_index))
+ {
+ clib_warning ("classify adj must specify table index");
+ return 0;
+ }
+ dpo_set(dpo, DPO_CLASSIFY, proto,
+ classify_dpo_create(fp, classify_table_index));
+ }
else
return 0;
-
- *result = n;
+
return 1;
}
-static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args)
-{
- vlib_main_t * vm = va_arg (*args, vlib_main_t *);
- ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *);
- u32 node_index = va_arg (*args, u32);
- vnet_main_t * vnm = vnet_get_main();
- u32 sw_if_index, is_ip6;
- ip46_address_t a46;
- ip_lookup_next_t next;
+const ip46_address_t zero_addr = {
+ .as_u64 = {
+ 0, 0
+ },
+};
- is_ip6 = node_index == ip6_rewrite_node.index;
- adj->rewrite_header.node_index = node_index;
- adj->explicit_fib_index = ~0;
+u32
+fib_table_id_find_fib_index (fib_protocol_t proto,
+ u32 table_id)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ uword * p;
- if (unformat (input, "arp %U %U",
- unformat_vnet_sw_interface, vnm, &sw_if_index,
- unformat_ip46_address, &a46, is_ip6?IP46_TYPE_IP6:IP46_TYPE_IP4))
+ switch (proto)
{
- ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main;
- ip_adjacency_t * a_adj;
- u32 adj_index;
-
- if (is_ip6)
- adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6);
- else
- adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4);
-
- a_adj = ip_get_adjacency (lm, adj_index);
-
- if (a_adj->rewrite_header.sw_if_index != sw_if_index)
- return 0;
-
- if (is_ip6)
- ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
- else
- ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
+ case FIB_PROTOCOL_IP4:
+ p = hash_get(im4->fib_index_by_table_id, table_id);
+ break;
+ case FIB_PROTOCOL_IP6:
+ p = hash_get(im6->fib_index_by_table_id, table_id);
+ break;
+ default:
+ p = NULL;
+ break;
}
-
- else if (unformat_user (input, unformat_ip_lookup_next, &next))
+ if (NULL != p)
{
- adj->lookup_next_index = next;
- adj->if_address_index = ~0;
- if (next == IP_LOOKUP_NEXT_LOCAL)
- (void) unformat (input, "%d", &adj->if_address_index);
- else if (next == IP_LOOKUP_NEXT_CLASSIFY)
- {
- if (!unformat (input, "%d", &adj->classify.table_index))
- {
- clib_warning ("classify adj must specify table index");
- return 0;
- }
- }
- else if (next == IP_LOOKUP_NEXT_DROP)
- {
- adj->rewrite_header.node_index = 0;
- }
+ return (p[0]);
}
-
- else if (unformat_user (input,
- unformat_vnet_rewrite,
- vm, &adj->rewrite_header, sizeof (adj->rewrite_data)))
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
-
- else
- return 0;
-
- return 1;
+ return (~0);
}
clib_error_t *
-vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd)
+vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
{
- vnet_main_t * vnm = vnet_get_main();
- clib_error_t * error = 0;
- u32 table_id, is_del;
- u32 weight, * weights = 0;
- u32 * table_ids = 0;
- u32 sw_if_index, * sw_if_indices = 0;
- ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0;
- ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0;
- u32 dst_address_length, * dst_address_lengths = 0;
- ip_adjacency_t parse_adj, * add_adj = 0;
unformat_input_t _line_input, * line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ dpo_id_t dpo = DPO_NULL, *dpos = NULL;
+ fib_prefix_t *prefixs = NULL, pfx;
+ clib_error_t * error = NULL;
+ mpls_label_t out_label;
+ u32 table_id, is_del;
+ vnet_main_t * vnm;
+ u32 fib_index;
f64 count;
- u32 outer_table_id;
+ int i;
+ vnm = vnet_get_main();
is_del = 0;
table_id = 0;
count = 1;
@@ -1247,410 +350,311 @@ vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_com
if (! unformat_user (main_input, unformat_line_input, line_input))
return 0;
- memset(&parse_adj, 0, sizeof (parse_adj));
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
+ memset(&rpath, 0, sizeof(rpath));
+ memset(&pfx, 0, sizeof(pfx));
+
if (unformat (line_input, "table %d", &table_id))
;
else if (unformat (line_input, "del"))
is_del = 1;
else if (unformat (line_input, "add"))
is_del = 0;
+ else if (unformat (line_input, "resolve-via-host"))
+ {
+ if (vec_len(rpaths) == 0)
+ {
+ error = clib_error_return(0 , "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len(rpaths)-1].frp_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ }
+ else if (unformat (line_input, "resolve-via-attached"))
+ {
+ if (vec_len(rpaths) == 0)
+ {
+ error = clib_error_return(0 , "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len(rpaths)-1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ }
+ else if (unformat (line_input, "out-label %U",
+ unformat_mpls_unicast_label, &out_label))
+ {
+ if (vec_len(rpaths) == 0)
+ {
+ error = clib_error_return(0 , "Paths then labels");
+ goto done;
+ }
+ rpaths[vec_len(rpaths)-1].frp_label = out_label;
+ }
else if (unformat (line_input, "count %f", &count))
;
else if (unformat (line_input, "%U/%d",
- unformat_ip4_address, &ip4_addr,
- &dst_address_length))
- {
- vec_add1 (ip4_dst_addresses, ip4_addr);
- vec_add1 (dst_address_lengths, dst_address_length);
- }
-
+ unformat_ip4_address,
+ &pfx.fp_addr.ip4,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ vec_add1(prefixs, pfx);
+ }
else if (unformat (line_input, "%U/%d",
- unformat_ip6_address, &ip6_addr,
- &dst_address_length))
- {
- vec_add1 (ip6_dst_addresses, ip6_addr);
- vec_add1 (dst_address_lengths, dst_address_length);
- }
-
+ unformat_ip6_address,
+ &pfx.fp_addr.ip6,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ vec_add1(prefixs, pfx);
+ }
else if (unformat (line_input, "via %U %U weight %u",
- unformat_ip4_address, &ip4_addr,
- unformat_vnet_sw_interface, vnm, &sw_if_index,
- &weight))
- {
- vec_add1 (ip4_via_next_hops, ip4_addr);
- vec_add1 (sw_if_indices, sw_if_index);
- vec_add1 (weights, weight);
- vec_add1 (table_ids, (u32)~0);
- }
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1(rpaths, rpath);
+ }
else if (unformat (line_input, "via %U %U weight %u",
- unformat_ip6_address, &ip6_addr,
- unformat_vnet_sw_interface, vnm, &sw_if_index,
- &weight))
- {
- vec_add1 (ip6_via_next_hops, ip6_addr);
- vec_add1 (sw_if_indices, sw_if_index);
- vec_add1 (weights, weight);
- vec_add1 (table_ids, (u32)~0);
- }
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1(rpaths, rpath);
+ }
else if (unformat (line_input, "via %U %U",
- unformat_ip4_address, &ip4_addr,
- unformat_vnet_sw_interface, vnm, &sw_if_index))
- {
- vec_add1 (ip4_via_next_hops, ip4_addr);
- vec_add1 (sw_if_indices, sw_if_index);
- vec_add1 (weights, 1);
- vec_add1 (table_ids, (u32)~0);
- }
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_weight = 1;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1(rpaths, rpath);
+ }
else if (unformat (line_input, "via %U %U",
- unformat_ip6_address, &ip6_addr,
- unformat_vnet_sw_interface, vnm, &sw_if_index))
- {
- vec_add1 (ip6_via_next_hops, ip6_addr);
- vec_add1 (sw_if_indices, sw_if_index);
- vec_add1 (weights, 1);
- vec_add1 (table_ids, (u32)~0);
- }
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_weight = 1;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1(rpaths, rpath);
+ }
else if (unformat (line_input, "via %U",
- unformat_ip4_address, &ip4_addr))
- {
- vec_add1 (ip4_via_next_hops, ip4_addr);
- vec_add1 (sw_if_indices, (u32)~0);
- vec_add1 (weights, 1);
- vec_add1 (table_ids, table_id);
- }
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1(rpaths, rpath);
+ }
else if (unformat (line_input, "via %U",
- unformat_ip6_address, &ip6_addr))
- {
- vec_add1 (ip6_via_next_hops, ip6_addr);
- vec_add1 (sw_if_indices, (u32)~0);
- vec_add1 (weights, 1);
- vec_add1 (table_ids, (u32)table_id);
- }
-
- else if (vec_len (ip4_dst_addresses) > 0
- && unformat (line_input, "via %U",
- unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index))
- vec_add1 (add_adj, parse_adj);
-
- else if (vec_len (ip6_dst_addresses) > 0
- && unformat (line_input, "via %U",
- unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index))
- vec_add1 (add_adj, parse_adj);
- else if (unformat (line_input, "lookup in table %d", &outer_table_id))
- {
- uword * p;
-
- if (vec_len (ip4_dst_addresses) > 0)
- p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id);
- else
- p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id);
-
- if (p == 0)
- {
- error = clib_error_return (0, "Nonexistent outer table id %d",
- outer_table_id);
- goto done;
- }
-
- parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
- parse_adj.explicit_fib_index = p[0];
- vec_add1 (add_adj, parse_adj);
- }
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "lookup in table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = pfx.fp_proto;
+ vec_add1(rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_dpo, &dpo, prefixs[0].fp_proto))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ vec_add1 (dpos, dpo);
+ }
else
- {
+ {
error = unformat_parse_error (line_input);
goto done;
- }
+ }
}
unformat_free (line_input);
- if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0)
- {
+ if (vec_len (prefixs) == 0)
+ {
error = clib_error_return (0, "expected ip4/ip6 destination address/length.");
goto done;
}
- if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0)
- {
- error = clib_error_return (0, "mixed ip4/ip6 address/length.");
- goto done;
- }
-
- if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0)
- {
- error = clib_error_return (0, "ip4 destinations with ip6 next hops.");
- goto done;
- }
-
- if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0)
- {
- error = clib_error_return (0, "ip6 destinations with ip4 next hops.");
- goto done;
- }
-
- if (! is_del && vec_len (add_adj) + vec_len (weights) == 0)
+ if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0)
{
- error = clib_error_return (0, "no next hops or adjacencies to add.");
+ error = clib_error_return (0, "expected paths.");
goto done;
}
+ if (~0 == table_id)
{
- int i;
- ip4_main_t * im4 = &ip4_main;
- ip6_main_t * im6 = &ip6_main;
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = fib_table_id_find_fib_index(prefixs[0].fp_proto,
+ table_id);
- for (i = 0; i < vec_len (ip4_dst_addresses); i++)
+ if (~0 == fib_index)
{
- ip4_add_del_route_args_t a;
-
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id;
- a.dst_address = ip4_dst_addresses[i];
- a.dst_address_length = dst_address_lengths[i];
- a.adj_index = ~0;
-
- if (is_del)
- {
- if (vec_len (ip4_via_next_hops) == 0)
- {
- uword * dst_hash, * dst_result;
- u32 dst_address_u32;
- ip4_fib_t * fib;
-
- fib = find_ip4_fib_by_table_index_or_id (im4, table_id,
- 0 /* by table id */);
-
- a.flags |= IP4_ROUTE_FLAG_DEL;
- dst_address_u32 = a.dst_address.as_u32
- & im4->fib_masks[a.dst_address_length];
-
- dst_hash =
- fib->adj_index_by_dst_address[a.dst_address_length];
- dst_result = hash_get (dst_hash, dst_address_u32);
- if (dst_result)
- a.adj_index = dst_result[0];
- else
- {
- clib_warning ("%U/%d not in FIB",
- format_ip4_address, &a.dst_address,
- a.dst_address_length);
- continue;
- }
-
- ip4_add_del_route (im4, &a);
- ip4_maybe_remap_adjacencies (im4, table_id,
- IP4_ROUTE_FLAG_TABLE_ID);
- }
- else
- {
- u32 i, j, n, f, incr;
- ip4_address_t dst = a.dst_address;
- f64 t[2];
- n = count;
- t[0] = vlib_time_now (vm);
- incr = 1<<(32 - a.dst_address_length);
- for (i = 0; i < n; i++)
- {
- f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
- a.dst_address = dst;
- for (j = 0; j < vec_len (ip4_via_next_hops); j++)
- {
- if (table_ids[j] != (u32)~0)
- {
- uword * p = hash_get (im4->fib_index_by_table_id,
- table_ids[j]);
- if (p == 0)
- {
- clib_warning ("no such FIB table %d",
- table_ids[j]);
- continue;
- }
- table_ids[j] = p[0];
- }
-
- ip4_add_del_route_next_hop (im4,
- IP4_ROUTE_FLAG_DEL | f,
- &a.dst_address,
- a.dst_address_length,
- &ip4_via_next_hops[j],
- sw_if_indices[j],
- weights[j], (u32)~0,
- table_ids[j] /* fib index */);
- }
- dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
- }
- t[1] = vlib_time_now (vm);
- if (count > 1)
- vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
- }
- }
- else
- {
- if (vec_len (add_adj) > 0)
- {
- a.flags |= IP4_ROUTE_FLAG_ADD;
- a.add_adj = add_adj;
- a.n_add_adj = vec_len (add_adj);
-
- ip4_add_del_route (im4, &a);
- }
- else if (vec_len (ip4_via_next_hops) > 0)
- {
- u32 i, j, n, f, incr;
- ip4_address_t dst = a.dst_address;
- f64 t[2];
- n = count;
- t[0] = vlib_time_now (vm);
- incr = 1<<(32 - a.dst_address_length);
- for (i = 0; i < n; i++)
- {
- f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
- a.dst_address = dst;
- for (j = 0; j < vec_len (ip4_via_next_hops); j++)
- {
- if (table_ids[j] != (u32)~0)
- {
- uword * p = hash_get (im4->fib_index_by_table_id,
- table_ids[j]);
- if (p == 0)
- {
- clib_warning ("no such FIB table %d",
- table_ids[j]);
- continue;
- }
- table_ids[j] = p[0];
- }
- ip4_add_del_route_next_hop (im4,
- IP4_ROUTE_FLAG_ADD | f,
- &a.dst_address,
- a.dst_address_length,
- &ip4_via_next_hops[j],
- sw_if_indices[j],
- weights[j], (u32)~0,
- table_ids[j] /* fib index */);
- }
- dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
- }
- t[1] = vlib_time_now (vm);
- if (count > 1)
- vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
- }
- }
+ error = clib_error_return (0,
+ "Nonexistent table id %d",
+ table_id);
+ goto done;
}
+ }
- for (i = 0; i < vec_len (ip6_dst_addresses); i++)
+ for (i = 0; i < vec_len (prefixs); i++)
+ {
+ if (is_del && 0 == vec_len (rpaths))
{
- ip6_add_del_route_args_t a;
-
-
- memset (&a, 0, sizeof (a));
- a.flags = IP6_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id;
- a.dst_address = ip6_dst_addresses[i];
- a.dst_address_length = dst_address_lengths[i];
- a.adj_index = ~0;
-
- if (is_del)
+ fib_table_entry_delete(fib_index,
+ &prefixs[i],
+ FIB_SOURCE_CLI);
+ }
+ else if (!is_del && 1 == vec_len (dpos))
+ {
+ fib_table_entry_special_dpo_add(fib_index,
+ &prefixs[i],
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpos[0]);
+ dpo_reset(&dpos[0]);
+ }
+ else if (vec_len (dpos) > 0)
+ {
+ error = clib_error_return(0 , "Load-balancing over multiple special adjacencies is unsupported");
+ goto done;
+ }
+ else if (0 < vec_len (rpaths))
+ {
+ u32 k, j, n, incr;
+ ip46_address_t dst = prefixs[i].fp_addr;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
+ prefixs[i].fp_len);
+
+ for (k = 0; k < n; k++)
{
- if (vec_len (ip6_via_next_hops) == 0)
+ for (j = 0; j < vec_len (rpaths); j++)
{
- BVT(clib_bihash_kv) kv, value;
- ip6_address_t dst_address;
- ip6_fib_t * fib;
-
- fib = find_ip6_fib_by_table_index_or_id (im6, table_id,
- 0 /* by table id */);
-
- a.flags |= IP4_ROUTE_FLAG_DEL;
-
- dst_address = ip6_dst_addresses[i];
-
- ip6_address_mask (&dst_address,
- &im6->fib_masks[dst_address_length]);
-
- kv.key[0] = dst_address.as_u64[0];
- kv.key[1] = dst_address.as_u64[1];
- kv.key[2] = ((u64)(fib - im6->fibs)<<32)
- | a.dst_address_length;
-
- if (BV(clib_bihash_search)(&im6->ip6_lookup_table,
- &kv, &value) == 0)
- a.adj_index = value.value;
- else
- {
- clib_warning ("%U/%d not in FIB",
- format_ip6_address, &a.dst_address,
- a.dst_address_length);
- continue;
- }
-
- a.flags |= IP6_ROUTE_FLAG_DEL;
- ip6_add_del_route (im6, &a);
- ip6_maybe_remap_adjacencies (im6, table_id,
- IP6_ROUTE_FLAG_TABLE_ID);
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ rpaths[i].frp_fib_index =
+ fib_table_id_find_fib_index(prefixs[i].fp_proto,
+ rpaths[i].frp_fib_index);
+
+ fib_prefix_t rpfx = {
+ .fp_len = prefixs[i].fp_len,
+ .fp_proto = prefixs[i].fp_proto,
+ .fp_addr = dst,
+ };
+
+ if (is_del)
+ fib_table_entry_path_remove2(fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI,
+ &rpaths[j]);
+ else
+ fib_table_entry_path_add2(fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ &rpaths[j]);
}
- else
- {
- u32 i;
- for (i = 0; i < vec_len (ip6_via_next_hops); i++)
- {
- ip6_add_del_route_next_hop (im6,
- IP6_ROUTE_FLAG_DEL,
- &a.dst_address,
- a.dst_address_length,
- &ip6_via_next_hops[i],
- sw_if_indices[i],
- weights[i], (u32)~0,
- table_ids[i] /* fib index */);
- }
- }
- }
- else
- {
- if (vec_len (add_adj) > 0)
+
+ if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
{
- a.flags |= IP6_ROUTE_FLAG_ADD;
- a.add_adj = add_adj;
- a.n_add_adj = vec_len (add_adj);
-
- ip6_add_del_route (im6, &a);
+ dst.ip4.as_u32 =
+ clib_host_to_net_u32(incr +
+ clib_net_to_host_u32 (dst.ip4.as_u32));
}
- else if (vec_len (ip6_via_next_hops) > 0)
+ else
{
- u32 i;
- for (i = 0; i < vec_len (ip6_via_next_hops); i++)
- {
- ip6_add_del_route_next_hop (im6,
- IP6_ROUTE_FLAG_ADD,
- &a.dst_address,
- a.dst_address_length,
- &ip6_via_next_hops[i],
- sw_if_indices[i],
- weights[i], (u32)~0,
- table_ids[i]);
- }
+ int bucket = (incr < 64 ? 0 : 1);
+ dst.ip6.as_u64[bucket] =
+ clib_host_to_net_u64(incr +
+ clib_net_to_host_u64 (
+ dst.ip6.as_u64[bucket]));
+
}
}
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ else
+ {
+ error = clib_error_return(0 , "Don't understand what you want...");
+ goto done;
}
}
+
done:
- vec_free (add_adj);
- vec_free (weights);
- vec_free (dst_address_lengths);
- vec_free (ip4_dst_addresses);
- vec_free (ip6_dst_addresses);
- vec_free (ip4_via_next_hops);
- vec_free (ip6_via_next_hops);
+ vec_free (dpos);
+ vec_free (prefixs);
+ vec_free (rpaths);
return error;
}
@@ -1708,14 +712,14 @@ VLIB_CLI_COMMAND (ip_route_command, static) = {
.is_mp_safe = 1,
};
-/*
+/*
* The next two routines address a longstanding script hemorrhoid.
* Probing a v4 or v6 neighbor needs to appear to be synchronous,
* or dependent route-adds will simply fail.
*/
static clib_error_t *
ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
- int retry_count)
+ int retry_count)
{
vnet_main_t * vnm = vnet_get_main();
clib_error_t * e;
@@ -1727,7 +731,7 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
ASSERT (vlib_in_process_context(vm));
if (retry_count > 0)
- vnet_register_ip6_neighbor_resolution_event
+ vnet_register_ip6_neighbor_resolution_event
(vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
1 /* event */, 0 /* data */);
@@ -1735,17 +739,17 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
{
/* The interface may be down, etc. */
e = ip6_probe_neighbor (vm, a, sw_if_index);
-
+
if (e)
- return e;
-
+ return e;
+
vlib_process_wait_for_event_or_clock (vm, 1.0);
event_type = vlib_process_get_events (vm, &event_data);
- switch (event_type)
- {
- case 1: /* resolved... */
- vlib_cli_output (vm, "Resolved %U",
- format_ip6_address, a);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U",
+ format_ip6_address, a);
resolved = 1;
goto done;
@@ -1883,526 +887,3 @@ VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
.short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]",
.is_mp_safe = 1,
};
-
-typedef CLIB_PACKED (struct {
- ip4_address_t address;
-
- u32 address_length : 6;
-
- u32 index : 26;
-}) ip4_route_t;
-
-static int
-ip4_route_cmp (void * a1, void * a2)
-{
- ip4_route_t * r1 = a1;
- ip4_route_t * r2 = a2;
-
- int cmp = ip4_address_compare (&r1->address, &r2->address);
- return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
-}
-
-static clib_error_t *
-ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t * vnm = vnet_get_main();
- ip4_main_t * im4 = &ip4_main;
- ip4_route_t * routes, * r;
- ip4_fib_t * fib;
- ip_lookup_main_t * lm = &im4->lookup_main;
- uword * results, i;
- int verbose, matching, mtrie, include_empty_fibs;
- ip4_address_t matching_address;
- u8 clear = 0;
- int table_id = -1;
-
- routes = 0;
- results = 0;
- verbose = 1;
- include_empty_fibs = 0;
- matching = 0;
- mtrie = 0;
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "brief") || unformat (input, "summary")
- || unformat (input, "sum"))
- verbose = 0;
-
- else if (unformat (input, "mtrie"))
- mtrie = 1;
-
- else if (unformat (input, "include-empty"))
- include_empty_fibs = 1;
-
- else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
- matching = 1;
-
- else if (unformat (input, "clear"))
- clear = 1;
-
- else if (unformat (input, "table %d", &table_id))
- ;
- else
- break;
- }
-
- vec_foreach (fib, im4->fibs)
- {
- int fib_not_empty;
-
- fib_not_empty = 0;
- for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
- {
- uword * hash = fib->adj_index_by_dst_address[i];
- uword n_elts = hash_elts (hash);
- if (n_elts)
- {
- fib_not_empty = 1;
- break;
- }
- }
-
- if (fib_not_empty == 0 && include_empty_fibs == 0)
- continue;
-
- if (table_id >= 0 && table_id != (int)fib->table_id)
- continue;
-
- if (include_empty_fibs)
- vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
- fib->table_id, fib - im4->fibs,
- format_ip_flow_hash_config, fib->flow_hash_config);
-
- /* Show summary? */
- if (! verbose)
- {
- if (include_empty_fibs == 0)
- vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
- fib->table_id, fib - im4->fibs,
- format_ip_flow_hash_config, fib->flow_hash_config);
- vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
- for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
- {
- uword * hash = fib->adj_index_by_dst_address[i];
- uword n_elts = hash_elts (hash);
- if (n_elts > 0)
- vlib_cli_output (vm, "%20d%16d", i, n_elts);
- }
- continue;
- }
-
- if (routes)
- _vec_len (routes) = 0;
- if (results)
- _vec_len (results) = 0;
-
- for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
- {
- uword * hash = fib->adj_index_by_dst_address[i];
- hash_pair_t * p;
- ip4_route_t x;
-
- x.address_length = i;
-
- if (matching)
- {
- x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i];
- p = hash_get_pair (hash, x.address.as_u32);
- if (p)
- {
- if (lm->fib_result_n_words > 1)
- {
- x.index = vec_len (results);
- vec_add (results, p->value, lm->fib_result_n_words);
- }
- else
- x.index = p->value[0];
- vec_add1 (routes, x);
- }
- }
- else
- {
- hash_foreach_pair (p, hash, ({
- x.address.data_u32 = p->key;
- if (lm->fib_result_n_words > 1)
- {
- x.index = vec_len (results);
- vec_add (results, p->value, lm->fib_result_n_words);
- }
- else
- x.index = p->value[0];
-
- vec_add1 (routes, x);
- }));
- }
- }
-
- vec_sort_with_function (routes, ip4_route_cmp);
- if (vec_len(routes)) {
- if (include_empty_fibs == 0)
- vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
- fib->table_id, fib - im4->fibs,
- format_ip_flow_hash_config, fib->flow_hash_config);
- if (mtrie)
- vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
- vlib_cli_output (vm, "%=20s%=16s%=16s%=16s",
- "Destination", "Packets", "Bytes", "Adjacency");
- }
- vec_foreach (r, routes)
- {
- vlib_counter_t c, sum;
- uword i, j, n_left, n_nhs, adj_index, * result = 0;
- ip_adjacency_t * adj;
- ip_multipath_next_hop_t * nhs, tmp_nhs[1];
-
- adj_index = r->index;
- if (lm->fib_result_n_words > 1)
- {
- result = vec_elt_at_index (results, adj_index);
- adj_index = result[0];
- }
-
- adj = ip_get_adjacency (lm, adj_index);
- if (adj->n_adj == 1)
- {
- nhs = &tmp_nhs[0];
- nhs[0].next_hop_adj_index = ~0; /* not used */
- nhs[0].weight = 1;
- n_nhs = 1;
- }
- else
- {
- ip_multipath_adjacency_t * madj;
- madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
- nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
- n_nhs = madj->normalized_next_hops.count;
- }
-
- n_left = nhs[0].weight;
- vlib_counter_zero (&sum);
- for (i = j = 0; i < adj->n_adj; i++)
- {
- n_left -= 1;
- vlib_get_combined_counter (&lm->adjacency_counters,
- adj_index + i, &c);
- if (clear)
- vlib_zero_combined_counter (&lm->adjacency_counters,
- adj_index + i);
- vlib_counter_add (&sum, &c);
- if (n_left == 0)
- {
- u8 * msg = 0;
- uword indent;
-
- if (j == 0)
- msg = format (msg, "%-20U",
- format_ip4_address_and_length,
- r->address.data, r->address_length);
- else
- msg = format (msg, "%U", format_white_space, 20);
-
- msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
-
- indent = vec_len (msg);
- msg = format (msg, "weight %d, index %d",
- nhs[j].weight, adj_index + i);
-
- if (ip_adjacency_is_multipath(lm, adj_index))
- msg = format (msg, ", multipath");
-
- msg = format (msg, "\n%U%U",
- format_white_space, indent,
- format_ip_adjacency,
- vnm, lm, adj_index + i);
-
- vlib_cli_output (vm, "%v", msg);
- vec_free (msg);
-
- if (result && lm->format_fib_result)
- vlib_cli_output (vm, "%20s%U", "",
- lm->format_fib_result, vm, lm, result,
- i + 1 - nhs[j].weight,
- nhs[j].weight);
-
- j++;
- if (j < n_nhs)
- {
- n_left = nhs[j].weight;
- vlib_counter_zero (&sum);
- }
- }
- }
- }
- }
-
- vec_free (routes);
- vec_free (results);
-
- return 0;
-}
-
-/*?
- * Show FIB/route entries
- *
- * @cliexpar
- * @cliexstart{show ip fib}
- * Display the IPv4 FIB.
- * This command will run for a long time when the FIBs comprise millions of entries.
- * vpp# sh ip fib
- * Table 0
- * Destination Packets Bytes Adjacency
- * 6.0.0.0/8 0 0 weight 1, index 3
- * arp fake-eth0 6.0.0.1/8
- * 6.0.0.1/32 0 0 weight 1, index 4
- * local 6.0.0.1/8
- *
- * And so forth. Use 'show ip fib summary' for a summary:
- *
- * vpp# sh ip fib summary
- * Table 0
- * Prefix length Count
- * 8 1
- * 32 4
- * @cliexend
- ?*/
-VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
- .path = "show ip fib",
- .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]",
- .function = ip4_show_fib,
-};
-
-typedef struct {
- ip6_address_t address;
-
- u32 address_length;
-
- u32 index;
-} ip6_route_t;
-
-typedef struct {
- u32 fib_index;
- ip6_route_t ** routep;
-} add_routes_in_fib_arg_t;
-
-static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg)
-{
- add_routes_in_fib_arg_t * ap = arg;
-
- if (kvp->key[2]>>32 == ap->fib_index)
- {
- ip6_address_t *addr;
- ip6_route_t * r;
- addr = (ip6_address_t *) kvp;
- vec_add2 (*ap->routep, r, 1);
- r->address = addr[0];
- r->address_length = kvp->key[2] & 0xFF;
- r->index = kvp->value;
- }
-}
-
-typedef struct {
- u32 fib_index;
- u64 count_by_prefix_length[129];
-} count_routes_in_fib_at_prefix_length_arg_t;
-
-static void count_routes_in_fib_at_prefix_length
-(BVT(clib_bihash_kv) * kvp, void *arg)
-{
- count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
- int mask_width;
-
- if ((kvp->key[2]>>32) != ap->fib_index)
- return;
-
- mask_width = kvp->key[2] & 0xFF;
-
- ap->count_by_prefix_length[mask_width]++;
-}
-
-static int
-ip6_route_cmp (void * a1, void * a2)
-{
- ip6_route_t * r1 = a1;
- ip6_route_t * r2 = a2;
-
- int cmp = ip6_address_compare (&r1->address, &r2->address);
- return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
-}
-
-static clib_error_t *
-ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t * vnm = vnet_get_main();
- ip6_main_t * im6 = &ip6_main;
- ip6_route_t * routes, * r;
- ip6_fib_t * fib;
- ip_lookup_main_t * lm = &im6->lookup_main;
- uword * results;
- int verbose;
- BVT(clib_bihash) * h = &im6->ip6_lookup_table;
- __attribute__((unused)) u8 clear = 0;
- add_routes_in_fib_arg_t _a, *a=&_a;
- count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
-
- routes = 0;
- results = 0;
- verbose = 1;
- if (unformat (input, "brief") || unformat (input, "summary")
- || unformat (input, "sum"))
- verbose = 0;
-
- if (unformat (input, "clear"))
- clear = 1;
-
- vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap",
- im6->lookup_table_nbuckets, im6->lookup_table_size>>20);
- vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/);
- vlib_cli_output (vm, " ");
-
- vec_foreach (fib, im6->fibs)
- {
- vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U",
- fib->table_id, fib - im6->fibs,
- format_ip_flow_hash_config, fib->flow_hash_config);
-
- /* Show summary? */
- if (! verbose)
- {
- int len;
- vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
-
- memset (ca, 0, sizeof(*ca));
- ca->fib_index = fib - im6->fibs;
-
- BV(clib_bihash_foreach_key_value_pair)
- (h, count_routes_in_fib_at_prefix_length, ca);
-
- for (len = 128; len >= 0; len--)
- {
- if (ca->count_by_prefix_length[len])
- vlib_cli_output (vm, "%=20d%=16lld",
- len, ca->count_by_prefix_length[len]);
- }
- continue;
- }
-
- if (routes)
- _vec_len (routes) = 0;
- if (results)
- _vec_len (results) = 0;
-
- a->fib_index = fib - im6->fibs;
- a->routep = &routes;
-
- BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a);
-
- vec_sort_with_function (routes, ip6_route_cmp);
-
- vlib_cli_output (vm, "%=45s%=16s%=16s%=16s",
- "Destination", "Packets", "Bytes", "Adjacency");
- vec_foreach (r, routes)
- {
- vlib_counter_t c, sum;
- uword i, j, n_left, n_nhs, adj_index, * result = 0;
- ip_adjacency_t * adj;
- ip_multipath_next_hop_t * nhs, tmp_nhs[1];
-
- adj_index = r->index;
- if (lm->fib_result_n_words > 1)
- {
- result = vec_elt_at_index (results, adj_index);
- adj_index = result[0];
- }
-
- adj = ip_get_adjacency (lm, adj_index);
- if (adj->n_adj == 1)
- {
- nhs = &tmp_nhs[0];
- nhs[0].next_hop_adj_index = ~0; /* not used */
- nhs[0].weight = 1;
- n_nhs = 1;
- }
- else
- {
- ip_multipath_adjacency_t * madj;
- madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
- nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
- n_nhs = madj->normalized_next_hops.count;
- }
-
- n_left = nhs[0].weight;
- vlib_counter_zero (&sum);
- for (i = j = 0; i < adj->n_adj; i++)
- {
- n_left -= 1;
- vlib_get_combined_counter (&lm->adjacency_counters,
- adj_index + i, &c);
- if (clear)
- vlib_zero_combined_counter (&lm->adjacency_counters,
- adj_index + i);
- vlib_counter_add (&sum, &c);
- if (n_left == 0)
- {
- u8 * msg = 0;
- uword indent;
-
- if (j == 0)
- msg = format (msg, "%-45U",
- format_ip6_address_and_length,
- r->address.as_u8, r->address_length);
- else
- msg = format (msg, "%U", format_white_space, 20);
-
- msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
-
- indent = vec_len (msg);
- msg = format (msg, "weight %d, index %d",
- nhs[j].weight, adj_index + i);
-
- if (ip_adjacency_is_multipath(lm, adj_index + i))
- msg = format (msg, ", multipath");
-
- msg = format (msg, "\n%U%U",
- format_white_space, indent,
- format_ip_adjacency,
- vnm, lm, adj_index + i);
-
- vlib_cli_output (vm, "%v", msg);
- vec_free (msg);
-
- j++;
- if (j < n_nhs)
- {
- n_left = nhs[j].weight;
- vlib_counter_zero (&sum);
- }
- }
- }
-
- if (result && lm->format_fib_result)
- vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0);
- }
- vlib_cli_output (vm, " ");
- }
-
- vec_free (routes);
- vec_free (results);
-
- return 0;
-}
-
-/*?
- * Show FIB6/route entries
- *
- * @cliexpar
- * @cliexstart{show ip fib}
- * Display the IPv6 FIB.
- * This command will run for a long time when the FIBs comprise millions of entries.
- * See 'show ip fib'
- * @cliexend
- ?*/
-VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
- .path = "show ip6 fib",
- .short_help = "show ip6 fib [summary] [clear]",
- .function = ip6_show_fib,
-};
diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h
index dcc9d25fed0..c8dcc141430 100644
--- a/vnet/vnet/ip/lookup.h
+++ b/vnet/vnet/ip/lookup.h
@@ -45,7 +45,6 @@
* - Callbacks on route add.
* - Callbacks on interface address change.
*/
-
#ifndef included_ip_lookup_h
#define included_ip_lookup_h
@@ -53,12 +52,11 @@
#include <vlib/buffer.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
/** @brief Common (IP4/IP6) next index stored in adjacency. */
typedef enum {
- /** Packet does not match any route in table. */
- IP_LOOKUP_NEXT_MISS,
-
/** Adjacency to drop this packet. */
IP_LOOKUP_NEXT_DROP,
/** Adjacency to punt this packet. */
@@ -67,27 +65,26 @@ typedef enum {
/** This packet is for one of our own IP addresses. */
IP_LOOKUP_NEXT_LOCAL,
- /** This packet matches an "interface route" and packets
+ /** This packet matches an "incomplete adjacency" and packets
need to be passed to ARP to find rewrite string for
this destination. */
IP_LOOKUP_NEXT_ARP,
+ /** This packet matches an "interface route" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_GLEAN,
+
/** This packet is to be rewritten and forwarded to the next
processing node. This is typically the output interface but
might be another node for further output processing. */
IP_LOOKUP_NEXT_REWRITE,
- /** This packet needs to be classified */
- IP_LOOKUP_NEXT_CLASSIFY,
-
- /** This packet needs to go to MAP - RFC7596, RFC7597 */
- IP_LOOKUP_NEXT_MAP,
+ /** This packets follow a load-balance */
+ IP_LOOKUP_NEXT_LOAD_BALANCE,
- /** This packet needs to go to MAP with Translation - RFC7599 */
- IP_LOOKUP_NEXT_MAP_T,
-
- /** This packets needs to go to indirect next hop */
- IP_LOOKUP_NEXT_INDIRECT,
+ /** This packets follow a mid-chain adjacency */
+ IP_LOOKUP_NEXT_MIDCHAIN,
/** This packets needs to go to ICMP error */
IP_LOOKUP_NEXT_ICMP_ERROR,
@@ -100,7 +97,7 @@ typedef enum {
} ip4_lookup_next_t;
typedef enum {
- /** Hop-by-hop header handling */
+ /* Hop-by-hop header handling */
IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
@@ -108,30 +105,26 @@ typedef enum {
} ip6_lookup_next_t;
#define IP4_LOOKUP_NEXT_NODES { \
- [IP_LOOKUP_NEXT_MISS] = "ip4-miss", \
[IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
[IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
[IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
[IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
[IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \
- [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", \
- [IP_LOOKUP_NEXT_MAP] = "ip4-map", \
- [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", \
- [IP_LOOKUP_NEXT_INDIRECT] = "ip4-indirect", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
+ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \
[IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
}
#define IP6_LOOKUP_NEXT_NODES { \
- [IP_LOOKUP_NEXT_MISS] = "ip6-miss", \
[IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
[IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
[IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
[IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
[IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
- [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", \
- [IP_LOOKUP_NEXT_MAP] = "ip6-map", \
- [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", \
- [IP_LOOKUP_NEXT_INDIRECT] = "ip6-indirect", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
+ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \
[IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
[IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
[IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
@@ -157,20 +150,20 @@ _(dport, IP_FLOW_HASH_DST_PORT) \
_(proto, IP_FLOW_HASH_PROTO) \
_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
+/**
+ * A flow hash configuration is a mask of the flow hash options
+ */
+typedef u32 flow_hash_config_t;
+
#define IP_ADJACENCY_OPAQUE_SZ 16
/** @brief IP unicast adjacency.
@note cache aligned.
*/
typedef struct {
CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
- /** Handle for this adjacency in adjacency heap. */
+ /* Handle for this adjacency in adjacency heap. */
u32 heap_handle;
- STRUCT_MARK(signature_start);
-
- /** Interface address index for this local/arp adjacency. */
- u32 if_address_index;
-
/** Number of adjecencies in block. Greater than 1 means multipath;
otherwise equal to 1. */
u16 n_adj;
@@ -181,27 +174,63 @@ typedef struct {
u16 lookup_next_index_as_int;
};
+ /** Interface address index for this local/arp adjacency. */
+ u32 if_address_index;
+
/** Force re-lookup in a different FIB. ~0 => normal behavior */
- i16 explicit_fib_index;
u16 mcast_group_index;
/** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
u16 saved_lookup_next_index;
+ /*
+ * link/ether-type
+ */
+ u8 ia_link;
+ u8 ia_nh_proto;
+
union {
- /** IP_LOOKUP_NEXT_ARP only */
- struct {
- ip46_address_t next_hop;
- } arp;
- /** IP_LOOKUP_NEXT_CLASSIFY only */
- struct {
- u16 table_index;
- } classify;
- /** IP_LOOKUP_NEXT_INDIRECT only */
- struct {
- ip46_address_t next_hop;
- } indirect;
- u8 opaque[IP_ADJACENCY_OPAQUE_SZ];
+ union {
+ /**
+ * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
+ *
+ * neighbour adjacency sub-type;
+ */
+ struct {
+ ip46_address_t next_hop;
+ } nbr;
+ /**
+ * IP_LOOKUP_NEXT_MIDCHAIN
+ *
+ * A nbr adj that is also recursive. Think tunnels.
+ * A nbr adj can transition to be of type MDICHAIN
+ * so be sure to leave the two structs with the next_hop
+ * fields aligned.
+ */
+ struct {
+ /**
+ * The recursive next-hop
+ */
+ ip46_address_t next_hop;
+ /**
+ * The node index of the tunnel's post rewrite/TX function.
+ */
+ u32 tx_function_node;
+ /**
+ * The next DPO to use
+ */
+ dpo_id_t next_dpo;
+ } midchain;
+ /**
+ * IP_LOOKUP_NEXT_GLEAN
+ *
+ * Glean the address to ARP for from the packet's destination
+ */
+ struct {
+ ip46_address_t receive_addr;
+ } glean;
+ } sub_type;
+ u16 opaque[IP_ADJACENCY_OPAQUE_SZ];
};
/** @brief Special format function for this adjacency.
@@ -210,63 +239,32 @@ typedef struct {
* the first cache line reads "full" on the free space gas gauge.
*/
u32 special_adjacency_format_function_index; /* 0 is invalid */
- STRUCT_MARK(signature_end);
-
- /** Number of FIB entries sharing this adjacency */
- u32 share_count;
- /** Use this adjacency instead */
- u32 next_adj_with_signature;
CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
- /** Rewrite in second/third cache lines */
+ /* Rewrite in second/third cache lines */
vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+
+ /*
+ * member not accessed in the data plane are relgated to the
+ * remaining cachelines
+ */
+ fib_node_t ia_node;
} ip_adjacency_t;
-static inline uword
-vnet_ip_adjacency_signature (ip_adjacency_t * adj)
-{
- uword signature = 0xfeedfaceULL;
-
- /* Skip heap handle, sum everything up to but not including share_count */
- signature = hash_memory
- (STRUCT_MARK_PTR(adj, signature_start),
- STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
- - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start),
- signature);
-
- /* and the rewrite */
- signature = hash_memory (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE,
- signature);
- return signature;
-}
+_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0),
+ "IP adjacency cachline 0 is not offset");
+_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES),
+ "IP adjacency cachline 1 is more than one cachline size offset");
-static inline int
-vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2)
-{
- if (memcmp (STRUCT_MARK_PTR(a1, signature_start),
- STRUCT_MARK_PTR(a2, signature_start),
- STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
- - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start)))
- return 0;
- if (memcmp (&a1->rewrite_header, &a2->rewrite_header,
- VLIB_BUFFER_PRE_DATA_SIZE))
- return 0;
- return 1;
-}
+/* An all zeros address */
+extern const ip46_address_t zero_addr;
/* Index into adjacency table. */
typedef u32 ip_adjacency_index_t;
typedef struct {
- /* Directly connected next-hop adjacency index. */
- u32 next_hop_adj_index;
-
- /* Path weight for this adjacency. */
- u32 weight;
-} ip_multipath_next_hop_t;
-
-typedef struct {
/* Adjacency index of first index in block. */
u32 adj_index;
@@ -276,11 +274,7 @@ typedef struct {
/* Number of prefixes that point to this adjacency. */
u32 reference_count;
- /* Normalized next hops are used as hash keys: they are sorted by weight
- and weights are chosen so they add up to 1 << log2_n_adj_in_block (with
- zero-weighted next hops being deleted).
- Unnormalized next hops are saved so that control plane has a record of exactly
- what the RIB told it. */
+ /* Normalized next hops are saved for stats/display purposes */
struct {
/* Number of hops in the multipath. */
u32 count;
@@ -290,7 +284,7 @@ typedef struct {
/* Heap handle used to for example free block when we're done with it. */
u32 heap_handle;
- } normalized_next_hops, unnormalized_next_hops;
+ } normalized_next_hops;
} ip_multipath_adjacency_t;
/* IP multicast adjacency. */
@@ -397,20 +391,11 @@ typedef struct ip_adj_register_struct {
} ip_adj_register_t;
typedef struct ip_lookup_main_t {
- /** Adjacency heap. */
+ /* Adjacency heap. */
ip_adjacency_t * adjacency_heap;
- /** Adjacency packet/byte counters indexed by adjacency index. */
- vlib_combined_counter_main_t adjacency_counters;
-
- /** Heap of (next hop, weight) blocks. Sorted by next hop. */
- ip_multipath_next_hop_t * next_hop_heap;
-
- /** Indexed by heap_handle from ip_adjacency_t. */
- ip_multipath_adjacency_t * multipath_adjacencies;
-
- /** Adjacency by signature hash */
- uword * adj_index_by_signature;
+ /** load-balance packet/byte counters indexed by LB index. */
+ vlib_combined_counter_main_t load_balance_counters;
/** any-tx-feature-enabled interface bitmap */
uword * tx_sw_if_has_ip_output_features;
@@ -418,29 +403,6 @@ typedef struct ip_lookup_main_t {
/** count of enabled features, per sw_if_index, to maintain bitmap */
i16 * tx_feature_count_by_sw_if_index;
- /** Temporary vectors for looking up next hops in hash. */
- ip_multipath_next_hop_t * next_hop_hash_lookup_key;
- ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized;
-
- /** Hash table mapping normalized next hops and weights
- to multipath adjacency index. */
- uword * multipath_adjacency_by_next_hops;
-
- u32 * adjacency_remap_table;
- u32 n_adjacency_remaps;
-
- /** If average error per adjacency is less than this threshold adjacency block
- size is accepted. */
- f64 multipath_next_hop_error_tolerance;
-
- /** Adjacency index for routing table misses, local punts, and drops. */
- u32 miss_adj_index, drop_adj_index, local_adj_index;
-
- /** Miss adjacency is always first in adjacency table. */
-#define IP_LOOKUP_MISS_ADJ_INDEX 0
-
- ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks;
-
/** Pool of addresses that are assigned to interfaces. */
ip_interface_address_t * if_address_pool;
@@ -501,54 +463,6 @@ do { \
CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
} while (0)
-/* Adds a next node to ip4 or ip6 lookup node which can be then used in adjacencies.
- * @param vlib_main pointer
- * @param lm ip4_main.lookup_main or ip6_main.lookup_main
- * @param reg registration structure
- * @param next_node_index Returned index to be used in adjacencies.
- * @return 0 on success. -1 on failure.
- */
-int ip_register_adjacency(vlib_main_t *vm, u8 is_ip4,
- ip_adj_register_t *reg);
-
-/*
- * Construction helpers to add IP adjacency at init.
- */
-#define VNET_IP_REGISTER_ADJACENCY(ip,x,...) \
- __VA_ARGS__ ip_adj_register_t ip##adj_##x; \
-static void __vnet_##ip##_register_adjacency_##x (void) \
- __attribute__((__constructor__)) ; \
-static void __vnet_##ip##_register_adjacency_##x (void) \
-{ \
- ip_lookup_main_t *lm = &ip##_main.lookup_main; \
- ip##adj_##x.next = lm->registered_adjacencies; \
- lm->registered_adjacencies = &ip##adj_##x; \
-} \
-__VA_ARGS__ ip_adj_register_t ip##adj_##x
-
-#define VNET_IP4_REGISTER_ADJACENCY(x,...) \
- VNET_IP_REGISTER_ADJACENCY(ip4, x, __VA_ARGS__)
-
-#define VNET_IP6_REGISTER_ADJACENCY(x,...) \
- VNET_IP_REGISTER_ADJACENCY(ip6, x, __VA_ARGS__)
-
-static inline void
-ip_register_add_del_adjacency_callback(ip_lookup_main_t * lm,
- ip_add_del_adjacency_callback_t cb)
-{
- vec_add1(lm->add_del_adjacency_callbacks, cb);
-}
-
-always_inline void
-ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del)
-{
- ip_adjacency_t * adj;
- uword i;
- adj = ip_get_adjacency (lm, adj_index);
- for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++)
- lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del);
-}
-
/* Create new block of given number of contiguous adjacencies. */
ip_adjacency_t *
ip_add_adjacency (ip_lookup_main_t * lm,
@@ -556,38 +470,6 @@ ip_add_adjacency (ip_lookup_main_t * lm,
u32 n_adj,
u32 * adj_index_result);
-void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index);
-void
-ip_update_adjacency (ip_lookup_main_t * lm,
- u32 adj_index,
- ip_adjacency_t * copy_adj);
-
-static inline int
-ip_adjacency_is_multipath(ip_lookup_main_t * lm, u32 adj_index)
-{
- if (!vec_len(lm->multipath_adjacencies))
- return 0;
-
- if (vec_len(lm->multipath_adjacencies) < adj_index - 1)
- return 0;
-
-
- return (lm->multipath_adjacencies[adj_index].adj_index == adj_index &&
- lm->multipath_adjacencies[adj_index].n_adj_in_block > 0);
-}
-
-void
-ip_multipath_adjacency_free (ip_lookup_main_t * lm,
- ip_multipath_adjacency_t * a);
-
-u32
-ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
- u32 is_del,
- u32 old_mp_adj_index,
- u32 next_hop_adj_index,
- u32 next_hop_weight,
- u32 * new_mp_adj_index);
-
clib_error_t *
ip_interface_address_add_del (ip_lookup_main_t * lm,
u32 sw_if_index,
@@ -596,6 +478,9 @@ ip_interface_address_add_del (ip_lookup_main_t * lm,
u32 is_del,
u32 * result_index);
+u8 *
+format_ip_flow_hash_config (u8 * s, va_list * args);
+
always_inline ip_interface_address_t *
ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
{
@@ -603,28 +488,14 @@ ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
}
+u32
+fib_table_id_find_fib_index (fib_protocol_t proto,
+ u32 table_id);
+
always_inline void *
ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
{ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
-always_inline ip_interface_address_t *
-ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index)
-{
- ip_adjacency_t * adj;
- u32 if_address_index;
-
- adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]);
-
- ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
- || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL);
- if_address_index = adj->if_address_index;
- if_address_index = (if_address_index == ~0 ?
- vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index)
- : if_address_index);
-
- return (if_address_index != ~0)?pool_elt_at_index (lm->if_address_pool, if_address_index):NULL;
-}
-
#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
do { \
vnet_main_t *_vnm = vnet_get_main(); \
@@ -653,7 +524,5 @@ do { \
} while (0)
void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
-u32 vnet_register_special_adjacency_format_function
-(ip_lookup_main_t * lm, format_function_t * fp);
#endif /* included_ip_lookup_h */
diff --git a/vnet/vnet/ip/ping.c b/vnet/vnet/ip/ping.c
index b5842a69c50..3bc4da882d5 100644
--- a/vnet/vnet/ip/ping.c
+++ b/vnet/vnet/ip/ping.c
@@ -14,6 +14,9 @@
*/
#include <vnet/ip/ping.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
u8 *
format_icmp4_input_trace (u8 * s, va_list * va)
@@ -278,7 +281,14 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */
fib_index0 = 0;
- adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, pa6);
+ adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128));
+
+ if (ADJ_INDEX_INVALID == adj_index0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
sw_if_index0 =
adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0,
sw_if_index, verbose);
@@ -362,7 +372,15 @@ send_ip4_ping (vlib_main_t * vm,
vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */
fib_index0 = 0;
- adj_index0 = ip4_fib_lookup_with_table (im, fib_index0, pa4, 0);
+ adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup(
+ ip4_fib_get(fib_index0), pa4, 32));
+
+ if (ADJ_INDEX_INVALID == adj_index0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
sw_if_index0 =
adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0,
sw_if_index, verbose);
diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h
index 1cf525c6093..1845fa74a46 100644
--- a/vnet/vnet/ip/udp.h
+++ b/vnet/vnet/ip/udp.h
@@ -115,14 +115,13 @@ void udp_register_dst_port (vlib_main_t * vm,
u32 node_index, u8 is_ip4);
always_inline void
-ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
+ip_udp_fixup_one (vlib_main_t * vm,
+ vlib_buffer_t * b0,
u8 is_ip4)
{
u16 new_l0;
udp_header_t * udp0;
- vlib_buffer_advance (b0, - ec_len);
-
if (is_ip4)
{
ip4_header_t * ip0;
@@ -131,9 +130,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
ip0 = vlib_buffer_get_current(b0);
- /* Apply the encap string. */
- clib_memcpy(ip0, ec0, ec_len);
-
/* fix the <bleep>ing outer-IP checksum */
sum0 = ip0->checksum;
/* old_l0 always 0, see the rewrite setup */
@@ -157,9 +153,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
ip0 = vlib_buffer_get_current(b0);
- /* Apply the encap string. */
- clib_memcpy(ip0, ec0, ec_len);
-
new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
- sizeof (*ip0));
ip0->payload_length = new_l0;
@@ -175,6 +168,33 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
udp0->checksum = 0xffff;
}
}
+always_inline void
+ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
+ u8 is_ip4)
+{
+ vlib_buffer_advance (b0, - ec_len);
+
+ if (is_ip4)
+ {
+ ip4_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current(b0);
+
+ /* Apply the encap string. */
+ clib_memcpy(ip0, ec0, ec_len);
+ ip_udp_fixup_one(vm, b0, 1);
+ }
+ else
+ {
+ ip6_header_t * ip0;
+
+ ip0 = vlib_buffer_get_current(b0);
+
+ /* Apply the encap string. */
+ clib_memcpy(ip0, ec0, ec_len);
+ ip_udp_fixup_one(vm, b0, 0);
+ }
+}
always_inline void
ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
diff --git a/vnet/vnet/ipsec-gre/ipsec_gre.c b/vnet/vnet/ipsec-gre/ipsec_gre.c
index 3d1b54fc7f9..cf0f391fede 100644
--- a/vnet/vnet/ipsec-gre/ipsec_gre.c
+++ b/vnet/vnet/ipsec-gre/ipsec_gre.c
@@ -25,18 +25,6 @@
ipsec_gre_main_t ipsec_gre_main;
/**
- * @brief IPv4 and GRE header.
- *
-*/
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct
-{
- ip4_header_t ip4;
- gre_header_t gre;
-}) ip4_and_gre_header_t;
-/* *INDENT-OFF* */
-
-/**
* @brief IPv4 and GRE header union.
*
*/
diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c
index 16d7bfa0e1f..5de30d5bb64 100644
--- a/vnet/vnet/lisp-cp/control.c
+++ b/vnet/vnet/lisp-cp/control.c
@@ -18,6 +18,8 @@
#include <vnet/lisp-cp/packets.h>
#include <vnet/lisp-cp/lisp_msg_serdes.h>
#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
typedef struct
{
@@ -74,37 +76,36 @@ ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index,
return 1;
}
-static u32
-ip_fib_lookup_with_table (lisp_cp_main_t * lcm, u32 fib_index,
- ip_address_t * dst)
+/**
+ * convert from a LISP address to a FIB prefix
+ */
+void
+ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
{
- if (ip_addr_version (dst) == IP4)
- return ip4_fib_lookup_with_table (lcm->im4, fib_index, &ip_addr_v4 (dst),
- 0);
+ if (addr->version == IP4)
+ {
+ prefix->fp_len = 32;
+ prefix->fp_proto = FIB_PROTOCOL_IP4;
+ memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad));
+ memcpy (&prefix->fp_addr.ip4, &addr->ip, sizeof (prefix->fp_addr.ip4));
+ }
else
- return ip6_fib_lookup_with_table (lcm->im6, fib_index, &ip_addr_v6 (dst));
+ {
+ prefix->fp_len = 128;
+ prefix->fp_proto = FIB_PROTOCOL_IP6;
+ memcpy (&prefix->fp_addr.ip6, &addr->ip, sizeof (prefix->fp_addr.ip6));
+ }
}
-u32
-ip_fib_get_egress_iface_for_dst_with_lm (lisp_cp_main_t * lcm,
- ip_address_t * dst,
- ip_lookup_main_t * lm)
+/**
+ * convert from a LISP to a FIB prefix
+ */
+void
+ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix,
+ fib_prefix_t * fib_prefix)
{
- u32 adj_index;
- ip_adjacency_t *adj;
-
- adj_index = ip_fib_lookup_with_table (lcm, 0, dst);
- adj = ip_get_adjacency (lm, adj_index);
-
- if (adj == 0)
- return ~0;
-
- /* we only want outgoing routes */
- if (adj->lookup_next_index != IP_LOOKUP_NEXT_ARP
- && adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)
- return ~0;
-
- return adj->rewrite_header.sw_if_index;
+ ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix);
+ fib_prefix->fp_len = ip_prefix->len;
}
/**
@@ -114,12 +115,14 @@ ip_fib_get_egress_iface_for_dst_with_lm (lisp_cp_main_t * lcm,
u32
ip_fib_get_egress_iface_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst)
{
- ip_lookup_main_t *lm;
+ fib_node_index_t fei;
+ fib_prefix_t prefix;
+
+ ip_address_to_fib_prefix (dst, &prefix);
- lm = ip_addr_version (dst) == IP4 ?
- &lcm->im4->lookup_main : &lcm->im6->lookup_main;
+ fei = fib_table_lookup (0, &prefix);
- return ip_fib_get_egress_iface_for_dst_with_lm (lcm, dst, lm);
+ return (fib_entry_get_resolving_interface (fei));
}
/**
@@ -140,7 +143,7 @@ ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst,
ipver = ip_addr_version (dst);
lm = (ipver == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main;
- si = ip_fib_get_egress_iface_for_dst_with_lm (lcm, dst, lm);
+ si = ip_fib_get_egress_iface_for_dst (lcm, dst);
if ((u32) ~ 0 == si)
return 0;
@@ -2871,28 +2874,14 @@ lisp_get_vni_from_buffer_ip (lisp_cp_main_t * lcm, vlib_buffer_t * b,
u8 version)
{
uword *vnip;
- u32 vni = ~0, table_id = ~0, fib_index;
+ u32 vni = ~0, table_id = ~0;
- if (version == IP4)
- {
- ip4_fib_t *fib;
- ip4_main_t *im4 = &ip4_main;
- fib_index = vec_elt (im4->fib_index_by_sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX]);
- fib = find_ip4_fib_by_table_index_or_id (im4, fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
- table_id = fib->table_id;
- }
- else
- {
- ip6_fib_t *fib;
- ip6_main_t *im6 = &ip6_main;
- fib_index = vec_elt (im6->fib_index_by_sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX]);
- fib = find_ip6_fib_by_table_index_or_id (im6, fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
- table_id = fib->table_id;
- }
+ table_id =
+ fib_table_get_table_id_for_sw_if_index (vnet_buffer (b)->sw_if_index
+ [VLIB_RX],
+ (version ==
+ IP4 ? FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6));
vnip = hash_get (lcm->vni_by_table_id, table_id);
if (vnip)
@@ -2979,8 +2968,9 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b,
}
static uword
-lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+lisp_cp_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int overlay)
{
u32 *from, *to_next_drop, di, si;
lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
@@ -3010,6 +3000,7 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
b0 = vlib_get_buffer (vm, pi0);
b0->error = node->errors[LISP_CP_LOOKUP_ERROR_DROP];
+ vnet_buffer (b0)->lisp.overlay_afi = overlay;
/* src/dst eid pair */
get_src_and_dst_eids_from_buffer (lcm, b0, &src, &dst);
@@ -3070,10 +3061,45 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
+static uword
+lisp_cp_lookup_ip4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP));
+}
+
+static uword
+lisp_cp_lookup_ip6 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP6));
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
+ .function = lisp_cp_lookup_ip4,
+ .name = "lisp-cp-lookup-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_cp_lookup_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = LISP_CP_LOOKUP_N_ERROR,
+ .error_strings = lisp_cp_lookup_error_strings,
+
+ .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+ .next_nodes = {
+ [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+ [LISP_CP_LOOKUP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [LISP_CP_LOOKUP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+};
+/* *INDENT-ON* */
+
/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (lisp_cp_lookup_node) = {
- .function = lisp_cp_lookup,
- .name = "lisp-cp-lookup",
+VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
+ .function = lisp_cp_lookup_ip6,
+ .name = "lisp-cp-lookup-ip6",
.vector_size = sizeof (u32),
.format_trace = format_lisp_cp_lookup_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
diff --git a/vnet/vnet/lisp-cp/control.h b/vnet/vnet/lisp-cp/control.h
index 76590b2c36b..02efd046170 100644
--- a/vnet/vnet/lisp-cp/control.h
+++ b/vnet/vnet/lisp-cp/control.h
@@ -149,7 +149,8 @@ typedef struct
lisp_cp_main_t lisp_control_main;
extern vlib_node_registration_t lisp_cp_input_node;
-extern vlib_node_registration_t lisp_cp_lookup_node;
+extern vlib_node_registration_t lisp_cp_lookup_ip4_node;
+extern vlib_node_registration_t lisp_cp_lookup_ip6_node;
clib_error_t *lisp_cp_init ();
diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.c b/vnet/vnet/lisp-cp/lisp_cp_dpo.c
new file mode 100644
index 00000000000..0bb8098d6fc
--- /dev/null
+++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-cp/control.h>
+
+index_t
+lisp_cp_dpo_get (fib_protocol_t proto)
+{
+ /*
+ * there are only two instances of this DPO type.
+ * we can use the protocol as the index
+ */
+ return (proto);
+}
+
+static u8*
+format_lisp_cp_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+
+ return (format(s, "lisp-cp-punt-%U",
+ format_fib_protocol, index));
+}
+
+static void
+lisp_cp_dpo_lock (dpo_id_t *dpo)
+{
+}
+
+static void
+lisp_cp_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+const static dpo_vft_t lisp_cp_vft = {
+ .dv_lock = lisp_cp_dpo_lock,
+ .dv_unlock = lisp_cp_dpo_unlock,
+ .dv_format = format_lisp_cp_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a LISP-CP
+ * object.
+ *
+ * this means that these graph nodes are ones from which a LISP-CP is the
+ * parent object in the DPO-graph.
+ */
+const static char* const lisp_cp_ip4_nodes[] =
+{
+ "lisp-cp-lookup-ip4",
+ NULL,
+};
+const static char* const lisp_cp_ip6_nodes[] =
+{
+ "lisp-cp-lookup-ip6",
+ NULL,
+};
+
+const static char* const * const lisp_cp_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lisp_cp_ip4_nodes,
+ [DPO_PROTO_IP6] = lisp_cp_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+clib_error_t *
+lisp_cp_dpo_module_init (vlib_main_t * vm)
+{
+ /*
+ * there are no exit arcs from the LIS-CP VLIB node, so we
+ * pass NULL as said node array.
+ */
+ dpo_register(DPO_LISP_CP, &lisp_cp_vft, lisp_cp_nodes);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(lisp_cp_dpo_module_init);
diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.h b/vnet/vnet/lisp-cp/lisp_cp_dpo.h
new file mode 100644
index 00000000000..ea97711a8de
--- /dev/null
+++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LISP_CP_DPO_H__
+#define __LISP_CP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of punt to the LISP control plane.
+ */
+typedef struct lisp_cp_dpo_t
+{
+ /**
+ * The transport payload type.
+ */
+ fib_protocol_t lcd_proto;
+} lisp_cp_dpo_t;
+
+extern index_t lisp_cp_dpo_get(fib_protocol_t proto);
+
+extern void lisp_cp_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c
index b4fb1d91bfc..a2edb487cca 100644
--- a/vnet/vnet/lisp-cp/lisp_types.c
+++ b/vnet/vnet/lisp-cp/lisp_types.c
@@ -147,6 +147,8 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+
+ memset (a, 0, sizeof (*a));
if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
ip_addr_version (a) = IP4;
else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
@@ -331,8 +333,32 @@ unformat_negative_mapping_action (unformat_input_t * input, va_list * args)
return 1;
}
+u8 *
+format_negative_mapping_action (u8 * s, va_list * args)
+{
+ lisp_action_e action = va_arg (*args, lisp_action_e);
+
+ switch (action)
+ {
+ case LISP_NO_ACTION:
+ s = format (s, "no-action");
+ break;
+ case LISP_FORWARD_NATIVE:
+ s = format (s, "natively-forward");
+ break;
+ case LISP_SEND_MAP_REQUEST:
+ s = format (s, "send-map-request");
+ break;
+ case LISP_DROP:
+ default:
+ s = format (s, "drop");
+ break;
+ }
+ return (s);
+}
+
u16
-ip_address_size (ip_address_t * a)
+ip_address_size (const ip_address_t * a)
{
switch (ip_addr_version (a))
{
@@ -653,7 +679,7 @@ gid_address_free (gid_address_t * a)
}
int
-ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2)
+ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2)
{
int res = 0;
if (ip_addr_version (ip1) != ip_addr_version (ip2))
@@ -670,19 +696,19 @@ ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2)
}
void
-ip_address_copy (ip_address_t * dst, ip_address_t * src)
+ip_address_copy (ip_address_t * dst, const ip_address_t * src)
{
clib_memcpy (dst, src, sizeof (ip_address_t));
}
void
-ip_address_copy_addr (void *dst, ip_address_t * src)
+ip_address_copy_addr (void *dst, const ip_address_t * src)
{
clib_memcpy (dst, src, ip_address_size (src));
}
void
-ip_address_set (ip_address_t * dst, void *src, u8 version)
+ip_address_set (ip_address_t * dst, const void *src, u8 version)
{
clib_memcpy (dst, src, ip_version_to_size (version));
ip_addr_version (dst) = version;
diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h
index cb1b277b530..cd1d1b9a642 100644
--- a/vnet/vnet/lisp-cp/lisp_types.h
+++ b/vnet/vnet/lisp-cp/lisp_types.h
@@ -42,10 +42,10 @@ typedef CLIB_PACKED(struct ip_address
#define ip_addr_v6(_a) (_a)->ip.v6
#define ip_addr_version(_a) (_a)->version
-int ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2);
-void ip_address_copy (ip_address_t * dst, ip_address_t * src);
-void ip_address_copy_addr (void *dst, ip_address_t * src);
-void ip_address_set (ip_address_t * dst, void *src, u8 version);
+int ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2);
+void ip_address_copy (ip_address_t * dst, const ip_address_t * src);
+void ip_address_copy_addr (void *dst, const ip_address_t * src);
+void ip_address_set (ip_address_t * dst, const void *src, u8 version);
/* *INDENT-OFF* */
typedef CLIB_PACKED(struct ip_prefix
@@ -63,6 +63,11 @@ typedef CLIB_PACKED(struct ip_prefix
void ip_prefix_normalize (ip_prefix_t * a);
+extern void ip_address_to_fib_prefix (const ip_address_t * addr,
+ fib_prefix_t * prefix);
+extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp,
+ fib_prefix_t * fibp);
+
typedef enum
{
/* NOTE: ip addresses are left out on purpose. Use max masked ip-prefixes
@@ -107,6 +112,7 @@ typedef fid_address_t dp_address_t;
#define fid_addr_ippref(_a) (_a)->ippref
#define fid_addr_mac(_a) (_a)->mac
#define fid_addr_type(_a) (_a)->type
+u8 *format_fid_address (u8 * s, va_list * args);
typedef struct
{
@@ -293,6 +299,7 @@ typedef struct
uword
unformat_negative_mapping_action (unformat_input_t * input, va_list * args);
+u8 *format_negative_mapping_action (u8 *, va_list * args);
typedef struct locator_pair
{
diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c
index abfdfdb89f2..52db1eb3628 100644
--- a/vnet/vnet/lisp-gpe/interface.c
+++ b/vnet/vnet/lisp-gpe/interface.c
@@ -26,6 +26,10 @@
#include <vnet/ip/udp.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
#define foreach_lisp_gpe_tx_next \
_(DROP, "error-drop") \
@@ -56,147 +60,6 @@ format_lisp_gpe_tx_trace (u8 * s, va_list * args)
return s;
}
-always_inline void
-get_one_tunnel_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
- lisp_gpe_tunnel_t ** t0, u8 is_v4)
-{
- u32 adj_index0, tunnel_index0;
- ip_adjacency_t *adj0;
-
- /* Get adjacency and from it the tunnel_index */
- adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
-
- if (is_v4)
- adj0 = ip_get_adjacency (lgm->lm4, adj_index0);
- else
- adj0 = ip_get_adjacency (lgm->lm6, adj_index0);
-
- tunnel_index0 = adj0->if_address_index;
- t0[0] = pool_elt_at_index (lgm->tunnels, tunnel_index0);
-
- ASSERT (t0[0] != 0);
-}
-
-always_inline void
-encap_one_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
- lisp_gpe_tunnel_t * t0, u32 * next0)
-{
- ASSERT (sizeof (ip4_udp_lisp_gpe_header_t) == 36);
- ASSERT (sizeof (ip6_udp_lisp_gpe_header_t) == 56);
-
- lisp_gpe_sub_tunnel_t *st0;
- u32 *sti0;
-
- sti0 = vec_elt_at_index (t0->sub_tunnels_lbv,
- vnet_buffer (b0)->ip.flow_hash %
- t0->sub_tunnels_lbv_count);
- st0 = vec_elt_at_index (t0->sub_tunnels, sti0[0]);
- if (st0->is_ip4)
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
- next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
- }
- else
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0);
- next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
- }
-
- /* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
-}
-
-always_inline void
-get_two_tunnels_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
- vlib_buffer_t * b1, lisp_gpe_tunnel_t ** t0,
- lisp_gpe_tunnel_t ** t1, u8 is_v4)
-{
- u32 adj_index0, adj_index1, tunnel_index0, tunnel_index1;
- ip_adjacency_t *adj0, *adj1;
-
- /* Get adjacency and from it the tunnel_index */
- adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
-
- if (is_v4)
- {
- adj0 = ip_get_adjacency (lgm->lm4, adj_index0);
- adj1 = ip_get_adjacency (lgm->lm4, adj_index1);
- }
- else
- {
- adj0 = ip_get_adjacency (lgm->lm6, adj_index0);
- adj1 = ip_get_adjacency (lgm->lm6, adj_index1);
- }
-
- tunnel_index0 = adj0->if_address_index;
- tunnel_index1 = adj1->if_address_index;
-
- t0[0] = pool_elt_at_index (lgm->tunnels, tunnel_index0);
- t1[0] = pool_elt_at_index (lgm->tunnels, tunnel_index1);
-
- ASSERT (t0[0] != 0);
- ASSERT (t1[0] != 0);
-}
-
-always_inline void
-encap_two_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
- vlib_buffer_t * b1, lisp_gpe_tunnel_t * t0,
- lisp_gpe_tunnel_t * t1, u32 * next0, u32 * next1)
-{
- ASSERT (sizeof (ip4_udp_lisp_gpe_header_t) == 36);
- ASSERT (sizeof (ip6_udp_lisp_gpe_header_t) == 56);
-
- lisp_gpe_sub_tunnel_t *st0, *st1;
- u32 *sti0, *sti1;
- sti0 = vec_elt_at_index (t0->sub_tunnels_lbv,
- vnet_buffer (b0)->ip.flow_hash %
- t0->sub_tunnels_lbv_count);
- sti1 =
- vec_elt_at_index (t1->sub_tunnels_lbv,
- vnet_buffer (b1)->ip.flow_hash %
- t1->sub_tunnels_lbv_count);
- st0 = vec_elt_at_index (t0->sub_tunnels, sti0[0]);
- st1 = vec_elt_at_index (t1->sub_tunnels, sti1[0]);
-
- if (PREDICT_TRUE (st0->is_ip4 == st1->is_ip4))
- {
- if (st0->is_ip4)
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
- ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1);
- next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
- }
- else
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0);
- ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 0);
- next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
- }
- }
- else
- {
- if (st0->is_ip4)
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
- ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 1);
- next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
- next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
- }
- else
- {
- ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 1);
- ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1);
- next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
- next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
- }
- }
-
- /* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
-}
-
#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40
/**
@@ -233,81 +96,12 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
- u32 next0, next1;
- lisp_gpe_tunnel_t *t0 = 0, *t1 = 0;
- u8 is_v4_eid0, is_v4_eid1;
-
- next0 = next1 = LISP_GPE_TX_NEXT_IP4_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- is_v4_eid0 = is_v4_packet (vlib_buffer_get_current (b0));
- is_v4_eid1 = is_v4_packet (vlib_buffer_get_current (b1));
-
- if (PREDICT_TRUE (is_v4_eid0 == is_v4_eid1))
- {
- get_two_tunnels_inline (lgm, b0, b1, &t0, &t1,
- is_v4_eid0 ? 1 : 0);
- }
- else
- {
- get_one_tunnel_inline (lgm, b0, &t0, is_v4_eid0 ? 1 : 0);
- get_one_tunnel_inline (lgm, b1, &t1, is_v4_eid1 ? 1 : 0);
- }
-
- encap_two_inline (lgm, b0, b1, t0, t1, &next0, &next1);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
- sizeof (*tr));
- tr->tunnel_index = t0 - lgm->tunnels;
- }
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b1,
- sizeof (*tr));
- tr->tunnel_index = t1 - lgm->tunnels;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
- }
-
while (n_left_from > 0 && n_left_to_next > 0)
{
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t *adj0;
+ const dpo_id_t *dpo0;
vlib_buffer_t *b0;
- u32 bi0, next0 = LISP_GPE_TX_NEXT_IP4_LOOKUP;
- lisp_gpe_tunnel_t *t0 = 0;
u8 is_v4_0;
bi0 = from[0];
@@ -319,16 +113,23 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
b0 = vlib_get_buffer (vm, bi0);
+ /* Fixup the checksum and len fields in the LISP tunnel encap
+ * that was applied at the midchain node */
is_v4_0 = is_v4_packet (vlib_buffer_get_current (b0));
- get_one_tunnel_inline (lgm, b0, &t0, is_v4_0 ? 1 : 0);
+ ip_udp_fixup_one (lgm->vlib_main, b0, is_v4_0);
- encap_one_inline (lgm, b0, t0, &next0);
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
sizeof (*tr));
- tr->tunnel_index = t0 - lgm->tunnels;
+ tr->tunnel_index = adj_index0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -348,7 +149,7 @@ format_lisp_gpe_name (u8 * s, va_list * args)
}
/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (lisp_gpe_device_class,static) = {
+VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
.name = "LISP_GPE",
.format_device_name = format_lisp_gpe_name,
.format_tx_trace = format_lisp_gpe_tx_trace,
@@ -394,133 +195,51 @@ VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
};
/* *INDENT-ON* */
-int
-add_del_ip_prefix_route (ip_prefix_t * dst_prefix, u32 table_id,
- ip_adjacency_t * add_adj, u8 is_add, u32 * adj_index)
+static void
+add_del_lisp_gpe_default_route (u32 table_id, fib_protocol_t proto, u8 is_add)
{
- uword *p;
+ fib_prefix_t prefix = {
+ .fp_proto = proto,
+ };
+ u32 fib_index;
- if (ip_prefix_version (dst_prefix) == IP4)
+ if (is_add)
{
- ip4_main_t *im4 = &ip4_main;
- ip4_add_del_route_args_t a;
- ip4_address_t addr = ip_prefix_v4 (dst_prefix);
-
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id;
- a.adj_index = ~0;
- a.dst_address_length = ip_prefix_len (dst_prefix);
- a.dst_address = addr;
- a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL;
- a.add_adj = add_adj;
- a.n_add_adj = is_add ? 1 : 0;
-
- ip4_add_del_route (im4, &a);
-
- if (is_add)
- {
- p = ip4_get_route (im4, table_id, 0, addr.as_u8,
- ip_prefix_len (dst_prefix));
- if (p == 0)
- {
- clib_warning ("Failed to insert route for eid %U!",
- format_ip4_address_and_length, addr.as_u8,
- ip_prefix_len (dst_prefix));
- return -1;
- }
- adj_index[0] = p[0];
- }
+ /*
+ * Add a deafult route that results in a control plane punt DPO
+ */
+ dpo_id_t cp_punt = DPO_NULL;
+
+ dpo_set (&cp_punt, DPO_LISP_CP, fib_proto_to_dpo (proto), proto);
+
+ fib_index =
+ fib_table_find_or_create_and_lock (prefix.fp_proto, table_id);
+ fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &cp_punt);
+ dpo_unlock (&cp_punt);
}
else
{
- ip6_main_t *im6 = &ip6_main;
- ip6_add_del_route_args_t a;
- ip6_address_t addr = ip_prefix_v6 (dst_prefix);
-
- memset (&a, 0, sizeof (a));
- a.flags = IP6_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id;
- a.adj_index = ~0;
- a.dst_address_length = ip_prefix_len (dst_prefix);
- a.dst_address = addr;
- a.flags |= is_add ? IP6_ROUTE_FLAG_ADD : IP6_ROUTE_FLAG_DEL;
- a.add_adj = add_adj;
- a.n_add_adj = is_add ? 1 : 0;
-
- ip6_add_del_route (im6, &a);
-
- if (is_add)
- {
- adj_index[0] = ip6_get_route (im6, table_id, 0, &addr,
- ip_prefix_len (dst_prefix));
- if (adj_index[0] == 0)
- {
- clib_warning ("Failed to insert route for eid %U!",
- format_ip6_address_and_length, addr.as_u8,
- ip_prefix_len (dst_prefix));
- return -1;
- }
- }
+ fib_index = fib_table_find (prefix.fp_proto, table_id);
+ fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP);
+ fib_table_unlock (fib_index, prefix.fp_proto);
}
- return 0;
}
-static void
-add_del_lisp_gpe_default_route (u32 table_id, u8 is_v4, u8 is_add)
+void
+lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id)
{
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
- ip_adjacency_t adj;
- ip_prefix_t prefix;
- u32 adj_index = 0;
-
- /* setup adjacency */
- memset (&adj, 0, sizeof (adj));
-
- adj.n_adj = 1;
- adj.explicit_fib_index = ~0;
- adj.lookup_next_index = is_v4 ? lgm->ip4_lookup_next_lgpe_ip4_lookup :
- lgm->ip6_lookup_next_lgpe_ip6_lookup;
- /* default route has tunnel_index ~0 */
- adj.rewrite_header.sw_if_index = ~0;
-
- /* set prefix to 0/0 */
- memset (&prefix, 0, sizeof (prefix));
- ip_prefix_version (&prefix) = is_v4 ? IP4 : IP6;
-
- /* add/delete route for prefix */
- add_del_ip_prefix_route (&prefix, table_id, &adj, is_add, &adj_index);
-}
+ fib_node_index_t fib_index;
-static void
-lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id, u8 is_ip4)
-{
- if (is_ip4)
- {
- ip4_main_t *im4 = &ip4_main;
- ip4_fib_t *fib;
- fib = find_ip4_fib_by_table_index_or_id (im4, table_id,
- IP4_ROUTE_FLAG_TABLE_ID);
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
+ vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
- /* fib's created if it doesn't exist */
- ASSERT (fib != 0);
-
- vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
- im4->fib_index_by_sw_if_index[sw_if_index] = fib->index;
- }
- else
- {
- ip6_main_t *im6 = &ip6_main;
- ip6_fib_t *fib;
- fib = find_ip6_fib_by_table_index_or_id (im6, table_id,
- IP6_ROUTE_FLAG_TABLE_ID);
-
- /* fib's created if it doesn't exist */
- ASSERT (fib != 0);
-
- vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
- im6->fib_index_by_sw_if_index[sw_if_index] = fib->index;
- }
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id);
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ ip6_sw_interface_enable_disable (sw_if_index, 1);
}
#define foreach_l2_lisp_gpe_tx_next \
@@ -605,71 +324,71 @@ l2_flow_hash (vlib_buffer_t * b0)
return (u32) c;
}
-always_inline void
-l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0,
- u32 * next0)
-{
- lisp_gpe_tunnel_t *t0;
-
- t0 = pool_elt_at_index (lgm->tunnels, ti0);
- ASSERT (0 != t0);
-
- if (PREDICT_TRUE (LISP_NO_ACTION == t0->action))
- {
- /* compute 'flow' hash */
- if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1))
- vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
- encap_one_inline (lgm, b0, t0, next0);
- }
- else
- {
- l2_process_tunnel_action (b0, t0->action, next0);
- }
-}
-
-always_inline void
-l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1,
- u32 ti0, u32 ti1, u32 * next0, u32 * next1)
-{
- lisp_gpe_tunnel_t *t0, *t1;
-
- t0 = pool_elt_at_index (lgm->tunnels, ti0);
- t1 = pool_elt_at_index (lgm->tunnels, ti1);
-
- ASSERT (0 != t0 && 0 != t1);
-
- if (PREDICT_TRUE (LISP_NO_ACTION == t0->action
- && LISP_NO_ACTION == t1->action))
- {
- if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1))
- vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
- if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1))
- vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
- encap_two_inline (lgm, b0, b1, t0, t1, next0, next1);
- }
- else
- {
- if (LISP_NO_ACTION == t0->action)
- {
- if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1))
- vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
- encap_one_inline (lgm, b0, t0, next0);
- l2_process_tunnel_action (b1, t1->action, next1);
- }
- else if (LISP_NO_ACTION == t1->action)
- {
- if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1))
- vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
- encap_one_inline (lgm, b1, t1, next1);
- l2_process_tunnel_action (b0, t0->action, next0);
- }
- else
- {
- l2_process_tunnel_action (b0, t0->action, next0);
- l2_process_tunnel_action (b1, t1->action, next1);
- }
- }
-}
+/* always_inline void */
+/* l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, */
+/* u32 * next0) */
+/* { */
+/* lisp_gpe_tunnel_t *t0; */
+
+/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */
+/* ASSERT (0 != t0); */
+
+/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action)) */
+/* { */
+/* /\* compute 'flow' hash *\/ */
+/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */
+/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */
+/* encap_one_inline (lgm, b0, t0, next0); */
+/* } */
+/* else */
+/* { */
+/* l2_process_tunnel_action (b0, t0->action, next0); */
+/* } */
+/* } */
+
+/* always_inline void */
+/* l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, */
+/* u32 ti0, u32 ti1, u32 * next0, u32 * next1) */
+/* { */
+/* lisp_gpe_tunnel_t *t0, *t1; */
+
+/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */
+/* t1 = pool_elt_at_index (lgm->tunnels, ti1); */
+
+/* ASSERT (0 != t0 && 0 != t1); */
+
+/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action */
+/* && LISP_NO_ACTION == t1->action)) */
+/* { */
+/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */
+/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */
+/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */
+/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */
+/* encap_two_inline (lgm, b0, b1, t0, t1, next0, next1); */
+/* } */
+/* else */
+/* { */
+/* if (LISP_NO_ACTION == t0->action) */
+/* { */
+/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */
+/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */
+/* encap_one_inline (lgm, b0, t0, next0); */
+/* l2_process_tunnel_action (b1, t1->action, next1); */
+/* } */
+/* else if (LISP_NO_ACTION == t1->action) */
+/* { */
+/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */
+/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */
+/* encap_one_inline (lgm, b1, t1, next1); */
+/* l2_process_tunnel_action (b0, t0->action, next0); */
+/* } */
+/* else */
+/* { */
+/* l2_process_tunnel_action (b0, t0->action, next0); */
+/* l2_process_tunnel_action (b1, t1->action, next1); */
+/* } */
+/* } */
+/* } */
/**
* @brief LISP-GPE interface TX (encap) function for L2 overlays.
@@ -710,9 +429,9 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
- u32 next0, next1, ti0, ti1;
+ u32 next0, next1;
lisp_gpe_tunnel_t *t0 = 0, *t1 = 0;
- ethernet_header_t *e0, *e1;
+ // ethernet_header_t *e0, *e1;
next0 = next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
@@ -742,49 +461,49 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- e0 = vlib_buffer_get_current (b0);
- e1 = vlib_buffer_get_current (b1);
+ /* e0 = vlib_buffer_get_current (b0); */
+ /* e1 = vlib_buffer_get_current (b1); */
/* lookup dst + src mac */
- ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index,
- e0->src_address, e0->dst_address);
- ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer (b1)->l2.bd_index,
- e1->src_address, e1->dst_address);
-
- if (PREDICT_TRUE ((u32) ~ 0 != ti0) && (u32) ~ 0 != ti1)
- {
- /* process both tunnels */
- l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1);
- }
- else
- {
- if ((u32) ~ 0 != ti0)
- {
- /* process tunnel for b0 */
- l2_process_one (lgm, b0, ti0, &next0);
-
- /* no tunnel found for b1, send to control plane */
- next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
- vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC;
- }
- else if ((u32) ~ 0 != ti1)
- {
- /* process tunnel for b1 */
- l2_process_one (lgm, b1, ti1, &next1);
-
- /* no tunnel found b0, send to control plane */
- next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC;
- }
- else
- {
- /* no tunnels found */
- next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC;
- next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
- vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC;
- }
- }
+ /* ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, */
+ /* e0->src_address, e0->dst_address); */
+ /* ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer (b1)->l2.bd_index, */
+ /* e1->src_address, e1->dst_address); */
+
+ /* if (PREDICT_TRUE ((u32) ~ 0 != ti0) && (u32) ~ 0 != ti1) */
+ /* { */
+ /* /\* process both tunnels *\/ */
+ /* l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1); */
+ /* } */
+ /* else */
+ /* { */
+ /* if ((u32) ~ 0 != ti0) */
+ /* { */
+ /* /\* process tunnel for b0 *\/ */
+ /* l2_process_one (lgm, b0, ti0, &next0); */
+
+ /* /\* no tunnel found for b1, send to control plane *\/ */
+ /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */
+ /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */
+ /* } */
+ /* else if ((u32) ~ 0 != ti1) */
+ /* { */
+ /* /\* process tunnel for b1 *\/ */
+ /* l2_process_one (lgm, b1, ti1, &next1); */
+
+ /* /\* no tunnel found b0, send to control plane *\/ */
+ /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */
+ /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */
+ /* } */
+ /* else */
+ /* { */
+ /* /\* no tunnels found *\/ */
+ /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */
+ /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */
+ /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */
+ /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */
+ /* } */
+ /* } */
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -824,16 +543,16 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index,
e0->src_address, e0->dst_address);
- if (PREDICT_TRUE ((u32) ~ 0 != ti0))
- {
- l2_process_one (lgm, b0, ti0, &next0);
- }
- else
- {
- /* no tunnel found send to control plane */
- next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP;
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC;
- }
+ /* if (PREDICT_TRUE ((u32) ~ 0 != ti0)) */
+ /* { */
+ /* l2_process_one (lgm, b0, ti0, &next0); */
+ /* } */
+ /* else */
+ /* { */
+ /* /\* no tunnel found send to control plane *\/ */
+ /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */
+ /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */
+ /* } */
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -973,7 +692,6 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm,
vnet_main_t *vnm = lgm->vnet_main;
tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces;
vnet_hw_interface_t *hi;
- u32 lookup_next_index4, lookup_next_index6;
uword *hip, *si;
hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, a->table_id);
@@ -997,30 +715,10 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm,
hi = create_lisp_gpe_iface (lgm, a->vni, a->table_id,
&lisp_gpe_device_class, l3_ifaces);
- /* set ingress arc from lgpe_ipX_lookup */
- lookup_next_index4 = vlib_node_add_next (lgm->vlib_main,
- lgpe_ip4_lookup_node.index,
- hi->output_node_index);
- lookup_next_index6 = vlib_node_add_next (lgm->vlib_main,
- lgpe_ip6_lookup_node.index,
- hi->output_node_index);
- hash_set (lgm->lgpe_ip4_lookup_next_index_by_table_id, a->table_id,
- lookup_next_index4);
- hash_set (lgm->lgpe_ip6_lookup_next_index_by_table_id, a->table_id,
- lookup_next_index6);
-
- /* insert default routes that point to lgpe-ipx-lookup */
- add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 1, 1);
- add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 0, 1);
-
- /* set egress arcs */
-#define _(sym,str) vlib_node_add_named_next_with_slot (vnm->vlib_main, \
- hi->tx_node_index, str, LISP_GPE_TX_NEXT_##sym);
- foreach_lisp_gpe_tx_next
-#undef _
- /* set interface in appropriate v4 and v6 FIBs */
- lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id, 1);
- lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id, 0);
+ /* insert default routes that point to lisp-cp lookup */
+ lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id);
+ add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 1);
+ add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 1);
/* enable interface */
vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
@@ -1037,11 +735,15 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm,
return -1;
}
+ hi = vnet_get_hw_interface (vnm, hip[0]);
+
remove_lisp_gpe_iface (lgm, hip[0], a->table_id, &lgm->l3_ifaces);
/* unset default routes */
- add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 1, 0);
- add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 0, 0);
+ ip4_sw_interface_enable_disable (hi->sw_if_index, 0);
+ ip6_sw_interface_enable_disable (hi->sw_if_index, 0);
+ add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 0);
+ add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 0);
}
return 0;
diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c
index bd9951acefa..8a24ec0322c 100644
--- a/vnet/vnet/lisp-gpe/ip_forward.c
+++ b/vnet/vnet/lisp-gpe/ip_forward.c
@@ -12,1492 +12,257 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @file
- * @brief LISP-GPE overlay IP forwarding logic and lookup data structures.
- *
- * Provides an implementation of a Source/Dest (SD) IP FIB that leverages the
- * existing destination only FIB. Lookups are done in two stages, first the
- * destination FIB looks up a packet's destination address and then if a
- * an SD entry is hit, the destination adjacency will point to the second
- * stage, the source FIB, where the packet's source is looked up. Note that a
- * miss in the source FIB does not result in an overall SD lookup retry with
- * a less specific entry from the destination FIB.
- */
-#include <vnet/lisp-gpe/lisp_gpe.h>
-
-/** Sets adj index for destination address in IP4 FIB. Similar to the function
- * in ip4_forward but this one avoids calling route callbacks */
-static void
-ip4_sd_fib_set_adj_index (lisp_gpe_main_t * lgm, ip4_fib_t * fib, u32 flags,
- u32 dst_address_u32, u32 dst_address_length,
- u32 adj_index)
-{
- ip_lookup_main_t *lm = lgm->lm4;
- uword *hash;
-
- if (vec_bytes (fib->old_hash_values))
- memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
- if (vec_bytes (fib->new_hash_values))
- memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
- fib->new_hash_values[0] = adj_index;
-
- /* Make sure adj index is valid. */
- if (CLIB_DEBUG > 0)
- (void) ip_get_adjacency (lm, adj_index);
-
- hash = fib->adj_index_by_dst_address[dst_address_length];
-
- hash = _hash_set3 (hash, dst_address_u32,
- fib->new_hash_values, fib->old_hash_values);
-
- fib->adj_index_by_dst_address[dst_address_length] = hash;
-}
-
-/** Initialize the adjacency index by destination address vector for IP4 FIB.
- * Copied from ip4_forward since it's static */
-static void
-ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
- ip4_fib_t * fib, u32 address_length)
-{
- hash_t *h;
- uword max_index;
-
- ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
- lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword))
- / sizeof (uword);
-
- fib->adj_index_by_dst_address[address_length] =
- hash_create (32 /* elts */ , lm->fib_result_n_words * sizeof (uword));
-
- hash_set_flags (fib->adj_index_by_dst_address[address_length],
- HASH_FLAG_NO_AUTO_SHRINK);
-
- h = hash_header (fib->adj_index_by_dst_address[address_length]);
- max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
-
- /* Initialize new/old hash value vectors. */
- vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
- vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
-}
-
-/** Add/del src route to IP4 SD FIB. */
-static void
-ip4_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm,
- ip4_add_del_route_args_t * a)
-{
- ip_lookup_main_t *lm = lgm->lm4;
- ip4_fib_t *fib;
- u32 dst_address, dst_address_length, adj_index, old_adj_index;
- uword *hash, is_del;
-
- /* Either create new adjacency or use given one depending on arguments. */
- if (a->n_add_adj > 0)
- ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
- else
- adj_index = a->adj_index;
-
- dst_address = a->dst_address.data_u32;
- dst_address_length = a->dst_address_length;
-
- fib = pool_elt_at_index (lgm->ip4_src_fibs, a->table_index_or_table_id);
-
- if (!fib->adj_index_by_dst_address[dst_address_length])
- ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
-
- hash = fib->adj_index_by_dst_address[dst_address_length];
-
- is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
-
- if (is_del)
- {
- fib->old_hash_values[0] = ~0;
- hash = _hash_unset (hash, dst_address, fib->old_hash_values);
- fib->adj_index_by_dst_address[dst_address_length] = hash;
- }
- else
- ip4_sd_fib_set_adj_index (lgm, fib, a->flags, dst_address,
- dst_address_length, adj_index);
-
- old_adj_index = fib->old_hash_values[0];
-
- /* Avoid spurious reference count increments */
- if (old_adj_index == adj_index
- && adj_index != ~0 && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
- {
- ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
- if (adj->share_count > 0)
- adj->share_count--;
- }
-
- ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
- is_del ? old_adj_index : adj_index, is_del);
-
- /* Delete old adjacency index if present and changed. */
- if (!(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
- && old_adj_index != ~0 && old_adj_index != adj_index)
- ip_del_adjacency (lm, old_adj_index);
-}
-
-/** Get src route from IP4 SD FIB. */
-static void *
-ip4_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index,
- ip4_address_t * src, u32 address_length)
-{
- ip4_fib_t *fib = pool_elt_at_index (lgm->ip4_src_fibs, src_fib_index);
- uword *hash, *p;
- hash = fib->adj_index_by_dst_address[address_length];
- p = hash_get (hash, src->as_u32);
- return (void *) p;
-}
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct ip4_route {
- ip4_address_t address;
- u32 address_length : 6;
- u32 index : 26;
-}) ip4_route_t;
-/* *INDENT-ON* */
-
-/** Remove all routes from src IP4 FIB */
-void
-ip4_sd_fib_clear_src_fib (lisp_gpe_main_t * lgm, ip4_fib_t * fib)
-{
- ip4_route_t *routes = 0, *r;
- u32 i;
-
- vec_reset_length (routes);
-
- for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
- {
- uword *hash = fib->adj_index_by_dst_address[i];
- hash_pair_t *p;
- ip4_route_t x;
-
- x.address_length = i;
- x.index = 0; /* shut up coverity */
-
- /* *INDENT-OFF* */
- hash_foreach_pair (p, hash,
- ({
- x.address.data_u32 = p->key;
- vec_add1 (routes, x);
- }));
- /* *INDENT-ON* */
- }
-
- vec_foreach (r, routes)
- {
- ip4_add_del_route_args_t a;
-
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = fib - lgm->ip4_src_fibs;
- a.dst_address = r->address;
- a.dst_address_length = r->address_length;
- a.adj_index = ~0;
-
- ip4_sd_fib_add_del_src_route (lgm, &a);
- }
-}
-
-/** Test if IP4 FIB is empty */
-static u8
-ip4_fib_is_empty (ip4_fib_t * fib)
-{
- u8 fib_is_empty;
- int i;
-
- fib_is_empty = 1;
- for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 0; i--)
- {
- uword *hash = fib->adj_index_by_dst_address[i];
- uword n_elts = hash_elts (hash);
- if (n_elts)
- {
- fib_is_empty = 0;
- break;
- }
- }
- return fib_is_empty;
-}
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance.h>
/**
- * @brief Add/del route to IP4 SD FIB.
- *
- * Adds/remove routes to both destination and source FIBs. Entries added
- * to destination FIB are associated to adjacencies that point to the source
- * FIB and store the index of the particular source FIB associated to the
- * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs
- * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the
- * global adjacency pool.
+ * @brief Add route to IP4 or IP6 Destination FIB.
*
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP4 prefix.
- * @param[in] src_prefix Source IP4 prefix.
- * @param[in] table_id Table id.
- * @param[in] add_adj Pointer to the adjacency to be added.
- * @param[in] is_add Add/del flag.
+ * Add a route to the destination FIB that results in the lookup
+ * in the SRC FIB. The SRC FIB is created is it does not yet exist.
*
- * @return 0 on success.
+ * @param[in] dst_table_id Destination FIB Table-ID
+ * @param[in] dst_prefix Destination IP prefix.
+ * @param[out] src_fib_index The index/ID of the SRC FIB created.
*/
-static int
-ip4_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id,
- ip_adjacency_t * add_adj, u8 is_add)
-{
- uword *p;
- ip4_add_del_route_args_t a;
- ip_adjacency_t *dst_adjp, dst_adj;
- ip4_address_t dst = ip_prefix_v4 (dst_prefix), src;
- u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0;
- ip4_fib_t *src_fib;
-
- if (src_prefix)
- {
- src = ip_prefix_v4 (src_prefix);
- src_address_length = ip_prefix_len (src_prefix);
- }
- else
- memset (&src, 0, sizeof (src));
-
- /* lookup dst adj */
- p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length);
-
- if (is_add)
- {
- /* insert dst prefix to ip4 fib, if it's not in yet */
- if (p == 0)
- {
- /* allocate and init src ip4 fib */
- pool_get (lgm->ip4_src_fibs, src_fib);
- ip4_mtrie_init (&src_fib->mtrie);
-
- /* configure adjacency */
- memset (&dst_adj, 0, sizeof (dst_adj));
-
- /* reuse rewrite header to store pointer to src fib */
- dst_adj.rewrite_header.sw_if_index = src_fib - lgm->ip4_src_fibs;
-
- /* dst adj should point to lisp gpe lookup */
- dst_adj.lookup_next_index = lgm->ip4_lookup_next_lgpe_ip4_lookup;
-
- /* explicit_fib_index is used in IP6 FIB lookup, don't reuse it */
- dst_adj.explicit_fib_index = ~0;
- dst_adj.n_adj = 1;
-
- /* make sure we have different signatures for adj in different tables
- * but with the same lookup_next_index and for adj in the same table
- * but associated to different destinations */
- dst_adj.if_address_index = table_id;
- dst_adj.indirect.next_hop.ip4 = dst;
-
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id; /* vrf */
- a.adj_index = ~0;
- a.dst_address_length = dst_address_length;
- a.dst_address = dst;
- a.flags |= IP4_ROUTE_FLAG_ADD;
- a.add_adj = &dst_adj;
- a.n_add_adj = 1;
-
- ip4_add_del_route (lgm->im4, &a);
-
- /* lookup dst adj to obtain the adj index */
- p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8,
- dst_address_length);
-
- /* make sure insertion succeeded */
- if (CLIB_DEBUG)
- {
- ASSERT (p != 0);
- dst_adjp = ip_get_adjacency (lgm->lm4, p[0]);
- ASSERT (dst_adjp->rewrite_header.sw_if_index
- == dst_adj.rewrite_header.sw_if_index);
- }
- }
- }
- else
- {
- if (p == 0)
- {
- clib_warning
- ("Trying to delete inexistent dst route for %U. Aborting",
- format_ip4_address_and_length, dst.as_u8, dst_address_length);
- return -1;
- }
- }
-
- dst_adjp = ip_get_adjacency (lgm->lm4, p[0]);
-
- /* add/del src prefix to src fib */
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index;
- a.adj_index = ~0;
- a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL;
- a.add_adj = add_adj;
- a.n_add_adj = is_add ? 1 : 0;
- /* if src prefix is null, add 0/0 */
- a.dst_address_length = src_address_length;
- a.dst_address = src;
- ip4_sd_fib_add_del_src_route (lgm, &a);
-
- /* make sure insertion succeeded */
- if (CLIB_DEBUG && is_add)
- {
- uword *sai;
- ip_adjacency_t *src_adjp;
- sai = ip4_sd_get_src_route (lgm, dst_adjp->rewrite_header.sw_if_index,
- &src, src_address_length);
- ASSERT (sai != 0);
- src_adjp = ip_get_adjacency (lgm->lm4, sai[0]);
- ASSERT (src_adjp->if_address_index == add_adj->if_address_index);
- }
-
- /* if a delete, check if there are elements left in the src fib */
- if (!is_add)
- {
- src_fib = pool_elt_at_index (lgm->ip4_src_fibs,
- dst_adjp->rewrite_header.sw_if_index);
- if (!src_fib)
- return 0;
-
- /* if there's nothing left */
- if (ip4_fib_is_empty (src_fib))
- {
- /* remove the src fib .. */
- pool_put (lgm->ip4_src_fibs, src_fib);
-
- /* .. and remove dst route */
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id; /* vrf */
- a.adj_index = ~0;
- a.dst_address_length = dst_address_length;
- a.dst_address = dst;
- a.flags |= IP4_ROUTE_FLAG_DEL;
-
- ip4_add_del_route (lgm->im4, &a);
- }
- }
-
- return 0;
-}
-
-/**
- * @brief Retrieve IP4 SD FIB entry.
- *
- * Looks up SD IP4 route by first looking up the destination in VPP's main FIB
- * and subsequently the source in the src FIB. The index of the source FIB is
- * stored in the dst adjacency's rewrite_header.sw_if_index. If source is 0
- * do search with 0/0 src.
- *
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP4 prefix.
- * @param[in] src_prefix Source IP4 prefix.
- * @param[in] table_id Table id.
- *
- * @return pointer to the adjacency if route found.
- */
-static void *
-ip4_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id)
-{
- uword *p;
- ip4_address_t dst = ip_prefix_v4 (dst_prefix), src;
- u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0;
- ip_adjacency_t *dst_adj;
-
- if (src_prefix)
- {
- src = ip_prefix_v4 (src_prefix);
- src_address_length = ip_prefix_len (src_prefix);
- }
- else
- memset (&src, 0, sizeof (src));
-
- /* lookup dst adj */
- p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length);
- if (p == 0)
- return p;
-
- dst_adj = ip_get_adjacency (lgm->lm4, p[0]);
- return ip4_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src,
- src_address_length);
-}
-
-/** Get src route from IP6 SD FIB. */
-static u32
-ip6_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index,
- ip6_address_t * src, u32 address_length)
-{
- int rv;
- BVT (clib_bihash_kv) kv, value;
- ip6_src_fib_t *fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index);
-
- ip6_address_t *mask;
-
- ASSERT (address_length <= 128);
-
- mask = &fib->fib_masks[address_length];
-
- kv.key[0] = src->as_u64[0] & mask->as_u64[0];
- kv.key[1] = src->as_u64[1] & mask->as_u64[1];
- kv.key[2] = address_length;
-
- rv = BV (clib_bihash_search_inline_2) (&fib->ip6_lookup_table, &kv, &value);
- if (rv == 0)
- return value.value;
-
- return 0;
-}
-
-static void
-compute_prefix_lengths_in_search_order (ip6_src_fib_t * fib)
-{
- int i;
- vec_reset_length (fib->prefix_lengths_in_search_order);
- /* Note: bitmap reversed so this is in fact a longest prefix match */
-
- /* *INDENT-OFF* */
- clib_bitmap_foreach(i, fib->non_empty_dst_address_length_bitmap, ({
- int dst_address_length = 128 - i;
- vec_add1 (fib->prefix_lengths_in_search_order, dst_address_length);
- }));
- /* *INDENT-ON* */
-}
-
-/** Add/del src route to IP6 SD FIB. Rewrite of ip6_add_del_route() because
- * it uses im6 to find the FIB .*/
-static void
-ip6_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm,
- ip6_add_del_route_args_t * a)
+u32
+ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix)
{
- ip_lookup_main_t *lm = lgm->lm6;
- ip6_src_fib_t *fib;
- ip6_address_t dst_address;
- u32 dst_address_length, adj_index;
- uword is_del;
- u32 old_adj_index = ~0;
- BVT (clib_bihash_kv) kv, value;
-
- vlib_smp_unsafe_warning ();
-
- is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0;
+ fib_node_index_t src_fib_index;
+ fib_prefix_t dst_fib_prefix;
+ fib_node_index_t dst_fei;
- /* Either create new adjacency or use given one depending on arguments. */
- if (a->n_add_adj > 0)
- {
- ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
- }
- else
- adj_index = a->adj_index;
+ ASSERT (NULL != dst_prefix);
- dst_address = a->dst_address;
- dst_address_length = a->dst_address_length;
- fib = pool_elt_at_index (lgm->ip6_src_fibs, a->table_index_or_table_id);
+ ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix);
- ASSERT (dst_address_length < ARRAY_LEN (fib->fib_masks));
- ip6_address_mask (&dst_address, &fib->fib_masks[dst_address_length]);
+ /*
+ * lookup the destination prefix in the VRF table and retrieve the
+ * LISP associated data
+ */
+ dst_fei = fib_table_lookup_exact_match (dst_fib_index, &dst_fib_prefix);
- /* refcount accounting */
- if (is_del)
+ /*
+ * If the FIB entry is not present, or not LISP sourced, add it
+ */
+ if (dst_fei == FIB_NODE_INDEX_INVALID ||
+ NULL == fib_entry_get_source_data (dst_fei, FIB_SOURCE_LISP))
{
- ASSERT (fib->dst_address_length_refcounts[dst_address_length] > 0);
- if (--fib->dst_address_length_refcounts[dst_address_length] == 0)
- {
- fib->non_empty_dst_address_length_bitmap =
- clib_bitmap_set (fib->non_empty_dst_address_length_bitmap,
- 128 - dst_address_length, 0);
- compute_prefix_lengths_in_search_order (fib);
- }
+ dpo_id_t src_lkup_dpo = DPO_NULL;
+
+ /* create a new src FIB. */
+ src_fib_index =
+ fib_table_create_and_lock (dst_fib_prefix.fp_proto,
+ "LISP-src for [%d,%U]",
+ dst_fib_index,
+ format_fib_prefix, &dst_fib_prefix);
+
+ /*
+ * create a data-path object to perform the source address lookup
+ * in the SRC FIB
+ */
+ lookup_dpo_add_or_lock_w_fib_index (src_fib_index,
+ (ip_prefix_version (dst_prefix) ==
+ IP6 ? DPO_PROTO_IP6 :
+ DPO_PROTO_IP4),
+ LOOKUP_INPUT_SRC_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ &src_lkup_dpo);
+
+ /*
+ * add the entry to the destination FIB that uses the lookup DPO
+ */
+ dst_fei = fib_table_entry_special_dpo_add (dst_fib_index,
+ &dst_fib_prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &src_lkup_dpo);
+
+ /*
+ * the DPO is locked by the FIB entry, and we have no further
+ * need for it.
+ */
+ dpo_unlock (&src_lkup_dpo);
+
+ /*
+ * save the SRC FIB index on the entry so we can retrieve it for
+ * subsequent routes.
+ */
+ fib_entry_set_source_data (dst_fei, FIB_SOURCE_LISP, &src_fib_index);
}
else
{
- fib->dst_address_length_refcounts[dst_address_length]++;
-
- fib->non_empty_dst_address_length_bitmap =
- clib_bitmap_set (fib->non_empty_dst_address_length_bitmap,
- 128 - dst_address_length, 1);
- compute_prefix_lengths_in_search_order (fib);
- }
-
- kv.key[0] = dst_address.as_u64[0];
- kv.key[1] = dst_address.as_u64[1];
- kv.key[2] = dst_address_length;
-
- if (BV (clib_bihash_search) (&fib->ip6_lookup_table, &kv, &value) == 0)
- old_adj_index = value.value;
-
- if (is_del)
- BV (clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 0 /* is_add */ );
- else
- {
- /* Make sure adj index is valid. */
- if (CLIB_DEBUG > 0)
- (void) ip_get_adjacency (lm, adj_index);
-
- kv.value = adj_index;
-
- BV (clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 1 /* is_add */ );
+ /*
+ * destination FIB entry already present
+ */
+ src_fib_index = *(u32 *) fib_entry_get_source_data (dst_fei,
+ FIB_SOURCE_LISP);
}
- /* Avoid spurious reference count increments */
- if (old_adj_index == adj_index
- && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
- {
- ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
- if (adj->share_count > 0)
- adj->share_count--;
- }
-
- /* Delete old adjacency index if present and changed. */
- {
- if (!(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
- && old_adj_index != ~0 && old_adj_index != adj_index)
- ip_del_adjacency (lm, old_adj_index);
- }
-}
-
-static void
-ip6_src_fib_init (ip6_src_fib_t * fib)
-{
- uword i;
-
- for (i = 0; i < ARRAY_LEN (fib->fib_masks); i++)
- {
- u32 j, i0, i1;
-
- i0 = i / 32;
- i1 = i % 32;
-
- for (j = 0; j < i0; j++)
- fib->fib_masks[i].as_u32[j] = ~0;
-
- if (i1)
- fib->fib_masks[i].as_u32[i0] =
- clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
- }
-
- if (fib->lookup_table_nbuckets == 0)
- fib->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
-
- fib->lookup_table_nbuckets = 1 << max_log2 (fib->lookup_table_nbuckets);
-
- if (fib->lookup_table_size == 0)
- fib->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
-
- BV (clib_bihash_init) (&fib->ip6_lookup_table, "ip6 lookup table",
- fib->lookup_table_nbuckets, fib->lookup_table_size);
-
+ return (src_fib_index);
}
/**
- * @brief Add/del route to IP6 SD FIB.
- *
- * Adds/remove routes to both destination and source FIBs. Entries added
- * to destination FIB are associated to adjacencies that point to the source
- * FIB and store the index of the particular source FIB associated to the
- * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs
- * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the
- * global adjacency pool.
+ * @brief Del route to IP4 or IP6 SD FIB.
*
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP6 prefix.
- * @param[in] src_prefix Source IP6 prefix.
- * @param[in] table_id Table id.
- * @param[in] add_adj Pointer to the adjacency to be added.
- * @param[in] is_add Add/del flag.
+ * Remove routes from both destination and source FIBs.
*
- * @return 0 on success.
+ * @param[in] src_fib_index The index/ID of the SRC FIB
+ * @param[in] src_prefix Source IP prefix.
+ * @param[in] dst_fib_index The index/ID of the DST FIB
+ * @param[in] dst_prefix Destination IP prefix.
*/
-static int
-ip6_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id,
- ip_adjacency_t * add_adj, u8 is_add)
+void
+ip_src_dst_fib_del_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ u32 dst_fib_index, const ip_prefix_t * dst_prefix)
{
- u32 adj_index;
- ip6_add_del_route_args_t a;
- ip_adjacency_t *dst_adjp, dst_adj;
- ip6_address_t dst = ip_prefix_v6 (dst_prefix), src;
- u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0;
- ip6_src_fib_t *src_fib;
-
- if (src_prefix)
- {
- src = ip_prefix_v6 (src_prefix);
- src_address_length = ip_prefix_len (src_prefix);
- }
- else
- memset (&src, 0, sizeof (src));
-
- /* lookup dst adj and create it if it doesn't exist */
- adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length);
+ fib_prefix_t dst_fib_prefix, src_fib_prefix;
- if (is_add)
- {
- /* insert dst prefix to ip6 fib, if it's not in yet */
- if (adj_index == 0)
- {
- /* allocate and init src ip6 fib */
- pool_get (lgm->ip6_src_fibs, src_fib);
- memset (src_fib, 0, sizeof (src_fib[0]));
- ip6_src_fib_init (src_fib);
-
- memset (&dst_adj, 0, sizeof (dst_adj));
-
- /* reuse rewrite header to store pointer to src fib */
- dst_adj.rewrite_header.sw_if_index = src_fib - lgm->ip6_src_fibs;
-
- /* dst adj should point to lisp gpe ip lookup */
- dst_adj.lookup_next_index = lgm->ip6_lookup_next_lgpe_ip6_lookup;
-
- /* explicit_fib_index is used in IP6 FIB lookup, don't reuse it */
- dst_adj.explicit_fib_index = ~0;
- dst_adj.n_adj = 1;
-
- /* make sure we have different signatures for adj in different tables
- * but with the same lookup_next_index and for adj in the same table
- * but associated to different destinations */
- dst_adj.if_address_index = table_id;
- dst_adj.indirect.next_hop.ip6 = dst;
-
- memset (&a, 0, sizeof (a));
- a.flags = IP6_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id; /* vrf */
- a.adj_index = ~0;
- a.dst_address_length = dst_address_length;
- a.dst_address = dst;
- a.flags |= IP6_ROUTE_FLAG_ADD;
- a.add_adj = &dst_adj;
- a.n_add_adj = 1;
-
- ip6_add_del_route (lgm->im6, &a);
-
- /* lookup dst adj to obtain the adj index */
- adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst,
- dst_address_length);
-
- /* make sure insertion succeeded */
- if (CLIB_DEBUG)
- {
- ASSERT (adj_index != 0);
- dst_adjp = ip_get_adjacency (lgm->lm6, adj_index);
- ASSERT (dst_adjp->rewrite_header.sw_if_index
- == dst_adj.rewrite_header.sw_if_index);
- }
- }
- }
- else
- {
- if (adj_index == 0)
- {
- clib_warning
- ("Trying to delete inexistent dst route for %U. Aborting",
- format_ip_prefix, dst_prefix);
- return -1;
- }
- }
-
- dst_adjp = ip_get_adjacency (lgm->lm6, adj_index);
-
- /* add/del src prefix to src fib */
- memset (&a, 0, sizeof (a));
- a.flags = IP6_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index;
- a.adj_index = ~0;
- a.flags |= is_add ? IP6_ROUTE_FLAG_ADD : IP6_ROUTE_FLAG_DEL;
- a.add_adj = add_adj;
- a.n_add_adj = is_add ? 1 : 0;
- /* if src prefix is null, add ::0 */
- a.dst_address_length = src_address_length;
- a.dst_address = src;
- ip6_sd_fib_add_del_src_route (lgm, &a);
-
- /* make sure insertion succeeded */
- if (CLIB_DEBUG && is_add)
- {
- u32 sai;
- ip_adjacency_t *src_adjp;
- sai = ip6_sd_get_src_route (lgm, dst_adjp->rewrite_header.sw_if_index,
- &src, src_address_length);
- ASSERT (sai != 0);
- src_adjp = ip_get_adjacency (lgm->lm6, sai);
- ASSERT (src_adjp->if_address_index == add_adj->if_address_index);
- }
-
- /* if a delete, check if there are elements left in the src fib */
- if (!is_add)
- {
- src_fib = pool_elt_at_index (lgm->ip6_src_fibs,
- dst_adjp->rewrite_header.sw_if_index);
- if (!src_fib)
- return 0;
-
- /* if there's nothing left */
- if (clib_bitmap_count_set_bits
- (src_fib->non_empty_dst_address_length_bitmap) == 0)
- {
- /* remove src fib .. */
- pool_put (lgm->ip6_src_fibs, src_fib);
+ ASSERT (NULL != dst_prefix);
+ ASSERT (NULL != src_prefix);
- /* .. and remove dst route */
- memset (&a, 0, sizeof (a));
- a.flags = IP6_ROUTE_FLAG_TABLE_ID;
- a.table_index_or_table_id = table_id; /* vrf */
- a.adj_index = ~0;
- a.dst_address_length = dst_address_length;
- a.dst_address = dst;
- a.flags |= IP6_ROUTE_FLAG_DEL;
+ ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix);
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
- ip6_add_del_route (lgm->im6, &a);
- }
- }
-
- return 0;
-}
+ fib_table_entry_delete (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP);
-/**
- * @brief Retrieve IP6 SD FIB entry.
- *
- * Looks up SD IP6 route by first looking up the destination in VPP's main FIB
- * and subsequently the source in the src FIB. The index of the source FIB is
- * stored in the dst adjacency's @ref rewrite_header.sw_if_index. If source is
- * 0 do search with ::/0 src.
- *
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP6 prefix.
- * @param[in] src_prefix Source IP6 prefix.
- * @param[in] table_id Table id.
- *
- * @return adjacency index if route found.
- */
-static u32
-ip6_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id)
-{
- u32 adj_index;
- ip6_address_t dst = ip_prefix_v6 (dst_prefix), src;
- u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0;
- ip_adjacency_t *dst_adj;
-
- if (src_prefix)
+ if (0 == fib_table_get_num_entries (src_fib_index,
+ src_fib_prefix.fp_proto,
+ FIB_SOURCE_LISP))
{
- src = ip_prefix_v6 (src_prefix);
- src_address_length = ip_prefix_len (src_prefix);
+ /*
+ * there's nothing left, unlock the source FIB and the
+ * destination route
+ */
+ fib_table_entry_special_remove (dst_fib_index,
+ &dst_fib_prefix, FIB_SOURCE_LISP);
+ fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto);
}
- else
- memset (&src, 0, sizeof (src));
-
- /* lookup dst adj */
- adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length);
- if (adj_index == 0)
- return adj_index;
-
- dst_adj = ip_get_adjacency (lgm->lm6, adj_index);
- return ip6_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src,
- src_address_length);
-}
-
-/**
- * @brief Add/del route to IP4 or IP6 SD FIB.
- *
- * Adds/remove routes to both destination and source FIBs. Entries added
- * to destination FIB are associated to adjacencies that point to the source
- * FIB and store the index of the particular source FIB associated to the
- * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs
- * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the
- * global adjacency pool.
- *
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP prefix.
- * @param[in] src_prefix Source IP prefix.
- * @param[in] table_id Table id.
- * @param[in] add_adj Pointer to the adjacency to be added.
- * @param[in] is_add Add/del flag.
- *
- * @return 0 on success.
- */
-int
-ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id,
- ip_adjacency_t * add_adj, u8 is_add)
-{
- return (ip_prefix_version (dst_prefix) == IP4 ?
- ip4_sd_fib_add_del_route : ip6_sd_fib_add_del_route) (lgm,
- dst_prefix,
- src_prefix,
- table_id,
- add_adj,
- is_add);
}
/**
- * @brief Retrieve IP4 or IP6 SD FIB entry.
+ * @brief Add route to IP4 or IP6 SRC FIB.
*
- * Looks up SD IP route by first looking up the destination in VPP's main FIB
- * and subsequently the source in the src FIB. The index of the source FIB is
- * stored in the dst adjacency's @ref rewrite_header.sw_if_index. If source is
- * 0 do search with ::/0 src.
+ * Adds a route to in the LISP SRC FIB with the result of the route
+ * being the DPO passed.
*
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] dst_prefix Destination IP prefix.
+ * @param[in] src_fib_index The index/ID of the SRC FIB
* @param[in] src_prefix Source IP prefix.
- * @param[in] table_id Table id.
- *
- * @return adjacency index if route found.
+ * @param[in] src_dpo The DPO the route will link to.
*/
-u32
-ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id)
-{
- if (ip_prefix_version (dst_prefix) == IP4)
- {
- u32 *adj_index = ip4_sd_fib_get_route (lgm, dst_prefix, src_prefix,
- table_id);
- return (adj_index == 0) ? 0 : adj_index[0];
- }
- else
- return ip6_sd_fib_get_route (lgm, dst_prefix, src_prefix, table_id);
-}
-
-always_inline void
-ip4_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0,
- ip4_address_t * addr0, u32 * src_adj_index0)
-{
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip4_fib_mtrie_t *mtrie0;
-
- /* if default route not hit in ip4 lookup */
- if (PREDICT_TRUE (src_fib_index0 != (u32) ~ 0))
- {
- mtrie0 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index0)->mtrie;
-
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
-
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY) ?
- mtrie0->default_leaf : leaf0;
- src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- }
- else
- src_adj_index0[0] = ~0;
-}
-
-always_inline void
-ip4_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0,
- u32 src_fib_index1, ip4_address_t * addr0,
- ip4_address_t * addr1, u32 * src_adj_index0,
- u32 * src_adj_index1)
+void
+ip_src_fib_add_route_w_dpo (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const dpo_id_t * src_dpo)
{
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip4_fib_mtrie_t *mtrie0, *mtrie1;
-
- /* if default route not hit in ip4 lookup */
- if (PREDICT_TRUE
- (src_fib_index0 != (u32) ~ 0 && src_fib_index1 != (u32) ~ 0))
- {
- mtrie0 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index0)->mtrie;
- mtrie1 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index1)->mtrie;
-
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+ fib_prefix_t src_fib_prefix;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
+ /*
+ * add the entry into the source fib.
+ */
+ fib_node_index_t src_fei;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
+ src_fei = fib_table_lookup_exact_match (src_fib_index, &src_fib_prefix);
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY) ?
- mtrie0->default_leaf : leaf0;
- leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY) ?
- mtrie1->default_leaf : leaf1;
- src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
- }
- else
+ if (FIB_NODE_INDEX_INVALID == src_fei ||
+ !fib_entry_is_sourced (src_fei, FIB_SOURCE_LISP))
{
- ip4_src_fib_lookup_one (lgm, src_fib_index0, addr0, src_adj_index0);
- ip4_src_fib_lookup_one (lgm, src_fib_index1, addr1, src_adj_index1);
+ fib_table_entry_special_dpo_add (src_fib_index,
+ &src_fib_prefix,
+ FIB_SOURCE_LISP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, src_dpo);
}
}
-/**
- * @brief IPv4 src lookup node.
- * @node lgpe-ip4-lookup
- *
- * The LISP IPv4 source lookup dispatch node.
- *
- * This is the IPv4 source lookup dispatch node. It first looks up the
- * adjacency hit in the main (destination) FIB and then uses its
- * <code>rewrite_header.sw_if_index</code>to find the source FIB wherein
- * the source IP is subsequently looked up. Data in the resulting adjacency
- * is used to decide the next node (the lisp_gpe interface) and if a flow
- * hash must be computed, when traffic can be load balanced over multiple
- * tunnels.
- *
- *
- * @param[in] vm vlib_main_t corresponding to current thread.
- * @param[in] node vlib_node_runtime_t data for this node.
- * @param[in] frame vlib_frame_t whose contents should be dispatched.
- *
- * @return number of vectors in frame.
- */
-always_inline uword
-lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+static void
+ip_address_to_46 (const ip_address_t * addr,
+ ip46_address_t * a, fib_protocol_t * proto)
{
- u32 n_left_from, next_index, *from, *to_next;
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
+ *proto = (IP4 == ip_addr_version (addr) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ switch (*proto)
{
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip0, *ip1;
- u32 dst_adj_index0, src_adj_index0, src_fib_index0;
- u32 dst_adj_index1, src_adj_index1, src_fib_index1;
- ip_adjacency_t *dst_adj0, *src_adj0, *dst_adj1, *src_adj1;
- u32 next0, next1;
-
- next0 = next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- ip0 = vlib_buffer_get_current (b0);
- ip1 = vlib_buffer_get_current (b1);
-
- /* dst lookup was done by ip4 lookup */
- dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- dst_adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
-
- dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0);
- dst_adj1 = ip_get_adjacency (lgm->lm4, dst_adj_index1);
-
- src_fib_index0 = dst_adj0->rewrite_header.sw_if_index;
- src_fib_index1 = dst_adj1->rewrite_header.sw_if_index;
-
- ip4_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1,
- &ip0->src_address, &ip1->src_address,
- &src_adj_index0, &src_adj_index1);
-
- /* if a source fib exists */
- if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0
- && (u32) ~ 0 != src_adj_index1))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1;
-
- src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0);
- src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1);
-
- next0 = src_adj0->explicit_fib_index;
- next1 = src_adj1->explicit_fib_index;
-
- /* prepare buffer for lisp-gpe output node */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] =
- src_adj1->rewrite_header.sw_if_index;
-
- /* if multipath: saved_lookup_next_index is reused to store
- * nb of sub-tunnels. If greater than 1, multipath is on.
- * Note that flow hash should be 0 after ipx lookup! */
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
-
- if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1))
- vnet_buffer (b1)->ip.flow_hash =
- ip4_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- if ((u32) ~ 0 != src_adj_index0)
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0);
- next0 = src_adj0->explicit_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
-
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
-
- if ((u32) ~ 0 != src_adj_index1)
- {
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1;
- src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1);
- next1 = src_adj1->explicit_fib_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] =
- src_adj1->rewrite_header.sw_if_index;
- if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1))
- vnet_buffer (b1)->ip.flow_hash =
- ip4_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
- }
-
- /* mark the packets for CP lookup if needed */
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0))
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP;
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next1))
- vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP;
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip0;
- u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0;
- u32 next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- ip_adjacency_t *dst_adj0, *src_adj0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (b0);
-
- /* dst lookup was done by ip4 lookup */
- dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0);
- src_fib_index0 = dst_adj0->rewrite_header.sw_if_index;
-
- /* do src lookup */
- ip4_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address,
- &src_adj_index0);
-
- /* if a source fib exists */
- if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0);
- next0 = src_adj0->explicit_fib_index;
-
- /* prepare packet for lisp-gpe output node */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
-
- /* if multipath: saved_lookup_next_index is reused to store
- * nb of sub-tunnels. If greater than 1, multipath is on */
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
-
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0))
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ case FIB_PROTOCOL_IP4:
+ a->ip4 = addr->ip.v4;
+ break;
+ case FIB_PROTOCOL_IP6:
+ a->ip6 = addr->ip.v6;
+ break;
+ default:
+ ASSERT (0);
+ break;
}
- return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (lgpe_ip4_lookup_node) = {
- .function = lgpe_ip4_lookup,
- .name = "lgpe-ip4-lookup",
- .vector_size = sizeof (u32),
-
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_next_nodes = LGPE_IP4_LOOKUP_N_NEXT,
- .next_nodes = {
-#define _(sym,str) [LGPE_IP4_LOOKUP_NEXT_##sym] = str,
- foreach_lgpe_ip4_lookup_next
-#undef _
- },
-};
-/* *INDENT-ON* */
-
-static u32
-ip6_src_fib_lookup (lisp_gpe_main_t * lgm, u32 src_fib_index,
- ip6_address_t * src)
+static fib_route_path_t *
+ip_src_fib_mk_paths (const lisp_fwd_path_t * paths)
{
- int i, len;
- int rv;
- BVT (clib_bihash_kv) kv, value;
- ip6_src_fib_t *fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index);
+ const lisp_gpe_adjacency_t *ladj;
+ fib_route_path_t *rpaths = NULL;
+ u8 best_priority;
+ u32 ii;
- len = vec_len (fib->prefix_lengths_in_search_order);
+ vec_validate (rpaths, vec_len (paths) - 1);
- for (i = 0; i < len; i++)
- {
- int dst_address_length = fib->prefix_lengths_in_search_order[i];
- ip6_address_t *mask;
+ best_priority = paths[0].priority;
- ASSERT (dst_address_length >= 0 && dst_address_length <= 128);
+ vec_foreach_index (ii, paths)
+ {
+ if (paths[0].priority != best_priority)
+ break;
- mask = &fib->fib_masks[dst_address_length];
+ ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj);
- kv.key[0] = src->as_u64[0] & mask->as_u64[0];
- kv.key[1] = src->as_u64[1] & mask->as_u64[1];
- kv.key[2] = dst_address_length;
+ ip_address_to_46 (&ladj->remote_rloc,
+ &rpaths[ii].frp_addr, &rpaths[ii].frp_proto);
- rv =
- BV (clib_bihash_search_inline_2) (&fib->ip6_lookup_table, &kv,
- &value);
- if (rv == 0)
- return value.value;
- }
-
- return 0;
-}
+ rpaths[ii].frp_sw_if_index = ladj->sw_if_index;
+ rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1);
+ rpaths[ii].frp_label = MPLS_LABEL_INVALID;
+ }
-always_inline void
-ip6_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0,
- ip6_address_t * addr0, u32 * src_adj_index0)
-{
- /* if default route not hit in ip6 lookup */
- if (PREDICT_TRUE (src_fib_index0 != (u32) ~ 0))
- src_adj_index0[0] = ip6_src_fib_lookup (lgm, src_fib_index0, addr0);
- else
- src_adj_index0[0] = ~0;
-}
+ ASSERT (0 != vec_len (rpaths));
-always_inline void
-ip6_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0,
- u32 src_fib_index1, ip6_address_t * addr0,
- ip6_address_t * addr1, u32 * src_adj_index0,
- u32 * src_adj_index1)
-{
- /* if default route not hit in ip6 lookup */
- if (PREDICT_TRUE
- (src_fib_index0 != (u32) ~ 0 && src_fib_index1 != (u32) ~ 0))
- {
- src_adj_index0[0] = ip6_src_fib_lookup (lgm, src_fib_index0, addr0);
- src_adj_index1[0] = ip6_src_fib_lookup (lgm, src_fib_index1, addr1);
- }
- else
- {
- ip6_src_fib_lookup_one (lgm, src_fib_index0, addr0, src_adj_index0);
- ip6_src_fib_lookup_one (lgm, src_fib_index1, addr1, src_adj_index1);
- }
+ return (rpaths);
}
/**
- * @brief IPv6 src lookup node.
- * @node lgpe-ip6-lookup
- *
- * The LISP IPv6 source lookup dispatch node.
+ * @brief Add route to IP4 or IP6 SRC FIB.
*
- * This is the IPv6 source lookup dispatch node. It first looks up the
- * adjacency hit in the main (destination) FIB and then uses its
- * <code>rewrite_header.sw_if_index</code>to find the source FIB wherein
- * the source IP is subsequently looked up. Data in the resulting adjacency
- * is used to decide the next node (the lisp_gpe interface) and if a flow
- * hash must be computed, when traffic can be load balanced over multiple
- * tunnels.
+ * Adds a route to in the LISP SRC FIB for the tunnel.
*
- * @param[in] vm vlib_main_t corresponding to current thread.
- * @param[in] node vlib_node_runtime_t data for this node.
- * @param[in] frame vlib_frame_t whose contents should be dispatched.
- *
- * @return number of vectors in frame.
+ * @param[in] src_fib_index The index/ID of the SRC FIB
+ * @param[in] src_prefix Source IP prefix.
+ * @param[in] paths The paths from which to construct the
+ * load balance
*/
-always_inline uword
-lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+void
+ip_src_fib_add_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const lisp_fwd_path_t * paths)
{
- u32 n_left_from, next_index, *from, *to_next;
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
- ip6_header_t *ip0, *ip1;
- u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1,
- src_adj_index1, src_fib_index1;
- ip_adjacency_t *dst_adj0, *src_adj0, *dst_adj1, *src_adj1;
- u32 next0, next1;
-
- next0 = next1 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- ip0 = vlib_buffer_get_current (b0);
- ip1 = vlib_buffer_get_current (b1);
-
- /* dst lookup was done by ip6 lookup */
- dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- dst_adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
-
- dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0);
- dst_adj1 = ip_get_adjacency (lgm->lm6, dst_adj_index1);
-
- src_fib_index0 = dst_adj0->rewrite_header.sw_if_index;
- src_fib_index1 = dst_adj1->rewrite_header.sw_if_index;
-
- ip6_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1,
- &ip0->src_address, &ip1->src_address,
- &src_adj_index0, &src_adj_index1);
-
- /* if a source fib exists */
- if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0
- && (u32) ~ 0 != src_adj_index1))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1;
+ fib_prefix_t src_fib_prefix;
+ fib_route_path_t *rpaths;
- src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0);
- src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1);
+ ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix);
- next0 = src_adj0->explicit_fib_index;
- next1 = src_adj1->explicit_fib_index;
+ rpaths = ip_src_fib_mk_paths (paths);
- /* prepare buffer for lisp-gpe output node */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] =
- src_adj1->rewrite_header.sw_if_index;
-
- /* if multipath: saved_lookup_next_index is reused to store
- * nb of sub-tunnels. If greater than 1, multipath is on.
- * Note that flow hash should be 0 after ipx lookup! */
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
-
- if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1))
- vnet_buffer (b1)->ip.flow_hash =
- ip6_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- if (src_adj_index0 != (u32) ~ 0)
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0);
- next0 = src_adj0->explicit_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
-
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
-
- if (src_adj_index1 != (u32) ~ 0)
- {
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1;
- src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1);
- next1 = src_adj1->explicit_fib_index;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] =
- src_adj1->rewrite_header.sw_if_index;
-
- if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1))
- vnet_buffer (b1)->ip.flow_hash =
- ip6_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
- }
-
- /* mark the packets for CP lookup if needed */
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0))
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP;
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next1))
- vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP;
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip6_header_t *ip0;
- u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0;
- u32 next0 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP;
- ip_adjacency_t *dst_adj0, *src_adj0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (b0);
-
- /* dst lookup was done by ip6 lookup */
- dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
- dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0);
- src_fib_index0 = dst_adj0->rewrite_header.sw_if_index;
-
- /* do src lookup */
- ip6_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address,
- &src_adj_index0);
-
- /* if a source fib exists */
- if (PREDICT_TRUE (src_adj_index0 != (u32) ~ 0))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0;
- src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0);
- next0 = src_adj0->explicit_fib_index;
-
- /* prepare packet for lisp-gpe output node */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] =
- src_adj0->rewrite_header.sw_if_index;
-
- /* if multipath: saved_lookup_next_index is reused to store
- * nb of sub-tunnels. If greater than 1, multipath is on */
- if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1))
- vnet_buffer (b0)->ip.flow_hash =
- ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT);
- }
- else
- {
- next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP;
- }
-
- /* mark the packets for CP lookup if needed */
- if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0))
- vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- return from_frame->n_vectors;
+ fib_table_entry_update (src_fib_index,
+ &src_fib_prefix,
+ FIB_SOURCE_LISP, FIB_ENTRY_FLAG_NONE, rpaths);
+ vec_free (rpaths);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (lgpe_ip6_lookup_node) = {
- .function = lgpe_ip6_lookup,
- .name = "lgpe-ip6-lookup",
- .vector_size = sizeof (u32),
-
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_next_nodes = LGPE_IP6_LOOKUP_N_NEXT,
- .next_nodes = {
-#define _(sym,str) [LGPE_IP6_LOOKUP_NEXT_##sym] = str,
- foreach_lgpe_ip6_lookup_next
-#undef _
- },
-};
-/* *INDENT-ON* */
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c
index 579422b484b..f05c6a2028f 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe.c
+++ b/vnet/vnet/lisp-gpe/lisp_gpe.c
@@ -19,435 +19,230 @@
*/
#include <vnet/lisp-gpe/lisp_gpe.h>
-#include <vppinfra/math.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/load_balance.h>
/** LISP-GPE global state */
lisp_gpe_main_t lisp_gpe_main;
/**
- * @brief Compute IP-UDP-GPE sub-tunnel encap/rewrite header.
- *
- * @param[in] t Parent of the sub-tunnel.
- * @param[in] st Sub-tunnel.
- * @param[in] lp Local and remote locators used in the encap header.
- *
- * @return 0 on success.
+ * @brief A Pool of all LISP forwarding entries
*/
-static int
-lisp_gpe_rewrite (lisp_gpe_tunnel_t * t, lisp_gpe_sub_tunnel_t * st,
- locator_pair_t * lp)
-{
- u8 *rw = 0;
- lisp_gpe_header_t *lisp0;
- int len;
-
- if (ip_addr_version (&lp->lcl_loc) == IP4)
- {
- ip4_header_t *ip0;
- ip4_udp_lisp_gpe_header_t *h0;
- len = sizeof (*h0);
+static lisp_fwd_entry_t *lisp_fwd_entry_pool;
- vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+/**
+ * DB of all forwarding entries. The Key is:{l-EID,r-EID,vni}
+ * where the EID encodes L2 or L3
+ */
+static uword *lisp_gpe_fwd_entries;
- h0 = (ip4_udp_lisp_gpe_header_t *) rw;
+static void
+create_fib_entries (lisp_fwd_entry_t * lfe)
+{
+ dpo_proto_t dproto;
- /* Fixed portion of the (outer) ip4 header */
- ip0 = &h0->ip4;
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
+ dproto = (ip_prefix_version (&lfe->key->rmt.ippref) == IP4 ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip_address_copy_addr (&ip0->src_address, &lp->lcl_loc);
- ip_address_copy_addr (&ip0->dst_address, &lp->rmt_loc);
- ip0->checksum = ip4_header_checksum (ip0);
+ lfe->src_fib_index = ip_dst_fib_add_route (lfe->eid_fib_index,
+ &lfe->key->rmt.ippref);
- /* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (4341);
- h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
+ if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ {
+ dpo_id_t dpo = DPO_NULL;
- /* LISP-gpe header */
- lisp0 = &h0->lisp;
+ switch (lfe->action)
+ {
+ case LISP_NO_ACTION:
+ /* TODO update timers? */
+ case LISP_FORWARD_NATIVE:
+ /* TODO check if route/next-hop for eid exists in fib and add
+ * more specific for the eid with the next-hop found */
+ case LISP_SEND_MAP_REQUEST:
+ /* insert tunnel that always sends map-request */
+ dpo_set (&dpo, DPO_LISP_CP, 0, dproto);
+ break;
+ case LISP_DROP:
+ /* for drop fwd entries, just add route, no need to add encap tunnel */
+ dpo_copy (&dpo, drop_dpo_get (dproto));
+ break;
+ }
+ ip_src_fib_add_route_w_dpo (lfe->src_fib_index,
+ &lfe->key->lcl.ippref, &dpo);
+ dpo_reset (&dpo);
}
else
{
- ip6_header_t *ip0;
- ip6_udp_lisp_gpe_header_t *h0;
- len = sizeof (*h0);
-
- vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
-
- h0 = (ip6_udp_lisp_gpe_header_t *) rw;
-
- /* Fixed portion of the (outer) ip6 header */
- ip0 = &h0->ip6;
- ip0->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (0x6 << 28);
- ip0->hop_limit = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- /* we fix up the ip6 header length after-the-fact */
- ip_address_copy_addr (&ip0->src_address, &lp->lcl_loc);
- ip_address_copy_addr (&ip0->dst_address, &lp->rmt_loc);
-
- /* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (4341);
- h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
-
- /* LISP-gpe header */
- lisp0 = &h0->lisp;
+ ip_src_fib_add_route (lfe->src_fib_index,
+ &lfe->key->lcl.ippref, lfe->paths);
}
-
- lisp0->flags = t->flags;
- lisp0->ver_res = t->ver_res;
- lisp0->res = t->res;
- lisp0->next_protocol = t->next_protocol;
- lisp0->iid = clib_host_to_net_u32 (t->vni);
-
- st->is_ip4 = ip_addr_version (&lp->lcl_loc) == IP4;
- st->rewrite = rw;
- return 0;
}
-static int
-weight_cmp (normalized_sub_tunnel_weights_t * a,
- normalized_sub_tunnel_weights_t * b)
+static void
+delete_fib_entries (lisp_fwd_entry_t * lfe)
{
- int cmp = a->weight - b->weight;
- return (cmp == 0
- ? a->sub_tunnel_index - b->sub_tunnel_index : (cmp > 0 ? -1 : 1));
+ ip_src_dst_fib_del_route (lfe->src_fib_index,
+ &lfe->key->lcl.ippref,
+ lfe->eid_fib_index, &lfe->key->rmt.ippref);
}
-/**
- * @brief Computes sub-tunnel load balancing vector.
- *
- * Algorithm is identical to that used for building unequal-cost multipath
- * adjacencies. Saves normalized sub-tunnel weights and builds load-balancing
- * vector consisting of list of sub-tunnel indexes replicated according to
- * weight.
- *
- * @param[in] t Tunnel for which load balancing vector is computed.
- */
static void
-compute_sub_tunnels_balancing_vector (lisp_gpe_tunnel_t * t)
+gid_to_dp_address (gid_address_t * g, dp_address_t * d)
{
- uword n_sts, i, n_nsts, n_nsts_left;
- f64 sum_weight, norm, error, tolerance;
- normalized_sub_tunnel_weights_t *nsts = 0, *stp;
- lisp_gpe_sub_tunnel_t *sts = t->sub_tunnels;
- u32 *st_lbv = 0;
-
- /* Accept 1% error */
- tolerance = .01;
-
- n_sts = vec_len (sts);
- vec_validate (nsts, 2 * n_sts - 1);
-
- sum_weight = 0;
- for (i = 0; i < n_sts; i++)
+ switch (gid_address_type (g))
{
- /* Find total weight to normalize weights. */
- sum_weight += sts[i].weight;
-
- /* build normalized sub tunnels vector */
- nsts[i].weight = sts[i].weight;
- nsts[i].sub_tunnel_index = i;
- }
-
- n_nsts = n_sts;
- if (n_sts == 1)
- {
- nsts[0].weight = 1;
- _vec_len (nsts) = 1;
- goto build_lbv;
+ case GID_ADDR_IP_PREFIX:
+ case GID_ADDR_SRC_DST:
+ ip_prefix_copy (&d->ippref, &gid_address_ippref (g));
+ d->type = FID_ADDR_IP_PREF;
+ break;
+ case GID_ADDR_MAC:
+ default:
+ mac_copy (&d->mac, &gid_address_mac (g));
+ d->type = FID_ADDR_MAC;
+ break;
}
+}
- /* Sort sub-tunnels by weight */
- qsort (nsts, n_nsts, sizeof (u32), (void *) weight_cmp);
+static lisp_fwd_entry_t *
+find_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
+ lisp_gpe_fwd_entry_key_t * key)
+{
+ uword *p;
- /* Save copies of all next hop weights to avoid being overwritten in loop below. */
- for (i = 0; i < n_nsts; i++)
- nsts[n_nsts + i].weight = nsts[i].weight;
+ memset (key, 0, sizeof (*key));
- /* Try larger and larger power of 2 sized blocks until we
- find one where traffic flows to within 1% of specified weights. */
- for (n_nsts = max_pow2 (n_sts);; n_nsts *= 2)
+ if (GID_ADDR_IP_PREFIX == gid_address_type (&a->rmt_eid))
{
- error = 0;
-
- norm = n_nsts / sum_weight;
- n_nsts_left = n_nsts;
- for (i = 0; i < n_sts; i++)
- {
- f64 nf = nsts[n_sts + i].weight * norm;
- word n = flt_round_nearest (nf);
-
- n = n > n_nsts_left ? n_nsts_left : n;
- n_nsts_left -= n;
- error += fabs (nf - n);
- nsts[i].weight = n;
- }
-
- nsts[0].weight += n_nsts_left;
-
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= tolerance * n_nsts)
- {
- /* Truncate any next hops with zero weight. */
- _vec_len (nsts) = i;
- break;
- }
+ /*
+ * the ip version of the source is not set to ip6 when the
+ * source is all zeros. force it.
+ */
+ ip_prefix_version (&gid_address_ippref (&a->lcl_eid)) =
+ ip_prefix_version (&gid_address_ippref (&a->rmt_eid));
}
-build_lbv:
+ gid_to_dp_address (&a->rmt_eid, &key->rmt);
+ gid_to_dp_address (&a->lcl_eid, &key->lcl);
+ key->vni = a->vni;
- /* build load balancing vector */
- vec_foreach (stp, nsts)
- {
- for (i = 0; i < stp[0].weight; i++)
- vec_add1 (st_lbv, stp[0].sub_tunnel_index);
- }
+ p = hash_get_mem (lisp_gpe_fwd_entries, key);
- t->sub_tunnels_lbv = st_lbv;
- t->sub_tunnels_lbv_count = n_nsts;
- t->norm_sub_tunnel_weights = nsts;
+ if (NULL != p)
+ {
+ return (pool_elt_at_index (lisp_fwd_entry_pool, p[0]));
+ }
+ return (NULL);
}
-/** Create sub-tunnels and load-balancing vector for all locator pairs
- * associated to a tunnel.*/
-static void
-create_sub_tunnels (lisp_gpe_main_t * lgm, lisp_gpe_tunnel_t * t)
+static int
+lisp_gpe_fwd_entry_path_sort (void *a1, void *a2)
{
- lisp_gpe_sub_tunnel_t st;
- locator_pair_t *lp = 0;
- int i;
-
- /* create sub-tunnels for all locator pairs */
- for (i = 0; i < vec_len (t->locator_pairs); i++)
- {
- lp = &t->locator_pairs[i];
- st.locator_pair_index = i;
- st.parent_index = t - lgm->tunnels;
- st.weight = lp->weight;
-
- /* compute rewrite for sub-tunnel */
- lisp_gpe_rewrite (t, &st, lp);
- vec_add1 (t->sub_tunnels, st);
- }
+ lisp_fwd_path_t *p1 = a1, *p2 = a2;
- /* normalize weights and compute sub-tunnel load balancing vector */
- compute_sub_tunnels_balancing_vector (t);
+ return (p1->priority - p2->priority);
}
-#define foreach_copy_field \
-_(encap_fib_index) \
-_(decap_fib_index) \
-_(decap_next_index) \
-_(vni) \
-_(action)
-
/**
- * @brief Create/delete IP encapsulated tunnel.
+ * @brief Add/Delete LISP IP forwarding entry.
*
- * Builds GPE tunnel for L2 or L3 packets and populates tunnel pool
- * @ref lisp_gpe_tunnel_by_key in @ref lisp_gpe_main_t.
+ * creation of forwarding entries for IP LISP overlay:
*
- * @param[in] a Tunnel parameters.
- * @param[in] is_l2 Flag indicating if encapsulated content is l2.
- * @param[out] tun_index_res Tunnel index.
+ * @param[in] lgm Reference to @ref lisp_gpe_main_t.
+ * @param[in] a Parameters for building the forwarding entry.
*
* @return 0 on success.
*/
static int
-add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, u8 is_l2,
- u32 * tun_index_res)
+add_ip_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
{
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
- lisp_gpe_tunnel_t *t = 0;
- lisp_gpe_tunnel_key_t key;
- lisp_gpe_sub_tunnel_t *stp = 0;
- uword *p;
-
- /* prepare tunnel key */
- memset (&key, 0, sizeof (key));
-
- /* fill in the key's remote eid */
- if (!is_l2)
- ip_prefix_copy (&key.rmt.ippref, &gid_address_ippref (&a->rmt_eid));
- else
- mac_copy (&key.rmt.mac, &gid_address_mac (&a->rmt_eid));
-
- key.vni = clib_host_to_net_u32 (a->vni);
-
- p = mhash_get (&lgm->lisp_gpe_tunnel_by_key, &key);
-
- if (a->is_add)
- {
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- return VNET_API_ERROR_INVALID_VALUE;
-
- if (a->decap_next_index >= LISP_GPE_INPUT_N_NEXT)
- return VNET_API_ERROR_INVALID_DECAP_NEXT;
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_fwd_entry_t *lfe;
+ fib_protocol_t fproto;
- pool_get_aligned (lgm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- memset (t, 0, sizeof (*t));
+ lfe = find_fwd_entry (lgm, a, &key);
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_copy_field;
-#undef _
+ if (NULL != lfe)
+ /* don't support updates */
+ return VNET_API_ERROR_INVALID_VALUE;
- t->locator_pairs = vec_dup (a->locator_pairs);
+ pool_get (lisp_fwd_entry_pool, lfe);
+ memset (lfe, 0, sizeof (*lfe));
+ lfe->key = clib_mem_alloc (sizeof (key));
+ memcpy (lfe->key, &key, sizeof (key));
- /* if vni is non-default */
- if (a->vni)
- t->flags = LISP_GPE_FLAGS_I;
+ hash_set_mem (lisp_gpe_fwd_entries, lfe->key, lfe - lisp_fwd_entry_pool);
- /* work in lisp-gpe not legacy mode */
- t->flags |= LISP_GPE_FLAGS_P;
+ fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- /* next proto */
- if (!is_l2)
- t->next_protocol = ip_prefix_version (&key.rmt.ippref) == IP4 ?
- LISP_GPE_NEXT_PROTO_IP4 : LISP_GPE_NEXT_PROTO_IP6;
- else
- t->next_protocol = LISP_GPE_NEXT_PROTO_ETHERNET;
-
- /* build sub-tunnels for lowest priority locator-pairs */
- if (!a->is_negative)
- create_sub_tunnels (lgm, t);
-
- mhash_set (&lgm->lisp_gpe_tunnel_by_key, &key, t - lgm->tunnels, 0);
+ lfe->type = (a->is_negative ?
+ LISP_FWD_ENTRY_TYPE_NEGATIVE : LISP_FWD_ENTRY_TYPE_NORMAL);
+ lfe->eid_table_id = a->table_id;
+ lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto,
+ lfe->eid_table_id);
- /* return tunnel index */
- if (tun_index_res)
- tun_index_res[0] = t - lgm->tunnels;
- }
- else
+ if (LISP_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
{
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- {
- clib_warning ("Tunnel for eid %U doesn't exist!",
- format_gid_address, &a->rmt_eid);
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- t = pool_elt_at_index (lgm->tunnels, p[0]);
+ lisp_fwd_path_t *path;
+ u32 index;
- mhash_unset (&lgm->lisp_gpe_tunnel_by_key, &key, 0);
+ vec_validate (lfe->paths, vec_len (a->locator_pairs) - 1);
- vec_foreach (stp, t->sub_tunnels)
+ vec_foreach_index (index, a->locator_pairs)
{
- vec_free (stp->rewrite);
+ path = &lfe->paths[index];
+
+ path->priority = a->locator_pairs[index].priority;
+ path->weight = a->locator_pairs[index].weight;
+
+ path->lisp_adj =
+ lisp_gpe_adjacency_find_or_create_and_lock (&a->locator_pairs
+ [index],
+ lfe->eid_table_id,
+ lfe->key->vni);
}
- vec_free (t->sub_tunnels);
- vec_free (t->sub_tunnels_lbv);
- vec_free (t->locator_pairs);
- pool_put (lgm->tunnels, t);
+ vec_sort_with_function (lfe->paths, lisp_gpe_fwd_entry_path_sort);
}
- return 0;
+ create_fib_entries (lfe);
+
+ return (0);
}
-/**
- * @brief Build IP adjacency for LISP Source/Dest FIB.
- *
- * Because LISP forwarding does not follow typical IP forwarding path, the
- * adjacency's fields are overloaded (i.e., hijacked) to carry LISP specific
- * data concerning the lisp-gpe interface the packets hitting the adjacency
- * should be sent to and the tunnel that should be used.
- *
- * @param[in] lgm Reference to @ref lisp_gpe_main_t.
- * @param[out] adj Adjacency to be populated.
- * @param[in] table_id VRF for adjacency.
- * @param[in] vni Virtual Network identifier (tenant id).
- * @param[in] tun_index Tunnel index.
- * @param[in] n_sub_tun Number of sub-tunnels.
- * @param[in] is_negative Flag to indicate if the adjacency is for a
- * negative mapping.
- * @param[in] action Action to be taken for negative mapping.
- * @param[in] ip_ver IP version for the adjacency.
- *
- * @return 0 on success.
- */
-static int
-build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id,
- u32 vni, u32 tun_index, u32 n_sub_tun, u8 is_negative,
- u8 action, u8 ip_ver)
+static void
+del_ip_fwd_entry_i (lisp_fwd_entry_t * lfe)
{
- uword *lookup_next_index, *lgpe_sw_if_index, *lnip;
+ lisp_fwd_path_t *path;
+ fib_protocol_t fproto;
- memset (adj, 0, sizeof (adj[0]));
- adj->n_adj = 1;
- /* fill in lookup_next_index with a 'legal' value to avoid problems */
- adj->lookup_next_index = (ip_ver == IP4) ?
- lgm->ip4_lookup_next_lgpe_ip4_lookup :
- lgm->ip6_lookup_next_lgpe_ip6_lookup;
+ vec_foreach (path, lfe->paths)
+ {
+ lisp_gpe_adjacency_unlock (path->lisp_adj);
+ }
- /* positive mapping */
- if (!is_negative)
- {
- /* send packets that hit this adj to lisp-gpe interface output node in
- * requested vrf. */
- lnip = (ip_ver == IP4) ?
- lgm->lgpe_ip4_lookup_next_index_by_table_id :
- lgm->lgpe_ip6_lookup_next_index_by_table_id;
- lookup_next_index = hash_get (lnip, table_id);
- lgpe_sw_if_index = hash_get (lgm->l3_ifaces.sw_if_index_by_vni, vni);
-
- /* the assumption is that the interface must've been created before
- * programming the dp */
- ASSERT (lookup_next_index != 0 && lgpe_sw_if_index != 0);
-
- /* hijack explicit fib index to store lisp interface node index,
- * if_address_index for the tunnel index and saved lookup next index
- * for the number of sub tunnels */
- adj->explicit_fib_index = lookup_next_index[0];
- adj->if_address_index = tun_index;
- adj->rewrite_header.sw_if_index = lgpe_sw_if_index[0];
- adj->saved_lookup_next_index = n_sub_tun;
- }
- /* negative mapping */
- else
- {
- adj->rewrite_header.sw_if_index = ~0;
- adj->rewrite_header.next_index = ~0;
- adj->if_address_index = tun_index;
+ delete_fib_entries (lfe);
- switch (action)
- {
- case LISP_NO_ACTION:
- /* TODO update timers? */
- case LISP_FORWARD_NATIVE:
- /* TODO check if route/next-hop for eid exists in fib and add
- * more specific for the eid with the next-hop found */
- case LISP_SEND_MAP_REQUEST:
- /* insert tunnel that always sends map-request */
- adj->explicit_fib_index = (ip_ver == IP4) ?
- LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP :
- LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP;
- break;
- case LISP_DROP:
- /* for drop fwd entries, just add route, no need to add encap tunnel */
- adj->explicit_fib_index = (ip_ver == IP4 ?
- LGPE_IP4_LOOKUP_NEXT_DROP :
- LGPE_IP6_LOOKUP_NEXT_DROP);
- break;
- default:
- return -1;
- }
- }
- return 0;
+ fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ fib_table_unlock (lfe->eid_fib_index, fproto);
+
+ hash_unset_mem (lisp_gpe_fwd_entries, lfe->key);
+ clib_mem_free (lfe->key);
+ pool_put (lisp_fwd_entry_pool, lfe);
}
/**
* @brief Add/Delete LISP IP forwarding entry.
*
- * Coordinates the creation/removal of forwarding entries for IP LISP overlay:
- * creates lisp-gpe tunnel, builds tunnel customized forwarding entry and
- * injects new route in Source/Dest FIB.
+ * removal of forwarding entries for IP LISP overlay:
*
* @param[in] lgm Reference to @ref lisp_gpe_main_t.
* @param[in] a Parameters for building the forwarding entry.
@@ -455,63 +250,21 @@ build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id,
* @return 0 on success.
*/
static int
-add_del_ip_fwd_entry (lisp_gpe_main_t * lgm,
- vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+del_ip_fwd_entry (lisp_gpe_main_t * lgm,
+ vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
{
- ip_adjacency_t adj, *adjp;
- lisp_gpe_tunnel_t *t;
- u32 rv, tun_index = ~0, n_sub_tuns = 0;
- ip_prefix_t *rmt_pref, *lcl_pref;
- u8 ip_ver;
-
- rmt_pref = &gid_address_ippref (&a->rmt_eid);
- lcl_pref = &gid_address_ippref (&a->lcl_eid);
- ip_ver = ip_prefix_version (rmt_pref);
-
- /* add/del tunnel to tunnels pool and prepares rewrite */
- if (0 != a->locator_pairs)
- {
- rv = add_del_ip_tunnel (a, 0 /* is_l2 */ , &tun_index);
- if (rv)
- {
- clib_warning ("failed to build tunnel!");
- return rv;
- }
- if (a->is_add)
- {
- t = pool_elt_at_index (lgm->tunnels, tun_index);
- n_sub_tuns = t->sub_tunnels_lbv_count;
- }
- }
-
- /* setup adjacency for eid */
- rv = build_ip_adjacency (lgm, &adj, a->table_id, a->vni, tun_index,
- n_sub_tuns, a->is_negative, a->action, ip_ver);
-
- /* add/delete route for eid */
- rv |= ip_sd_fib_add_del_route (lgm, rmt_pref, lcl_pref, a->table_id, &adj,
- a->is_add);
-
- if (rv)
- {
- clib_warning ("failed to insert route for tunnel!");
- return rv;
- }
+ lisp_gpe_fwd_entry_key_t key;
+ lisp_fwd_entry_t *lfe;
- /* check that everything worked */
- if (CLIB_DEBUG && a->is_add)
- {
- u32 adj_index;
- adj_index = ip_sd_fib_get_route (lgm, rmt_pref, lcl_pref, a->table_id);
- ASSERT (adj_index != 0);
+ lfe = find_fwd_entry (lgm, a, &key);
- adjp = ip_get_adjacency ((ip_ver == IP4) ? lgm->lm4 : lgm->lm6,
- adj_index);
+ if (NULL == lfe)
+ /* no such entry */
+ return VNET_API_ERROR_INVALID_VALUE;
- ASSERT (adjp != 0 && adjp->if_address_index == tun_index);
- }
+ del_ip_fwd_entry_i (lfe);
- return rv;
+ return (0);
}
static void
@@ -536,7 +289,7 @@ make_mac_fib_key (BVT (clib_bihash_kv) * kv, u16 bd_index, u8 src_mac[6],
*
* @return index of mapping matching the lookup key.
*/
-u32
+index_t
lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6],
u8 dst_mac[6])
{
@@ -555,7 +308,7 @@ lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6],
return value.value;
}
- return ~0;
+ return lisp_gpe_main.l2_lb_miss;
}
/**
@@ -601,6 +354,12 @@ l2_fib_init (lisp_gpe_main_t * lgm)
BV (clib_bihash_init) (&lgm->l2_fib, "l2 fib",
1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS),
L2_FIB_DEFAULT_HASH_MEMORY_SIZE);
+
+ /*
+ * the result from a 'miss' in a L2 Table
+ */
+ lgm->l2_lb_miss = load_balance_create (1, DPO_PROTO_IP4, 0);
+ load_balance_set_bucket (lgm->l2_lb_miss, 0, drop_dpo_get (DPO_PROTO_IP4));
}
/**
@@ -618,27 +377,75 @@ static int
add_del_l2_fwd_entry (lisp_gpe_main_t * lgm,
vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
{
- int rv;
- u32 tun_index;
- bd_main_t *bdm = &bd_main;
- uword *bd_indexp;
-
- /* create tunnel */
- rv = add_del_ip_tunnel (a, 1 /* is_l2 */ , &tun_index);
- if (rv)
- return rv;
-
- bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id);
- if (!bd_indexp)
- {
- clib_warning ("bridge domain %d doesn't exist", a->bd_id);
- return -1;
- }
-
- /* add entry to l2 lisp fib */
- lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac (&a->lcl_eid),
- gid_address_mac (&a->rmt_eid), tun_index,
- a->is_add);
+ /* lisp_gpe_fwd_entry_key_t key; */
+ /* lisp_fwd_entry_t *lfe; */
+ /* fib_protocol_t fproto; */
+ /* uword *bd_indexp; */
+
+ /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */
+ /* if (!bd_indexp) */
+ /* { */
+ /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */
+ /* return -1; */
+ /* } */
+
+ /* lfe = find_fwd_entry(lgm, a, &key); */
+
+ /* if (NULL != lfe) */
+ /* /\* don't support updates *\/ */
+ /* return VNET_API_ERROR_INVALID_VALUE; */
+
+ /* int rv; */
+ /* u32 tun_index; */
+ /* fib_node_index_t old_path_list; */
+ /* bd_main_t *bdm = &bd_main; */
+ /* fib_route_path_t *rpaths; */
+ /* lisp_gpe_tunnel_t *t; */
+ /* const dpo_id_t *dpo; */
+ /* index_t lbi; */
+
+ /* /\* create tunnel *\/ */
+ /* rv = add_del_ip_tunnel (a, 1 /\* is_l2 *\/ , &tun_index, NULL); */
+ /* if (rv) */
+ /* return rv; */
+
+ /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */
+ /* if (!bd_indexp) */
+ /* { */
+ /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */
+ /* return -1; */
+ /* } */
+
+ /* t = pool_elt_at_index (lgm->tunnels, tun_index); */
+ /* old_path_list = t->l2_path_list; */
+
+ /* if (LISP_NO_ACTION == t->action) */
+ /* { */
+ /* rpaths = lisp_gpe_mk_paths_for_sub_tunnels (t); */
+
+ /* t->l2_path_list = fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, */
+ /* rpaths); */
+
+ /* vec_free (rpaths); */
+ /* fib_path_list_lock (t->l2_path_list); */
+
+ /* dpo = fib_path_list_contribute_forwarding (t->l2_path_list, */
+ /* FIB_FORW_CHAIN_TYPE_UNICAST_IP); */
+ /* lbi = dpo->dpoi_index; */
+ /* } */
+ /* else if (LISP_SEND_MAP_REQUEST == t->action) */
+ /* { */
+ /* lbi = lgm->l2_lb_cp_lkup; */
+ /* } */
+ /* else */
+ /* { */
+ /* lbi = lgm->l2_lb_miss; */
+ /* } */
+ /* fib_path_list_unlock (old_path_list); */
+
+ /* /\* add entry to l2 lisp fib *\/ */
+ /* lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac (&a->lcl_eid), */
+ /* gid_address_mac (&a->rmt_eid), lbi, a->is_add); */
return 0;
}
@@ -669,7 +476,11 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
switch (type)
{
case GID_ADDR_IP_PREFIX:
- return add_del_ip_fwd_entry (lgm, a);
+ if (a->is_add)
+ return add_ip_fwd_entry (lgm, a);
+ else
+ return del_ip_fwd_entry (lgm, a);
+ break;
case GID_ADDR_MAC:
return add_del_l2_fwd_entry (lgm, a);
default:
@@ -807,103 +618,77 @@ done:
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
- .path = "lisp gpe tunnel",
- .short_help = "lisp gpe tunnel add/del vni <vni> vrf <vrf> [leid <leid>]"
+ .path = "lisp gpe entry",
+ .short_help = "lisp gpe entry add/del vni <vni> vrf <vrf> [leid <leid>]"
"reid <reid> [loc-pair <lloc> <rloc> p <priority> w <weight>] "
"[negative action <action>]",
.function = lisp_gpe_add_del_fwd_entry_command_fn,
};
/* *INDENT-ON* */
-/** Format LISP-GPE next indexes. */
static u8 *
-format_decap_next (u8 * s, va_list * args)
+format_lisp_fwd_path (u8 * s, va_list ap)
{
- u32 next_index = va_arg (*args, u32);
+ lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *);
- switch (next_index)
- {
- case LISP_GPE_INPUT_NEXT_DROP:
- return format (s, "drop");
- case LISP_GPE_INPUT_NEXT_IP4_INPUT:
- return format (s, "ip4");
- case LISP_GPE_INPUT_NEXT_IP6_INPUT:
- return format (s, "ip6");
- default:
- return format (s, "unknown %d", next_index);
- }
- return s;
+ s = format (s, "pirority:%d weight:%d ", lfp->priority, lfp->weight);
+ s = format (s, "adj:[%U]\n",
+ format_lisp_gpe_adjacency,
+ lisp_gpe_adjacency_get (lfp->lisp_adj),
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE);
+
+ return (s);
}
-/** Format LISP-GPE tunnel. */
-u8 *
-format_lisp_gpe_tunnel (u8 * s, va_list * args)
+static u8 *
+format_lisp_gpe_fwd_entry (u8 * s, va_list ap)
{
- lisp_gpe_tunnel_t *t = va_arg (*args, lisp_gpe_tunnel_t *);
- lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
- locator_pair_t *lp = 0;
- normalized_sub_tunnel_weights_t *nstw;
-
- s =
- format (s, "tunnel %d vni %d (0x%x)\n", t - lgm->tunnels, t->vni, t->vni);
- s =
- format (s, " fibs: encap %d, decap %d decap next %U\n",
- t->encap_fib_index, t->decap_fib_index, format_decap_next,
- t->decap_next_index);
- s = format (s, " lisp ver %d ", (t->ver_res >> 6));
-
-#define _(n,v) if (t->flags & v) s = format (s, "%s-bit ", #n);
- foreach_lisp_gpe_flag_bit;
-#undef _
-
- s = format (s, "next_protocol %d ver_res %x res %x\n",
- t->next_protocol, t->ver_res, t->res);
-
- s = format (s, " locator-pairs:\n");
- vec_foreach (lp, t->locator_pairs)
- {
- s = format (s, " local: %U remote: %U weight %d\n",
- format_ip_address, &lp->lcl_loc, format_ip_address,
- &lp->rmt_loc, lp->weight);
- }
+ lisp_fwd_entry_t *lfe = va_arg (ap, lisp_fwd_entry_t *);
- s = format (s, " active sub-tunnels:\n");
- vec_foreach (nstw, t->norm_sub_tunnel_weights)
- {
- lp = vec_elt_at_index (t->locator_pairs, nstw->sub_tunnel_index);
- s = format (s, " local: %U remote: %U weight %d\n", format_ip_address,
- &lp->lcl_loc, format_ip_address, &lp->rmt_loc, nstw->weight);
- }
- return s;
+ s = format (s, "VNI:%d VRF:%d EID: %U -> %U",
+ lfe->key->vni, lfe->eid_table_id,
+ format_fid_address, &lfe->key->lcl,
+ format_fid_address, &lfe->key->rmt);
+ if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type)
+ {
+ s = format (s, "\n Negative - action:%U",
+ format_negative_mapping_action, lfe->action);
+ }
+ else
+ {
+ lisp_fwd_path_t *path;
+
+ s = format (s, "\n via:");
+ vec_foreach (path, lfe->paths)
+ {
+ s = format (s, "\n %U", format_lisp_fwd_path, path);
+ }
+ }
+
+ return (s);
}
-/** CLI command to show LISP-GPE tunnels. */
static clib_error_t *
-show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+lisp_gpe_fwd_entry_show (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
{
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
- lisp_gpe_tunnel_t *t;
-
- if (pool_elts (lgm->tunnels) == 0)
- vlib_cli_output (vm, "No lisp-gpe tunnels configured...");
+ lisp_fwd_entry_t *lfe;
- /* *INDENT-OFF* */
- pool_foreach (t, lgm->tunnels,
+/* *INDENT-OFF* */
+ pool_foreach (lfe, lisp_fwd_entry_pool,
({
- vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, t);
+ vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe);
}));
- /* *INDENT-ON* */
+/* *INDENT-ON* */
- return 0;
+ return (NULL);
}
/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
-{
- .path = "show lisp gpe tunnel",
- .function = show_lisp_gpe_tunnel_command_fn,
+VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = {
+ .path = "show lisp gpe entry",
+ .short_help = "show lisp gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>",
+ .function = lisp_gpe_fwd_entry_show,
};
/* *INDENT-ON* */
@@ -921,29 +706,9 @@ clib_error_t *
vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a)
{
lisp_gpe_main_t *lgm = &lisp_gpe_main;
- vnet_main_t *vnm = lgm->vnet_main;
if (a->is_en)
{
- /* add lgpe_ip4_lookup as possible next_node for ip4 lookup */
- if (lgm->ip4_lookup_next_lgpe_ip4_lookup == ~0)
- {
- lgm->ip4_lookup_next_lgpe_ip4_lookup =
- vlib_node_add_next (vnm->vlib_main, ip4_lookup_node.index,
- lgpe_ip4_lookup_node.index);
- }
- /* add lgpe_ip6_lookup as possible next_node for ip6 lookup */
- if (lgm->ip6_lookup_next_lgpe_ip6_lookup == ~0)
- {
- lgm->ip6_lookup_next_lgpe_ip6_lookup =
- vlib_node_add_next (vnm->vlib_main, ip6_lookup_node.index,
- lgpe_ip6_lookup_node.index);
- }
- else
- {
- /* ask cp to re-add ifaces and defaults */
- }
-
lgm->is_en = 1;
}
else
@@ -951,37 +716,17 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a)
CLIB_UNUSED (uword * val);
hash_pair_t *p;
u32 *dp_tables = 0, *dp_table;
- lisp_gpe_tunnel_key_t *tunnels = 0, *tunnel;
- vnet_lisp_gpe_add_del_fwd_entry_args_t _at, *at = &_at;
vnet_lisp_gpe_add_del_iface_args_t _ai, *ai = &_ai;
+ lisp_fwd_entry_t *lfe;
- /* remove all tunnels */
-
+ /* remove all entries */
/* *INDENT-OFF* */
- mhash_foreach(tunnel, val, &lgm->lisp_gpe_tunnel_by_key, ({
- vec_add1(tunnels, tunnel[0]);
+ pool_foreach (lfe, lisp_fwd_entry_pool,
+ ({
+ del_ip_fwd_entry_i (lfe);
}));
/* *INDENT-ON* */
- vec_foreach (tunnel, tunnels)
- {
- memset (at, 0, sizeof (at[0]));
- at->is_add = 0;
- if (tunnel->rmt.type == GID_ADDR_IP_PREFIX)
- {
- gid_address_type (&at->rmt_eid) = GID_ADDR_IP_PREFIX;
- ip_prefix_copy (&gid_address_ippref (&at->rmt_eid),
- &tunnel->rmt.ippref);
- }
- else
- {
- gid_address_type (&at->rmt_eid) = GID_ADDR_MAC;
- mac_copy (&gid_address_mac (&at->rmt_eid), &tunnel->rmt.mac);
- }
- vnet_lisp_gpe_add_del_fwd_entry (at, 0);
- }
- vec_free (tunnels);
-
/* disable all l3 ifaces */
/* *INDENT-OFF* */
@@ -1109,6 +854,7 @@ format_vnet_lisp_gpe_status (u8 * s, va_list * args)
return format (s, "%s", lgm->is_en ? "enabled" : "disabled");
}
+
/** LISP-GPE init function. */
clib_error_t *
lisp_gpe_init (vlib_main_t * vm)
@@ -1128,11 +874,10 @@ lisp_gpe_init (vlib_main_t * vm)
lgm->im6 = &ip6_main;
lgm->lm4 = &ip4_main.lookup_main;
lgm->lm6 = &ip6_main.lookup_main;
- lgm->ip4_lookup_next_lgpe_ip4_lookup = ~0;
- lgm->ip6_lookup_next_lgpe_ip6_lookup = ~0;
- mhash_init (&lgm->lisp_gpe_tunnel_by_key, sizeof (uword),
- sizeof (lisp_gpe_tunnel_key_t));
+ lisp_gpe_fwd_entries = hash_create_mem (0,
+ sizeof (lisp_gpe_fwd_entry_key_t),
+ sizeof (uword));
l2_fib_init (lgm);
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h
index 4a8bdfe7f93..66009cc1947 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe.h
+++ b/vnet/vnet/lisp-gpe/lisp_gpe.h
@@ -30,6 +30,7 @@
#include <vnet/ip/udp.h>
#include <vnet/lisp-cp/lisp_types.h>
#include <vnet/lisp-gpe/lisp_gpe_packet.h>
+#include <vnet/adj/adj_types.h>
/** IP4-UDP-LISP encap header */
/* *INDENT-OFF* */
@@ -49,37 +50,6 @@ typedef CLIB_PACKED (struct {
}) ip6_udp_lisp_gpe_header_t;
/* *INDENT-ON* */
-/** LISP-GPE tunnel key */
-typedef struct
-{
- union
- {
- struct
- {
- dp_address_t rmt;
- dp_address_t lcl;
- u32 vni;
- };
- u8 as_u8[40];
- };
-} lisp_gpe_tunnel_key_t;
-
-typedef struct lisp_gpe_sub_tunnel
-{
- /** Rewrite string. $$$$ embed vnet_rewrite header */
- u8 *rewrite;
- u32 parent_index;
- u32 locator_pair_index;
- u8 weight;
- u8 is_ip4;
-} lisp_gpe_sub_tunnel_t;
-
-typedef struct nomalized_sub_tunnel
-{
- u32 sub_tunnel_index;
- u8 weight;
-} normalized_sub_tunnel_weights_t;
-
/** LISP-GPE tunnel structure */
typedef struct
{
@@ -87,17 +57,7 @@ typedef struct
locator_pair_t *locator_pairs;
/** locator-pairs with best priority become sub-tunnels */
- lisp_gpe_sub_tunnel_t *sub_tunnels;
-
- /** sub-tunnels load balancing vector: contains list of sub-tunnel
- * indexes replicated according to weight */
- u32 *sub_tunnels_lbv;
-
- /** number of entries in load balancing vector */
- u32 sub_tunnels_lbv_count;
-
- /** normalized sub tunnel weights */
- normalized_sub_tunnel_weights_t *norm_sub_tunnel_weights;
+ u32 *sub_tunnels;
/** decap next index */
u32 decap_next_index;
@@ -109,10 +69,16 @@ typedef struct
u32 encap_fib_index; /* tunnel partner lookup here */
u32 decap_fib_index; /* inner IP lookup here */
+ /** index of the source address lookup FIB */
+ u32 src_fib_index;
+
/** vnet intfc hw/sw_if_index */
u32 hw_if_index;
u32 sw_if_index;
+ /** L2 path-list */
+ fib_node_index_t l2_path_list;
+
/** action for 'negative' tunnels */
u8 action;
@@ -124,6 +90,112 @@ typedef struct
u32 vni;
} lisp_gpe_tunnel_t;
+/**
+ * @brief A path on which to forward lisp traffic
+ */
+typedef struct lisp_fwd_path_t_
+{
+ /**
+ * The adjacency constructed for the locator pair
+ */
+ index_t lisp_adj;
+
+ /**
+ * Priority. Only the paths with the best priority will be installed in FIB
+ */
+ u8 priority;
+
+ /**
+ * [UE]CMP weigt for the path
+ */
+ u8 weight;
+
+} lisp_fwd_path_t;
+
+/**
+ * @brief A Forwarding entry can be 'normal' or 'negative'
+ * Negative implies we deliberately want to add a FIB entry for an EID
+ * that results in 'spcial' behaviour determined by an 'action'.
+ * @normal' means send it down some tunnels.
+ */
+typedef enum lisp_fwd_entry_type_t_
+{
+ LISP_FWD_ENTRY_TYPE_NORMAL,
+ LISP_FWD_ENTRY_TYPE_NEGATIVE,
+} lisp_fwd_entry_type_t;
+
+typedef enum
+{
+ NO_ACTION,
+ FORWARD_NATIVE,
+ SEND_MAP_REQUEST,
+ DROP
+} negative_fwd_actions_e;
+
+/**
+ * LISP-GPE fwd entry key
+ */
+typedef struct lisp_gpe_fwd_entry_key_t_
+{
+ dp_address_t rmt;
+ dp_address_t lcl;
+ u32 vni;
+} lisp_gpe_fwd_entry_key_t;
+
+/**
+ * @brief A LISP Forwarding Entry
+ *
+ * A forwarding entry is from a locai EID to a remote EID over a set of rloc pairs
+ */
+typedef struct lisp_fwd_entry_t_
+{
+ /**
+ * The Entry's key: {lEID,r-EID,vni}
+ */
+ lisp_gpe_fwd_entry_key_t *key;
+
+ /**
+ * The VRF (in the case of L3) or Bridge-Domain (for L2) index
+ */
+ union
+ {
+ u32 eid_table_id;
+ u32 eid_bd_index;
+ };
+
+ /**
+ * The forwarding entry type
+ */
+ lisp_fwd_entry_type_t type;
+
+ union
+ {
+ /**
+ * @brief When the type is 'normal'
+ * The RLOC pair that form the route's paths. i.e. where to send
+ * packets for this route.
+ */
+ lisp_fwd_path_t *paths;
+
+ /**
+ * @brief When the type is negative. The action to take.
+ */
+ negative_fwd_actions_e action;
+ };
+
+ /**
+ * The FIB index for the overlay, i.e. the FIB in which the EIDs
+ * are present
+ */
+ u32 eid_fib_index;
+
+ /**
+ * The SRC-FIB index for created for anding source-route entries
+ */
+ u32 src_fib_index;
+} lisp_fwd_entry_t;
+
+
#define foreach_lisp_gpe_ip_input_next \
_(DROP, "error-drop") \
_(IP4_INPUT, "ip4-input") \
@@ -147,30 +219,6 @@ typedef enum
LISP_GPE_N_ERROR,
} lisp_gpe_error_t;
-/** IP4 source FIB.
- * As a first step, reuse v4 fib. The goal of the typedef is
- * to shield consumers from future updates that may result in the lisp ip4 fib
- * diverging from ip4 fib
- */
-typedef ip4_fib_t ip4_src_fib_t;
-
-/** IP6 source FIB */
-typedef struct ip6_src_fib
-{
- BVT (clib_bihash) ip6_lookup_table;
-
- /** bitmap/vector of mask widths to search */
- uword *non_empty_dst_address_length_bitmap;
- u8 *prefix_lengths_in_search_order;
- ip6_address_t fib_masks[129];
- i32 dst_address_length_refcounts[129];
-
- /** ip6 lookup table config parameters */
- u32 lookup_table_nbuckets;
- uword lookup_table_size;
-} ip6_src_fib_t;
-
-/** Tunnel lookup structure for L2 and L3 tunnels */
typedef struct tunnel_lookup
{
/** Lookup lisp-gpe interfaces by dp table (eg. vrf/bridge index) */
@@ -178,6 +226,8 @@ typedef struct tunnel_lookup
/** lookup decap tunnel termination sw_if_index by vni and vice versa */
uword *sw_if_index_by_vni;
+
+ // FIXME - Need this?
uword *vni_by_sw_if_index;
} tunnel_lookup_t;
@@ -187,9 +237,6 @@ typedef struct lisp_gpe_main
/** pool of encap tunnel instances */
lisp_gpe_tunnel_t *tunnels;
- /** lookup tunnel by key */
- mhash_t lisp_gpe_tunnel_by_key;
-
/** Free vlib hw_if_indices */
u32 *free_tunnel_hw_if_indices;
@@ -197,21 +244,8 @@ typedef struct lisp_gpe_main
/* L3 data structures
* ================== */
-
- /** Pool of src fibs that are paired with dst fibs */
- ip4_src_fib_t *ip4_src_fibs;
- ip6_src_fib_t *ip6_src_fibs;
-
tunnel_lookup_t l3_ifaces;
- /** Lookup lgpe_ipX_lookup_next by vrf */
- uword *lgpe_ip4_lookup_next_index_by_table_id;
- uword *lgpe_ip6_lookup_next_index_by_table_id;
-
- /** next node indexes that point ip4/6 lookup to lisp gpe ip lookup */
- u32 ip4_lookup_next_lgpe_ip4_lookup;
- u32 ip6_lookup_next_lgpe_ip6_lookup;
-
/* L2 data structures
* ================== */
@@ -220,6 +254,10 @@ typedef struct lisp_gpe_main
tunnel_lookup_t l2_ifaces;
+ /** Load-balance for a miss in the table */
+ index_t l2_lb_miss;
+ index_t l2_lb_cp_lkup;
+
/** convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
@@ -238,10 +276,10 @@ vnet_lisp_gpe_get_main ()
return &lisp_gpe_main;
}
-extern vlib_node_registration_t lgpe_ip4_lookup_node;
-extern vlib_node_registration_t lgpe_ip6_lookup_node;
+
extern vlib_node_registration_t lisp_gpe_ip4_input_node;
extern vlib_node_registration_t lisp_gpe_ip6_input_node;
+extern vnet_hw_interface_class_t lisp_gpe_hw_class;
u8 *format_lisp_gpe_header_with_length (u8 * s, va_list * args);
@@ -291,7 +329,7 @@ typedef struct
u8 is_negative;
/** action for negative mappings */
- u8 action;
+ negative_fwd_actions_e action;
/** local eid */
gid_address_t lcl_eid;
@@ -332,13 +370,23 @@ int
vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
u32 * hw_if_indexp);
-int
-ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id,
- ip_adjacency_t * add_adj, u8 is_add);
-u32
-ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix,
- ip_prefix_t * src_prefix, u32 table_id);
+extern void
+ip_src_fib_add_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const lisp_fwd_path_t * paths);
+extern void
+ip_src_dst_fib_del_route (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ u32 dst_table_id, const ip_prefix_t * dst_prefix);
+extern void
+ip_src_fib_add_route_w_dpo (u32 src_fib_index,
+ const ip_prefix_t * src_prefix,
+ const dpo_id_t * src_dpo);
+extern u32
+ip_dst_fib_add_route (u32 dst_table_id, const ip_prefix_t * dst_prefix);
+
+extern fib_route_path_t *lisp_gpe_mk_paths_for_sub_tunnels (lisp_gpe_tunnel_t
+ * t);
#define foreach_lgpe_ip4_lookup_next \
_(DROP, "error-drop") \
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
new file mode 100644
index 00000000000..861f0dd38c0
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys.
+ *
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/adj/adj_midchain.h>
+
+/**
+ * Memory pool of all adjacencies
+ */
+static lisp_gpe_adjacency_t *lisp_adj_pool;
+
+/**
+ * Hash table of all adjacencies. key:{nh, itf}
+ * We never have an all zeros address since the interfaces are multi-access,
+ * therefore there is no ambiguity between a v4 and v6 next-hop, so we don't
+ * need to add the protocol to the key.
+ */
+static
+BVT (clib_bihash)
+ lisp_adj_db;
+
+#define LISP_ADJ_SET_KEY(_key, _itf, _nh) \
+{ \
+ _key.key[0] = (_nh)->ip.v6.as_u64[0]; \
+ _key.key[1] = (_nh)->ip.v6.as_u64[1]; \
+ _key.key[2] = (_itf); \
+}
+
+ static index_t lisp_adj_find (const ip_address_t * addr, u32 sw_if_index)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+
+ if (BV (clib_bihash_search) (&lisp_adj_db, &kv, &kv) < 0)
+ {
+ return (INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static void
+lisp_adj_insert (const ip_address_t * addr, u32 sw_if_index, index_t ai)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+ kv.value = ai;
+
+ BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 1);
+}
+
+static void
+lisp_adj_remove (const ip_address_t * addr, u32 sw_if_index)
+{
+ BVT (clib_bihash_kv) kv;
+
+ LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
+
+ BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 0);
+}
+
+static lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_get_i (index_t lai)
+{
+ return (pool_elt_at_index (lisp_adj_pool, lai));
+}
+
+fib_forward_chain_type_t
+lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj)
+{
+ switch (ip_addr_version (&ladj->remote_rloc))
+ {
+ case IP4:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+ case IP6:
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+ default:
+ ASSERT (0);
+ break;
+ }
+ return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+}
+
+/**
+ * @brief Stack the tunnel's midchain on the IP forwarding chain of the via
+ */
+static void
+lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+{
+ const lisp_gpe_tunnel_2_t *lgt;
+ dpo_id_t tmp = DPO_NULL;
+ fib_link_t linkt;
+
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+ fib_entry_contribute_forwarding (lgt->fib_entry_index,
+ lisp_gpe_adj_get_fib_chain_type (ladj),
+ &tmp);
+
+ FOR_EACH_FIB_IP_LINK (linkt)
+ {
+ adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp);
+ }
+ dpo_reset (&tmp);
+}
+
+static lisp_gpe_next_protocol_e
+lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt)
+{
+ switch (linkt)
+ {
+ case FIB_LINK_IP4:
+ return (LISP_GPE_INPUT_NEXT_IP4_INPUT);
+ case FIB_LINK_IP6:
+ return (LISP_GPE_INPUT_NEXT_IP6_INPUT);
+ default:
+ ASSERT (0);
+ }
+ return (LISP_GPE_INPUT_NEXT_DROP);
+}
+
+index_t
+lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
+ u32 overlay_table_id, u32 vni)
+{
+ const lisp_gpe_tunnel_2_t *lgt;
+ lisp_gpe_adjacency_t *ladj;
+ index_t lai, l3si;
+
+ /*
+ * first find the L3 sub-interface that corresponds to the loacl-rloc and vni
+ */
+ l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc,
+ overlay_table_id,
+ vni);
+
+ /*
+ * find an existing or create a new adj
+ */
+ lai = lisp_adj_find (&pair->rmt_loc, l3si);
+
+ if (INDEX_INVALID == lai)
+ {
+ const lisp_gpe_sub_interface_t *l3s;
+ u8 *rewrite = NULL;
+ fib_link_t linkt;
+ fib_prefix_t nh;
+
+ pool_get (lisp_adj_pool, ladj);
+ memset (ladj, 0, sizeof (*ladj));
+ lai = (ladj - lisp_adj_pool);
+
+ ladj->remote_rloc = pair->rmt_loc;
+ ladj->vni = vni;
+ /* transfer the lock to the adj */
+ ladj->lisp_l3_sub_index = l3si;
+
+ l3s = lisp_gpe_sub_interface_get (l3si);
+ ladj->sw_if_index = l3s->sw_if_index;
+
+ /* if vni is non-default */
+ if (ladj->vni)
+ ladj->flags = LISP_GPE_FLAGS_I;
+
+ /* work in lisp-gpe not legacy mode */
+ ladj->flags |= LISP_GPE_FLAGS_P;
+
+ /*
+ * find the tunnel that will provide the underlying transport
+ * and hence the rewrite.
+ * The RLOC FIB index is default table - always.
+ */
+ ladj->tunnel_index = lisp_gpe_tunnel_find_or_create_and_lock (pair, 0);
+
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+
+ /*
+ * become of child of the RLOC FIB entry so we are updated when
+ * its reachability changes, allowing us to re-stack the midcahins
+ */
+ ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index,
+ FIB_NODE_TYPE_LISP_ADJ,
+ lai);
+ ip_address_to_fib_prefix (&pair->rmt_loc, &nh);
+
+ /*
+ * construct and stack the FIB midchain adjacencies
+ */
+ FOR_EACH_FIB_IP_LINK (linkt)
+ {
+ ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto,
+ linkt,
+ &nh.fp_addr,
+ ladj->sw_if_index);
+
+ rewrite =
+ lisp_gpe_tunnel_build_rewrite (lgt, ladj,
+ lisp_gpe_adj_proto_from_fib_link_type
+ (linkt));
+
+ adj_nbr_midchain_update_rewrite (ladj->adjs[linkt],
+ vnet_get_sup_hw_interface
+ (vnet_get_main (),
+ ladj->sw_if_index)->tx_node_index,
+ rewrite);
+
+ vec_free (rewrite);
+ }
+
+ lisp_gpe_adj_stack (ladj);
+
+ lisp_adj_insert (&ladj->remote_rloc, ladj->lisp_l3_sub_index, lai);
+ }
+ else
+ {
+ /* unlock the interface from the find. */
+ lisp_gpe_sub_interface_unlock (l3si);
+ ladj = lisp_gpe_adjacency_get_i (lai);
+ }
+
+ ladj->locks++;
+
+ return (lai);
+}
+
+/**
+ * @brief Get a pointer to a tunnel from a pointer to a FIB node
+ */
+static lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_from_fib_node (const fib_node_t * node)
+{
+ return ((lisp_gpe_adjacency_t *)
+ ((char *) node -
+ STRUCT_OFFSET_OF (lisp_gpe_adjacency_t, fib_node)));
+}
+
+static void
+lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj)
+{
+ /*
+ * no children so we are not counting locks. no-op.
+ * at least not counting
+ */
+ lisp_adj_remove (&ladj->remote_rloc, ladj->lisp_l3_sub_index);
+
+ /*
+ * unlock the resources this adj holds
+ */
+ lisp_gpe_tunnel_unlock (ladj->tunnel_index);
+ lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index);
+
+ pool_put (lisp_adj_pool, ladj);
+}
+
+void
+lisp_gpe_adjacency_unlock (index_t lai)
+{
+ lisp_gpe_adjacency_t *ladj;
+
+ ladj = lisp_gpe_adjacency_get_i (lai);
+
+ ladj->locks--;
+
+ if (0 == ladj->locks)
+ {
+ lisp_gpe_adjacency_last_lock_gone (ladj);
+ }
+}
+
+const lisp_gpe_adjacency_t *
+lisp_gpe_adjacency_get (index_t lai)
+{
+ return (lisp_gpe_adjacency_get_i (lai));
+}
+
+
+/**
+ * @brief LISP GPE tunnel back walk
+ *
+ * The FIB entry through which this tunnel resolves has been updated.
+ * re-stack the midchain on the new forwarding.
+ */
+static fib_node_back_walk_rc_t
+lisp_gpe_adjacency_back_walk (fib_node_t * node,
+ fib_node_back_walk_ctx_t * ctx)
+{
+ lisp_gpe_adj_stack (lisp_gpe_adjacency_from_fib_node (node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static fib_node_t *
+lisp_gpe_adjacency_get_fib_node (fib_node_index_t index)
+{
+ lisp_gpe_adjacency_t *ladj;
+
+ ladj = pool_elt_at_index (lisp_adj_pool, index);
+ return (&ladj->fib_node);
+}
+
+static void
+lisp_gpe_adjacency_last_fib_lock_gone (fib_node_t * node)
+{
+ lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_from_fib_node (node));
+}
+
+const static fib_node_vft_t lisp_gpe_tuennel_vft = {
+ .fnv_get = lisp_gpe_adjacency_get_fib_node,
+ .fnv_back_walk = lisp_gpe_adjacency_back_walk,
+ .fnv_last_lock = lisp_gpe_adjacency_last_fib_lock_gone,
+};
+
+u8 *
+format_lisp_gpe_adjacency (u8 * s, va_list * args)
+{
+ lisp_gpe_adjacency_t *ladj = va_arg (*args, lisp_gpe_adjacency_t *);
+ lisp_gpe_adjacency_format_flags_t flags =
+ va_arg (args, lisp_gpe_adjacency_format_flags_t);
+
+ if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
+ {
+ s =
+ format (s, "index %d locks:%d\n", ladj - lisp_adj_pool, ladj->locks);
+ }
+
+ s = format (s, " vni: %d,", ladj->vni);
+ s = format (s, " remote-RLOC: %U,", format_ip_address, &ladj->remote_rloc);
+
+ if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
+ {
+ s = format (s, " %U\n",
+ format_lisp_gpe_sub_interface,
+ lisp_gpe_sub_interface_get (ladj->lisp_l3_sub_index));
+ s = format (s, " %U\n",
+ format_lisp_gpe_tunnel,
+ lisp_gpe_tunnel_get (ladj->tunnel_index));
+ s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d\n",
+ ladj->adjs[FIB_LINK_IP4], ladj->adjs[FIB_LINK_IP6]);
+ }
+ else
+ {
+ s = format (s, " LISP L3 sub-interface index: %d,",
+ ladj->lisp_l3_sub_index);
+ s = format (s, " LISP tunnel index: %d", ladj->tunnel_index);
+ }
+
+
+ return (s);
+}
+
+static clib_error_t *
+lisp_gpe_adjacency_show (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lisp_gpe_adjacency_t *ladj;
+ index_t index;
+
+ if (pool_elts (lisp_adj_pool) == 0)
+ vlib_cli_output (vm, "No lisp-gpe Adjacencies");
+
+ if (unformat (input, "%d", &index))
+ {
+ ladj = lisp_gpe_adjacency_get_i (index);
+ vlib_cli_output (vm, "%U", format_lisp_gpe_adjacency, ladj,
+ LISP_GPE_ADJ_FORMAT_FLAG_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (ladj, lisp_adj_pool,
+ ({
+ vlib_cli_output (vm, "[%d] %U\n",
+ ladj - lisp_adj_pool,
+ format_lisp_gpe_adjacency, ladj,
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
+{
+ .path = "show lisp gpe adjacency",
+ .function = lisp_gpe_adjacency_show,
+};
+/* *INDENT-ON* */
+
+#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256)
+#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20)
+
+static clib_error_t *
+lisp_gpe_adj_module_init (vlib_main_t * vm)
+{
+ BV (clib_bihash_init) (&lisp_adj_db,
+ "Adjacency Neighbour table",
+ LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+
+ fib_node_register_type (FIB_NODE_TYPE_LISP_ADJ, &lisp_gpe_tuennel_vft);
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_adj_module_init)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
new file mode 100644
index 00000000000..f6a66cddf0b
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys.
+ *
+ */
+
+#ifndef LISP_GPE_ADJACENCY_H__
+#define LISP_GPE_ADJACENCY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * @brief A LISP GPE Adjacency.
+ *
+ * A adjacency represents peer on an L3 sub-interface to which to send traffic.
+ * adjacencies are thus present in the EID space.
+ * The peer is identified by the key:{remote-rloc, sub-interface}, which is
+ * equivalent to the usal adjacency key {next-hop, interface}. So curiously
+ * the rloc address from the underlay is used as a next hop address in the overlay
+ * This is OK because:
+ * 1 - the RLOC is unique in the underlay AND there is only one underlay VRF per
+ * overlay
+ * 2 - the RLOC may overlap with an address in the overlay, but we do not create
+ * an adj-fib (i.e. a route in the overlay FIB for the rloc)
+ *
+ *
+ */
+typedef struct lisp_gpe_adjacency_t_
+{
+ /**
+ * The LISP adj is a part of the FIB control plane graph.
+ */
+ fib_node_t fib_node;
+
+ /**
+ * remote RLOC. The adjacency's next-hop
+ */
+ ip_address_t remote_rloc;
+
+ /**
+ * The VNI. Used in combination with the local-rloc to get the sub-interface
+ */
+ u32 vni;
+
+ /**
+ * The number of locks/reference counts on the adjacency.
+ */
+ u32 locks;
+
+ /**
+ * The index of the LISP L3 subinterface
+ */
+ u32 lisp_l3_sub_index;
+
+ /**
+ * The SW IF index of the sub-interface this adjacency uses.
+ * Cached for convenience from the LISP L3 sub-interface
+ */
+ u32 sw_if_index;
+
+ /**
+ * The index of the LISP GPE tunnel that provides the transport
+ * in the underlay.
+ */
+ u32 tunnel_index;
+
+ /**
+ * Per-link-type FIB adjacencies contributed.
+ * These will be used as a result of a FIB lookup.
+ */
+ adj_index_t adjs[FIB_LINK_NUM];
+
+ /**
+ * This adjacency is a child of the FIB entry to reach the RLOC.
+ * This is so when the reachability of that RLOC changes, we can restack
+ * the FIB adjacnecies.
+ */
+ u32 fib_entry_child_index;
+
+ /**
+ * LISP header fields in HOST byte order
+ */
+ u8 flags;
+ u8 ver_res;
+ u8 res;
+ u8 next_protocol;
+
+} lisp_gpe_adjacency_t;
+
+extern index_t lisp_gpe_adjacency_find_or_create_and_lock (const
+ locator_pair_t *
+ pair,
+ u32 rloc_fib_index,
+ u32 vni);
+
+extern void lisp_gpe_adjacency_unlock (index_t l3si);
+
+extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si);
+
+/**
+ * @brief Flags for displaying the adjacency
+ */
+typedef enum lisp_gpe_adjacency_format_flags_t_
+{
+ LISP_GPE_ADJ_FORMAT_FLAG_NONE,
+ LISP_GPE_ADJ_FORMAT_FLAG_DETAIL,
+} lisp_gpe_adjacency_format_flags_t;
+
+extern u8 *format_lisp_gpe_adjacency (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c
new file mode 100644
index 00000000000..220802b17c7
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP sub-interfaces.
+ *
+ */
+#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/interface.h>
+
+/**
+ * @brief Pool of all l3-sub-interfaces
+ */
+static lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_pool;
+
+/**
+ * A DB of all LISP L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ */
+static uword *lisp_gpe_sub_interfaces;
+
+/**
+ * A DB of all VNET L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ * Used in the data-plane for interface lookup on decap.
+ */
+uword *lisp_gpe_sub_interfaces_sw_if_index;
+
+/**
+ * The next available sub-interface ID. FIXME
+ */
+static u32 lisp_gpe_sub_interface_id;
+
+
+static index_t
+lisp_gpe_sub_interface_db_find (const ip_address_t * lrloc, u32 vni)
+{
+ uword *p;
+
+ lisp_gpe_sub_interface_key_t key = {
+ .local_rloc = *lrloc,
+ .vni = clib_host_to_net_u32 (vni),
+ };
+
+ p = hash_get_mem (lisp_gpe_sub_interfaces, &key);
+
+ if (NULL == p)
+ return (INDEX_INVALID);
+ else
+ return (p[0]);
+}
+
+static void
+lisp_gpe_sub_interface_db_insert (const lisp_gpe_sub_interface_t * l3s)
+{
+ hash_set_mem (lisp_gpe_sub_interfaces,
+ &l3s->key, l3s - lisp_gpe_sub_interface_pool);
+ hash_set_mem (lisp_gpe_sub_interfaces_sw_if_index,
+ &l3s->key, l3s->sw_if_index);
+}
+
+static void
+lisp_gpe_sub_interface_db_remove (const lisp_gpe_sub_interface_t * l3s)
+{
+ hash_unset_mem (lisp_gpe_sub_interfaces, &l3s->key);
+ hash_unset_mem (lisp_gpe_sub_interfaces_sw_if_index, &l3s->key);
+}
+
+lisp_gpe_sub_interface_t *
+lisp_gpe_sub_interface_get_i (index_t l3si)
+{
+ return (pool_elt_at_index (lisp_gpe_sub_interface_pool, l3si));
+}
+
+static void
+lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
+{
+ fib_node_index_t fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
+ ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
+
+ vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ // FIXME. enable When we get an adj
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id);
+ ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
+
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ // FIXME. enable When we get an adj
+ ip6_sw_interface_enable_disable (sw_if_index, 1);
+}
+
+static void
+lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id)
+{
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0;
+ ip4_sw_interface_enable_disable (sw_if_index, 0);
+
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0;
+ ip6_sw_interface_enable_disable (sw_if_index, 0);
+}
+
+index_t
+lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc,
+ u32 overlay_table_id, u32 vni)
+{
+ lisp_gpe_sub_interface_t *l3s;
+ lisp_gpe_main_t *lgm = &lisp_gpe_main;
+ index_t l3si;
+
+ l3si = lisp_gpe_sub_interface_db_find (lrloc, vni);
+
+ if (INDEX_INVALID == l3si)
+ {
+ vnet_hw_interface_t *hi;
+ clib_error_t *error;
+ u32 sub_sw_if_index;
+ uword *p;
+
+ /*
+ * find the main interface from the VNI
+ */
+ p = hash_get (lgm->l3_ifaces.sw_if_index_by_vni, vni);
+
+ if (NULL == p)
+ return (INDEX_INVALID);
+
+ hi = vnet_get_hw_interface (vnet_get_main (), p[0]);
+
+ if (NULL == hi)
+ return (INDEX_INVALID);
+
+ vnet_sw_interface_t sub_itf_template = {
+ .type = VNET_SW_INTERFACE_TYPE_SUB,
+ .sup_sw_if_index = hi->sw_if_index,
+ .sub.id = lisp_gpe_sub_interface_id++,
+ };
+
+ error = vnet_create_sw_interface (vnet_get_main (),
+ &sub_itf_template, &sub_sw_if_index);
+
+ if (NULL != error)
+ return (INDEX_INVALID);
+
+ pool_get (lisp_gpe_sub_interface_pool, l3s);
+ memset (l3s, 0, sizeof (*l3s));
+ l3s->key = clib_mem_alloc (sizeof (*l3s->key));
+ memset (l3s->key, 0, sizeof (*l3s->key));
+
+ l3s->key->local_rloc = *lrloc;
+ l3s->key->vni = clib_host_to_net_u32 (vni);
+ l3s->main_sw_if_index = hi->sw_if_index;
+ l3s->sw_if_index = sub_sw_if_index;
+ l3s->eid_table_id = overlay_table_id;
+
+ l3si = (l3s - lisp_gpe_sub_interface_pool);
+
+ lisp_gpe_sub_interface_set_table (l3s->sw_if_index, l3s->eid_table_id);
+ vnet_sw_interface_set_flags (vnet_get_main (),
+ l3s->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ lisp_gpe_sub_interface_db_insert (l3s);
+ }
+ else
+ {
+ l3s = lisp_gpe_sub_interface_get_i (l3si);
+ }
+
+ l3s->locks++;
+
+ return (l3si);
+}
+
+void
+lisp_gpe_sub_interface_unlock (index_t l3si)
+{
+ lisp_gpe_sub_interface_t *l3s;
+
+ l3s = lisp_gpe_sub_interface_get_i (l3si);
+
+ l3s->locks--;
+
+ if (0 == l3s->locks)
+ {
+ lisp_gpe_sub_interface_unset_table (l3s->sw_if_index,
+ l3s->eid_table_id);
+
+ vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0);
+ vnet_delete_sub_interface (l3s->sw_if_index);
+
+ lisp_gpe_sub_interface_db_remove (l3s);
+
+ clib_mem_free (l3s->key);
+ pool_put (lisp_gpe_sub_interface_pool, l3s);
+ }
+}
+
+const lisp_gpe_sub_interface_t *
+lisp_gpe_sub_interface_get (index_t l3si)
+{
+ return (lisp_gpe_sub_interface_get_i (l3si));
+}
+
+u8 *
+format_lisp_gpe_sub_interface (u8 * s, va_list ap)
+{
+ lisp_gpe_sub_interface_t *l3s = va_arg (ap, lisp_gpe_sub_interface_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ s = format (s, "%=16U",
+ format_vnet_sw_interface_name,
+ vnm, vnet_get_sw_interface (vnm, l3s->sw_if_index));
+ s = format (s, "%=10d", clib_net_to_host_u32 (l3s->key->vni));
+ s = format (s, "%=12d", l3s->sw_if_index);
+ s = format (s, "%U", format_ip_address, &l3s->key->local_rloc);
+
+ return (s);
+}
+
+/** CLI command to show LISP-GPE interfaces. */
+static clib_error_t *
+lisp_gpe_sub_interface_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_sub_interface_t *l3s;
+
+ vlib_cli_output (vm, "%=16s%=10s%=12s%s", "Name", "VNI", "SW IF Index",
+ "local RLOC");
+
+ /* *INDENT-OFF* */
+ pool_foreach (l3s, lisp_gpe_sub_interface_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_lisp_gpe_sub_interface, l3s);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lisp_gpe_sub_interface_command) = {
+ .path = "show lisp gpe sub-interface",
+ .short_help = "show lisp gpe sub-interface",
+ .function = lisp_gpe_sub_interface_show,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_gpe_sub_interface_module_init (vlib_main_t * vm)
+{
+ lisp_gpe_sub_interfaces =
+ hash_create_mem (0,
+ sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword));
+ lisp_gpe_sub_interfaces_sw_if_index =
+ hash_create_mem (0,
+ sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword));
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_sub_interface_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h
new file mode 100644
index 00000000000..ad942f415d1
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief LISP sub-interfaces.
+ *
+ */
+
+#ifndef __LISP_GPE_SUB_INTERFACE_H__
+#define __LISP_GPE_SUB_INTERFACE_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+
+/**
+ * A Key for lookup in the L£ sub-interface DB
+ */
+typedef struct lisp_gpe_sub_interface_key_t_
+{
+ /**
+ * The local-RLOC. This is the interface's 'source' address.
+ */
+ ip_address_t local_rloc;
+
+ /**
+ * The VNI. In network byte order!
+ */
+ u32 vni;
+} lisp_gpe_sub_interface_key_t;
+
+/**
+ * @brief A LISP L3 sub-interface
+ *
+ * A LISP sub-interface is a multi-access interface, whose local address is a
+ * single local-RLOC. Adjacencies that form on this sub-interface, represent
+ * remote RLOCs.
+ * This is analogous to an ethernet interface.
+ * As with all interface types it can only be present in one VRF, hence a
+ * LISP sub-interface is per-local-rloc and per-VNI.
+ */
+typedef struct lisp_gpe_sub_interface_t_
+{
+ /**
+ * The interface's key inthe DB; rloc & vni;
+ * The key is allocated from the heap so it can be used in the hash-table.
+ * if it's part of the object, then it is subjet to realloc, which no-worky.
+ */
+ lisp_gpe_sub_interface_key_t *key;
+
+ /**
+ * The Table-ID in the overlay that this interface is bound to.
+ */
+ u32 eid_table_id;
+
+ /**
+ * A reference counting lock on the number of users of this interface.
+ * When this count drops to 0 the interface is deleted.
+ */
+ u32 locks;
+
+ /**
+ * The SW if index assigned to this sub-interface
+ */
+ u32 sw_if_index;
+
+ /**
+ * The SW IF index assigned to the main interface of which this is a sub.
+ */
+ u32 main_sw_if_index;
+} lisp_gpe_sub_interface_t;
+
+extern index_t lisp_gpe_sub_interface_find_or_create_and_lock (const
+ ip_address_t *
+ lrloc,
+ u32
+ eid_table_id,
+ u32 vni);
+
+extern u8 *format_lisp_gpe_sub_interface (u8 * s, va_list ap);
+
+extern void lisp_gpe_sub_interface_unlock (index_t itf);
+
+extern const lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_get (index_t
+ itf);
+
+/**
+ * A DB of all L3 sub-interfaces. The key is:{VNI,l-RLOC}
+ */
+extern uword *lisp_gpe_sub_interfaces_sw_if_index;
+
+/**
+ * @brief
+ * Get a VNET L3 interface matching the local-RLOC and VNI
+ * Called from the data-plane
+ */
+always_inline u32
+lisp_gpe_sub_interface_find_ip6 (const ip6_address_t * addr, u32 vni)
+{
+ lisp_gpe_sub_interface_key_t key;
+ const uword *p;
+
+ key.local_rloc.ip.v6.as_u64[0] = addr->as_u64[0];
+ key.local_rloc.ip.v6.as_u64[1] = addr->as_u64[1];
+ key.local_rloc.version = IP6;
+ key.vni = vni;
+
+ p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key);
+
+ if (NULL != p)
+ return p[0];
+
+ return (INDEX_INVALID);
+}
+
+/**
+ * @brief
+ * Get a VNET L3 interface matching the local-RLOC and VNI
+ * Called from the data-plane
+ */
+always_inline index_t
+lisp_gpe_sub_interface_find_ip4 (const ip4_address_t * addr, u32 vni)
+{
+ lisp_gpe_sub_interface_key_t key;
+ const uword *p;
+
+ key.local_rloc.ip.v4.as_u32 = addr->as_u32;
+ key.local_rloc.version = IP4;
+ key.vni = vni;
+
+ p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key);
+
+ if (NULL != p)
+ return p[0];
+
+ return (INDEX_INVALID);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c
new file mode 100644
index 00000000000..0aecc0a1aa4
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels.
+ *
+ */
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
+
+#include <vnet/fib/fib_table.h>
+
+/**
+ * @brief Pool of all LISP tunnels
+ */
+static lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_pool;
+
+/**
+ * @brief a DB of all tunnels
+ */
+static uword *lisp_gpe_tunnel_db;
+
+/**
+ * @brief Compute IP-UDP-GPE sub-tunnel encap/rewrite header.
+ *
+ * @param[in] t Parent of the sub-tunnel.
+ * @param[in] st Sub-tunnel.
+ * @param[in] lp Local and remote locators used in the encap header.
+ *
+ * @return 0 on success.
+ */
+u8 *
+lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt,
+ const lisp_gpe_adjacency_t * ladj,
+ lisp_gpe_next_protocol_e payload_proto)
+{
+ lisp_gpe_header_t *lisp0;
+ u8 *rw = 0;
+ int len;
+
+ if (IP4 == ip_addr_version (&lgt->key->lcl))
+ {
+ ip4_udp_lisp_gpe_header_t *h0;
+ ip4_header_t *ip0;
+
+ len = sizeof (*h0);
+
+ vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_udp_lisp_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl);
+ ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt);
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4341);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
+
+ /* LISP-gpe header */
+ lisp0 = &h0->lisp;
+ }
+ else
+ {
+ ip6_udp_lisp_gpe_header_t *h0;
+ ip6_header_t *ip0;
+
+ len = sizeof (*h0);
+
+ vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip6_udp_lisp_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip6 header */
+ ip0 = &h0->ip6;
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ ip0->hop_limit = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip6 header length after-the-fact */
+ ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl);
+ ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4341);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe);
+
+ /* LISP-gpe header */
+ lisp0 = &h0->lisp;
+ }
+
+ lisp0->flags = ladj->flags;
+ lisp0->ver_res = 0;
+ lisp0->res = 0;
+ lisp0->next_protocol = payload_proto;
+ lisp0->iid = clib_host_to_net_u32 (ladj->vni);
+
+ return (rw);
+}
+
+static lisp_gpe_tunnel_2_t *
+lisp_gpe_tunnel_db_find (const lisp_gpe_tunnel_key_t * key)
+{
+ uword *p;
+
+ p = hash_get_mem (lisp_gpe_tunnel_db, (void *) key);
+
+ if (NULL != p)
+ {
+ return (pool_elt_at_index (lisp_gpe_tunnel_pool, p[0]));
+ }
+ return (NULL);
+}
+
+lisp_gpe_tunnel_2_t *
+lisp_gpe_tunnel_get_i (index_t lgti)
+{
+ return (pool_elt_at_index (lisp_gpe_tunnel_pool, lgti));
+}
+
+index_t
+lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair,
+ u32 rloc_fib_index)
+{
+ lisp_gpe_tunnel_key_t key = {
+ .lcl = pair->lcl_loc,
+ .rmt = pair->rmt_loc,
+ .fib_index = rloc_fib_index,
+ };
+ lisp_gpe_tunnel_2_t *lgt;
+ fib_prefix_t pfx;
+
+ lgt = lisp_gpe_tunnel_db_find (&key);
+
+ if (NULL == lgt)
+ {
+ pool_get (lisp_gpe_tunnel_pool, lgt);
+ memset (lgt, 0, sizeof (*lgt));
+
+ lgt->key = clib_mem_alloc (sizeof (*lgt->key));
+ memset (lgt->key, 0, sizeof (*lgt->key));
+
+ lgt->key->rmt = pair->rmt_loc;
+ lgt->key->lcl = pair->lcl_loc;
+ lgt->key->fib_index = rloc_fib_index;
+
+ /*
+ * source the FIB entry for the RLOC so we can track its forwarding
+ * chain
+ */
+ ip_address_to_fib_prefix (&lgt->key->rmt, &pfx);
+
+ lgt->fib_entry_index = fib_table_entry_special_add (rloc_fib_index,
+ &pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+
+ hash_set_mem (lisp_gpe_tunnel_db, &lgt->key,
+ (lgt - lisp_gpe_tunnel_pool));
+ }
+
+ lgt->locks++;
+
+ return (lgt - lisp_gpe_tunnel_pool);
+}
+
+void
+lisp_gpe_tunnel_unlock (index_t lgti)
+{
+ lisp_gpe_tunnel_2_t *lgt;
+
+ lgt = lisp_gpe_tunnel_get_i (lgti);
+ lgt->locks--;
+
+ if (0 == lgt->locks)
+ {
+ hash_unset_mem (lisp_gpe_tunnel_db, &lgt->key);
+ clib_mem_free (lgt->key);
+ pool_put (lisp_gpe_tunnel_pool, lgt);
+ }
+}
+
+const lisp_gpe_tunnel_2_t *
+lisp_gpe_tunnel_get (index_t lgti)
+{
+ return (lisp_gpe_tunnel_get_i (lgti));
+}
+
+/** Format LISP-GPE tunnel. */
+u8 *
+format_lisp_gpe_tunnel (u8 * s, va_list * args)
+{
+ lisp_gpe_tunnel_2_t *lgt = va_arg (*args, lisp_gpe_tunnel_2_t *);
+
+ s = format (s, "tunnel %d\n", lgt - lisp_gpe_tunnel_pool);
+ s = format (s, " fib-index: %d, locks:%d \n",
+ lgt->key->fib_index, lgt->locks);
+ s = format (s, " lisp ver 0\n");
+
+ s = format (s, " locator-pair:\n");
+ s = format (s, " local: %U remote: %U\n",
+ format_ip_address, &lgt->key->lcl,
+ format_ip_address, &lgt->key->rmt);
+ s = format (s, " RLOC FIB entry: %d\n", lgt->fib_entry_index);
+
+ return s;
+}
+
+/**
+ * CLI command to show LISP-GPE tunnels.
+ */
+static clib_error_t *
+show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ lisp_gpe_tunnel_2_t *lgt;
+ index_t index;
+
+ if (pool_elts (lisp_gpe_tunnel_pool) == 0)
+ vlib_cli_output (vm, "No lisp-gpe tunnels configured...");
+
+ if (unformat (input, "%d", &index))
+ {
+ lgt = lisp_gpe_tunnel_get_i (index);
+ vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (lgt, lisp_gpe_tunnel_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
+{
+ .path = "show lisp gpe tunnel",
+ .function = show_lisp_gpe_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_gpe_tunnel_module_init (vlib_main_t * vm)
+{
+ lisp_gpe_tunnel_db = hash_create_mem (0,
+ sizeof (lisp_gpe_fwd_entry_key_t),
+ sizeof (uword));
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lisp_gpe_tunnel_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h
new file mode 100644
index 00000000000..d417fa991a8
--- /dev/null
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels.
+ *
+ */
+
+#ifndef LISP_GPE_TUNNEL_H__
+#define LISP_GPE_TUNNEL_H__
+
+#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vnet/lisp-gpe/lisp_gpe_packet.h>
+
+/**
+ * Forward declaration
+ */
+struct lisp_gpe_adjacency_t_;
+
+/**
+ * A Key for a tunnel
+ */
+typedef struct lisp_gpe_tunnel_key_t_
+{
+ ip_address_t rmt;
+ ip_address_t lcl;
+ u32 fib_index;
+} lisp_gpe_tunnel_key_t;
+
+/**
+ * @brief A LISP GPE Tunnel.
+ *
+ * A tunnel represents an associatation between a local and remote RLOC.
+ * As such it represents a unique LISP rewrite.
+ */
+typedef struct lisp_gpe_tunnel_2_t_
+{
+ /**
+ * RLOC pair and rloc fib_index. This is the tunnel's key.
+ */
+ lisp_gpe_tunnel_key_t *key;
+
+ /**
+ * number of reference counting locks
+ */
+ u32 locks;
+
+ /**
+ * the FIB entry through which the remote rloc is reachable
+ s */
+ fib_node_index_t fib_entry_index;
+} lisp_gpe_tunnel_2_t;
+
+extern index_t lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t *
+ pair,
+ u32 rloc_fib_index);
+
+extern void lisp_gpe_tunnel_unlock (index_t lgti);
+
+extern const lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_get (index_t lgti);
+
+extern u8 *lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt,
+ const struct lisp_gpe_adjacency_t_
+ *ladj,
+ lisp_gpe_next_protocol_e
+ payload_proto);
+extern u8 *format_lisp_gpe_tunnel (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c
index 5b5bae54720..74a99057c90 100644
--- a/vnet/vnet/map/map.c
+++ b/vnet/vnet/map/map.c
@@ -15,6 +15,11 @@
* limitations under the License.
*/
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+
#include "map.h"
#ifndef __SSE4_2__
@@ -159,15 +164,12 @@ map_create_domain (ip4_address_t * ip4_prefix,
u8 psid_offset,
u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags)
{
+ u8 suffix_len, suffix_shift;
map_main_t *mm = &map_main;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
+ dpo_id_t dpo_v4 = DPO_NULL;
+ dpo_id_t dpo_v6 = DPO_NULL;
+ fib_node_index_t fei;
map_domain_t *d;
- ip_adjacency_t adj;
- ip4_add_del_route_args_t args4;
- ip6_add_del_route_args_t args6;
- u8 suffix_len, suffix_shift;
- uword *p;
/* Sanity check on the src prefix length */
if (flags & MAP_DOMAIN_TRANSLATION)
@@ -236,73 +238,82 @@ map_create_domain (ip4_address_t * ip4_prefix,
d->psid_mask = (1 << d->psid_length) - 1;
d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
- /* Init IP adjacency */
- memset (&adj, 0, sizeof (adj));
- adj.explicit_fib_index = ~0;
- adj.lookup_next_index =
- (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T :
- IP_LOOKUP_NEXT_MAP;
- p = (uword *) & adj.rewrite_data[0];
- *p = (uword) (*map_domain_index);
+ /* MAP data-plane object */
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+ else
+ map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+
+ /* Create ip4 route */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_dpo_add (0, &pfx,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
+ dpo_reset (&dpo_v4);
- if (ip4_get_route (im4, 0, 0, (u8 *) ip4_prefix, ip4_prefix_len))
+ /*
+ * Multiple MAP domains may share same source IPv6 TEP.
+ * In this case the route will exist and be MAP sourced.
+ * Find the adj (if any) already contributed and modify it
+ */
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr = {
+ .ip6 = d->ip6_src,
+ }
+ ,
+ };
+ fei = fib_table_lookup_exact_match (0, &pfx6);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
{
- clib_warning ("IPv4 route already defined: %U/%d", format_ip4_address,
- ip4_prefix, ip4_prefix_len);
- pool_put (mm->domains, d);
- return -1;
- }
+ dpo_id_t dpo = DPO_NULL;
- /* Create ip4 adjacency */
- memset (&args4, 0, sizeof (args4));
- args4.table_index_or_table_id = 0;
- args4.flags = IP4_ROUTE_FLAG_ADD;
- args4.dst_address.as_u32 = ip4_prefix->as_u32;
- args4.dst_address_length = ip4_prefix_len;
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_MAP, &dpo))
+ {
+ /*
+ * modify the existing MAP to indicate it's shared
+ * skip to route add.
+ */
+ const dpo_id_t *md_dpo;
+ map_dpo_t *md;
- args4.adj_index = ~0;
- args4.add_adj = &adj;
- args4.n_add_adj = 1;
- ip4_add_del_route (im4, &args4);
+ ASSERT (DPO_LOAD_BALANCE == dpo.dpoi_type);
- /* Multiple MAP domains may share same source IPv6 TEP */
- u32 ai = ip6_get_route (im6, 0, 0, ip6_src, ip6_src_len);
- if (ai > 0)
- {
- ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
- ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai);
- if (adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP &&
- adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP_T)
- {
- clib_warning ("BR source address already assigned: %U",
- format_ip6_address, ip6_src);
- pool_put (mm->domains, d);
- return -1;
- }
- /* Shared source */
- p = (uword *) & adj6->rewrite_data[0];
- p[0] = ~0;
+ md_dpo = load_balance_get_bucket (dpo.dpoi_index, 0);
+ md = map_dpo_get (md_dpo->dpoi_index);
- /*
- * Add refcount, so we don't accidentially delete the route
- * underneath someone
- */
- p[1]++;
+ md->md_domain = ~0;
+ dpo_copy (&dpo_v6, md_dpo);
+ dpo_reset (&dpo);
+
+ goto route_add;
+ }
}
+
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
else
- {
- /* Create ip6 adjacency. */
- memset (&args6, 0, sizeof (args6));
- args6.table_index_or_table_id = 0;
- args6.flags = IP6_ROUTE_FLAG_ADD;
- args6.dst_address.as_u64[0] = ip6_src->as_u64[0];
- args6.dst_address.as_u64[1] = ip6_src->as_u64[1];
- args6.dst_address_length = ip6_src_len;
- args6.adj_index = ~0;
- args6.add_adj = &adj;
- args6.n_add_adj = 1;
- ip6_add_del_route (im6, &args6);
- }
+ map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
+
+route_add:
+ /*
+ * Create ip6 route. This is a reference counted add. If the prefix
+ * already exists and is MAP sourced, it is now MAP source n+1 times
+ * and will need to be removed n+1 times.
+ */
+ fib_table_entry_special_dpo_add (0, &pfx6,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6);
+ dpo_reset (&dpo_v6);
/* Validate packet/byte counters */
map_domain_counter_lock (mm);
@@ -332,12 +343,7 @@ int
map_delete_domain (u32 map_domain_index)
{
map_main_t *mm = &map_main;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
map_domain_t *d;
- ip_adjacency_t adj;
- ip4_add_del_route_args_t args4;
- ip6_add_del_route_args_t args6;
if (pool_is_free_index (mm->domains, map_domain_index))
{
@@ -348,47 +354,26 @@ map_delete_domain (u32 map_domain_index)
d = pool_elt_at_index (mm->domains, map_domain_index);
- memset (&adj, 0, sizeof (adj));
- adj.explicit_fib_index = ~0;
- adj.lookup_next_index =
- (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T :
- IP_LOOKUP_NEXT_MAP;
-
- /* Delete ip4 adjacency */
- memset (&args4, 0, sizeof (args4));
- args4.table_index_or_table_id = 0;
- args4.flags = IP4_ROUTE_FLAG_DEL;
- args4.dst_address.as_u32 = d->ip4_prefix.as_u32;
- args4.dst_address_length = d->ip4_prefix_len;
- args4.adj_index = 0;
- args4.add_adj = &adj;
- args4.n_add_adj = 0;
- ip4_add_del_route (im4, &args4);
-
- /* Delete ip6 adjacency */
- u32 ai = ip6_get_route (im6, 0, 0, &d->ip6_src, d->ip6_src_len);
- if (ai > 0)
- {
- ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
- ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai);
-
- uword *p = (uword *) & adj6->rewrite_data[0];
- /* Delete route when no other domains use this source */
- if (p[1] == 0)
- {
- memset (&args6, 0, sizeof (args6));
- args6.table_index_or_table_id = 0;
- args6.flags = IP6_ROUTE_FLAG_DEL;
- args6.dst_address.as_u64[0] = d->ip6_src.as_u64[0];
- args6.dst_address.as_u64[1] = d->ip6_src.as_u64[1];
- args6.dst_address_length = d->ip6_src_len;
- args6.adj_index = 0;
- args6.add_adj = &adj;
- args6.n_add_adj = 0;
- ip6_add_del_route (im6, &args6);
- }
- p[1]--;
- }
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP);
+
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr = {
+ .ip6 = d->ip6_src,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP);
+
/* Deleting rules */
if (d->rules)
clib_mem_free (d->rules);
@@ -448,17 +433,18 @@ static void
map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6)
{
map_main_t *mm = &map_main;
- ip4_main_t *im4 = &ip4_main;
ip6_main_t *im6 = &ip6_main;
if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)
{
- mm->adj6_index = ip6_fib_lookup_with_table (im6, 0, ip6);
+ // FIXME NOT an ADJ
+ mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6);
clib_warning ("FIB lookup results in: %u", mm->adj6_index);
}
if (ip4->as_u32 != 0)
{
- mm->adj4_index = ip4_fib_lookup_with_table (im4, 0, ip4, 0);
+ // FIXME NOT an ADJ
+ mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4);
clib_warning ("FIB lookup results in: %u", mm->adj4_index);
}
}
@@ -2156,6 +2142,8 @@ map_init (vlib_main_t * vm)
mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
map_ip6_reass_reinit (NULL, NULL);
+ map_dpo_module_init ();
+
return 0;
}
diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h
index fb532291f8a..b76891b69b3 100644
--- a/vnet/vnet/map/map.h
+++ b/vnet/vnet/map/map.h
@@ -17,6 +17,11 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+#include <vnet/dpo/load_balance.h>
#define MAP_SKIP_IP6_LOOKUP 1
@@ -105,6 +110,9 @@ typedef struct
u8 ip4_prefix_len;
} map_domain_t;
+_Static_assert ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES),
+ "MAP domain fits in one cacheline");
+
#define MAP_REASS_INDEX_NONE ((u16)0xffff)
/*
@@ -381,16 +389,17 @@ map_get_ip4 (ip6_address_t *addr)
* Get the MAP domain from an IPv4 lookup adjacency.
*/
static_always_inline map_domain_t *
-ip4_map_get_domain (u32 adj_index, u32 *map_domain_index)
+ip4_map_get_domain (u32 mdi,
+ u32 *map_domain_index)
{
map_main_t *mm = &map_main;
- ip_lookup_main_t *lm = &ip4_main.lookup_main;
- ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index);
- ASSERT(adj);
- uword *p = (uword *)adj->rewrite_data;
- ASSERT(p);
- *map_domain_index = p[0];
- return pool_elt_at_index(mm->domains, p[0]);
+ map_dpo_t *md;
+
+ md = map_dpo_get(mdi);
+
+ ASSERT(md);
+ *map_domain_index = md->md_domain;
+ return pool_elt_at_index(mm->domains, *map_domain_index);
}
/*
@@ -399,36 +408,34 @@ ip4_map_get_domain (u32 adj_index, u32 *map_domain_index)
* The IPv4 address is used otherwise.
*/
static_always_inline map_domain_t *
-ip6_map_get_domain (u32 adj_index, ip4_address_t *addr,
+ip6_map_get_domain (u32 mdi, ip4_address_t *addr,
u32 *map_domain_index, u8 *error)
{
map_main_t *mm = &map_main;
- ip4_main_t *im4 = &ip4_main;
- ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ map_dpo_t *md;
/*
* Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA.
* (That's done implicitly when MAP domain is looked up in the IPv4 FIB)
*/
#ifdef MAP_NONSHARED_DOMAIN_ENABLED
- ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
- ip_adjacency_t *adj = ip_get_adjacency(lm6, adj_index);
- ASSERT(adj);
- uword *p = (uword *)adj->rewrite_data;
- ASSERT(p);
- *map_domain_index = p[0];
- if (p[0] != ~0)
- return pool_elt_at_index(mm->domains, p[0]);
-#endif
+ md = map_dpo_get(mdi);
- u32 ai = ip4_fib_lookup_with_table(im4, 0, addr, 0);
- ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai);
- if (PREDICT_TRUE(adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP ||
- adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP_T)) {
- uword *p = (uword *)adj4->rewrite_data;
- *map_domain_index = p[0];
+ ASSERT(md);
+ *map_domain_index = md->md_domain;
+ if (*map_domain_index != ~0)
return pool_elt_at_index(mm->domains, *map_domain_index);
- }
+#endif
+
+ u32 lbi = ip4_fib_forwarding_lookup(0, addr);
+ const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0);
+ if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type ||
+ dpo->dpoi_type == map_t_dpo_type))
+ {
+ md = map_dpo_get(dpo->dpoi_index);
+ *map_domain_index = md->md_domain;
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+ }
*error = MAP_ERROR_NO_DOMAIN;
return NULL;
}
diff --git a/vnet/vnet/map/map_dpo.c b/vnet/vnet/map/map_dpo.c
new file mode 100644
index 00000000000..df2b5fa4197
--- /dev/null
+++ b/vnet/vnet/map/map_dpo.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/map/map_dpo.h>
+
+/**
+ * pool of all MPLS Label DPOs
+ */
+map_dpo_t *map_dpo_pool;
+
+/**
+ * The register MAP DPO type
+ */
+dpo_type_t map_dpo_type;
+dpo_type_t map_t_dpo_type;
+
+static map_dpo_t *
+map_dpo_alloc (void)
+{
+ map_dpo_t *md;
+
+ pool_get_aligned(map_dpo_pool, md, CLIB_CACHE_LINE_BYTES);
+ memset(md, 0, sizeof(*md));
+
+ return (md);
+}
+
+static index_t
+map_dpo_get_index (map_dpo_t *md)
+{
+ return (md - map_dpo_pool);
+}
+
+void
+map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_alloc();
+ md->md_domain = domain_index;
+ md->md_proto = dproto;
+
+ dpo_set(dpo,
+ map_dpo_type,
+ dproto,
+ map_dpo_get_index(md));
+}
+
+void
+map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_alloc();
+ md->md_domain = domain_index;
+ md->md_proto = dproto;
+
+ dpo_set(dpo,
+ map_t_dpo_type,
+ dproto,
+ map_dpo_get_index(md));
+}
+
+
+u8*
+format_map_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ map_dpo_t *md;
+
+ md = map_dpo_get(index);
+
+ return (format(s, "map:[%d]:%U domain:%d",
+ index,
+ format_dpo_proto, md->md_proto,
+ md->md_domain));
+}
+
+u8*
+format_map_t_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ map_dpo_t *md;
+
+ md = map_dpo_get(index);
+
+ return (format(s, "map-t:[%d]:%U domain:%d",
+ index,
+ format_dpo_proto, md->md_proto,
+ md->md_domain));
+}
+
+
+static void
+map_dpo_lock (dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_get(dpo->dpoi_index);
+
+ md->md_locks++;
+}
+
+static void
+map_dpo_unlock (dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_get(dpo->dpoi_index);
+
+ md->md_locks--;
+
+ if (0 == md->md_locks)
+ {
+ pool_put(map_dpo_pool, md);
+ }
+}
+
+const static dpo_vft_t md_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_dpo,
+};
+
+const static char* const map_ip4_nodes[] =
+{
+ "ip4-map",
+ NULL,
+};
+const static char* const map_ip6_nodes[] =
+{
+ "ip6-map",
+ NULL,
+};
+
+const static char* const * const map_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_ip4_nodes,
+ [DPO_PROTO_IP6] = map_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static dpo_vft_t md_t_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_t_dpo,
+};
+
+const static char* const map_t_ip4_nodes[] =
+{
+ "ip4-map-t",
+ NULL,
+};
+const static char* const map_t_ip6_nodes[] =
+{
+ "ip6-map-t",
+ NULL,
+};
+
+const static char* const * const map_t_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_t_ip4_nodes,
+ [DPO_PROTO_IP6] = map_t_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+map_dpo_module_init (void)
+{
+ map_dpo_type = dpo_register_new_type(&md_vft, map_nodes);
+ map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes);
+}
diff --git a/vnet/vnet/map/map_dpo.h b/vnet/vnet/map/map_dpo.h
new file mode 100644
index 00000000000..be510dbaea6
--- /dev/null
+++ b/vnet/vnet/map/map_dpo.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MAP_DPO_H__
+#define __MAP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of a MAP DPO
+ */
+typedef struct map_dpo_t
+{
+ /**
+ * The dat-plane protocol
+ */
+ dpo_proto_t md_proto;
+
+ /**
+ * the MAP domain index
+ */
+ u32 md_domain;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 md_locks;
+} map_dpo_t;
+
+extern void map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+extern void map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+
+extern u8* format_map_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern map_dpo_t *map_dpo_pool;
+extern dpo_type_t map_dpo_type;
+extern dpo_type_t map_t_dpo_type;
+
+static inline map_dpo_t *
+map_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(map_dpo_pool, index));
+}
+
+extern void map_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/mcast/mcast_test.c b/vnet/vnet/mcast/mcast_test.c
index 4561d7cdc00..be80c9fc982 100644
--- a/vnet/vnet/mcast/mcast_test.c
+++ b/vnet/vnet/mcast/mcast_test.c
@@ -40,91 +40,91 @@ mcast_test_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- u8 *rewrite_data;
- mcast_test_main_t * mtm = &mcast_test_main;
- mcast_main_t * mcm = mtm->mcast_main;
- ip_adjacency_t adj;
- u32 adj_index;
- mcast_group_t * g;
- mcast_group_member_t * member;
- unformat_input_t _line_input, * line_input = &_line_input;
- ip4_address_t dst_addr, zero;
- ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
-
- /* Get a line of input. */
- if (! unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- pool_get (mcm->groups, g);
- memset (g, 0, sizeof (*g));
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- vnet_hw_interface_t *hw;
- u32 next, sw_if_index;
-
- if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- mtm->vnet_main, &sw_if_index))
- {
- vec_add2 (g->members, member, 1);
- member->tx_sw_if_index = sw_if_index;
+ /* u8 *rewrite_data; */
+ /* mcast_test_main_t * mtm = &mcast_test_main; */
+ /* mcast_main_t * mcm = mtm->mcast_main; */
+ /* ip_adjacency_t adj; */
+ /* u32 adj_index; */
+ /* mcast_group_t * g; */
+ /* mcast_group_member_t * member; */
+ /* unformat_input_t _line_input, * line_input = &_line_input; */
+ /* ip4_address_t dst_addr, zero; */
+ /* ip4_main_t * im = &ip4_main; */
+ /* ip_lookup_main_t * lm = &im->lookup_main; */
+
+ /* /\* Get a line of input. *\/ */
+ /* if (! unformat_user (input, unformat_line_input, line_input)) */
+ /* return 0; */
+
+ /* pool_get (mcm->groups, g); */
+ /* memset (g, 0, sizeof (*g)); */
+
+ /* while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) */
+ /* { */
+ /* vnet_hw_interface_t *hw; */
+ /* u32 next, sw_if_index; */
+
+ /* if (unformat (line_input, "%U", unformat_vnet_sw_interface, */
+ /* mtm->vnet_main, &sw_if_index)) */
+ /* { */
+ /* vec_add2 (g->members, member, 1); */
+ /* member->tx_sw_if_index = sw_if_index; */
- hw = vnet_get_sup_hw_interface (mtm->vnet_main,
- sw_if_index);
+ /* hw = vnet_get_sup_hw_interface (mtm->vnet_main, */
+ /* sw_if_index); */
- next = vlib_node_add_next (mtm->vlib_main,
- mcast_prep_node.index,
- hw->output_node_index);
+ /* next = vlib_node_add_next (mtm->vlib_main, */
+ /* mcast_prep_node.index, */
+ /* hw->output_node_index); */
- /* Required to be the same next index... */
- vlib_node_add_next_with_slot (mtm->vlib_main,
- mcast_recycle_node.index,
- hw->output_node_index, next);
- member->prep_and_recycle_node_next_index = next;
- }
- else
- {
- return unformat_parse_error (line_input);
- }
- }
-
- if (vec_len (g->members) == 0)
- {
- pool_put (mcm->groups, g);
- vlib_cli_output (vm, "no group members specified");
- return 0;
- }
-
-
- adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
- adj.mcast_group_index = g - mcm->groups;
- rewrite_data = format (0, "abcdefg");
-
- vnet_rewrite_for_tunnel
- (mtm->vnet_main,
- (u32)~0, /* tx_sw_if_index, we dont know yet */
- ip4_rewrite_node.index,
- mcast_prep_node.index,
- &adj.rewrite_header,
- rewrite_data, vec_len(rewrite_data));
-
- ip_add_adjacency (lm, &adj, 1 /* one adj */,
- &adj_index);
+ /* /\* Required to be the same next index... *\/ */
+ /* vlib_node_add_next_with_slot (mtm->vlib_main, */
+ /* mcast_recycle_node.index, */
+ /* hw->output_node_index, next); */
+ /* member->prep_and_recycle_node_next_index = next; */
+ /* } */
+ /* else */
+ /* { */
+ /* return unformat_parse_error (line_input); */
+ /* } */
+ /* } */
+
+ /* if (vec_len (g->members) == 0) */
+ /* { */
+ /* pool_put (mcm->groups, g); */
+ /* vlib_cli_output (vm, "no group members specified"); */
+ /* return 0; */
+ /* } */
+
+
+ /* adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; */
+ /* adj.mcast_group_index = g - mcm->groups; */
+ /* rewrite_data = format (0, "abcdefg"); */
+
+ /* vnet_rewrite_for_tunnel */
+ /* (mtm->vnet_main, */
+ /* (u32)~0, /\* tx_sw_if_index, we dont know yet *\/ */
+ /* ip4_rewrite_node.index, */
+ /* mcast_prep_node.index, */
+ /* &adj.rewrite_header, */
+ /* rewrite_data, vec_len(rewrite_data)); */
+
+ /* ip_add_adjacency (lm, &adj, 1 /\* one adj *\/, */
+ /* &adj_index); */
- dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002);
- zero.as_u32 = 0;
-
- ip4_add_del_route_next_hop (im,
- IP4_ROUTE_FLAG_ADD,
- &dst_addr,
- 24 /* mask width */,
- &zero /* no next hop */,
+ /* dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); */
+ /* zero.as_u32 = 0; */
+
+ /* ip4_add_del_route_next_hop (im, */
+ /* IP4_ROUTE_FLAG_ADD, */
+ /* &dst_addr, */
+ /* 24 /\* mask width *\/, */
+ /* &zero /\* no next hop *\/, */
- 0, // next hop sw if index
- 1, // weight
- adj_index,
- 0 /* explicit fib 0 */);
+ /* 0, // next hop sw if index */
+ /* 1, // weight */
+ /* adj_index, */
+ /* 0 /\* explicit fib 0 *\/); */
return 0;
}
diff --git a/vnet/vnet/misc.c b/vnet/vnet/misc.c
index c0729f73c16..4c8c4cad5a7 100644
--- a/vnet/vnet/misc.c
+++ b/vnet/vnet/misc.c
@@ -38,6 +38,7 @@
*/
#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
vnet_main_t vnet_main;
@@ -79,6 +80,9 @@ vnet_main_init (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, vnet_interface_init)))
return error;
+ if ((error = vlib_call_init_function (vm, fib_module_init)))
+ return error;
+
if ((error = vlib_call_init_function (vm, ip_main_init)))
return error;
@@ -88,6 +92,9 @@ vnet_main_init (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
return error;
+ if ((error = vlib_call_init_function (vm, mpls_init)))
+ return error;
+
vnm->vlib_main = vm;
hw_if_index = vnet_register_interface
@@ -98,6 +105,11 @@ vnet_main_init (vlib_main_t * vm)
vnm->local_interface_hw_if_index = hw_if_index;
vnm->local_interface_sw_if_index = hw->sw_if_index;
+ /* the local interface is used as an input interface when decapping from
+ * an IPSEC tunnel. so it needs to be IP enabled */
+ ip4_sw_interface_enable_disable (hw->sw_if_index, 1);
+ ip6_sw_interface_enable_disable (hw->sw_if_index, 1);
+
return 0;
}
diff --git a/vnet/vnet/mpls-gre/node.c b/vnet/vnet/mpls-gre/node.c
deleted file mode 100644
index 474e2e2a9a4..00000000000
--- a/vnet/vnet/mpls-gre/node.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * node.c: mpls-o-gre decap processing
- *
- * Copyright (c) 2012-2014 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/mpls-gre/mpls.h>
-
-typedef struct {
- u32 next_index;
- u32 decap_index;
- u32 tx_fib_index;
- u32 label_host_byte_order;
-} mpls_rx_trace_t;
-
-u8 * format_mpls_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- mpls_rx_trace_t * t = va_arg (*args, mpls_rx_trace_t *);
- char * next_name;
-
- next_name = "BUG!";
-
-#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
- foreach_mpls_input_next;
-#undef _
-
- s = format (s, "MPLS: next %s, lookup fib index %d, decap index %d\n",
- next_name, t->next_index, t->tx_fib_index, t->decap_index);
- if (t->decap_index != ~0)
- {
- s = format (s, " label %d",
- vnet_mpls_uc_get_label(t->label_host_byte_order));
- }
- return s;
-}
-
-vlib_node_registration_t mpls_input_node;
-
-typedef struct {
- u32 last_label;
- u32 last_inner_fib_index;
- u32 last_outer_fib_index;
- mpls_main_t * mpls_main;
-} mpls_input_runtime_t;
-
-static inline uword
-mpls_input_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, int is_mpls_o_gre)
-{
- u32 n_left_from, next_index, * from, * to_next;
- ip4_main_t * im = &ip4_main;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- mpls_input_runtime_t * rt;
- mpls_main_t * mm;
-
- rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
- mm = rt->mpls_main;
- /*
- * Force an initial lookup every time, in case the control-plane
- * changed the label->FIB mapping.
- */
- rt->last_label = ~0;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index,
- to_next, n_left_to_next);
-
-#if 0
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t * b0, * b1;
- mpls_unicast_header_t * h0, * h1;
- int li0, li1;
- u64 key0, key1;
- u32 label0, label1;
- u32 next0, next1;
- uword * p0, * p1;
- u32 fib_index0, fib_index1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* $$$$$ dual loop me */
-
- vlib_buffer_advance (b0, sizeof (*h0));
- vlib_buffer_advance (b1, sizeof (*h1));
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
-#endif
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t * b0;
- mpls_unicast_header_t * h0;
- u32 label0;
- u32 next0;
- u64 key0;
- uword * p0;
- u32 rx_fib_index0;
- mpls_decap_t *d0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- h0 = vlib_buffer_get_current (b0);
-
- if (is_mpls_o_gre)
- {
- rx_fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer(b0)->sw_if_index[VLIB_RX]);
- }
- else
- {
-#if 0
- /* If separate RX numbering spaces are required... */
- rx_fib_index0 = vec_elt (mm->fib_index_by_sw_if_index,
- vnet_buffer(b0)->sw_if_index[VLIB_RX]);
-#endif
- rx_fib_index0 = 0;
- }
-
- next0 = ~0;
- d0 = 0;
-
- /*
- * Expect the control-plane team to squeal like pigs.
- * If they don't program a decap label entry for each
- * and every label in the stack, packets go into the trash...
- */
-
- do
- {
- label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
- /* TTL expired? */
- if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
- {
- next0 = MPLS_INPUT_NEXT_DROP;
- b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
- break;
- }
-
- key0 = ((u64)rx_fib_index0<<32)
- | ((u64)vnet_mpls_uc_get_label (label0)<<12)
- | ((u64)vnet_mpls_uc_get_s (label0)<<8);
-
- /*
- * The architecture crew claims that we won't need
- * separate ip4, ip6, mpls-o-ethernet label numbering
- * spaces. Use the low 8 key bits as a discriminator.
- */
-
- p0 = hash_get (mm->mpls_decap_by_rx_fib_and_label, key0);
- if (p0 == 0)
- {
- next0 = MPLS_INPUT_NEXT_DROP;
- b0->error = node->errors[MPLS_ERROR_BAD_LABEL];
- break;
- }
- d0 = pool_elt_at_index (mm->decaps, p0[0]);
- next0 = d0->next_index;
- vnet_buffer(b0)->sw_if_index[VLIB_TX] = d0->tx_fib_index;
- vlib_buffer_advance (b0, sizeof (*h0));
- h0 = vlib_buffer_get_current (b0);
- } while (!vnet_mpls_uc_get_s(label0));
-
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_rx_trace_t *tr = vlib_add_trace (vm, node,
- b0, sizeof (*tr));
- tr->next_index = next0;
- tr->decap_index = d0 ? d0 - mm->decaps : ~0;
- tr->tx_fib_index = vnet_buffer(b0)->sw_if_index[VLIB_TX];
- tr->label_host_byte_order = label0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_node_increment_counter (vm, mpls_input_node.index,
- MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
- return from_frame->n_vectors;
-}
-
-static uword
-mpls_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return mpls_input_inline (vm, node, from_frame, 1 /* is mpls-o-gre */);
-}
-
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
-VLIB_REGISTER_NODE (mpls_input_node) = {
- .function = mpls_input,
- .name = "mpls-gre-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .runtime_data_bytes = sizeof(mpls_input_runtime_t),
-
- .n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
-
- .n_next_nodes = MPLS_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
- foreach_mpls_input_next
-#undef _
- },
-
- .format_buffer = format_mpls_gre_header_with_length,
- .format_trace = format_mpls_rx_trace,
- .unformat_buffer = unformat_mpls_gre_header,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
-
-static uword
-mpls_ethernet_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return mpls_input_inline (vm, node, from_frame, 0 /* is mpls-o-gre */);
-}
-
-
-VLIB_REGISTER_NODE (mpls_ethernet_input_node) = {
- .function = mpls_ethernet_input,
- .name = "mpls-ethernet-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .runtime_data_bytes = sizeof(mpls_input_runtime_t),
-
- .n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
-
- .n_next_nodes = MPLS_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
- foreach_mpls_input_next
-#undef _
- },
-
- .format_buffer = format_mpls_eth_header_with_length,
- .format_trace = format_mpls_rx_trace,
- .unformat_buffer = unformat_mpls_gre_header,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (mpls_ethernet_input_node, mpls_ethernet_input)
-
-static void
-mpls_setup_nodes (vlib_main_t * vm)
-{
- vlib_node_t * n = vlib_get_node (vm, mpls_input_node.index);
- pg_node_t * pn = pg_get_node (mpls_input_node.index);
- mpls_input_runtime_t * rt;
-
- n->format_buffer = format_mpls_gre_header_with_length;
- n->unformat_buffer = unformat_mpls_gre_header;
- pn->unformat_edit = unformat_pg_mpls_header;
-
- rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
- rt->last_label = (u32) ~0;
- rt->last_inner_fib_index = 0;
- rt->last_outer_fib_index = 0;
- rt->mpls_main = &mpls_main;
-
- n = vlib_get_node (vm, mpls_ethernet_input_node.index);
-
- n->format_buffer = format_mpls_eth_header_with_length;
-
- n->unformat_buffer = 0; /* unformat_mpls_ethernet_header; */
-
- rt = vlib_node_get_runtime_data (vm, mpls_ethernet_input_node.index);
- rt->last_label = (u32) ~0;
- rt->last_inner_fib_index = 0;
- rt->last_outer_fib_index = 0;
- rt->mpls_main = &mpls_main;
-
- ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST,
- mpls_ethernet_input_node.index);
-}
-
-static clib_error_t * mpls_input_init (vlib_main_t * vm)
-{
- clib_error_t * error;
-
- error = vlib_call_init_function (vm, mpls_init);
- if (error)
- clib_error_report (error);
-
- mpls_setup_nodes (vm);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (mpls_input_init);
diff --git a/vnet/vnet/mpls-gre/packet.h b/vnet/vnet/mpls-gre/packet.h
deleted file mode 100644
index baa01818f09..00000000000
--- a/vnet/vnet/mpls-gre/packet.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef included_vnet_mpls_packet_h
-#define included_vnet_mpls_packet_h
-
-/*
- * MPLS packet format
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-typedef struct {
- /* Label: top 20 bits [in network byte order] */
- /* Experimental: 3 bits ... */
- /* S (bottom of label stack): 1 bit */
- /* TTL: 8 bits */
- u32 label_exp_s_ttl;
-} mpls_unicast_header_t;
-
-static inline u32 vnet_mpls_uc_get_label (u32 label_exp_s_ttl)
-{
- return (label_exp_s_ttl>>12);
-}
-
-static inline u32 vnet_mpls_uc_get_exp (u32 label_exp_s_ttl)
-{
- return ((label_exp_s_ttl>>9) & 0x7);
-}
-
-static inline u32 vnet_mpls_uc_get_s (u32 label_exp_s_ttl)
-{
- return ((label_exp_s_ttl>>8) & 0x1);
-}
-
-static inline u32 vnet_mpls_uc_get_ttl (u32 label_exp_s_ttl)
-{
- return (label_exp_s_ttl & 0xff);
-}
-
-#endif /* included_vnet_mpls_packet_h */
diff --git a/vnet/vnet/mpls-gre/error.def b/vnet/vnet/mpls/error.def
index 424ab50a030..de8b9665dfb 100644
--- a/vnet/vnet/mpls-gre/error.def
+++ b/vnet/vnet/mpls/error.def
@@ -26,3 +26,6 @@ mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set")
mpls_error (BAD_LABEL, "invalid FIB id in label")
mpls_error (NOT_IP4, "non-ip4 packets dropped")
mpls_error (DISALLOWED_FIB, "disallowed FIB id")
+mpls_error (NOT_ENABLED, "MPLS not enabled")
+mpls_error (DROP, "MPLS DROP DPO")
+mpls_error (PUNT, "MPLS PUNT DPO")
diff --git a/vnet/vnet/mpls-gre/interface.c b/vnet/vnet/mpls/interface.c
index dd61a803f4c..9ef4c293494 100644
--- a/vnet/vnet/mpls-gre/interface.c
+++ b/vnet/vnet/mpls/interface.c
@@ -18,7 +18,10 @@
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vnet/gre/gre.h>
-#include <vnet/mpls-gre/mpls.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/dpo/classify_dpo.h>
static uword mpls_gre_set_rewrite (vnet_main_t * vnm,
u32 sw_if_index,
@@ -525,24 +528,23 @@ VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = {
.set_rewrite = mpls_eth_set_rewrite,
};
-#define foreach_mpls_post_rewrite_next \
- _ (IP4_LOOKUP, "ip4-lookup")
-
-typedef enum {
-#define _(s,n) MPLS_POST_REWRITE_NEXT_##s,
- foreach_mpls_post_rewrite_next
-#undef _
- MPLS_POST_REWRITE_N_NEXT,
-} mpls_post_rewrite_next_t;
-
+/**
+ * A conversion of DPO next object tpyes to VLIB graph next nodes from
+ * the mpls_post_rewrite node
+ */
+static const int dpo_next_2_mpls_post_rewrite[DPO_LAST] = {
+ [DPO_LOAD_BALANCE] = IP_LOOKUP_NEXT_LOAD_BALANCE,
+};
static uword
mpls_post_rewrite (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
u32 n_left_from, next_index, * from, * to_next;
- u16 old_l0 = 0, old_l1 = 0;
+ u16 old_l0 = 0; //, old_l1 = 0;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -556,78 +558,103 @@ mpls_post_rewrite (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t * b0, * b1;
- ip4_header_t * ip0, * ip1;
- u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP;
- u32 next1 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP;
- u16 new_l0, new_l1;
- ip_csum_t sum0, sum1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* u32 bi0, bi1; */
+ /* vlib_buffer_t * b0, * b1; */
+ /* ip4_header_t * ip0, * ip1; */
+ /* u32 next0; */
+ /* u32 next1; */
+ /* u16 new_l0, new_l1, adj_index0, adj_index1; */
+ /* ip_csum_t sum0, sum1; */
+ /* ip_adjacency_t *adj0, *adj1; */
+
+ /* /\* Prefetch next iteration. *\/ */
+ /* { */
+ /* vlib_buffer_t * p2, * p3; */
+
+ /* p2 = vlib_get_buffer (vm, from[2]); */
+ /* p3 = vlib_get_buffer (vm, from[3]); */
+
+ /* vlib_prefetch_buffer_header (p2, LOAD); */
+ /* vlib_prefetch_buffer_header (p3, LOAD); */
+
+ /* CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */
+ /* CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */
+ /* } */
+
+ /* bi0 = from[0]; */
+ /* bi1 = from[1]; */
+ /* to_next[0] = bi0; */
+ /* to_next[1] = bi1; */
+ /* from += 2; */
+ /* to_next += 2; */
+ /* n_left_to_next -= 2; */
+ /* n_left_from -= 2; */
+
+
+ /* b0 = vlib_get_buffer (vm, bi0); */
+ /* b1 = vlib_get_buffer (vm, bi1); */
+ /* ip0 = vlib_buffer_get_current (b0); */
+ /* ip1 = vlib_buffer_get_current (b1); */
+
+ /* /\* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] *\/ */
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ /* /\* set the GRE (outer) ip packet length, fix the bloody checksum *\/ */
+ /* sum0 = ip0->checksum; */
+ /* sum1 = ip1->checksum; */
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ /* /\* old_l0, old_l1 always 0, see the rewrite setup *\/ */
+ /* new_l0 = */
+ /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); */
+ /* new_l1 = */
+ /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); */
+
+ /* sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, */
+ /* length /\* changed member *\/); */
+ /* sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, */
+ /* length /\* changed member *\/); */
+ /* ip0->checksum = ip_csum_fold (sum0); */
+ /* ip1->checksum = ip_csum_fold (sum1); */
+ /* ip0->length = new_l0; */
+ /* ip1->length = new_l1; */
- CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- }
+ /* /\* replace the TX adj in the packet with the next in the chain *\/ */
+ /* adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; */
+ /* adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; */
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+ /* ASSERT(adj0->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */
+ /* ASSERT(adj1->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- ip0 = vlib_buffer_get_current (b0);
- ip1 = vlib_buffer_get_current (b1);
-
- /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */
+ /* adj_index0 = adj0->sub_type.midchain.adj_index; */
+ /* adj_index1 = adj1->sub_type.midchain.adj_index; */
- /* set the GRE (outer) ip packet length, fix the bloody checksum */
- sum0 = ip0->checksum;
- sum1 = ip1->checksum;
+ /* vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; */
+ /* vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1; */
- /* old_l0, old_l1 always 0, see the rewrite setup */
- new_l0 =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- new_l1 =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
-
- sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
- length /* changed member */);
- sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
- length /* changed member */);
- ip0->checksum = ip_csum_fold (sum0);
- ip1->checksum = ip_csum_fold (sum1);
- ip0->length = new_l0;
- ip1->length = new_l1;
+ /* /\* get the next adj in the chain to determine the next graph node *\/ */
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+
+ /* next0 = adj0->lookup_next_index; */
+ /* next1 = adj1->lookup_next_index; */
+
+ /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */
+ /* to_next, n_left_to_next, */
+ /* bi0, bi1, next0, next1); */
+ /* } */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
while (n_left_from > 0 && n_left_to_next > 0)
{
+ ip_adjacency_t * adj0;
u32 bi0;
vlib_buffer_t * b0;
ip4_header_t * ip0;
- u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP;
- u16 new_l0;
+ u32 next0;
+ u16 new_l0, adj_index0;
ip_csum_t sum0;
bi0 = from[0];
@@ -653,6 +680,20 @@ mpls_post_rewrite (vlib_main_t * vm,
ip0->checksum = ip_csum_fold (sum0);
ip0->length = new_l0;
+ /* replace the TX adj in the packet with the next in the chain */
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+
+ ASSERT(adj_index0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ ASSERT(adj0->sub_type.midchain.next_dpo.dpoi_index != ADJ_INDEX_INVALID);
+ adj_index0 = adj0->sub_type.midchain.next_dpo.dpoi_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ /* get the next adj in the chain to determine the next graph node */
+ ASSERT(0);
+ next0 = 0; //adj0->sub_type.midchain.next_dpo.dpoi_next;
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
@@ -673,12 +714,8 @@ VLIB_REGISTER_NODE (mpls_post_rewrite_node) = {
.runtime_data_bytes = 0,
- .n_next_nodes = MPLS_POST_REWRITE_N_NEXT,
- .next_nodes = {
-#define _(s,n) [MPLS_POST_REWRITE_NEXT_##s] = n,
- foreach_mpls_post_rewrite_next
-#undef _
- },
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = IP4_LOOKUP_NEXT_NODES,
};
VLIB_NODE_FUNCTION_MULTIARCH (mpls_post_rewrite_node, mpls_post_rewrite)
@@ -725,237 +762,512 @@ static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t)
return (rewrite_data);
}
-int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src,
- ip4_address_t *dst,
- ip4_address_t *intfc,
- u32 mask_width,
- u32 inner_fib_id, u32 outer_fib_id,
- u32 * tunnel_sw_if_index,
- u8 l2_only,
- u8 is_add)
+u8
+mpls_sw_interface_is_enabled (u32 sw_if_index)
{
- ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
- mpls_main_t * mm = &mpls_main;
- vnet_main_t * vnm = vnet_get_main();
- ip4_address_t zero;
- mpls_gre_tunnel_t *tp;
- int need_route_add_del = 1;
- u32 inner_fib_index = 0;
- u32 outer_fib_index = 0;
- ip_adjacency_t adj;
- u32 adj_index;
- u8 * rewrite_data;
- int found_tunnel = 0;
- mpls_encap_t * e = 0;
- u32 hw_if_index = ~0;
- vnet_hw_interface_t * hi;
- u32 slot;
- u32 dummy;
-
- zero.as_u32 = 0;
-
- /* No questions, no answers */
- if (tunnel_sw_if_index == 0)
- tunnel_sw_if_index = &dummy;
+ mpls_main_t * mm = &mpls_main;
- *tunnel_sw_if_index = ~0;
+ if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index)
+ return (0);
- if (inner_fib_id != (u32)~0)
+ return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
+}
+
+void
+mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable)
+{
+ mpls_interface_state_change_callback_t *callback;
+ vlib_main_t * vm = vlib_get_main();
+ ip_config_main_t * cm = &mm->rx_config_mains;
+ vnet_config_main_t * vcm = &cm->config_main;
+ u32 lookup_feature_index;
+ fib_node_index_t lfib_index;
+ u32 ci;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
{
- uword * p;
-
- p = hash_get (im->fib_index_by_table_id, inner_fib_id);
- if (! p)
- return VNET_API_ERROR_NO_SUCH_INNER_FIB;
- inner_fib_index = p[0];
- }
+ if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return;
- if (outer_fib_id != 0)
+ lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID);
+ vec_validate(mm->fib_index_by_sw_if_index, 0);
+ mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index;
+ }
+ else
{
- uword * p;
-
- p = hash_get (im->fib_index_by_table_id, outer_fib_id);
- if (! p)
- return VNET_API_ERROR_NO_SUCH_FIB;
- outer_fib_index = p[0];
+ ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return;
+
+ fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_MPLS);
}
- /* suppress duplicate mpls interface generation. */
- pool_foreach (tp, mm->gre_tunnels,
- ({
- /*
- * If we have a tunnel which matches (src, dst, intfc/mask)
- * AND the expected route is in the FIB, it's a dup
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ lookup_feature_index = mm->mpls_rx_feature_lookup;
+
+ if (is_enable)
+ ci = vnet_config_add_feature (vm, vcm,
+ ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ ci = vnet_config_del_feature (vm, vcm, ci,
+ lookup_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ /*
+ * notify all interested clients of the change of state.
+ */
+ vec_foreach(callback, mm->mpls_interface_state_change_callbacks)
+ {
+ (*callback)(sw_if_index, is_enable);
+ }
+}
+
+static mpls_gre_tunnel_t *
+mpls_gre_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_MPLS_GRE_TUNNEL == node->fn_type);
+#endif
+ return ((mpls_gre_tunnel_t*)node);
+}
+
+/*
+ * mpls_gre_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+static void
+mpls_gre_tunnel_stack (mpls_gre_tunnel_t *mgt)
+{
+ /*
+ * find the adjacency that is contributed by the FIB entry
+ * that this tunnel resovles via, and use it as the next adj
+ * in the midchain
*/
- if (!memcmp (&tp->tunnel_src, src, sizeof (*src))
- && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst))
- && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc))
- && tp->inner_fib_index == inner_fib_index)
- {
- ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index);
- uword * hash = fib->adj_index_by_dst_address[mask_width];
- uword key = intfc->as_u32 & im->fib_masks[mask_width];
- uword *p = hash_get (hash, key);
+ adj_nbr_midchain_stack(mgt->adj_index,
+ fib_entry_contribute_ip_forwarding(mgt->fei));
+}
- found_tunnel = 1;
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+mpls_gre_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ mpls_gre_tunnel_stack(mpls_gre_tunnel_from_fib_node(node));
- if (is_add)
- {
- /* A dup, and the route is in the fib. Done */
- if (p || l2_only)
- return 1;
- else
- {
- /* Reinstall the route (and other stuff) */
- e = mpls_encap_by_fib_and_dest (mm, inner_fib_index,
- dst->as_u32);
- if (e == 0)
- return VNET_API_ERROR_NO_SUCH_LABEL;
- goto reinstall_it;
- }
- }
- else
- {
- /* Delete, the route is already gone? */
- if (!p)
- need_route_add_del = 0;
- goto add_del_route;
- }
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
- }
- }));
-
- /* Delete, and we can't find the tunnel */
- if (is_add == 0 && found_tunnel == 0)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+mpls_gre_tunnel_fib_node_get (fib_node_index_t index)
+{
+ mpls_gre_tunnel_t * mgt;
+ mpls_main_t * mm;
- e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32);
- if (e == 0)
- return VNET_API_ERROR_NO_SUCH_LABEL;
+ mm = &mpls_main;
+ mgt = pool_elt_at_index(mm->gre_tunnels, index);
- pool_get(mm->gre_tunnels, tp);
- memset (tp, 0, sizeof (*tp));
+ return (&mgt->mgt_node);
+}
- if (vec_len (mm->free_gre_sw_if_indices) > 0)
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+mpls_gre_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The MPLS GRE tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t mpls_gre_vft = {
+ .fnv_get = mpls_gre_tunnel_fib_node_get,
+ .fnv_last_lock = mpls_gre_tunnel_last_lock_gone,
+ .fnv_back_walk = mpls_gre_tunnel_back_walk,
+};
+
+static mpls_gre_tunnel_t *
+mpls_gre_tunnel_find (ip4_address_t *src,
+ ip4_address_t *dst,
+ ip4_address_t *intfc,
+ u32 inner_fib_index)
+{
+ mpls_main_t * mm = &mpls_main;
+ mpls_gre_tunnel_t *tp;
+ int found_tunnel = 0;
+
+ /* suppress duplicate mpls interface generation. */
+ pool_foreach (tp, mm->gre_tunnels,
+ ({
+ /*
+ * If we have a tunnel which matches (src, dst, intfc/mask)
+ * AND the expected route is in the FIB, it's a dup
+ */
+ if (!memcmp (&tp->tunnel_src, src, sizeof (*src))
+ && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst))
+ && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc))
+ && tp->inner_fib_index == inner_fib_index)
+ {
+ found_tunnel = 1;
+ goto found;
+ }
+ }));
+
+found:
+ if (found_tunnel)
{
- hw_if_index =
- mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1];
- _vec_len (mm->free_gre_sw_if_indices) -= 1;
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- hi->dev_instance = tp - mm->gre_tunnels;
- hi->hw_instance = tp - mm->gre_tunnels;
+ return (tp);
}
- else
+ return (NULL);
+}
+
+int mpls_gre_tunnel_add (ip4_address_t *src,
+ ip4_address_t *dst,
+ ip4_address_t *intfc,
+ u32 mask_width,
+ u32 inner_fib_index,
+ u32 outer_fib_index,
+ u32 * tunnel_sw_if_index,
+ u8 l2_only)
+{
+ mpls_main_t * mm = &mpls_main;
+ gre_main_t * gm = &gre_main;
+ vnet_main_t * vnm = vnet_get_main();
+ mpls_gre_tunnel_t *tp;
+ ip_adjacency_t adj;
+ u8 * rewrite_data;
+ mpls_encap_t * e = 0;
+ u32 hw_if_index = ~0;
+ vnet_hw_interface_t * hi;
+ u32 slot;
+ const ip46_address_t zero_nh = {
+ .ip4.as_u32 = 0,
+ };
+
+ tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index);
+
+ /* Add, duplicate */
+ if (NULL != tp)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32);
+ if (e == 0)
+ return VNET_API_ERROR_NO_SUCH_LABEL;
+
+ pool_get(mm->gre_tunnels, tp);
+ memset (tp, 0, sizeof (*tp));
+ fib_node_init(&tp->mgt_node,
+ FIB_NODE_TYPE_MPLS_GRE_TUNNEL);
+
+ if (vec_len (mm->free_gre_sw_if_indices) > 0)
{
- hw_if_index = vnet_register_interface
- (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels,
- mpls_gre_hw_interface_class.index,
- tp - mm->gre_tunnels);
- hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hw_if_index =
+ mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1];
+ _vec_len (mm->free_gre_sw_if_indices) -= 1;
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = tp - mm->gre_tunnels;
+ hi->hw_instance = tp - mm->gre_tunnels;
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels,
+ mpls_gre_hw_interface_class.index,
+ tp - mm->gre_tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ /* ... to make the IP and L2 x-connect cases identical */
+ slot = vlib_node_add_named_next_with_slot
+ (vnm->vlib_main, hi->tx_node_index,
+ "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE);
+
+ ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE);
+ }
+
+ *tunnel_sw_if_index = hi->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vec_validate(ip4_main.fib_index_by_sw_if_index, *tunnel_sw_if_index);
+ ip4_main.fib_index_by_sw_if_index[*tunnel_sw_if_index] = outer_fib_index;
+
+ tp->hw_if_index = hw_if_index;
+
+ /* bind the MPLS and IPv4 FIBs to the interface and enable */
+ vec_validate(mm->fib_index_by_sw_if_index, hi->sw_if_index);
+ mm->fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index;
+ mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 1);
+ ip4_main.fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index;
+ ip4_sw_interface_enable_disable(hi->sw_if_index, 1);
+
+ tp->tunnel_src.as_u32 = src->as_u32;
+ tp->tunnel_dst.as_u32 = dst->as_u32;
+ tp->intfc_address.as_u32 = intfc->as_u32;
+ tp->mask_width = mask_width;
+ tp->inner_fib_index = inner_fib_index;
+ tp->outer_fib_index = outer_fib_index;
+ tp->encap_index = e - mm->encaps;
+ tp->l2_only = l2_only;
+
+ /* Add the tunnel to the hash table of all GRE tunnels */
+ u64 key = (u64)src->as_u32 << 32 | (u64)dst->as_u32;
+
+ ASSERT(NULL == hash_get (gm->tunnel_by_key, key));
+ hash_set (gm->tunnel_by_key, key, tp - mm->gre_tunnels);
+
+ /* Create the adjacency and add to v4 fib */
+ memset(&adj, 0, sizeof (adj));
+ adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+
+ rewrite_data = mpls_gre_rewrite (mm, tp);
+ if (rewrite_data == 0)
+ {
+ if (*tunnel_sw_if_index != ~0)
+ {
+ hi = vnet_get_hw_interface (vnm, tp->hw_if_index);
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ 0 /* admin down */);
+ vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index);
+ }
+ pool_put (mm->gre_tunnels, tp);
+ return VNET_API_ERROR_NO_SUCH_LABEL;
+ }
- /* ... to make the IP and L2 x-connect cases identical */
- slot = vlib_node_add_named_next_with_slot
- (vnm->vlib_main, hi->tx_node_index,
- "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE);
+ /* Save a copy of the rewrite data for L2 x-connect */
+ vec_free (tp->rewrite_data);
- ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE);
+ tp->rewrite_data = rewrite_data;
+
+ if (!l2_only)
+ {
+ /*
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = *dst,
+ }
+ };
+
+ tp->fei = fib_table_entry_special_add(outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+ tp->sibling_index = fib_entry_child_add(tp->fei,
+ FIB_NODE_TYPE_MPLS_GRE_TUNNEL,
+ tp - mm->gre_tunnels);
+
+ /*
+ * create and update the midchain adj this tunnel sources.
+ * This is the adj the route we add below will resolve to.
+ */
+ tp->adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_IP4,
+ &zero_nh,
+ hi->sw_if_index);
+
+ adj_nbr_midchain_update_rewrite(tp->adj_index,
+ mpls_post_rewrite_node.index,
+ rewrite_data);
+ mpls_gre_tunnel_stack(tp);
+
+ /*
+ * Update the route for the tunnel's subnet to point through the tunnel
+ */
+ const fib_prefix_t tun_sub_net_pfx = {
+ .fp_len = tp->mask_width,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = tp->intfc_address,
+ },
+ };
+
+ fib_table_entry_update_one_path(inner_fib_index,
+ &tun_sub_net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ FIB_PROTOCOL_IP4,
+ &zero_nh,
+ hi->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
}
+
+ return 0;
+}
+
+static int
+mpls_gre_tunnel_del (ip4_address_t *src,
+ ip4_address_t *dst,
+ ip4_address_t *intfc,
+ u32 mask_width,
+ u32 inner_fib_index,
+ u32 outer_fib_index,
+ u32 * tunnel_sw_if_index,
+ u8 l2_only)
+{
+ mpls_main_t * mm = &mpls_main;
+ vnet_main_t * vnm = vnet_get_main();
+ gre_main_t * gm = &gre_main;
+ mpls_gre_tunnel_t *tp;
+ vnet_hw_interface_t * hi;
- *tunnel_sw_if_index = hi->sw_if_index;
- vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index);
- tp->hw_if_index = hw_if_index;
-
- reinstall_it:
- tp->tunnel_src.as_u32 = src->as_u32;
- tp->tunnel_dst.as_u32 = dst->as_u32;
- tp->intfc_address.as_u32 = intfc->as_u32;
- tp->mask_width = mask_width;
- tp->inner_fib_index = inner_fib_index;
- tp->outer_fib_index = outer_fib_index;
- tp->encap_index = e - mm->encaps;
- tp->l2_only = l2_only;
+ /* Delete, and we can't find the tunnel */
+ if (NULL == tp)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- /* Create the adjacency and add to v4 fib */
- memset(&adj, 0, sizeof (adj));
- adj.explicit_fib_index = ~0;
- adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
-
- rewrite_data = mpls_gre_rewrite (mm, tp);
- if (rewrite_data == 0)
+ hi = vnet_get_hw_interface (vnm, tp->hw_if_index);
+
+ if (!l2_only)
{
- if (*tunnel_sw_if_index != ~0)
- {
- hi = vnet_get_hw_interface (vnm, tp->hw_if_index);
- vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
- 0 /* admin down */);
- vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index);
- }
- pool_put (mm->gre_tunnels, tp);
- return VNET_API_ERROR_NO_SUCH_LABEL;
+ /*
+ * unsource the FIB entry for the tunnel's destination
+ */
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = *dst,
+ }
+ };
+
+ fib_entry_child_remove(tp->fei,
+ tp->sibling_index);
+ fib_table_entry_special_remove(outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR);
+ tp->fei = FIB_NODE_INDEX_INVALID;
+ adj_unlock(tp->adj_index);
+
+ /*
+ * unsource the route for the tunnel's subnet
+ */
+ const fib_prefix_t tun_sub_net_pfx = {
+ .fp_len = tp->mask_width,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = tp->intfc_address,
+ },
+ };
+
+ fib_table_entry_delete(inner_fib_index,
+ &tun_sub_net_pfx,
+ FIB_SOURCE_INTERFACE);
}
-
- /* Save a copy of the rewrite data for L2 x-connect */
- vec_free (tp->rewrite_data);
- tp->rewrite_data = rewrite_data;
+ u64 key = ((u64)tp->tunnel_src.as_u32 << 32 |
+ (u64)tp->tunnel_src.as_u32);
- vnet_rewrite_for_tunnel
- (vnm,
- outer_fib_index /* tx_sw_if_index, aka outer fib ID */,
- ip4_rewrite_node.index,
- mpls_post_rewrite_node.index,
- &adj.rewrite_header,
- rewrite_data, vec_len(rewrite_data));
-
- if (!l2_only)
- ip_add_adjacency (lm, &adj, 1 /* one adj */,
- &adj_index);
+ hash_unset (gm->tunnel_by_key, key);
+ mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 0);
+ ip4_sw_interface_enable_disable(hi->sw_if_index, 0);
+
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ 0 /* admin down */);
+ vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index);
+ vec_free (tp->rewrite_data);
+ fib_node_deinit(&tp->mgt_node);
+ pool_put (mm->gre_tunnels, tp);
+
+ return 0;
+}
+
+int
+vnet_mpls_gre_add_del_tunnel (ip4_address_t *src,
+ ip4_address_t *dst,
+ ip4_address_t *intfc,
+ u32 mask_width,
+ u32 inner_fib_id, u32 outer_fib_id,
+ u32 * tunnel_sw_if_index,
+ u8 l2_only,
+ u8 is_add)
+{
+ u32 inner_fib_index = 0;
+ u32 outer_fib_index = 0;
+ u32 dummy;
+ ip4_main_t * im = &ip4_main;
- add_del_route:
+ /* No questions, no answers */
+ if (NULL == tunnel_sw_if_index)
+ tunnel_sw_if_index = &dummy;
- if (need_route_add_del && !l2_only)
+ *tunnel_sw_if_index = ~0;
+
+ if (inner_fib_id != (u32)~0)
{
- if (is_add)
- ip4_add_del_route_next_hop (im,
- IP4_ROUTE_FLAG_ADD,
- &tp->intfc_address,
- tp->mask_width,
- &zero /* no next hop */,
- (u32)~0 /* next_hop_sw_if_index */,
- 1 /* weight */,
- adj_index,
- tp->inner_fib_index);
- else
- {
- ip4_add_del_route_args_t a;
- memset (&a, 0, sizeof (a));
-
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = tp->inner_fib_index;
- a.dst_address = tp->intfc_address;
- a.dst_address_length = tp->mask_width;
- a.adj_index = ~0;
-
- ip4_add_del_route (im, &a);
- ip4_maybe_remap_adjacencies (im, tp->inner_fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
- }
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, inner_fib_id);
+ if (! p)
+ return VNET_API_ERROR_NO_SUCH_INNER_FIB;
+ inner_fib_index = p[0];
}
- if (is_add == 0 && found_tunnel)
+ if (outer_fib_id != 0)
{
- hi = vnet_get_hw_interface (vnm, tp->hw_if_index);
- vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
- 0 /* admin down */);
- vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index);
- vec_free (tp->rewrite_data);
- pool_put (mm->gre_tunnels, tp);
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, outer_fib_id);
+ if (! p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ outer_fib_index = p[0];
}
- return 0;
+ if (is_add)
+ {
+ return (mpls_gre_tunnel_add(src,dst,intfc, mask_width,
+ inner_fib_index,
+ outer_fib_index,
+ tunnel_sw_if_index,
+ l2_only));
+ }
+ else
+ {
+ return (mpls_gre_tunnel_del(src,dst,intfc, mask_width,
+ inner_fib_index,
+ outer_fib_index,
+ tunnel_sw_if_index,
+ l2_only));
+ }
}
/*
@@ -963,21 +1275,17 @@ int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src,
*/
int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id)
{
- ip4_main_t * im = &ip4_main;
mpls_main_t * mm = &mpls_main;
vnet_main_t * vnm = mm->vnet_main;
mpls_gre_tunnel_t *tp;
u32 fib_index = 0;
- uword * p;
u32 * tunnels_to_delete = 0;
vnet_hw_interface_t * hi;
- ip4_fib_t * fib;
int i;
- p = hash_get (im->fib_index_by_table_id, fib_id);
- if (! p)
+ fib_index = ip4_fib_index_from_table_id(fib_id);
+ if (~0 == fib_index)
return VNET_API_ERROR_NO_SUCH_INNER_FIB;
- fib_index = p[0];
pool_foreach (tp, mm->gre_tunnels,
({
@@ -985,28 +1293,40 @@ int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id)
vec_add1 (tunnels_to_delete, tp - mm->gre_tunnels);
}));
- fib = vec_elt_at_index (im->fibs, fib_index);
-
for (i = 0; i < vec_len(tunnels_to_delete); i++) {
tp = pool_elt_at_index (mm->gre_tunnels, tunnels_to_delete[i]);
- uword * hash = fib->adj_index_by_dst_address[tp->mask_width];
- uword key = tp->intfc_address.as_u32 & im->fib_masks[tp->mask_width];
- uword *p = hash_get (hash, key);
- ip4_add_del_route_args_t a;
/* Delete, the route if not already gone */
- if (p && !tp->l2_only)
- {
- memset (&a, 0, sizeof (a));
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = tp->inner_fib_index;
- a.dst_address = tp->intfc_address;
- a.dst_address_length = tp->mask_width;
- a.adj_index = ~0;
- ip4_add_del_route (im, &a);
- ip4_maybe_remap_adjacencies (im, tp->inner_fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
- }
+ if (FIB_NODE_INDEX_INVALID != tp->fei && !tp->l2_only)
+ {
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = tp->tunnel_dst,
+ }
+ };
+
+ fib_entry_child_remove(tp->fei,
+ tp->sibling_index);
+ fib_table_entry_special_remove(tp->outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR);
+ tp->fei = FIB_NODE_INDEX_INVALID;
+ adj_unlock(tp->adj_index);
+
+ const fib_prefix_t tun_sub_net_pfx = {
+ .fp_len = tp->mask_width,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = tp->intfc_address,
+ },
+ };
+
+ fib_table_entry_delete(tp->inner_fib_index,
+ &tun_sub_net_pfx,
+ FIB_SOURCE_INTERFACE);
+ }
hi = vnet_get_hw_interface (vnm, tp->hw_if_index);
vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
@@ -1229,11 +1549,15 @@ VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = {
.function = show_mpls_tunnel_command_fn,
};
+
/* force inclusion from application's main.c */
clib_error_t *mpls_interface_init (vlib_main_t *vm)
{
clib_error_t * error;
+ fib_node_register_type(FIB_NODE_TYPE_MPLS_GRE_TUNNEL,
+ &mpls_gre_vft);
+
if ((error = vlib_call_init_function (vm, mpls_policy_encap_init)))
return error;
@@ -1286,9 +1610,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
ip_lookup_main_t * lm = &im->lookup_main;
mpls_main_t * mm = &mpls_main;
vnet_main_t * vnm = vnet_get_main();
- ip4_address_t zero;
mpls_eth_tunnel_t *tp;
- int need_route_add_del = 1;
u32 inner_fib_index = 0;
ip_adjacency_t adj;
u32 adj_index;
@@ -1300,8 +1622,6 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
u32 slot;
u32 dummy;
- zero.as_u32 = 0;
-
if (tunnel_sw_if_index == 0)
tunnel_sw_if_index = &dummy;
@@ -1326,18 +1646,14 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
*/
if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst))
&& !memcmp (&tp->intfc_address, intfc, sizeof (*intfc))
- && tp->inner_fib_index == inner_fib_index)
+ && tp->inner_fib_index == inner_fib_index
+ && FIB_NODE_INDEX_INVALID != tp->fei)
{
- ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index);
- uword * hash = fib->adj_index_by_dst_address[mask_width];
- uword key = intfc->as_u32 & im->fib_masks[mask_width];
- uword *p = hash_get (hash, key);
-
found_tunnel = 1;
if (is_add)
{
- if (p || l2_only)
+ if (l2_only)
return 1;
else
{
@@ -1351,9 +1667,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
}
else
{
- /* Delete, the route is already gone? */
- if (!p)
- need_route_add_del = 0;
+ /* Delete */
goto add_del_route;
}
@@ -1413,7 +1727,6 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
/* Create the adjacency and add to v4 fib */
memset(&adj, 0, sizeof (adj));
- adj.explicit_fib_index = ~0;
adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
rewrite_data = mpls_ethernet_rewrite (mm, tp);
@@ -1465,33 +1778,26 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
add_del_route:
- if (need_route_add_del && !l2_only)
+ if (!l2_only)
{
+ const fib_prefix_t pfx = {
+ .fp_addr = {
+ .ip4 = tp->intfc_address,
+ },
+ .fp_len = tp->mask_width,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
if (is_add)
- ip4_add_del_route_next_hop (im,
- IP4_ROUTE_FLAG_ADD,
- &tp->intfc_address,
- tp->mask_width,
- &zero /* no next hop */,
- (u32)~0 /* next_hop_sw_if_index */,
- 1 /* weight */,
- adj_index,
- tp->inner_fib_index);
+ tp->fei = fib_table_entry_special_add(tp->inner_fib_index,
+ &pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ adj_index);
else
{
- ip4_add_del_route_args_t a;
- memset (&a, 0, sizeof (a));
-
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = tp->inner_fib_index;
- a.dst_address = tp->intfc_address;
- a.dst_address_length = tp->mask_width;
- a.adj_index = ~0;
-
- ip4_add_del_route (im, &a);
- ip4_maybe_remap_adjacencies (im, tp->inner_fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
- }
+ fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API);
+ tp->fei = FIB_NODE_INDEX_INVALID;
+ }
}
if (is_add == 0 && found_tunnel)
{
@@ -1667,15 +1973,10 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst,
u8 is_add)
{
ip4_main_t * im = &ip4_main;
- ip_lookup_main_t * lm = &im->lookup_main;
mpls_main_t * mm = &mpls_main;
vnet_main_t * vnm = vnet_get_main();
- ip4_address_t zero;
mpls_eth_tunnel_t *tp;
- int need_route_add_del = 1;
u32 inner_fib_index = 0;
- ip_adjacency_t adj;
- u32 adj_index;
int found_tunnel = 0;
mpls_encap_t * e = 0;
u32 hw_if_index = ~0;
@@ -1683,8 +1984,6 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst,
u32 slot;
u32 dummy;
- zero.as_u32 = 0;
-
if (tunnel_sw_if_index == 0)
tunnel_sw_if_index = &dummy;
@@ -1709,18 +2008,14 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst,
*/
if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst))
&& !memcmp (&tp->intfc_address, intfc, sizeof (*intfc))
- && tp->inner_fib_index == inner_fib_index)
+ && tp->inner_fib_index == inner_fib_index
+ && FIB_NODE_INDEX_INVALID != tp->fei)
{
- ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index);
- uword * hash = fib->adj_index_by_dst_address[mask_width];
- uword key = intfc->as_u32 & im->fib_masks[mask_width];
- uword *p = hash_get (hash, key);
-
found_tunnel = 1;
if (is_add)
{
- if (p || l2_only)
+ if (l2_only)
return 1;
else
{
@@ -1729,9 +2024,7 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst,
}
else
{
- /* Delete, the route is already gone? */
- if (!p)
- need_route_add_del = 0;
+ /* Delete */
goto add_del_route;
}
@@ -1784,49 +2077,44 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst,
tp->encap_index = e - mm->encaps;
tp->tx_sw_if_index = tx_sw_if_index;
tp->l2_only = l2_only;
+ tp->fei = FIB_NODE_INDEX_INVALID;
if (new_tunnel_index)
*new_tunnel_index = tp - mm->eth_tunnels;
- /* Create the classify adjacency and add to v4 fib */
- memset(&adj, 0, sizeof (adj));
- adj.explicit_fib_index = ~0;
- adj.lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
- adj.classify.table_index = classify_table_index;
-
- if (!l2_only)
- ip_add_adjacency (lm, &adj, 1 /* one adj */,
- &adj_index);
-
add_del_route:
- if (need_route_add_del && !l2_only)
+ if (!l2_only)
{
+ const fib_prefix_t pfx = {
+ .fp_addr = {
+ .ip4 = tp->intfc_address,
+ },
+ .fp_len = tp->mask_width,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ dpo_id_t dpo = DPO_NULL;
+
if (is_add)
- ip4_add_del_route_next_hop (im,
- IP4_ROUTE_FLAG_ADD,
- &tp->intfc_address,
- tp->mask_width,
- &zero /* no next hop */,
- (u32)~0 /* next_hop_sw_if_index */,
- 1 /* weight */,
- adj_index,
- tp->inner_fib_index);
- else
{
- ip4_add_del_route_args_t a;
- memset (&a, 0, sizeof (a));
-
- a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL;
- a.table_index_or_table_id = tp->inner_fib_index;
- a.dst_address = tp->intfc_address;
- a.dst_address_length = tp->mask_width;
- a.adj_index = ~0;
-
- ip4_add_del_route (im, &a);
- ip4_maybe_remap_adjacencies (im, tp->inner_fib_index,
- IP4_ROUTE_FLAG_FIB_INDEX);
+ dpo_set(&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create(FIB_PROTOCOL_IP4,
+ classify_table_index));
+
+ tp->fei = fib_table_entry_special_dpo_add(tp->inner_fib_index,
+ &pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
}
+ else
+ {
+ fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API);
+ tp->fei = FIB_NODE_INDEX_INVALID;
+ }
}
if (is_add == 0 && found_tunnel)
{
@@ -1945,3 +2233,44 @@ VLIB_CLI_COMMAND (create_mpls_ethernet_policy_tunnel_command, static) = {
" classify-table-index <nn>",
.function = create_mpls_ethernet_policy_tunnel_command_fn,
};
+
+static clib_error_t *
+mpls_interface_enable_disable (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, enable;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ {
+ error = clib_error_return (0, "expected 'enable' or 'disable'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
+ .path = "set interface mpls",
+ .function = mpls_interface_enable_disable,
+ .short_help = "Enable/Disable an interface for MPLS forwarding",
+};
diff --git a/vnet/vnet/mpls-gre/mpls.c b/vnet/vnet/mpls/mpls.c
index d914b4c2b72..be5e882f1b3 100644
--- a/vnet/vnet/mpls-gre/mpls.c
+++ b/vnet/vnet/mpls/mpls.c
@@ -16,10 +16,86 @@
*/
#include <vnet/vnet.h>
-#include <vnet/mpls-gre/mpls.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS;
mpls_main_t mpls_main;
+u8 * format_mpls_unicast_label (u8 * s, va_list * args)
+{
+ mpls_label_t label = va_arg (*args, mpls_label_t);
+
+ switch (label) {
+ case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ROUTER_ALERT_LABEL:
+ s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING);
+ break;
+ case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_IMPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ELI_LABEL:
+ s = format (s, "%s", MPLS_IETF_ELI_STRING);
+ break;
+ case MPLS_IETF_GAL_LABEL:
+ s = format (s, "%s", MPLS_IETF_GAL_STRING);
+ break;
+ default:
+ s = format (s, "%d", label);
+ break;
+ }
+ return s;
+}
+
+uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args)
+{
+ mpls_label_t *label = va_arg (*args, mpls_label_t*);
+
+ if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING))
+ *label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IMPLICIT_NULL_LABEL;
+ else if (unformat (input, "%d", label))
+ ;
+
+ return (1);
+}
+
+u8 * format_mpls_eos_bit (u8 * s, va_list * args)
+{
+ mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t);
+
+ ASSERT(eb <= MPLS_EOS);
+
+ s = format(s, "%s", mpls_eos_bit_names[eb]);
+
+ return (s);
+}
+
+u8 * format_mpls_header (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t);
+
+ return (format(s, "[%U:%d:%d:%U]",
+ format_mpls_unicast_label,
+ vnet_mpls_uc_get_label(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl),
+ format_mpls_eos_bit,
+ vnet_mpls_uc_get_s(hdr.label_exp_s_ttl)));
+}
+
u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -203,8 +279,9 @@ int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id,
/* Reformat label into mpls_unicast_header_t */
label_host_byte_order <<= 12;
- if (i == vec_len(labels_host_byte_order) - 1)
- label_host_byte_order |= 1<<8; /* S=1 */
+ // FIXME NEOS AND EOS
+ //if (i == vec_len(labels_host_byte_order) - 1)
+ // label_host_byte_order |= 1<<8; /* S=1 */
label_host_byte_order |= 0xff; /* TTL=FF */
label_net_byte_order = clib_host_to_net_u32 (label_host_byte_order);
h.label_exp_s_ttl = label_net_byte_order;
@@ -385,7 +462,7 @@ int vnet_mpls_add_del_decap (u32 rx_fib_id,
rx_fib_index = p[0];
/* L3 decap => transform fib ID to fib index */
- if (next_index == MPLS_INPUT_NEXT_IP4_INPUT)
+ if (next_index == MPLS_LOOKUP_NEXT_IP4_INPUT)
{
p = hash_get (im->fib_index_by_table_id, tx_fib_id);
if (! p)
@@ -437,12 +514,12 @@ unformat_mpls_gre_input_next (unformat_input_t * input, va_list * args)
if (unformat (input, "lookup"))
{
- *result = MPLS_INPUT_NEXT_IP4_INPUT;
+ *result = MPLS_LOOKUP_NEXT_IP4_INPUT;
rv = 1;
}
else if (unformat (input, "output"))
{
- *result = MPLS_INPUT_NEXT_L2_OUTPUT;
+ *result = MPLS_LOOKUP_NEXT_L2_OUTPUT;
rv = 1;
}
return rv;
@@ -614,10 +691,7 @@ show_mpls_fib_command_fn (vlib_main_t * vm,
show_mpls_fib_t *records = 0;
show_mpls_fib_t *s;
mpls_main_t * mm = &mpls_main;
- ip4_main_t * im = &ip4_main;
- ip4_fib_t * rx_fib, * tx_fib;
- u32 tx_table_id;
- char *swif_tag;
+ ip4_fib_t * rx_fib;
hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest,
({
@@ -630,7 +704,6 @@ show_mpls_fib_command_fn (vlib_main_t * vm,
if (!vec_len(records))
{
vlib_cli_output (vm, "MPLS encap table empty");
- goto decap_table;
}
/* sort output by dst address within fib */
vec_sort_with_function (records, mpls_dest_cmp);
@@ -639,65 +712,174 @@ show_mpls_fib_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=6s%=16s%=16s", "Table", "Dest address", "Labels");
vec_foreach (s, records)
{
- rx_fib = vec_elt_at_index (im->fibs, s->fib_index);
+ rx_fib = ip4_fib_get (s->fib_index);
vlib_cli_output (vm, "%=6d%=16U%=16U", rx_fib->table_id,
format_ip4_address, &s->dest,
format_mpls_encap_index, mm, s->entry_index);
}
- decap_table:
- vec_reset_length(records);
+ vec_free(records);
+ return 0;
+}
- hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label,
- ({
- vec_add2 (records, s, 1);
- s->fib_index = (u32)(key>>32);
- s->entry_index = (u32) value;
- s->label = ((u32) key)>>12;
- s->s_bit = (key & (1<<8)) != 0;
- }));
-
- if (!vec_len(records))
- {
- vlib_cli_output (vm, "MPLS decap table empty");
- goto out;
- }
+VLIB_CLI_COMMAND (show_mpls_fib_command, static) = {
+ .path = "show mpls encap",
+ .short_help = "show mpls encap",
+ .function = show_mpls_fib_command_fn,
+};
- vec_sort_with_function (records, mpls_label_cmp);
+static clib_error_t *
+vnet_mpls_local_label (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ clib_error_t * error = 0;
+ u32 table_id, is_del, is_ip;
+ fib_prefix_t pfx;
+ mpls_label_t local_label;
+ mpls_eos_bit_t eos;
+
+ is_ip = 0;
+ table_id = 0;
+ eos = MPLS_EOS;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
- vlib_cli_output (vm, "MPLS decap table");
- vlib_cli_output (vm, "%=10s%=15s%=6s%=6s", "RX Table", "TX Table/Intfc",
- "Label", "S-bit");
- vec_foreach (s, records)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- mpls_decap_t * d;
- d = pool_elt_at_index (mm->decaps, s->entry_index);
- if (d->next_index == MPLS_INPUT_NEXT_IP4_INPUT)
- {
- tx_fib = vec_elt_at_index (im->fibs, d->tx_fib_index);
- tx_table_id = tx_fib->table_id;
- swif_tag = " ";
- }
+ memset(&rpath, 0, sizeof(rpath));
+ memset(&pfx, 0, sizeof(pfx));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "eos"))
+ eos = MPLS_EOS;
+ else if (unformat (line_input, "non-eos"))
+ eos = MPLS_NON_EOS;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address,
+ &pfx.fp_addr.ip4,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address,
+ &pfx.fp_addr.ip6,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "%d", &local_label))
+ ;
+ else if (unformat (line_input,
+ "ip4-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "ip6-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "mpls-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_label = MPLS_LABEL_INVALID;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ vec_add1(rpaths, rpath);
+ }
else
- {
- tx_table_id = d->tx_fib_index;
- swif_tag = "(i) ";
- }
- rx_fib = vec_elt_at_index (im->fibs, s->fib_index);
+ {
+ error = clib_error_return (0, "unkown input: %U",
+ format_unformat_error, input);
+ goto done;
+ }
- vlib_cli_output (vm, "%=10d%=10d%=5s%=6d%=6d", rx_fib->table_id,
- tx_table_id, swif_tag, s->label, s->s_bit);
}
- out:
- vec_free(records);
- return 0;
+ if (is_ip)
+ {
+ u32 fib_index = fib_table_find(pfx.fp_proto, table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "%U table-id %d does not exist",
+ format_fib_protocol, pfx.fp_proto, table_id);
+ goto done;
+ }
+
+ if (is_del)
+ {
+ fib_table_entry_local_label_remove(fib_index, &pfx, local_label);
+ }
+ else
+ {
+ fib_table_entry_local_label_add(fib_index, &pfx, local_label);
+ }
+ }
+ else
+ {
+ fib_node_index_t lfe, fib_index;
+ fib_prefix_t prefix = {
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = local_label,
+ .fp_eos = eos,
+ };
+
+ fib_index = mpls_fib_index_from_table_id(table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "MPLS table-id %d does not exist",
+ table_id);
+ goto done;
+ }
+
+ lfe = fib_table_entry_path_add2(fib_index,
+ &prefix,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ if (FIB_NODE_INDEX_INVALID == lfe)
+ {
+ error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d",
+ format_mpls_unicast_label, local_label,
+ format_mpls_eos_bit, eos,
+ table_id);
+ goto done;
+ }
+ }
+
+done:
+ return error;
}
-VLIB_CLI_COMMAND (show_mpls_fib_command, static) = {
- .path = "show mpls fib",
- .short_help = "show mpls fib",
- .function = show_mpls_fib_command_fn,
+VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
+ .path = "mpls local-label",
+ .function = vnet_mpls_local_label,
+ .short_help = "Create/Delete MPL local labels",
};
int mpls_fib_reset_labels (u32 fib_id)
@@ -764,7 +946,6 @@ static clib_error_t * mpls_init (vlib_main_t * vm)
mpls_main_t * mm = &mpls_main;
clib_error_t * error;
- memset (mm, 0, sizeof (mm[0]));
mm->vlib_main = vm;
mm->vnet_main = vnet_get_main();
diff --git a/vnet/vnet/mpls-gre/mpls.h b/vnet/vnet/mpls/mpls.h
index d8ffca22793..2aeae49df43 100644
--- a/vnet/vnet/mpls-gre/mpls.h
+++ b/vnet/vnet/mpls/mpls.h
@@ -17,9 +17,12 @@
#include <vnet/vnet.h>
#include <vnet/gre/gre.h>
-#include <vnet/mpls-gre/packet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/mpls/mpls_types.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
typedef CLIB_PACKED (struct {
ip4_header_t ip4; /* 20 bytes */
@@ -31,7 +34,7 @@ extern vnet_hw_interface_class_t mpls_gre_hw_interface_class;
typedef enum {
#define mpls_error(n,s) MPLS_ERROR_##n,
-#include <vnet/mpls-gre/error.def>
+#include <vnet/mpls/error.def>
#undef mpls_error
MPLS_N_ERROR,
} mpls_gre_error_t;
@@ -42,6 +45,7 @@ typedef enum {
*/
typedef struct {
+ fib_node_t mgt_node;
ip4_address_t tunnel_src;
ip4_address_t tunnel_dst;
ip4_address_t intfc_address;
@@ -52,6 +56,9 @@ typedef struct {
u32 hw_if_index; /* L2 x-connect capable tunnel intfc */
u8 * rewrite_data;
u8 l2_only;
+ fib_node_index_t fei; /* FIB Entry index for the tunnel's destination */
+ adj_index_t adj_index; /* The midchain adj this tunnel creates */
+ u32 sibling_index;
} mpls_gre_tunnel_t;
typedef struct {
@@ -64,6 +71,7 @@ typedef struct {
u32 hw_if_index;
u8 * rewrite_data;
u8 l2_only;
+ fib_node_index_t fei;
} mpls_eth_tunnel_t;
typedef struct {
@@ -78,7 +86,53 @@ typedef struct {
u32 next_index; /* e.g. ip4/6-input, l2-input */
} mpls_decap_t;
+#define MPLS_FIB_DEFAULT_TABLE_ID 0
+
+/**
+ * Type exposure is to allow the DP fast/inlined access
+ */
+#define MPLS_FIB_KEY_SIZE 21
+#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+
+typedef struct mpls_fib_t_
+{
+ /**
+ * A hash table of entries. 21 bit key
+ * Hash table for reduced memory footprint
+ */
+ uword * mf_entries;
+
+ /**
+ * The load-balance indeices keyed by 21 bit label+eos bit.
+ * A flat array for maximum lookup performace.
+ */
+ index_t mf_lbs[MPLS_FIB_DB_SIZE];
+} mpls_fib_t;
+
+/**
+ * @brief Definition of a callback for receiving MPLS interface state change
+ * notifications
+ */
+typedef void (*mpls_interface_state_change_callback_t)(u32 sw_if_index,
+ u32 is_enable);
+
typedef struct {
+ /* MPLS FIB index for each software interface */
+ u32 *fib_index_by_sw_if_index;
+
+ /** A pool of all the MPLS FIBs */
+ struct fib_table_t_ *fibs;
+
+ /** A hash table to lookup the mpls_fib by table ID */
+ uword *fib_index_by_table_id;
+
+ /* rx/tx interface/feature configuration. */
+ ip_config_main_t rx_config_mains, tx_config_main;
+
+ /* Built-in unicast feature path indices, see ip_feature_init_cast(...) */
+ u32 mpls_rx_feature_lookup;
+ u32 mpls_rx_feature_not_enabled;
+
/* pool of gre tunnel instances */
mpls_gre_tunnel_t *gre_tunnels;
u32 * free_gre_sw_if_indices;
@@ -99,23 +153,53 @@ typedef struct {
u32 ip4_classify_mpls_policy_encap_next_index;
u32 ip6_classify_mpls_policy_encap_next_index;
+ /* feature path configuration lists */
+ vnet_ip_feature_registration_t * next_feature;
+
+ /* Save feature results for show command */
+ char **feature_nodes;
+
+ /* IP4 enabled count by software interface */
+ u8 * mpls_enabled_by_sw_if_index;
+
+ /* Functions to call when MPLS state on an interface changes. */
+ mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks;
+
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
} mpls_main_t;
-mpls_main_t mpls_main;
+extern mpls_main_t mpls_main;
+
+#define VNET_MPLS_FEATURE_INIT(x,...) \
+ __VA_ARGS__ vnet_ip_feature_registration_t uc_##x; \
+static void __vnet_add_feature_registration_uc_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vnet_add_feature_registration_uc_##x (void) \
+{ \
+ mpls_main_t * mm = &mpls_main; \
+ uc_##x.next = mm->next_feature; \
+ mm->next_feature = &uc_##x; \
+} \
+__VA_ARGS__ vnet_ip_feature_registration_t uc_##x
+
+extern clib_error_t * mpls_feature_init(vlib_main_t * vm);
format_function_t format_mpls_protocol;
-format_function_t format_mpls_header;
-format_function_t format_mpls_header_with_length;
format_function_t format_mpls_gre_header_with_length;
format_function_t format_mpls_eth_header_with_length;
-format_function_t format_mpls_unicast_label;
format_function_t format_mpls_encap_index;
+format_function_t format_mpls_eos_bit;
+format_function_t format_mpls_unicast_header_net_byte_order;
+format_function_t format_mpls_unicast_label;
+format_function_t format_mpls_header;
+
extern vlib_node_registration_t mpls_input_node;
extern vlib_node_registration_t mpls_policy_encap_node;
+extern vlib_node_registration_t mpls_output_node;
+extern vlib_node_registration_t mpls_midchain_node;
extern vnet_device_class_t mpls_gre_device_class;
@@ -126,6 +210,7 @@ unformat_function_t unformat_mpls_protocol_net_byte_order;
unformat_function_t unformat_mpls_label_net_byte_order;
unformat_function_t unformat_mpls_gre_header;
unformat_function_t unformat_pg_mpls_gre_header;
+unformat_function_t unformat_mpls_unicast_label;
/* Parse mpls header. */
unformat_function_t unformat_mpls_header;
@@ -135,6 +220,12 @@ unformat_function_t unformat_pg_mpls_header;
#define MPLS_GRE_OUTPUT_NEXT_LOOKUP 1
#define MPLS_GRE_OUTPUT_NEXT_DROP VNET_INTERFACE_TX_NEXT_DROP
+void mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable);
+
+u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+
mpls_encap_t *
mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address);
@@ -176,6 +267,7 @@ int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id,
int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm,
mpls_encap_t * e,
u32 policy_tunnel_index);
+
typedef struct {
u32 lookup_miss;
@@ -198,8 +290,7 @@ u8 * format_mpls_gre_header (u8 * s, va_list * args);
#define foreach_mpls_input_next \
_(DROP, "error-drop") \
-_(IP4_INPUT, "ip4-input") \
-_(L2_OUTPUT, "l2-output")
+_(LOOKUP, "mpls-lookup")
typedef enum {
#define _(s,n) MPLS_INPUT_NEXT_##s,
@@ -208,6 +299,28 @@ typedef enum {
MPLS_INPUT_N_NEXT,
} mpls_input_next_t;
+#define foreach_mpls_lookup_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(L2_OUTPUT, "l2-output")
+
+// FIXME remove.
+typedef enum {
+#define _(s,n) MPLS_LOOKUP_NEXT_##s,
+ foreach_mpls_lookup_next
+#undef _
+ MPLS_LOOKUP_N_NEXT,
+} mpls_lookup_next_t;
+
+#define foreach_mpls_output_next \
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) MPLS_OUTPUT_NEXT_##s,
+ foreach_mpls_output_next
+#undef _
+ MPLS_OUTPUT_N_NEXT,
+} mpls_output_next_t;
typedef struct {
u32 lookup_miss;
diff --git a/vnet/vnet/mpls/mpls_features.c b/vnet/vnet/mpls/mpls_features.c
new file mode 100644
index 00000000000..d3a726afd04
--- /dev/null
+++ b/vnet/vnet/mpls/mpls_features.c
@@ -0,0 +1,254 @@
+/*
+ * mpls_features.c: MPLS input and output features
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls.h>
+
+always_inline uword
+mpls_terminate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int error_code)
+{
+ u32 * buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node,
+ buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ mpls_input_node.index,
+ error_code);
+
+ return n_packets;
+}
+
+static uword
+mpls_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT));
+}
+
+VLIB_REGISTER_NODE (mpls_punt_node) = {
+ .function = mpls_punt,
+ .name = "mpls-punt",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt)
+
+static uword
+mpls_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP));
+}
+
+VLIB_REGISTER_NODE (mpls_drop_node) = {
+ .function = mpls_drop,
+ .name = "mpls-drop",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop)
+
+static uword
+mpls_not_enabled (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED));
+}
+
+VLIB_REGISTER_NODE (mpls_not_enabled_node) = {
+ .function = mpls_not_enabled,
+ .name = "mpls-not-enabled",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled)
+
+VNET_MPLS_FEATURE_INIT (mpls_lookup, static) = {
+ .node_name = "mpls-lookup",
+ .runs_before = ORDER_CONSTRAINTS {"mpls-not-enabled", 0},
+ .feature_index = &mpls_main.mpls_rx_feature_lookup,
+};
+
+VNET_MPLS_FEATURE_INIT (mpls_not_enabled, static) = {
+ .node_name = "mpls-not-enabled",
+ .runs_before = ORDER_CONSTRAINTS {0}, /* not before any other features */
+ .feature_index = &mpls_main.mpls_rx_feature_not_enabled,
+};
+
+static char * feature_start_nodes[] =
+{
+ "mpls-input",
+};
+
+clib_error_t *
+mpls_feature_init (vlib_main_t * vm)
+{
+ ip_config_main_t * cm = &mpls_main.rx_config_mains;
+ vnet_config_main_t * vcm = &cm->config_main;
+
+ return (ip_feature_init_cast (vm, cm, vcm,
+ feature_start_nodes,
+ ARRAY_LEN(feature_start_nodes),
+ VNET_IP_RX_UNICAST_FEAT,
+ VNET_L3_PACKET_TYPE_MPLS_UNICAST));
+}
+
+static clib_error_t *
+mpls_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ vlib_main_t * vm = vnm->vlib_main;
+ mpls_main_t * mm = &mpls_main;
+ ip_config_main_t * cm = &mm->rx_config_mains;
+ vnet_config_main_t * vcm = &cm->config_main;
+ u32 drop_feature_index;
+ u32 ci;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+ vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0);
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ drop_feature_index = mm->mpls_rx_feature_not_enabled;
+
+ if (is_add)
+ ci = vnet_config_add_feature (vm, vcm, ci,
+ drop_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ {
+ ci = vnet_config_del_feature (vm, vcm, ci,
+ drop_feature_index,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ mm->mpls_enabled_by_sw_if_index[sw_if_index] = 0;;
+ }
+
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del);
+
+static clib_error_t *
+show_mpls_features_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_main_t * mm = &mpls_main;
+ int i;
+ char ** features;
+
+ vlib_cli_output (vm, "Available MPLS feature nodes");
+
+ do {
+ features = mm->feature_nodes;
+ for (i = 0; i < vec_len(features); i++)
+ vlib_cli_output (vm, " %s\n", features[i]);
+ } while(0);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_ip_features_command, static) = {
+ .path = "show mpls features",
+ .short_help = "show mpls features",
+ .function = show_mpls_features_command_fn,
+};
+
+static clib_error_t *
+show_mpls_interface_features_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ mpls_main_t * mm = &mpls_main;
+
+ ip_config_main_t * cm;
+ vnet_config_main_t * vcm;
+ vnet_config_t * cfg;
+ u32 cfg_index;
+ vnet_config_feature_t * feat;
+ vlib_node_t * n;
+ u32 sw_if_index;
+ u32 node_index;
+ u32 current_config_index;
+ int i;
+
+ if (! unformat (input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ return clib_error_return (0, "Interface not specified...");
+
+ vlib_cli_output (vm, "MPLS feature paths configured on %U...",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+
+ cm = &mm->rx_config_mains;
+ vcm = &cm->config_main;
+
+ current_config_index = vec_elt (cm->config_index_by_sw_if_index,
+ sw_if_index);
+
+ ASSERT(current_config_index
+ < vec_len (vcm->config_pool_index_by_user_index));
+
+ cfg_index =
+ vcm->config_pool_index_by_user_index[current_config_index];
+ cfg = pool_elt_at_index (vcm->config_pool, cfg_index);
+
+ for (i = 0; i < vec_len(cfg->features); i++)
+ {
+ feat = cfg->features + i;
+ node_index = feat->node_index;
+ n = vlib_get_node (vm, node_index);
+ vlib_cli_output (vm, " %v", n->name);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_mpls_interface_features_command, static) = {
+ .path = "show mpls interface features",
+ .short_help = "show mpls interface features <intfc>",
+ .function = show_mpls_interface_features_command_fn,
+};
+
diff --git a/vnet/vnet/mpls/mpls_lookup.c b/vnet/vnet/mpls/mpls_lookup.c
new file mode 100644
index 00000000000..31ad68c4bc6
--- /dev/null
+++ b/vnet/vnet/mpls/mpls_lookup.c
@@ -0,0 +1,278 @@
+/*
+ * node.c: mpls-o-gre decap processing
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/load_balance.h>
+
+vlib_node_registration_t mpls_lookup_node;
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+ u32 lfib_index;
+ u32 label_net_byte_order;
+} mpls_lookup_trace_t;
+
+static u8 *
+format_mpls_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
+
+ s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index,
+ vnet_mpls_uc_get_label(
+ clib_net_to_host_u32(t->label_net_byte_order)),
+ vnet_mpls_uc_get_s(t->label_net_byte_order));
+ return s;
+}
+
+/*
+ * Compute flow hash.
+ * We'll use it to select which adjacency to use for this flow. And other things.
+ */
+always_inline u32
+mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
+ flow_hash_config_t flow_hash_config)
+{
+ // FIXME
+ return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl));
+}
+
+static inline uword
+mpls_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_main_t * mm = &mpls_main;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 lbi0, next0, lfib_index0, bi0, hash_c0;
+ const mpls_unicast_header_t * h0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+ lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
+ lb0 = load_balance_get(lbi0);
+
+ hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+ }
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ /*
+ * pop the label that was just used in the lookup
+ */
+ vlib_buffer_advance(b0, sizeof(*h0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->lfib_index = lfib_index0;
+ tr->label_net_byte_order = h0->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_lookup_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_lookup_node) = {
+ .function = mpls_lookup,
+ .name = "mpls-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .sibling_of = "ip4-lookup",
+
+ .format_buffer = format_mpls_gre_header_with_length,
+ .format_trace = format_mpls_lookup_trace,
+ .unformat_buffer = unformat_mpls_gre_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup)
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+} mpls_load_balance_trace_t;
+
+static u8 *
+format_mpls_load_balance_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
+
+ s = format (s, "MPLS: next [%d], LB index %d ",
+ t->next_index, t->lb_index);
+ return s;
+}
+
+always_inline uword
+mpls_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const mpls_unicast_header_t *hdr0;
+ const load_balance_t *lb0;
+ u32 pi0, lbi0, hc0, next0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * p0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ hdr0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+ hc0 = lb0->lb_hash_config;
+ vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(hdr0, hc0);
+
+ dpo0 = load_balance_get_bucket_i(lb0,
+ vnet_buffer(p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (mpls_load_balance_node) = {
+ .function = mpls_load_balance,
+ .name = "mpls-load-balance",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+
+ .format_trace = format_mpls_load_balance_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c
new file mode 100644
index 00000000000..932fcb8d0bd
--- /dev/null
+++ b/vnet/vnet/mpls/mpls_output.c
@@ -0,0 +1,343 @@
+/*
+ * mpls_output.c: MPLS Adj rewrite
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1*sizeof(u32)];
+} mpls_output_trace_t;
+
+static u8 *
+format_mpls_output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ uword indent = format_get_indent (s);
+
+ s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
+ t->adj_index,
+ format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, t->adj_index,
+ t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static inline uword
+mpls_output_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next, cpu_index;
+ vlib_node_runtime_t * error_node;
+
+ cpu_index = os_get_cpu_number();
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ mpls_unicast_header_t *hdr0;
+ vlib_buffer_t * p0;
+ u32 pi0, rw_len0, adj_index0, next0, error0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = adj_get(adj_index0);
+ hdr0 = vlib_buffer_get_current (p0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], hdr0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+
+ if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0-sizeof(ethernet_header_t));
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0)
+ > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : IP4_ERROR_NONE);
+
+ p0->error = error_node->errors[error0];
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ }
+ else
+ {
+ next0 = MPLS_OUTPUT_NEXT_DROP;
+ }
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_ENCAP,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+static inline uword
+mpls_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame));
+}
+
+VLIB_REGISTER_NODE (mpls_output_node) = {
+ .function = mpls_output,
+ .name = "mpls-output",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
+ foreach_mpls_output_next
+#undef _
+ },
+
+ .format_trace = format_mpls_output_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output)
+
+static inline uword
+mpls_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame));
+}
+
+VLIB_REGISTER_NODE (mpls_midchain_node) = {
+ .function = mpls_output,
+ .name = "mpls-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_output_trace,
+
+ .sibling_of = "mpls-output",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain)
+
+/**
+ * @brief Next index values from the MPLS incomplete adj node
+ */
+#define foreach_mpls_adj_incomplete_next \
+_(DROP, "error-drop") \
+_(IP4, "ip4-arp") \
+_(IP6, "ip6-discover-neighbor")
+
+typedef enum {
+#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ MPLS_ADJ_INCOMPLETE_N_NEXT,
+} mpls_adj_incomplete_next_t;
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_adj_incomplete_trace_t_
+{
+ u32 next;
+} mpls_adj_incomplete_trace_t;
+
+
+/**
+ * @brief Graph node for incomplete MPLS adjacency.
+ * This node will push traffic to either the v4-arp or v6-nd node
+ * based on the next-hop proto of the adj.
+ * We pay a cost for this 'routing' node, but an incomplete adj is the
+ * exception case.
+ */
+static inline uword
+mpls_adj_incomplete (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, next0, adj_index0;
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ ASSERT(adj_index0);
+
+ adj0 = adj_get(adj_index0);
+
+ if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto))
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4;
+ }
+ else
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_adj_incomplete_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_adj_incomplete_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_adj_incomplete_trace_t * t;
+ uword indent;
+
+ t = va_arg (*args, mpls_adj_incomplete_trace_t *);
+ indent = format_get_indent (s);
+
+ s = format (s, "%Unext:%d",
+ format_white_space, indent,
+ t->next);
+ return (s);
+}
+
+VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
+ .function = mpls_adj_incomplete,
+ .name = "mpls-adj-incomplete",
+ .format_trace = format_mpls_adj_incomplete_trace,
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ },
+
+ .format_trace = format_mpls_output_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node,
+ mpls_adj_incomplete)
diff --git a/vnet/vnet/mpls/mpls_types.h b/vnet/vnet/mpls/mpls_types.h
new file mode 100644
index 00000000000..d7c629df832
--- /dev/null
+++ b/vnet/vnet/mpls/mpls_types.h
@@ -0,0 +1,39 @@
+#ifndef __MPLS_TYPES_H__
+#define __MPLS_TYPES_H__
+
+#define MPLS_IETF_MIN_LABEL 0x00000
+#define MPLS_IETF_MAX_LABEL 0xfffff
+
+#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000
+#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f
+
+#define MPLS_IETF_MIN_UNRES_LABEL 0x00010
+#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000
+#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002
+#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003
+#define MPLS_IETF_ELI_LABEL 0x00007
+#define MPLS_IETF_GAL_LABEL 0x0000D
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null"
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul"
+#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null"
+#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul"
+#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert"
+#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl"
+#define MPLS_IETF_ELI_STRING "entropy-label-indicator"
+#define MPLS_IETF_ELI_BRIEF_STRING "eli"
+#define MPLS_IETF_GAL_STRING "gal"
+#define MPLS_IETF_GAL_BRIEF_STRING "gal"
+
+#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1)
+
+#define MPLS_LABEL_IS_REAL(_lbl) \
+ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \
+ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL))
+
+#endif
diff --git a/vnet/vnet/mpls/node.c b/vnet/vnet/mpls/node.c
new file mode 100644
index 00000000000..6801cc7b3ae
--- /dev/null
+++ b/vnet/vnet/mpls/node.c
@@ -0,0 +1,223 @@
+/*
+ * node.c: mpls-o-gre decap processing
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ u32 next_index;
+ u32 label_host_byte_order;
+} mpls_input_trace_t;
+
+static u8 *
+format_mpls_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
+ char * next_name;
+
+ next_name = "BUG!";
+
+#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
+ foreach_mpls_input_next;
+#undef _
+
+ s = format (s, "MPLS: next %s[%d] label %d ttl %d",
+ next_name, t->next_index,
+ vnet_mpls_uc_get_label(t->label_host_byte_order),
+ vnet_mpls_uc_get_ttl(t->label_host_byte_order));
+
+ return s;
+}
+
+vlib_node_registration_t mpls_input_node;
+
+typedef struct {
+ u32 last_label;
+ u32 last_inner_fib_index;
+ u32 last_outer_fib_index;
+ mpls_main_t * mpls_main;
+} mpls_input_runtime_t;
+
+static inline uword
+mpls_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_input_runtime_t * rt;
+ mpls_main_t * mm;
+ u32 cpu_index = os_get_cpu_number();
+ vlib_simple_counter_main_t * cm;
+ vnet_main_t * vnm = vnet_get_main();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ mm = rt->mpls_main;
+ /*
+ * Force an initial lookup every time, in case the control-plane
+ * changed the label->FIB mapping.
+ */
+ rt->last_label = ~0;
+
+ next_index = node->cached_next_index;
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_MPLS);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ mpls_unicast_header_t * h0;
+ u32 label0;
+ u32 next0;
+ ip_config_main_t * cm0;
+ u32 sw_if_index0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ cm0 = &mm->rx_config_mains;
+ b0->current_config_index = vec_elt (cm0->config_index_by_sw_if_index,
+ sw_if_index0);
+
+ label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
+ /* TTL expired? */
+ if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
+ {
+ next0 = MPLS_INPUT_NEXT_DROP;
+ b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ vnet_get_config_data (&cm0->config_main,
+ &b0->current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->label_host_byte_order = label0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_input_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static uword
+mpls_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return mpls_input_inline (vm, node, from_frame);
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_input_node) = {
+ .function = mpls_input,
+ .name = "mpls-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof(mpls_input_runtime_t),
+
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
+ foreach_mpls_input_next
+#undef _
+ },
+
+ .format_buffer = format_mpls_unicast_header_net_byte_order,
+ .format_trace = format_mpls_input_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
+
+static void
+mpls_setup_nodes (vlib_main_t * vm)
+{
+ mpls_input_runtime_t * rt;
+ pg_node_t * pn;
+
+ pn = pg_get_node (mpls_input_node.index);
+ pn->unformat_edit = unformat_pg_mpls_header;
+
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ rt->last_label = (u32) ~0;
+ rt->last_inner_fib_index = 0;
+ rt->last_outer_fib_index = 0;
+ rt->mpls_main = &mpls_main;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST,
+ mpls_input_node.index);
+}
+
+static clib_error_t * mpls_input_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, mpls_init);
+ if (error)
+ clib_error_report (error);
+
+ mpls_setup_nodes (vm);
+
+ return (mpls_feature_init(vm));
+}
+
+VLIB_INIT_FUNCTION (mpls_input_init);
diff --git a/vnet/vnet/mpls/packet.h b/vnet/vnet/mpls/packet.h
new file mode 100644
index 00000000000..bc67445be89
--- /dev/null
+++ b/vnet/vnet/mpls/packet.h
@@ -0,0 +1,125 @@
+#ifndef included_vnet_mpls_packet_h
+#define included_vnet_mpls_packet_h
+
+/*
+ * MPLS packet format
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A label value only, i.e. 20bits.
+ */
+typedef u32 mpls_label_t;
+
+typedef struct {
+ /* Label: top 20 bits [in network byte order] */
+ /* Experimental: 3 bits ... */
+ /* S (bottom of label stack): 1 bit */
+ /* TTL: 8 bits */
+ mpls_label_t label_exp_s_ttl;
+} mpls_unicast_header_t;
+
+typedef enum mpls_eos_bit_t_
+{
+ MPLS_NON_EOS = 0,
+ MPLS_EOS = 1,
+} mpls_eos_bit_t;
+
+#define MPLS_EOS_BITS { \
+ [MPLS_NON_EOS] = "neos", \
+ [MPLS_EOS] = "eos", \
+}
+
+#define FOR_EACH_MPLS_EOS_BIT(_eos) \
+ for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++)
+
+#define MPLS_ENTRY_LABEL_OFFSET 0
+#define MPLS_ENTRY_LABEL_SHIFT 12
+#define MPLS_ENTRY_LABEL_MASK 0x000fffff
+#define MPLS_ENTRY_LABEL_BITS \
+ (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT)
+
+#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */
+#define MPLS_ENTRY_EXP_SHIFT 9
+#define MPLS_ENTRY_EXP_MASK 0x07
+#define MPLS_ENTRY_EXP(mpls) \
+ (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK)
+#define MPLS_ENTRY_EXP_BITS \
+ (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT)
+
+#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */
+#define MPLS_ENTRY_EOS_SHIFT 8
+#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */
+#define MPLS_ENTRY_EOS(mpls) \
+ (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK)
+#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT)
+
+#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */
+#define MPLS_ENTRY_TTL_SHIFT 0
+#define MPLS_ENTRY_TTL_MASK 0xff
+#define MPLS_ENTRY_TTL(mpls) \
+ (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK)
+#define MPLS_ENTRY_TTL_BITS \
+ (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT)
+
+static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl)
+{
+ return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT);
+}
+
+static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EXP(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EOS(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_TTL(label_exp_s_ttl));
+}
+
+static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl,
+ u32 value)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) |
+ ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl,
+ u32 exp)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) |
+ ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl,
+ u32 eos)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) |
+ ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl,
+ u32 ttl)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) |
+ ((ttl & MPLS_ENTRY_TTL_MASK)));
+}
+
+#endif /* included_vnet_mpls_packet_h */
diff --git a/vnet/vnet/mpls-gre/pg.c b/vnet/vnet/mpls/pg.c
index 6b6a1017c58..f04b53075d3 100644
--- a/vnet/vnet/mpls-gre/pg.c
+++ b/vnet/vnet/mpls/pg.c
@@ -18,7 +18,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/gre/gre.h>
-#include <vnet/mpls-gre/mpls.h>
+#include <vnet/mpls/mpls.h>
typedef struct {
pg_edit_t label;
diff --git a/vnet/vnet/mpls-gre/policy_encap.c b/vnet/vnet/mpls/policy_encap.c
index 0ea051f56ec..278e8e6d7ce 100644
--- a/vnet/vnet/mpls-gre/policy_encap.c
+++ b/vnet/vnet/mpls/policy_encap.c
@@ -17,7 +17,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
-#include <vnet/mpls-gre/mpls.h>
+#include <vnet/mpls/mpls.h>
typedef struct {
u32 next_index;
diff --git a/vnet/vnet/pg/stream.c b/vnet/vnet/pg/stream.c
index 9f7e9e8df05..b66fb742ab4 100644
--- a/vnet/vnet/pg/stream.c
+++ b/vnet/vnet/pg/stream.c
@@ -40,6 +40,8 @@
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
/* Mark stream active or inactive. */
void
@@ -186,6 +188,10 @@ pg_interface_add_or_get (pg_main_t * pg, uword if_id)
pi->sw_if_index = hi->sw_if_index;
hash_set (pg->if_index_by_if_id, if_id, i);
+
+ ip4_sw_interface_enable_disable (pi->hw_if_index, 1);
+ ip6_sw_interface_enable_disable (pi->hw_if_index, 1);
+ mpls_sw_interface_enable_disable (&mpls_main, pi->hw_if_index, 1);
}
return i;
diff --git a/vnet/vnet/rewrite.c b/vnet/vnet/rewrite.c
index 0dcec408424..42d0688a5cc 100644
--- a/vnet/vnet/rewrite.c
+++ b/vnet/vnet/rewrite.c
@@ -70,27 +70,25 @@ format_vnet_rewrite (u8 * s, va_list * args)
vlib_main_t *vm = va_arg (*args, vlib_main_t *);
vnet_rewrite_header_t *rw = va_arg (*args, vnet_rewrite_header_t *);
u32 max_data_bytes = va_arg (*args, u32);
+ CLIB_UNUSED (uword indent) = va_arg (*args, u32);
vnet_main_t *vnm = vnet_get_main ();
vlib_node_t *next;
- uword indent;
next = vlib_get_next_node (vm, rw->node_index, rw->next_index);
- indent = format_get_indent (s);
-
if (rw->sw_if_index != ~0)
{
vnet_sw_interface_t *si;
si = vnet_get_sw_interface (vnm, rw->sw_if_index);
- s = format (s, "%U", format_vnet_sw_interface_name, vnm, si);
+ s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si);
}
else
- s = format (s, "%v", next->name);
+ s = format (s, "%v: ", next->name);
/* Format rewrite string. */
if (rw->data_bytes > 0)
- s = format (s, "\n%U%U",
- format_white_space, indent,
+
+ s = format (s, "%U",
next->format_buffer ? next->format_buffer : format_hex_bytes,
rw->data + max_data_bytes - rw->data_bytes, rw->data_bytes);
diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c
index 9c2d591102d..086cbe965b6 100644
--- a/vnet/vnet/sr/sr.c
+++ b/vnet/vnet/sr/sr.c
@@ -22,6 +22,8 @@
*/
#include <vnet/vnet.h>
#include <vnet/sr/sr.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
#include <openssl/hmac.h>
@@ -29,6 +31,11 @@ ip6_sr_main_t sr_main;
static vlib_node_registration_t sr_local_node;
/**
+ * @brief Dynamically added SR DPO type
+ */
+static dpo_type_t sr_dpo_type;
+
+/**
* @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines
*
* @param sm ip6_sr_main_t *
@@ -319,16 +326,12 @@ format_sr_rewrite_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *);
- ip6_main_t *im = &ip6_main;
ip6_sr_main_t *sm = &sr_main;
ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
ip6_fib_t *rx_fib, *tx_fib;
- rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
-
- tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
+ rx_fib = ip6_fib_get (tun->rx_fib_index);
+ tx_fib = ip6_fib_get (tun->tx_fib_index);
s = format
(s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n"
@@ -733,38 +736,18 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite)
u32 dst_address_length,
u32 rx_table_id)
{
- ip6_add_del_route_args_t a;
- ip6_address_t dst_address;
- ip6_fib_t *fib;
- ip6_main_t *im6 = &ip6_main;
- BVT (clib_bihash_kv) kv, value;
-
- fib = find_ip6_fib_by_table_index_or_id (im6, rx_table_id,
- IP6_ROUTE_FLAG_TABLE_ID);
- memset (&a, 0, sizeof (a));
- a.flags |= IP4_ROUTE_FLAG_DEL;
- a.dst_address_length = dst_address_length;
-
- dst_address = *dst_address_arg;
-
- ip6_address_mask (&dst_address, &im6->fib_masks[dst_address_length]);
-
- kv.key[0] = dst_address.as_u64[0];
- kv.key[1] = dst_address.as_u64[1];
- kv.key[2] = ((u64) ((fib - im6->fibs)) << 32) | dst_address_length;
-
- if (BV (clib_bihash_search) (&im6->ip6_lookup_table, &kv, &value) < 0)
- {
- clib_warning ("%U/%d not in FIB",
- format_ip6_address, &a.dst_address, a.dst_address_length);
- return -10;
- }
+ fib_prefix_t pfx = {
+ .fp_len = dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = *dst_address_arg,
+ }
+ };
- a.adj_index = value.value;
- a.dst_address = dst_address;
+ fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6,
+ rx_table_id),
+ &pfx, FIB_SOURCE_SR);
- ip6_add_del_route (im6, &a);
- ip6_maybe_remap_adjacencies (im6, rx_table_id, IP6_ROUTE_FLAG_TABLE_ID);
return 0;
}
@@ -837,23 +820,20 @@ int
ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
{
ip6_main_t *im = &ip6_main;
- ip_lookup_main_t *lm = &im->lookup_main;
ip6_sr_tunnel_key_t key;
ip6_sr_tunnel_t *t;
uword *p, *n;
ip6_sr_header_t *h = 0;
u32 header_length;
ip6_address_t *addrp, *this_address;
- ip_adjacency_t adj, *ap, *add_adj = 0;
- u32 adj_index;
ip6_sr_main_t *sm = &sr_main;
u8 *key_copy;
u32 rx_fib_index, tx_fib_index;
- ip6_add_del_route_args_t aa;
u32 hmac_key_index_u32;
u8 hmac_key_index = 0;
ip6_sr_policy_t *pt;
int i;
+ dpo_id_t dpo = DPO_NULL;
/* Make sure that the rx FIB exists */
p = hash_get (im->fib_index_by_table_id, a->rx_table_id);
@@ -1057,15 +1037,6 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t));
hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels);
- memset (&adj, 0, sizeof (adj));
-
- /* Create an adjacency and add to v6 fib */
- adj.lookup_next_index = sm->ip6_lookup_sr_next_index;
- adj.explicit_fib_index = ~0;
-
- ap = ip_add_adjacency (lm, &adj, 1 /* one adj */ ,
- &adj_index);
-
/*
* Stick the tunnel index into the rewrite header.
*
@@ -1077,22 +1048,20 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
* We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
* at some point...
*/
- ap->rewrite_header.sw_if_index = t - sm->tunnels;
-
- vec_add1 (add_adj, ap[0]);
-
- clib_memcpy (aa.dst_address.as_u8, a->dst_address,
- sizeof (aa.dst_address.as_u8));
- aa.dst_address_length = a->dst_mask_width;
+ dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
- aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD);
- aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX;
- aa.table_index_or_table_id = rx_fib_index;
- aa.add_adj = add_adj;
- aa.adj_index = adj_index;
- aa.n_add_adj = 1;
- ip6_add_del_route (im, &aa);
- vec_free (add_adj);
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = a->dst_mask_width,
+ .fp_addr = {
+ .ip6 = *a->dst_address,
+ }
+ };
+ fib_table_entry_special_dpo_add (rx_fib_index,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
if (a->policy_name)
{
@@ -1126,6 +1095,48 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
}
/**
+ * @brief no-op lock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+static void
+sr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+static void
+sr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+u8 *
+format_sr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "SR: tunnel:[%d]", index));
+}
+
+const static dpo_vft_t sr_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_dpo,
+};
+
+const static char *const sr_ip6_nodes[] = {
+ "sr-rewrite",
+ NULL,
+};
+
+const static char *const *const sr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_ip6_nodes,
+};
+
+/**
* @brief CLI parser for Add or Delete a Segment Routing tunnel.
*
* @param vm vlib_main_t *
@@ -1315,16 +1326,12 @@ VLIB_CLI_COMMAND (sr_tunnel_command, static) = {
void
ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t)
{
- ip6_main_t *im = &ip6_main;
ip6_sr_main_t *sm = &sr_main;
ip6_fib_t *rx_fib, *tx_fib;
ip6_sr_policy_t *pt;
- rx_fib = find_ip6_fib_by_table_index_or_id (im, t->rx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
-
- tx_fib = find_ip6_fib_by_table_index_or_id (im, t->tx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
+ rx_fib = ip6_fib_get (t->rx_fib_index);
+ tx_fib = ip6_fib_get (t->tx_fib_index);
if (t->name)
vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name);
@@ -1678,13 +1685,8 @@ int
ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
{
uword *p;
- ip6_main_t *im = &ip6_main;
- ip_lookup_main_t *lm = &im->lookup_main;
ip6_sr_tunnel_t *t;
- ip_adjacency_t adj, *ap, *add_adj = 0;
- u32 adj_index;
ip6_sr_main_t *sm = &sr_main;
- ip6_add_del_route_args_t aa;
ip6_sr_policy_t *pt;
if (a->is_del)
@@ -1714,16 +1716,6 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]);
- /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */
- memset (&adj, 0, sizeof (adj));
-
- /* Create an adjacency and add to v6 fib */
- adj.lookup_next_index = sm->ip6_lookup_sr_replicate_index;
- adj.explicit_fib_index = ~0;
-
- ap = ip_add_adjacency (lm, &adj, 1 /* one adj */ ,
- &adj_index);
-
/*
* Stick the tunnel index into the rewrite header.
*
@@ -1735,22 +1727,23 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
* We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
* at some point...
*/
- ap->rewrite_header.sw_if_index = t - sm->tunnels;
-
- vec_add1 (add_adj, ap[0]);
+ dpo_id_t dpo = DPO_NULL;
- memcpy (aa.dst_address.as_u8, a->multicast_address,
- sizeof (aa.dst_address.as_u8));
- aa.dst_address_length = 128;
+ dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
- aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD);
- aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX;
- aa.table_index_or_table_id = t->rx_fib_index;
- aa.add_adj = add_adj;
- aa.adj_index = adj_index;
- aa.n_add_adj = 1;
- ip6_add_del_route (im, &aa);
- vec_free (add_adj);
+ /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *a->multicast_address,
+ }
+ };
+ fib_table_entry_special_dpo_add (t->rx_fib_index,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
u8 *mcast_copy = 0;
mcast_copy = vec_new (ip6_address_t, 1);
@@ -2224,10 +2217,6 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr)
ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
ASSERT (ip6_rewrite_node);
- /* Add a disposition to ip6_lookup for the sr rewrite node */
- sm->ip6_lookup_sr_next_index =
- vlib_node_add_next (vm, ip6_lookup_node->index, sr_rewrite_node.index);
-
#if DPDK > 0 /* Cannot run replicate without DPDK */
/* Add a disposition to sr_replicate for the sr multicast replicate node */
sm->ip6_lookup_sr_replicate_index =
@@ -2244,6 +2233,8 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr)
sm->md = (void *) EVP_get_digestbyname ("sha1");
sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX));
+ sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes);
+
return error;
}
@@ -2884,41 +2875,48 @@ static clib_error_t *
set_ip6_sr_rewrite_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- ip6_address_t a;
- ip6_main_t *im = &ip6_main;
- ip_lookup_main_t *lm = &im->lookup_main;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ };
u32 fib_index = 0;
u32 fib_id = 0;
u32 adj_index;
- uword *p;
ip_adjacency_t *adj;
vnet_hw_interface_t *hi;
u32 sw_if_index;
ip6_sr_main_t *sm = &sr_main;
vnet_main_t *vnm = vnet_get_main ();
+ fib_node_index_t fei;
- if (!unformat (input, "%U", unformat_ip6_address, &a))
+ if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6))
return clib_error_return (0, "ip6 address missing in '%U'",
format_unformat_error, input);
if (unformat (input, "rx-table-id %d", &fib_id))
{
- p = hash_get (im->fib_index_by_table_id, fib_id);
- if (p == 0)
- return clib_error_return (0, "fib-id %d not found");
- fib_index = p[0];
+ fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id);
+ if (fib_index == ~0)
+ return clib_error_return (0, "fib-id %d not found", fib_id);
}
- adj_index = ip6_fib_lookup_with_table (im, fib_index, &a);
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ return clib_error_return (0, "no match for %U",
+ format_ip6_address, &pfx.fp_addr.ip6);
+
+ adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR);
- if (adj_index == lm->miss_adj_index)
- return clib_error_return (0, "no match for %U", format_ip6_address, &a);
+ if (ADJ_INDEX_INVALID == adj_index)
+ return clib_error_return (0, "%U not SR sourced",
+ format_ip6_address, &pfx.fp_addr.ip6);
- adj = ip_get_adjacency (lm, adj_index);
+ adj = adj_get (adj_index);
if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)
return clib_error_return (0, "%U unresolved (not a rewrite adj)",
- format_ip6_address, &a);
+ format_ip6_address, &pfx.fp_addr.ip6);
adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index;
diff --git a/vnet/vnet/sr/sr.h b/vnet/vnet/sr/sr.h
index bd8fa8ebdc0..610b36996f3 100644
--- a/vnet/vnet/sr/sr.h
+++ b/vnet/vnet/sr/sr.h
@@ -199,9 +199,6 @@ typedef struct
/** multicast address to policy mapping */
uword *policy_index_by_multicast_address;
- /** ip6-lookup next index for imposition FIB entries */
- u32 ip6_lookup_sr_next_index;
-
/** hmac key id by shared secret */
uword *hmac_key_by_shared_secret;
diff --git a/vnet/vnet/sr/sr_replicate.c b/vnet/vnet/sr/sr_replicate.c
index 37c39442efd..5fd9ef04b0f 100644
--- a/vnet/vnet/sr/sr_replicate.c
+++ b/vnet/vnet/sr/sr_replicate.c
@@ -32,6 +32,7 @@
#include <vnet/devices/dpdk/dpdk.h>
#include <vnet/dpdk_replication.h>
#include <vnet/ip/ip.h>
+#include <vnet/fib/ip6_fib.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
@@ -76,16 +77,12 @@ format_sr_replicate_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *);
- ip6_main_t *im = &ip6_main;
ip6_sr_main_t *sm = &sr_main;
ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
ip6_fib_t *rx_fib, *tx_fib;
- rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
-
- tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index,
- IP6_ROUTE_FLAG_FIB_INDEX);
+ rx_fib = ip6_fib_get (tun->rx_fib_index);
+ tx_fib = ip6_fib_get (tun->tx_fib_index);
s = format
(s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n"
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.c b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
index a2b8978241a..fae481c36bb 100644
--- a/vnet/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
@@ -18,6 +18,7 @@
*
*/
#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/fib/fib.h>
#include <vnet/ip/format.h>
vxlan_gpe_main_t vxlan_gpe_main;
@@ -419,56 +420,6 @@ int vnet_vxlan_gpe_add_del_tunnel
return 0;
}
-/**
- * @brief Find the IPv4 FIB index from the FIB ID
- *
- * @param fib_id
- *
- * @return fib_index
- *
- */
-static u32 fib4_index_from_fib_id (u32 fib_id)
-{
- ip4_main_t * im = &ip4_main;
- uword * p;
-
- p = hash_get (im->fib_index_by_table_id, fib_id);
- if (!p)
- return ~0;
-
- return p[0];
-}
-
-/**
- * @brief Find the IPv4 FIB index from the FIB ID
- *
- * @param fib_id
- *
- * @return fib_index
- *
- */
-static u32 fib6_index_from_fib_id (u32 fib_id)
-{
- ip6_main_t * im = &ip6_main;
- uword * p;
-
- p = hash_get (im->fib_index_by_table_id, fib_id);
- if (!p)
- return ~0;
-
- return p[0];
-}
-
-/**
- * @brief CLI function for Add/Del of IPv4/IPv6 VXLAN GPE tunnel
- *
- * @param *vm
- * @param *input
- * @param *cmd
- *
- * @return error
- *
- */
static clib_error_t *
vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -525,20 +476,19 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
else if (unformat (line_input, "encap-vrf-id %d", &tmp))
{
if (ipv6_set)
- encap_fib_index = fib6_index_from_fib_id (tmp);
+ encap_fib_index = ip6_fib_index_from_table_id (tmp);
else
- encap_fib_index = fib4_index_from_fib_id (tmp);
+ encap_fib_index = ip4_fib_index_from_table_id (tmp);
if (encap_fib_index == ~0)
return clib_error_return (0, "nonexistent encap fib id %d", tmp);
}
else if (unformat (line_input, "decap-vrf-id %d", &tmp))
{
-
if (ipv6_set)
- decap_fib_index = fib6_index_from_fib_id (tmp);
+ decap_fib_index = ip6_fib_index_from_table_id (tmp);
else
- decap_fib_index = fib4_index_from_fib_id (tmp);
+ decap_fib_index = ip4_fib_index_from_table_id (tmp);
if (decap_fib_index == ~0)
return clib_error_return (0, "nonexistent decap fib id %d", tmp);
diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c
index 32ad7533e58..da359a8d1bb 100644
--- a/vnet/vnet/vxlan/vxlan.c
+++ b/vnet/vnet/vxlan/vxlan.c
@@ -348,11 +348,13 @@ int vnet_vxlan_add_del_tunnel
vnet_sw_interface_set_flags (vnm, sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
if (!a->is_ip6) {
- vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
- im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
+ vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
+ im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
+ ip4_sw_interface_enable_disable(sw_if_index, 1);
} else {
vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
im6->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
+ ip6_sw_interface_enable_disable(sw_if_index, 1);
}
}
else
@@ -375,13 +377,16 @@ int vnet_vxlan_add_del_tunnel
= L2OUTPUT_NEXT_DEL_TUNNEL;
if (!a->is_ip6)
- hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ {
+ hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ ip4_sw_interface_enable_disable(sw_if_index, 1);
+ }
else
{
hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
clib_mem_free (t->key6);
+ ip6_sw_interface_enable_disable(sw_if_index, 1);
}
-
vec_free (t->rewrite);
pool_put (vxm->tunnels, t);
}