summaryrefslogtreecommitdiffstats
path: root/src/vnet/vxlan-gpe/decap.c
blob: d4c7424630dbd15129e5909548859b52b81af918 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
""" Container integration tests """

import unittest
from config import config
from framework import VppTestCase, VppTestRunner
from scapy.layers.l2 import Ether
from scapy.packet import Raw
from scapy.layers.inet import IP, UDP, TCP
from scapy.packet import Packet
from socket import inet_pton, AF_INET, AF_INET6
from scapy.layers.inet6 import IPv6, ICMPv6Unknown, ICMPv6EchoRequest
from scapy.layers.inet6 import ICMPv6EchoReply, IPv6ExtHdrRouting
from scapy.layers.inet6 import IPv6ExtHdrFragment
from pprint import pprint
from random import randint
from util import L4_Conn


class Conn(L4_Conn):
    # for now same as L4_Conn
    pass


@unittest.skipUnless(config.extended, "part of extended tests")
class ContainerIntegrationTestCase(VppTestCase):
    """ Container integration extended testcases """

    @classmethod
    def setUpClass(cls):
        super(ContainerIntegrationTestCase, cls).setUpClass()
        # create pg0 and pg1
        cls.create_pg_interfaces(range(2))
        for i in cls.pg_interfaces:
            i.admin_up()
            i.config_ip4()
            i.config_ip6()
            i.resolve_arp()
            i.resolve_ndp()

    @classmethod
    def tearDownClass(cls):
        super(ContainerIntegrationTestCase, cls).tearDownClass()

    def tearDown(self):
        """Run standard test teardown and log various show commands
        """
        super(ContainerIntegrationTestCase, self).tearDown()

    def show_commands_at_teardown(self):
        self.logger.info(self.vapi.cli("show ip neighbors"))

    def run_basic_conn_test(self, af, acl_side):
        """ Basic connectivity test """
        conn1 = Conn(self, self.pg0, self.pg1, af, UDP, 42001, 4242)
        conn1.send_through(0)
        # the return packets should pass
        conn1.send_through(1)

    def run_negative_conn_test(self, af, acl_side):
        """ Packets with local spoofed address """
        conn1 = Conn(self, self.pg0, self.pg1, af, UDP, 42001, 4242)
        try:
            p2 = conn1.send_through(0).command()
        except:
            # If we asserted while waiting, it's good.
            # the conn should have timed out.
            p2 = None
        self.assert_equal(p2, None, ": packet should have been dropped")

    def test_0010_basic_conn_test(self):
        """ IPv4 basic connectivity test """
        self.run_basic_conn_test(AF_INET, 0)

    def test_0011_basic_conn_test(self):
        """ IPv6 basic connectivity test """
        self.run_basic_conn_test(AF_INET6, 0)

    def test_0050_loopback_prepare_test(self):
        """ Create loopbacks overlapping with remote addresses """
        self.create_loopback_interfaces(2)
        for i in range(2):
            intf = self.lo_interfaces[i]
            intf.admin_up()
            intf.local_ip4 = self.pg_interfaces[i].remote_ip4
            intf.local_ip4_prefix_len = 32
            intf.config_ip4()
            intf.local_ip6 = self.pg_interfaces[i].remote_ip6
            intf.local_ip6_prefix_len = 128
            intf.config_ip6()

    def test_0110_basic_conn_test(self):
        """ IPv4 local-spoof connectivity test """
        self.run_negative_conn_test(AF_INET, 0)

    def test_0111_basic_conn_test(self):
        """ IPv6 local-spoof connectivity test """
        self.run_negative_conn_test(AF_INET, 1)

    def test_0200_basic_conn_test(self):
        """ Configure container commands """
        for i in range(2):
            for addr in [self.pg_interfaces[i].remote_ip4,
                         self.pg_interfaces[i].remote_ip6]:
                self.vapi.ppcli("ip container " + addr + " " +
                                self.pg_interfaces[i].name)
                self.vapi.ppcli("stn rule address " + addr +
                                " interface " + self.pg_interfaces[i].name)

    def test_0210_basic_conn_test(self):
        """ IPv4 test after configuring container """
        self.run_basic_conn_test(AF_INET, 0)

    def test_0211_basic_conn_test(self):
        """ IPv6 test after configuring container """
        self.run_basic_conn_test(AF_INET, 1)

    def test_0300_unconfigure_commands(self):
        """ Unconfigure container commands """
        for i in range(2):
            for addr in [self.pg_interfaces[i].remote_ip4,
                         self.pg_interfaces[i].remote_ip6]:
                self.vapi.ppcli("ip container " + addr + " " +
                                self.pg_interfaces[i].name +
                                " del")
                self.vapi.ppcli("stn rule address " + addr +
                                " interface " + self.pg_interfaces[i].name +
                                " del")

    def test_0410_spoof_test(self):
        """ IPv4 local-spoof after unconfig test """
        self.run_negative_conn_test(AF_INET, 0)

    def test_0411_spoof_test(self):
        """ IPv6 local-spoof after unconfig test """
        self.run_negative_conn_test(AF_INET, 1)
a id='n1022' href='#n1022'>1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
/*
 * decap.c - decapsulate VXLAN GPE
 *
 * Copyright (c) 2013 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 *  @file
 *  @brief Functions for decapsulating VXLAN GPE tunnels
 *
*/

#include <vlib/vlib.h>
#include <vnet/udp/udp_local.h>
#include <vnet/vxlan-gpe/vxlan_gpe.h>

/**
 * @brief Struct for VXLAN GPE decap packet tracing
 *
 */
typedef struct
{
  u32 next_index;
  u32 tunnel_index;
  u32 error;
} vxlan_gpe_rx_trace_t;

/**
 * @brief Tracing function for VXLAN GPE packet decapsulation
 *
 * @param *s
 * @param *args
 *
 * @return *s
 *
 */
static u8 *
format_vxlan_gpe_rx_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  vxlan_gpe_rx_trace_t *t = va_arg (*args, vxlan_gpe_rx_trace_t *);

  if (t->tunnel_index != ~0)
    {
      s = format (s, "VXLAN-GPE: tunnel %d next %d error %d", t->tunnel_index,
		  t->next_index, t->error);
    }
  else
    {
      s = format (s, "VXLAN-GPE: no tunnel next %d error %d\n", t->next_index,
		  t->error);
    }
  return s;
}

/**
 * @brief Tracing function for VXLAN GPE packet decapsulation including length
 *
 * @param *s
 * @param *args
 *
 * @return *s
 *
 */
static u8 *
format_vxlan_gpe_with_length (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);

  return s;
}

typedef struct
{
  vxlan4_gpe_tunnel_key_t key;
  vxlan_gpe_decap_info_t val;
} vxlan4_gpe_tunnel_cache_t;

static const vxlan_gpe_decap_info_t decap_not_found = {
  .tunnel_index = ~0,
  .next_index = VXLAN_GPE_INPUT_NEXT_DROP,
  .error = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL
};

always_inline vxlan_gpe_decap_info_t
vxlan4_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
			vxlan4_gpe_tunnel_cache_t *cache,
			ip4_vxlan_gpe_header_t *iuvn4_0)
{
  /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
   * VNI */
  vxlan4_gpe_tunnel_key_t key4 = {
    .local = iuvn4_0->ip4.dst_address.as_u32,
    .remote = iuvn4_0->ip4.src_address.as_u32,
    .vni = iuvn4_0->vxlan.vni_res,
    .port = (u32) iuvn4_0->udp.dst_port,
  };

  if (PREDICT_TRUE (key4.as_u64[0] == cache->key.as_u64[0] &&
		    key4.as_u64[1] == cache->key.as_u64[1]))
    {
      /* cache hit */
      return cache->val;
    }

  uword *p = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4);
  if (PREDICT_TRUE (p != 0))
    {
      u32 next = (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
		   nngm->decap_next_node_list[iuvn4_0->vxlan.protocol] :
		   VXLAN_GPE_INPUT_NEXT_DROP;

      cache->key.as_u64[0] = key4.as_u64[0];
      cache->key.as_u64[1] = key4.as_u64[1];

      cache->val.error = 0;
      cache->val.tunnel_index = p[0];
      cache->val.next_index = next;

      return cache->val;
    }

  return decap_not_found;
}

typedef struct
{
  vxlan6_gpe_tunnel_key_t key;
  vxlan_gpe_decap_info_t val;
} vxlan6_gpe_tunnel_cache_t;

always_inline vxlan_gpe_decap_info_t
vxlan6_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
			vxlan6_gpe_tunnel_cache_t *cache,
			ip6_vxlan_gpe_header_t *iuvn6_0)
{
  /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
   * VNI */
  vxlan6_gpe_tunnel_key_t key6;

  ip6_address_copy (&key6.local, &iuvn6_0->ip6.dst_address);
  ip6_address_copy (&key6.remote, &iuvn6_0->ip6.src_address);
  key6.vni = iuvn6_0->vxlan.vni_res;
  key6.port = iuvn6_0->udp.dst_port;

  if (PREDICT_TRUE (memcmp (&key6, &cache->key, sizeof (cache->key)) == 0))
    {
      /* cache hit */
      return cache->val;
    }

  uword *p = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6);
  if (PREDICT_TRUE (p != 0))
    {
      u32 next = (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
		   nngm->decap_next_node_list[iuvn6_0->vxlan.protocol] :
		   VXLAN_GPE_INPUT_NEXT_DROP;

      clib_memcpy_fast (&cache->key, &key6, sizeof (key6));
      cache->val.error = 0;
      cache->val.tunnel_index = p[0];
      cache->val.next_index = next;

      return cache->val;
    }

  return decap_not_found;
}

/**
 * @brief Common processing for IPv4 and IPv6 VXLAN GPE decap dispatch functions
 *
 * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
 * tunnels are "terminate local". This means that there is no "TX" interface for this
 * decap case, so that field in the buffer_metadata can be "used for something else".
 * The something else in this case is, for the IPv4/IPv6 inner-packet type case, the
 * FIB index used to look up the inner-packet's adjacency.
 *
 *      vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
 *
 * @param *vm
 * @param *node
 * @param *from_frame
 * @param is_ip4
 *
 * @return from_frame->n_vectors
 *
 */
always_inline uword
vxlan_gpe_input (vlib_main_t * vm,
		 vlib_node_runtime_t * node,
		 vlib_frame_t * from_frame, u8 is_ip4)
{
  u32 n_left_from, next_index, *from, *to_next;
  vxlan_gpe_main_t *nngm = &vxlan_gpe_main;
  vnet_main_t *vnm = nngm->vnet_main;
  vnet_interface_main_t *im = &vnm->interface_main;
  vxlan4_gpe_tunnel_cache_t last4;
  vxlan6_gpe_tunnel_cache_t last6;
  u32 pkts_decapsulated = 0;
  u32 thread_index = vm->thread_index;
  u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;

  if (is_ip4)
    clib_memset (&last4, 0xff, sizeof (last4));
  else
    clib_memset (&last6, 0xff, sizeof (last6));

  from = vlib_frame_vector_args (from_frame);
  n_left_from = from_frame->n_vectors;

  next_index = node->cached_next_index;
  stats_sw_if_index = node->runtime_data[0];
  stats_n_packets = stats_n_bytes = 0;

  while (n_left_from > 0)
    {
      u32 n_left_to_next;

      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from >= 4 && n_left_to_next >= 2)
	{
	  u32 bi0, bi1;
	  vlib_buffer_t *b0, *b1;
	  u32 next0, next1;
	  ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
	  ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
	  vxlan_gpe_decap_info_t di0, di1;
	  vxlan_gpe_tunnel_t *t0, *t1;
	  u32 error0, error1;
	  u32 sw_if_index0, sw_if_index1, len0, len1;

	  /* Prefetch next iteration. */
	  {
	    vlib_buffer_t *p2, *p3;

	    p2 = vlib_get_buffer (vm, from[2]);
	    p3 = vlib_get_buffer (vm, from[3]);

	    vlib_prefetch_buffer_header (p2, LOAD);
	    vlib_prefetch_buffer_header (p3, LOAD);

	    CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
	    CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
	  }

	  bi0 = from[0];
	  bi1 = from[1];
	  to_next[0] = bi0;
	  to_next[1] = bi1;
	  from += 2;
	  to_next += 2;
	  n_left_to_next -= 2;
	  n_left_from -= 2;

	  b0 = vlib_get_buffer (vm, bi0);
	  b1 = vlib_get_buffer (vm, bi1);

	  if (is_ip4)
	    {
	      /* udp leaves current_data pointing at the vxlan-gpe header */
	      vlib_buffer_advance (b0,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip4_header_t)));
	      vlib_buffer_advance (b1,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip4_header_t)));

	      iuvn4_0 = vlib_buffer_get_current (b0);
	      iuvn4_1 = vlib_buffer_get_current (b1);

	      /* pop (ip, udp, vxlan) */
	      vlib_buffer_advance (b0, sizeof (*iuvn4_0));
	      vlib_buffer_advance (b1, sizeof (*iuvn4_1));

	      di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
	      di1 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_1);
	    }
	  else
	    {
	      /* udp leaves current_data pointing at the vxlan-gpe header */
	      vlib_buffer_advance (b0,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip6_header_t)));
	      vlib_buffer_advance (b1,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip6_header_t)));

	      iuvn6_0 = vlib_buffer_get_current (b0);
	      iuvn6_1 = vlib_buffer_get_current (b1);

	      /* pop (ip, udp, vxlan) */
	      vlib_buffer_advance (b0, sizeof (*iuvn6_0));
	      vlib_buffer_advance (b1, sizeof (*iuvn6_1));

	      di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
	      di1 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_1);
	    }

	  /* Process packet 0 */
	  next0 = di0.next_index;
	  error0 = di0.error;
	  if (error0 != 0)
	    {
	      goto trace0;
	    }

	  t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);

	  sw_if_index0 = t0->sw_if_index;
	  len0 = vlib_buffer_length_in_chain (vm, b0);

	  /* Required to make the l2 tag push / pop code work on l2 subifs */
	  vnet_update_l2_len (b0);

	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;

      /**
       * ip[46] lookup in the configured FIB
       */
	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;

	  pkts_decapsulated++;
	  stats_n_packets += 1;
	  stats_n_bytes += len0;

	  if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
	    {
	      stats_n_packets -= 1;
	      stats_n_bytes -= len0;
	      if (stats_n_packets)
		vlib_increment_combined_counter (im->combined_sw_if_counters +
						 VNET_INTERFACE_COUNTER_RX,
						 thread_index,
						 stats_sw_if_index,
						 stats_n_packets,
						 stats_n_bytes);
	      stats_n_packets = 1;
	      stats_n_bytes = len0;
	      stats_sw_if_index = sw_if_index0;
	    }

	trace0:b0->error = error0 ? node->errors[error0] : 0;

	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      vxlan_gpe_rx_trace_t *tr =
		vlib_add_trace (vm, node, b0, sizeof (*tr));
	      tr->next_index = next0;
	      tr->error = error0;
	      tr->tunnel_index = di0.tunnel_index;
	    }

	  /* Process packet 1 */
	  next1 = di1.next_index;
	  error1 = di1.error;
	  if (error1 != 0)
	    {
	      goto trace1;
	    }

	  t1 = pool_elt_at_index (nngm->tunnels, di1.tunnel_index);

	  sw_if_index1 = t1->sw_if_index;
	  len1 = vlib_buffer_length_in_chain (vm, b1);

	  /* Required to make the l2 tag push / pop code work on l2 subifs */
	  vnet_update_l2_len (b1);

	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
	  vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;

	  /*
	   * ip[46] lookup in the configured FIB
	   */
	  vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;

	  pkts_decapsulated++;
	  stats_n_packets += 1;
	  stats_n_bytes += len1;

	  /* Batch stats increment on the same vxlan tunnel so counter
	     is not incremented per packet */
	  if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
	    {
	      stats_n_packets -= 1;
	      stats_n_bytes -= len1;
	      if (stats_n_packets)
		vlib_increment_combined_counter (im->combined_sw_if_counters +
						 VNET_INTERFACE_COUNTER_RX,
						 thread_index,
						 stats_sw_if_index,
						 stats_n_packets,
						 stats_n_bytes);
	      stats_n_packets = 1;
	      stats_n_bytes = len1;
	      stats_sw_if_index = sw_if_index1;
	    }
	  vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;

	trace1:b1->error = error1 ? node->errors[error1] : 0;

	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      vxlan_gpe_rx_trace_t *tr =
		vlib_add_trace (vm, node, b1, sizeof (*tr));
	      tr->next_index = next1;
	      tr->error = error1;
	      tr->tunnel_index = di1.tunnel_index;
	    }

	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
					   n_left_to_next, bi0, bi1, next0,
					   next1);
	}

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 bi0;
	  vlib_buffer_t *b0;
	  u32 next0;
	  ip4_vxlan_gpe_header_t *iuvn4_0;
	  ip6_vxlan_gpe_header_t *iuvn6_0;
	  vxlan_gpe_decap_info_t di0;
	  vxlan_gpe_tunnel_t *t0;
	  u32 error0;
	  u32 sw_if_index0, len0;

	  bi0 = from[0];
	  to_next[0] = bi0;
	  from += 1;
	  to_next += 1;
	  n_left_from -= 1;
	  n_left_to_next -= 1;

	  b0 = vlib_get_buffer (vm, bi0);

	  if (is_ip4)
	    {
	      /* udp leaves current_data pointing at the vxlan-gpe header */
	      vlib_buffer_advance (b0,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip4_header_t)));

	      iuvn4_0 = vlib_buffer_get_current (b0);

	      /* pop (ip, udp, vxlan) */
	      vlib_buffer_advance (b0, sizeof (*iuvn4_0));

	      di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
	    }
	  else
	    {
	      /* udp leaves current_data pointing at the vxlan-gpe header */
	      vlib_buffer_advance (b0,
				   -(word) (sizeof (udp_header_t) +
					    sizeof (ip6_header_t)));

	      iuvn6_0 = vlib_buffer_get_current (b0);

	      /* pop (ip, udp, vxlan) */
	      vlib_buffer_advance (b0, sizeof (*iuvn6_0));

	      di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
		}

	  next0 = di0.next_index;
	  error0 = di0.error;
	  if (error0 != 0)
	    {
	      goto trace00;
	    }

	  t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);

	  sw_if_index0 = t0->sw_if_index;
	  len0 = vlib_buffer_length_in_chain (vm, b0);

	  /* Required to make the l2 tag push / pop code work on l2 subifs */
	  vnet_update_l2_len (b0);

	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;

	  /*
	   * ip[46] lookup in the configured FIB
	   */
	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;

	  pkts_decapsulated++;
	  stats_n_packets += 1;
	  stats_n_bytes += len0;

	  /* Batch stats increment on the same vxlan-gpe tunnel so counter
	     is not incremented per packet */
	  if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
	    {
	      stats_n_packets -= 1;
	      stats_n_bytes -= len0;
	      if (stats_n_packets)
		vlib_increment_combined_counter (im->combined_sw_if_counters +
						 VNET_INTERFACE_COUNTER_RX,
						 thread_index,
						 stats_sw_if_index,
						 stats_n_packets,
						 stats_n_bytes);
	      stats_n_packets = 1;
	      stats_n_bytes = len0;
	      stats_sw_if_index = sw_if_index0;
	    }

	trace00:b0->error = error0 ? node->errors[error0] : 0;

	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      vxlan_gpe_rx_trace_t *tr =
		vlib_add_trace (vm, node, b0, sizeof (*tr));
	      tr->next_index = next0;
	      tr->error = error0;
	      tr->tunnel_index = di0.tunnel_index;
	    }
	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
					   n_left_to_next, bi0, next0);
	}

      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }

  vlib_node_increment_counter (vm,
			       is_ip4 ? vxlan4_gpe_input_node.index :
			       vxlan6_gpe_input_node.index,
			       VXLAN_GPE_ERROR_DECAPSULATED,
			       pkts_decapsulated);

  /* Increment any remaining batch stats */
  if (stats_n_packets)
    {
      vlib_increment_combined_counter (im->combined_sw_if_counters +
				       VNET_INTERFACE_COUNTER_RX,
				       thread_index, stats_sw_if_index,
				       stats_n_packets, stats_n_bytes);
      node->runtime_data[0] = stats_sw_if_index;
    }
  return from_frame->n_vectors;
}

/**
 * @brief Graph processing dispatch function for IPv4 VXLAN GPE
 *
 * @node vxlan4-gpe-input
 * @param *vm
 * @param *node
 * @param *from_frame
 *
 * @return from_frame->n_vectors
 *
 */
VLIB_NODE_FN (vxlan4_gpe_input_node) (vlib_main_t * vm,
				      vlib_node_runtime_t * node,
				      vlib_frame_t * from_frame)
{
  return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */ 1);
}

#ifndef CLIB_MARCH_VARIANT
void
vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index)
{
  vxlan_gpe_main_t *hm = &vxlan_gpe_main;
  hm->decap_next_node_list[protocol_id] = next_node_index;
  return;
}

void
vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index)
{
  vxlan_gpe_main_t *hm = &vxlan_gpe_main;
  hm->decap_next_node_list[protocol_id] = VXLAN_GPE_INPUT_NEXT_DROP;
  return;
}
#endif /* CLIB_MARCH_VARIANT */

/**
 * @brief Graph processing dispatch function for IPv6 VXLAN GPE
 *
 * @node vxlan6-gpe-input
 * @param *vm
 * @param *node
 * @param *from_frame
 *
 * @return from_frame->n_vectors - uword
 *
 */
VLIB_NODE_FN (vxlan6_gpe_input_node) (vlib_main_t * vm,
				      vlib_node_runtime_t * node,
				      vlib_frame_t * from_frame)
{
  return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */ 0);
}

/**
 * @brief VXLAN GPE error strings
 */
static char *vxlan_gpe_error_strings[] = {
#define vxlan_gpe_error(n,s) s,
#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
#undef vxlan_gpe_error
#undef _
};

VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
  .name = "vxlan4-gpe-input",
  /* Takes a vector of packets. */
  .vector_size = sizeof (u32),
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
  .error_strings = vxlan_gpe_error_strings,

  .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
  .next_nodes = {
#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
    foreach_vxlan_gpe_input_next
#undef _
  },

  .format_buffer = format_vxlan_gpe_with_length,
  .format_trace = format_vxlan_gpe_rx_trace,
  // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};

VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
  .name = "vxlan6-gpe-input",
  /* Takes a vector of packets. */
  .vector_size = sizeof (u32),
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
  .error_strings = vxlan_gpe_error_strings,

  .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
  .next_nodes = {
#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
    foreach_vxlan_gpe_input_next
#undef _
  },

  .format_buffer = format_vxlan_gpe_with_length,
  .format_trace = format_vxlan_gpe_rx_trace,
  // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};

typedef enum
{
  IP_VXLAN_BYPASS_NEXT_DROP,
  IP_VXLAN_BYPASS_NEXT_VXLAN,
  IP_VXLAN_BYPASS_N_NEXT,
} ip_vxlan_bypass_next_t;

always_inline uword
ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
			    vlib_node_runtime_t * node,
			    vlib_frame_t * frame, u32 is_ip4)
{
  vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
  u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_input_node.index);
  vtep4_key_t last_vtep4;	/* last IPv4 address / fib index
				   matching a local VTEP address */
  vtep6_key_t last_vtep6;	/* last IPv6 address / fib index
				   matching a local VTEP address */
  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;

  vxlan4_gpe_tunnel_cache_t last4;
  vxlan6_gpe_tunnel_cache_t last6;

  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;

  vlib_get_buffers (vm, from, bufs, n_left_from);

  if (node->flags & VLIB_NODE_FLAG_TRACE)
    ip4_forward_next_trace (vm, node, frame, VLIB_TX);

  if (is_ip4)
    {
      vtep4_key_init (&last_vtep4);
      clib_memset (&last4, 0xff, sizeof last4);
    }
  else
    {
      vtep6_key_init (&last_vtep6);
      clib_memset (&last6, 0xff, sizeof last6);
    }

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from >= 4 && n_left_to_next >= 2)
	{
	  vlib_buffer_t *b0, *b1;
	  ip4_header_t *ip40, *ip41;
	  ip6_header_t *ip60, *ip61;
	  udp_header_t *udp0, *udp1;
	  ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
	  ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
	  vxlan_gpe_decap_info_t di0, di1;
	  u32 bi0, ip_len0, udp_len0, flags0, next0;
	  u32 bi1, ip_len1, udp_len1, flags1, next1;
	  i32 len_diff0, len_diff1;
	  u8 error0, good_udp0, proto0;
	  u8 error1, good_udp1, proto1;

	  /* Prefetch next iteration. */
	  {
	    vlib_prefetch_buffer_header (b[2], LOAD);
	    vlib_prefetch_buffer_header (b[3], LOAD);

	    CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
	    CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
	  }

	  bi0 = to_next[0] = from[0];
	  bi1 = to_next[1] = from[1];
	  from += 2;
	  n_left_from -= 2;
	  to_next += 2;
	  n_left_to_next -= 2;

	  b0 = b[0];
	  b1 = b[1];
	  b += 2;
	  if (is_ip4)
	    {
	      ip40 = vlib_buffer_get_current (b0);
	      ip41 = vlib_buffer_get_current (b1);
	    }
	  else
	    {
	      ip60 = vlib_buffer_get_current (b0);
	      ip61 = vlib_buffer_get_current (b1);
	    }

	  /* Setup packet for next IP feature */
	  vnet_feature_next (&next0, b0);
	  vnet_feature_next (&next1, b1);

	  if (is_ip4)
	    {
	      proto0 = ip40->protocol;
	      proto1 = ip41->protocol;
	    }
	  else
	    {
	      proto0 = ip60->protocol;
	      proto1 = ip61->protocol;
	    }

	  /* Process packet 0 */
	  if (proto0 != IP_PROTOCOL_UDP)
	    goto exit0;		/* not UDP packet */

	  if (is_ip4)
	    {
	      udp0 = ip4_next_header (ip40);
	      iuvn4_0 = vlib_buffer_get_current (b0);
	      di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
	    }
	  else
	    {
	      udp0 = ip6_next_header (ip60);
	      iuvn6_0 = vlib_buffer_get_current (b0);
	      di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
	    }

	  if (PREDICT_FALSE (di0.tunnel_index == ~0))
	    goto exit0; /* unknown interface */

	  /* Validate DIP against VTEPs */
	  if (is_ip4)
	    {
#ifdef CLIB_HAVE_VEC512
	      if (!vtep4_check_vector (&ngm->vtep_table, b0, ip40, &last_vtep4,
				       &ngm->vtep4_u512))
#else
	      if (!vtep4_check (&ngm->vtep_table, b0, ip40, &last_vtep4))
#endif
		goto exit0;	/* no local VTEP for VXLAN packet */
	    }
	  else
	    {
	      if (!vtep6_check (&ngm->vtep_table, b0, ip60, &last_vtep6))
		goto exit0;	/* no local VTEP for VXLAN packet */
	    }

	  flags0 = b0->flags;
	  good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;

	  /* Don't verify UDP checksum for packets with explicit zero checksum. */
	  good_udp0 |= udp0->checksum == 0;

	  /* Verify UDP length */
	  if (is_ip4)
	    ip_len0 = clib_net_to_host_u16 (ip40->length);
	  else
	    ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
	  udp_len0 = clib_net_to_host_u16 (udp0->length);
	  len_diff0 = ip_len0 - udp_len0;

	  /* Verify UDP checksum */
	  if (PREDICT_FALSE (!good_udp0))
	    {
	      if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
		{
		  if (is_ip4)
		    flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
		  else
		    flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
		  good_udp0 =
		    (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
		}
	    }

	  if (is_ip4)
	    {
	      error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
	      error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
	    }
	  else
	    {
	      error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
	      error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
	    }

	  next0 = error0 ?
	    IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
	  b0->error = error0 ? error_node->errors[error0] : 0;

	  /* vxlan_gpe-input node expect current at VXLAN header */
	  if (is_ip4)
	    vlib_buffer_advance (b0,
				 sizeof (ip4_header_t) +
				 sizeof (udp_header_t));
	  else
	    vlib_buffer_advance (b0,
				 sizeof (ip6_header_t) +
				 sizeof (udp_header_t));

	exit0:
	  /* Process packet 1 */
	  if (proto1 != IP_PROTOCOL_UDP)
	    goto exit1;		/* not UDP packet */

	  if (is_ip4)
	    {
	      udp1 = ip4_next_header (ip41);
	      iuvn4_1 = vlib_buffer_get_current (b1);
	      di1 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_1);
	    }
	  else
	    {
	      udp1 = ip6_next_header (ip61);
	      iuvn6_1 = vlib_buffer_get_current (b1);
	      di1 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_1);
	    }

	  if (PREDICT_FALSE (di1.tunnel_index == ~0))
	    goto exit1; /* unknown interface */

	  /* Validate DIP against VTEPs */
	  if (is_ip4)
	    {
#ifdef CLIB_HAVE_VEC512
	      if (!vtep4_check_vector (&ngm->vtep_table, b1, ip41, &last_vtep4,
				       &ngm->vtep4_u512))
#else
	      if (!vtep4_check (&ngm->vtep_table, b1, ip41, &last_vtep4))
#endif
		goto exit1;	/* no local VTEP for VXLAN packet */
	    }
	  else
	    {
	      if (!vtep6_check (&ngm->vtep_table, b1, ip61, &last_vtep6))
		goto exit1;	/* no local VTEP for VXLAN packet */
	    }

	  flags1 = b1->flags;
	  good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;

	  /* Don't verify UDP checksum for packets with explicit zero checksum. */
	  good_udp1 |= udp1->checksum == 0;

	  /* Verify UDP length */
	  if (is_ip4)
	    ip_len1 = clib_net_to_host_u16 (ip41->length);
	  else
	    ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
	  udp_len1 = clib_net_to_host_u16 (udp1->length);
	  len_diff1 = ip_len1 - udp_len1;

	  /* Verify UDP checksum */
	  if (PREDICT_FALSE (!good_udp1))
	    {
	      if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
		{
		  if (is_ip4)
		    flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
		  else
		    flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
		  good_udp1 =
		    (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
		}
	    }

	  if (is_ip4)
	    {
	      error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
	      error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
	    }
	  else
	    {
	      error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
	      error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
	    }

	  next1 = error1 ?
	    IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
	  b1->error = error1 ? error_node->errors[error1] : 0;

	  /* vxlan_gpe-input node expect current at VXLAN header */
	  if (is_ip4)
	    vlib_buffer_advance (b1,
				 sizeof (ip4_header_t) +
				 sizeof (udp_header_t));
	  else
	    vlib_buffer_advance (b1,
				 sizeof (ip6_header_t) +
				 sizeof (udp_header_t));

	exit1:
	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
					   to_next, n_left_to_next,
					   bi0, bi1, next0, next1);
	}

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  vlib_buffer_t *b0;
	  ip4_header_t *ip40;
	  ip6_header_t *ip60;
	  udp_header_t *udp0;
	  ip4_vxlan_gpe_header_t *iuvn4_0;
	  ip6_vxlan_gpe_header_t *iuvn6_0;
	  vxlan_gpe_decap_info_t di0;
	  u32 bi0, ip_len0, udp_len0, flags0, next0;
	  i32 len_diff0;
	  u8 error0, good_udp0, proto0;

	  bi0 = to_next[0] = from[0];
	  from += 1;
	  n_left_from -= 1;
	  to_next += 1;
	  n_left_to_next -= 1;

	  b0 = b[0];
	  b++;
	  if (is_ip4)
	    ip40 = vlib_buffer_get_current (b0);
	  else
	    ip60 = vlib_buffer_get_current (b0);

	  /* Setup packet for next IP feature */
	  vnet_feature_next (&next0, b0);

	  if (is_ip4)
	    proto0 = ip40->protocol;
	  else
	    proto0 = ip60->protocol;

	  if (proto0 != IP_PROTOCOL_UDP)
	    goto exit;		/* not UDP packet */

	  if (is_ip4)
	    {
	      udp0 = ip4_next_header (ip40);
	      iuvn4_0 = vlib_buffer_get_current (b0);
	      di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
	    }
	  else
	    {
	      udp0 = ip6_next_header (ip60);
	      iuvn6_0 = vlib_buffer_get_current (b0);
	      di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
	    }

	  if (PREDICT_FALSE (di0.tunnel_index == ~0))
	    goto exit; /* unknown interface */

	  /* Validate DIP against VTEPs */

	  if (is_ip4)
	    {
#ifdef CLIB_HAVE_VEC512
	      if (!vtep4_check_vector (&ngm->vtep_table, b0, ip40, &last_vtep4,
				       &ngm->vtep4_u512))
#else
	      if (!vtep4_check (&ngm->vtep_table, b0, ip40, &last_vtep4))
#endif
		goto exit;	/* no local VTEP for VXLAN packet */
	    }
	  else
	    {
	      if (!vtep6_check (&ngm->vtep_table, b0, ip60, &last_vtep6))
		goto exit;	/* no local VTEP for VXLAN packet */
	    }

	  flags0 = b0->flags;
	  good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;

	  /* Don't verify UDP checksum for packets with explicit zero checksum. */
	  good_udp0 |= udp0->checksum == 0;

	  /* Verify UDP length */
	  if (is_ip4)
	    ip_len0 = clib_net_to_host_u16 (ip40->length);
	  else
	    ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
	  udp_len0 = clib_net_to_host_u16 (udp0->length);
	  len_diff0 = ip_len0 - udp_len0;

	  /* Verify UDP checksum */
	  if (PREDICT_FALSE (!good_udp0))
	    {
	      if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
		{
		  if (is_ip4)
		    flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
		  else
		    flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
		  good_udp0 =
		    (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
		}
	    }

	  if (is_ip4)
	    {
	      error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
	      error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
	    }
	  else
	    {
	      error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
	      error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
	    }

	  next0 = error0 ?
	    IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
	  b0->error = error0 ? error_node->errors[error0] : 0;

	  /* vxlan_gpe-input node expect current at VXLAN header */
	  if (is_ip4)
	    vlib_buffer_advance (b0,
				 sizeof (ip4_header_t) +
				 sizeof (udp_header_t));
	  else
	    vlib_buffer_advance (b0,
				 sizeof (ip6_header_t) +
				 sizeof (udp_header_t));

	exit:
	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next,
					   bi0, next0);
	}

      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }

  return frame->n_vectors;
}

VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm,
					  vlib_node_runtime_t * node,
					  vlib_frame_t * frame)
{
  return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}

VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
  .name = "ip4-vxlan-gpe-bypass",
  .vector_size = sizeof (u32),

  .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
  .next_nodes = {
    [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
    [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-gpe-input",
  },

  .format_buffer = format_ip4_header,
  .format_trace = format_ip4_forward_next_trace,
};

#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
clib_error_t *
ip4_vxlan_gpe_bypass_init (vlib_main_t * vm)
{
  return 0;
}

VLIB_INIT_FUNCTION (ip4_vxlan_gpe_bypass_init);
#endif /* CLIB_MARCH_VARIANT */

VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm,
					  vlib_node_runtime_t * node,
					  vlib_frame_t * frame)
{
  return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}

VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
  .name = "ip6-vxlan-gpe-bypass",
  .vector_size = sizeof (u32),

  .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
  .next_nodes = {
    [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
    [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-gpe-input",
  },

  .format_buffer = format_ip6_header,
  .format_trace = format_ip6_forward_next_trace,
};

#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
clib_error_t *
ip6_vxlan_gpe_bypass_init (vlib_main_t * vm)
{
  return 0;
}

VLIB_INIT_FUNCTION (ip6_vxlan_gpe_bypass_init);
#endif /* CLIB_MARCH_VARIANT */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */