summaryrefslogtreecommitdiffstats
path: root/src
AgeCommit message (Expand)AuthorFilesLines
2017-08-18Python API: Fix error message typo.Ole Troan1-1/+1
2017-08-18API: More gracefully fail when opening shared memory segment fails.Ole Troan3-8/+15
2017-08-18IP4_FIB stats API not indexing after FIB changes - VPP-951Keith Burns (alagalah)1-1/+3
2017-08-18feature: exit on misconfigured features (VPP-915)Florin Coras1-3/+7
2017-08-18Use correct msg ID in the sw-interface-event from TAP and VHOSTNeale Ranns2-2/+2
2017-08-18Fix vpp crash when adding macip aclPavel Kotucek1-1/+1
2017-08-18Fix small issues in ACL apiPavel Kotucek1-3/+11
2017-08-18acl-plugin: time out the sessions created by main thread too (VPP-948)Andrew Yourtchenko4-13/+28
2017-08-17TCP source address automationDave Barach8-17/+486
2017-08-17Python API: VPP-947 Empty chroot_prefix fails on encode()Ole Troan3-4/+209
2017-08-16jvpp: suppress unwritten fields warrning found in DTO's hashCodeMarek Gradzki2-0/+42
2017-08-16ip4: cleanup ip localFlorin Coras1-153/+85
2017-08-16tcp: fix v6 sessionsroot7-81/+90
2017-08-16SNAT: Make proto optional in nat64_bib_dump (VPP-942)Matus Fabian4-45/+115
2017-08-16No context in SW interface eventNeale Ranns2-3/+8
2017-08-16tcp: improve builtin http serverFlorin Coras5-197/+108
2017-08-16Add missing counters for P2P ethernet interfacesPavel Kotucek2-0/+40
2017-08-16NAT64: Fallback to 3-tuple key for non TCP/UDP sessions (VPP-884)Matus Fabian10-134/+770
2017-08-16VCL: copy complete ip addr to/from vpe-api buf.Dave Wallace1-18/+7
2017-08-16jvpp: move JVppReply's id out of synchronized blockMarek Gradzki2-15/+17
2017-08-15Support proxy ARP on mirrored TAP interfacesNeale Ranns1-4/+16
2017-08-15acl-plugin: don't attempt to delete the ACLs on interface deletion if ACL plu...Andrew Yourtchenko1-0/+4
2017-08-15IPv6 local-address for P2P ethernet interfacesPavel Kotucek1-1/+2
2017-08-15VPP-939: Update CLI Helptext for rx-placement commandsBilly McFall2-94/+293
2017-08-15tcp: state machine improvementsFlorin Coras7-23/+40
2017-08-15coverity: remove dependency on uint32_t in fib_test.cMarek Gradzki1-1/+1
2017-08-14TCP: Update time_now once per burstFlorin Coras4-3/+15
2017-08-14FIB table add/delete API onlyNeale Ranns5-0/+163
2017-08-14TCP: update builtin server/client cli helpFlorin Coras2-5/+19
2017-08-14jvpp: ignore messages if callback method is missing (VPP-548)Marek Gradzki1-0/+8
2017-08-14dpdk: cleanup unused build option *_uses_dpdk_cryptodev_swSergio Gonzalez Monroy1-11/+4
2017-08-14jvpp: make shm_prefix configurable (VPP-591)Jan Srnicek5-24/+62
2017-08-13default update adjacency function deos not return multicast adjacencyNeale Ranns1-6/+40
2017-08-12LISP: fix fid nsh address formattingFlorin Coras1-1/+8
2017-08-11Fix tcp multi buffer segments retransmissionFlorin Coras8-155/+304
2017-08-11Fix IP fragment-id and offset issue in LB-GRE4Hongjun Ni1-0/+2
2017-08-11LISP: fix wrong reply message in map_register_fallback_threshold callFilip Tehlar1-1/+1
2017-08-11Dedicated SW Interface EventNeale Ranns11-43/+63
2017-08-11the automatic addition of IP address to the magic TAP interface should only h...Neale Ranns1-0/+21
2017-08-11Add VPP Communications Library (VCL)Dave Wallace9-1/+4795
2017-08-11acl-plugin: add the debug CLI to show macip ACLs and where they are applied (...Andrew Yourtchenko1-0/+76
2017-08-10Fix memory leaks found in policer code.Chaoyu Jin1-0/+34
2017-08-10Fix LISP cp buffer leakageFlorin Coras1-1/+3
2017-08-10acl-plugin: hash lookup bitmask not cleared when ACL is unapplied from interf...Andrew Yourtchenko3-4/+31
2017-08-10Improve the svm fifo allocatorDave Barach6-20/+151
2017-08-10acl-plugin: avoid crash in multithreaded setup adding/deleting ACLs with traf...Andrew Yourtchenko3-0/+83
2017-08-10MPLS tunnel - multiple labels on the CLI - fix cut and paste robot errorsNeale Ranns1-13/+4
2017-08-09VPP-933 VPP crashes when deleting an ARP entryMatthew Smith1-0/+7
2017-08-10TCP proxy prototypeDave Barach16-212/+801
2017-08-10acl-plugin: all TCP sessions treated as transient (VPP-932)Andrew Yourtchenko1-3/+6
5555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "map.h"

#include <vnet/ip/ip_frag.h>
#include <vnet/ip/ip4_to_ip6.h>

typedef enum
{
  IP4_MAPT_NEXT_MAPT_TCP_UDP,
  IP4_MAPT_NEXT_MAPT_ICMP,
  IP4_MAPT_NEXT_MAPT_FRAGMENTED,
  IP4_MAPT_NEXT_ICMP_ERROR,
  IP4_MAPT_NEXT_DROP,
  IP4_MAPT_N_NEXT
} ip4_mapt_next_t;

typedef enum
{
  IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
  IP4_MAPT_ICMP_NEXT_IP6_REWRITE,
  IP4_MAPT_ICMP_NEXT_IP6_FRAG,
  IP4_MAPT_ICMP_NEXT_DROP,
  IP4_MAPT_ICMP_N_NEXT
} ip4_mapt_icmp_next_t;

typedef enum
{
  IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
  IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE,
  IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
  IP4_MAPT_TCP_UDP_NEXT_DROP,
  IP4_MAPT_TCP_UDP_N_NEXT
} ip4_mapt_tcp_udp_next_t;

typedef enum
{
  IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
  IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE,
  IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
  IP4_MAPT_FRAGMENTED_NEXT_DROP,
  IP4_MAPT_FRAGMENTED_N_NEXT
} ip4_mapt_fragmented_next_t;

//This is used to pass information within the buffer data.
//Buffer structure being too small to contain big structures like this.
/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  ip6_address_t daddr;
  ip6_address_t saddr;
  //IPv6 header + Fragmentation header will be here
  //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
  u8 unused[28];
}) ip4_mapt_pseudo_header_t;
/* *INDENT-ON* */

typedef struct
{
  map_domain_t *d;
  u16 recv_port;
} icmp_to_icmp6_ctx_t;

static int
ip4_to_ip6_set_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
			ip6_header_t * ip6, void *arg)
{
  icmp_to_icmp6_ctx_t *ctx = arg;

  ip4_map_t_embedded_address (ctx->d, &ip6->src_address, &ip4->src_address);
  ip6->dst_address.as_u64[0] =
    map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
  ip6->dst_address.as_u64[1] =
    map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);

  return 0;
}

static int
ip4_to_ip6_set_inner_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
			      ip6_header_t * ip6, void *arg)
{
  icmp_to_icmp6_ctx_t *ctx = arg;
  ip4_address_t old_src, old_dst;

  old_src.as_u32 = ip4->src_address.as_u32;
  old_dst.as_u32 = ip4->dst_address.as_u32;

  //Note that the source address is within the domain
  //while the destination address is the one outside the domain
  ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, &old_dst);
  ip6->src_address.as_u64[0] =
    map_get_pfx_net (ctx->d, old_src.as_u32, ctx->recv_port);
  ip6->src_address.as_u64[1] =
    map_get_sfx_net (ctx->d, old_src.as_u32, ctx->recv_port);

  return 0;
}

static uword
ip4_map_t_icmp (vlib_main_t * vm,
		vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index);
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  vlib_combined_counter_main_t *cm = map_main.domain_counters;
  u32 thread_index = vm->thread_index;

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0;
	  vlib_buffer_t *p0;
	  ip4_mapt_icmp_next_t next0;
	  u8 error0;
	  map_domain_t *d0;
	  u16 len0;
	  icmp_to_icmp6_ctx_t ctx0;
	  ip4_header_t *ip40;

	  next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
	  pi0 = to_next[0] = from[0];
	  from += 1;
	  n_left_from -= 1;
	  to_next += 1;
	  n_left_to_next -= 1;
	  error0 = MAP_ERROR_NONE;

	  p0 = vlib_get_buffer (vm, pi0);
	  vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t));	//The pseudo-header is not used
	  len0 =
	    clib_net_to_host_u16 (((ip4_header_t *)
				   vlib_buffer_get_current (p0))->length);
	  d0 =
	    pool_elt_at_index (map_main.domains,
			       vnet_buffer (p0)->map_t.map_domain_index);

	  ip40 = vlib_buffer_get_current (p0);
	  ctx0.recv_port = ip4_get_port (ip40, 0);
	  ctx0.d = d0;
	  if (ctx0.recv_port == 0)
	    {
	      // In case of 1:1 mapping, we don't care about the port
	      if (!(d0->ea_bits_len == 0 && d0->rules))
		{
		  error0 = MAP_ERROR_ICMP;
		  goto err0;
		}
	    }

	  if (icmp_to_icmp6
	      (p0, ip4_to_ip6_set_icmp_cb, &ctx0,
	       ip4_to_ip6_set_inner_icmp_cb, &ctx0))
	    {
	      error0 = MAP_ERROR_ICMP;
	      goto err0;
	    }

	  if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
	    {
	      vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
	      vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP;
	      next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
	    }
	  else
	    {
	      next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
		IP4_MAPT_ICMP_NEXT_IP6_REWRITE : next0;
	    }
	err0:
	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
	    {
	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
					       thread_index,
					       vnet_buffer (p0)->
					       map_t.map_domain_index, 1,
					       len0);
	    }
	  else
	    {
	      next0 = IP4_MAPT_ICMP_NEXT_DROP;
	    }
	  p0->error = error_node->errors[error0];
	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next, pi0,
					   next0);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  return frame->n_vectors;
}

/*
 * Translate fragmented IPv4 UDP/TCP packet to IPv6.
 */
always_inline int
map_ip4_to_ip6_fragmented (vlib_buffer_t * p,
			   ip4_mapt_pseudo_header_t * pheader)
{
  ip4_header_t *ip4;
  ip6_header_t *ip6;
  ip6_frag_hdr_t *frag;

  ip4 = vlib_buffer_get_current (p);
  frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
  ip6 =
    (ip6_header_t *) u8_ptr_add (ip4,
				 sizeof (*ip4) - sizeof (*frag) -
				 sizeof (*ip6));
  vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));

  //We know that the protocol was one of ICMP, TCP or UDP
  //because the first fragment was found and cached
  frag->next_hdr =
    (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol;
  frag->identification = frag_id_4to6 (ip4->fragment_id);
  frag->rsv = 0;
  frag->fragment_offset_and_more =
    ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4),
				  clib_net_to_host_u16
				  (ip4->flags_and_fragment_offset) &
				  IP4_HEADER_FLAG_MORE_FRAGMENTS);

  ip6->ip_version_traffic_class_and_flow_label =
    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
  ip6->payload_length =
    clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
			  sizeof (*ip4) + sizeof (*frag));
  ip6->hop_limit = ip4->ttl;
  ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;

  ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
  ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
  ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
  ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];

  return 0;
}

static uword
ip4_map_t_fragmented (vlib_main_t * vm,
		      vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index);

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0;
	  vlib_buffer_t *p0;
	  ip4_mapt_pseudo_header_t *pheader0;
	  ip4_mapt_fragmented_next_t next0;

	  next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
	  pi0 = to_next[0] = from[0];
	  from += 1;
	  n_left_from -= 1;
	  to_next += 1;
	  n_left_to_next -= 1;

	  p0 = vlib_get_buffer (vm, pi0);

	  //Accessing pseudo header
	  pheader0 = vlib_buffer_get_current (p0);
	  vlib_buffer_advance (p0, sizeof (*pheader0));

	  if (map_ip4_to_ip6_fragmented (p0, pheader0))
	    {
	      p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
	      next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP;
	    }
	  else
	    {
	      if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
		{
		  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
		  vnet_buffer (p0)->ip_frag.next_index =
		    IP_FRAG_NEXT_IP6_LOOKUP;
		  next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
		}
	      else
		{
		  next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
		    IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE : next0;
		}
	    }

	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next, pi0,
					   next0);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  return frame->n_vectors;
}

/*
 * Translate IPv4 UDP/TCP packet to IPv6.
 */
always_inline int
map_ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_mapt_pseudo_header_t * pheader)
{
  map_main_t *mm = &map_main;
  ip4_header_t *ip4;
  ip6_header_t *ip6;
  ip_csum_t csum;
  u16 *checksum;
  ip6_frag_hdr_t *frag;
  u32 frag_id;
  ip4_address_t old_src, old_dst;

  ip4 = vlib_buffer_get_current (p);

  if (ip4->protocol == IP_PROTOCOL_UDP)
    {
      udp_header_t *udp = ip4_next_header (ip4);
      checksum = &udp->checksum;

      /*
       * UDP checksum is optional over IPv4 but mandatory for IPv6 We
       * do not check udp->length sanity but use our safe computed
       * value instead
       */
      if (PREDICT_FALSE (!*checksum))
	{
	  u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
	  csum = ip_incremental_checksum (0, udp, udp_len);
	  csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
	  csum =
	    ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP));
	  csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
	  *checksum = ~ip_csum_fold (csum);
	}
    }
  else
    {
      tcp_header_t *tcp = ip4_next_header (ip4);
      if (mm->tcp_mss > 0)
	{
	  csum = tcp->checksum;
	  map_mss_clamping (tcp, &csum, mm->tcp_mss);
	  tcp->checksum = ip_csum_fold (csum);
	}
      checksum = &tcp->checksum;
    }

  old_src.as_u32 = ip4->src_address.as_u32;
  old_dst.as_u32 = ip4->dst_address.as_u32;

  /* Deal with fragmented packets */
  if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
		     clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
    {
      ip6 =
	(ip6_header_t *) u8_ptr_add (ip4,
				     sizeof (*ip4) - sizeof (*ip6) -
				     sizeof (*frag));
      frag =
	(ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
      frag_id = frag_id_4to6 (ip4->fragment_id);
      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
    }
  else
    {
      ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
      frag = NULL;
    }

  ip6->ip_version_traffic_class_and_flow_label =
    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
  ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
  ip6->hop_limit = ip4->ttl;
  ip6->protocol = ip4->protocol;
  if (PREDICT_FALSE (frag != NULL))
    {
      frag->next_hdr = ip6->protocol;
      frag->identification = frag_id;
      frag->rsv = 0;
      frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
      ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
      ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
    }

  ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
  ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
  ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
  ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];

  csum = ip_csum_sub_even (*checksum, old_src.as_u32);
  csum = ip_csum_sub_even (csum, old_dst.as_u32);
  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
  *checksum = ip_csum_fold (csum);

  return 0;
}

static uword
ip4_map_t_tcp_udp (vlib_main_t * vm,
		   vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index);


  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0;
	  vlib_buffer_t *p0;
	  ip4_mapt_pseudo_header_t *pheader0;
	  ip4_mapt_tcp_udp_next_t next0;

	  pi0 = to_next[0] = from[0];
	  from += 1;
	  n_left_from -= 1;
	  to_next += 1;
	  n_left_to_next -= 1;

	  next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
	  p0 = vlib_get_buffer (vm, pi0);

	  //Accessing pseudo header
	  pheader0 = vlib_buffer_get_current (p0);
	  vlib_buffer_advance (p0, sizeof (*pheader0));

	  if (map_ip4_to_ip6_tcp_udp (p0, pheader0))
	    {
	      p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
	      next0 = IP4_MAPT_TCP_UDP_NEXT_DROP;
	    }
	  else
	    {
	      if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
		{
		  //Send to fragmentation node if necessary
		  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
		  vnet_buffer (p0)->ip_frag.next_index =
		    IP_FRAG_NEXT_IP6_LOOKUP;
		  next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
		}
	      else
		{
		  next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
		    IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE : next0;
		}
	    }
	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next, pi0,
					   next0);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }

  return frame->n_vectors;
}

static_always_inline void
ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0,
		    ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0,
		    u8 * error0, ip4_mapt_next_t * next0, u16 l4_dst_port)
{
  if (PREDICT_FALSE (ip4_get_fragment_offset (ip40)))
    {
      *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
      if (d0->ea_bits_len == 0 && d0->rules)
	{
	  *dst_port0 = 0;
	}
      else
	{
	  *dst_port0 = l4_dst_port;
	  *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
	}
    }
  else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP))
    {
      vnet_buffer (p0)->map_t.checksum_offset = 36;
      *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
      *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
      *dst_port0 = l4_dst_port;
    }
  else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP))
    {
      vnet_buffer (p0)->map_t.checksum_offset = 26;
      *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
      *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
      *dst_port0 = l4_dst_port;
    }
  else if (ip40->protocol == IP_PROTOCOL_ICMP)
    {
      *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
      if (d0->ea_bits_len == 0 && d0->rules)
	*dst_port0 = 0;
      else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->type
	       == ICMP4_echo_reply
	       || ((icmp46_header_t *)
		   u8_ptr_add (ip40,
			       sizeof (*ip40)))->type == ICMP4_echo_request)
	*dst_port0 = l4_dst_port;
    }
  else
    {
      *error0 = MAP_ERROR_BAD_PROTOCOL;
    }
}

static uword
ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_map_t_node.index);
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  vlib_combined_counter_main_t *cm = map_main.domain_counters;
  u32 thread_index = vm->thread_index;

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0;
	  vlib_buffer_t *p0;
	  ip4_header_t *ip40;
	  map_domain_t *d0;
	  ip4_mapt_next_t next0 = 0;
	  u16 ip4_len0;
	  u8 error0;
	  i32 dst_port0;
	  ip4_mapt_pseudo_header_t *pheader0;

	  pi0 = to_next[0] = from[0];
	  from += 1;
	  n_left_from -= 1;
	  to_next += 1;
	  n_left_to_next -= 1;
	  error0 = MAP_ERROR_NONE;

	  p0 = vlib_get_buffer (vm, pi0);

	  u16 l4_dst_port = vnet_buffer (p0)->ip.reass.l4_dst_port;

	  ip40 = vlib_buffer_get_current (p0);
	  ip4_len0 = clib_host_to_net_u16 (ip40->length);
	  if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
			     ip40->ip_version_and_header_length != 0x45))
	    {
	      error0 = MAP_ERROR_UNKNOWN;
	    }

	  d0 = ip4_map_get_domain (&ip40->dst_address,
				   &vnet_buffer (p0)->map_t.map_domain_index,
				   &error0);

	  if (!d0)
	    {			/* Guess it wasn't for us */
	      vnet_feature_next (&next0, p0);
	      goto exit;
	    }

	  dst_port0 = -1;

	  if (PREDICT_FALSE (ip40->ttl == 1))
	    {
	      icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
					   ICMP4_time_exceeded_ttl_exceeded_in_transit,
					   0);
	      p0->error = error_node->errors[MAP_ERROR_TIME_EXCEEDED];
	      next0 = IP4_MAPT_NEXT_ICMP_ERROR;
	      goto trace;
	    }

	  bool df0 =
	    ip40->flags_and_fragment_offset &
	    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);

	  vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;

	  if (PREDICT_FALSE
	      (df0 && !map_main.frag_ignore_df
	       &&
	       ((ip4_len0 +
		 (sizeof (ip6_header_t) - sizeof (ip4_header_t))) >
		vnet_buffer (p0)->map_t.mtu)))
	    {
	      icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
					   ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
					   vnet_buffer (p0)->map_t.mtu -
					   (sizeof (ip6_header_t) -
					    sizeof (ip4_header_t)));
	      p0->error = error_node->errors[MAP_ERROR_DF_SET];
	      next0 = IP4_MAPT_NEXT_ICMP_ERROR;
	      goto trace;
	    }

	  ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
			      &next0, l4_dst_port);

	  /* Verify that port is not among the well-known ports */
	  if ((d0->psid_length > 0 && d0->psid_offset > 0)
	      && (clib_net_to_host_u16 (dst_port0) <
		  (0x1 << (16 - d0->psid_offset))))
	    {
	      error0 = MAP_ERROR_SEC_CHECK;
	    }

	  //Add MAP-T pseudo header in front of the packet
	  vlib_buffer_advance (p0, -sizeof (*pheader0));
	  pheader0 = vlib_buffer_get_current (p0);

	  //Save addresses within the packet
	  ip4_map_t_embedded_address (d0, &pheader0->saddr,
				      &ip40->src_address);
	  pheader0->daddr.as_u64[0] =
	    map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
	  pheader0->daddr.as_u64[1] =
	    map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);

	  if (PREDICT_TRUE
	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
	    {
	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
					       thread_index,
					       vnet_buffer (p0)->
					       map_t.map_domain_index, 1,
					       clib_net_to_host_u16
					       (ip40->length));
	    }

	  next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
	  p0->error = error_node->errors[error0];
	trace:
	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      map_add_trace (vm, node, p0, d0 - map_main.domains, dst_port0);
	    }
	exit:
	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next, pi0,
					   next0);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  return frame->n_vectors;
}

/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_map_t_feature, static) = {
    .arc_name = "ip4-unicast",
    .node_name = "ip4-map-t",
    .runs_before = VNET_FEATURES ("ip4-flow-classify"),
    .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
};

VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
  .function = ip4_map_t_fragmented,
  .name = "ip4-map-t-fragmented",
  .vector_size = sizeof(u32),
  .format_trace = format_map_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = MAP_N_ERROR,
  .error_counters = map_error_counters,

  .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
  .next_nodes = {
      [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
      [IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE] = "ip6-load-balance",
      [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
      [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
  },
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
  .function = ip4_map_t_icmp,
  .name = "ip4-map-t-icmp",
  .vector_size = sizeof(u32),
  .format_trace = format_map_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = MAP_N_ERROR,
  .error_counters = map_error_counters,

  .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
  .next_nodes = {
      [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
      [IP4_MAPT_ICMP_NEXT_IP6_REWRITE] = "ip6-load-balance",
      [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
      [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
  },
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
  .function = ip4_map_t_tcp_udp,
  .name = "ip4-map-t-tcp-udp",
  .vector_size = sizeof(u32),
  .format_trace = format_map_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = MAP_N_ERROR,
  .error_counters = map_error_counters,

  .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
  .next_nodes = {
      [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
      [IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE] = "ip6-load-balance",
      [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
      [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
  },
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_node) = {
  .function = ip4_map_t,
  .name = "ip4-map-t",
  .vector_size = sizeof(u32),
  .format_trace = format_map_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = MAP_N_ERROR,
  .error_counters = map_error_counters,

  .n_next_nodes = IP4_MAPT_N_NEXT,
  .next_nodes = {
      [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
      [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
      [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
      [IP4_MAPT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
      [IP4_MAPT_NEXT_DROP] = "error-drop",
  },
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */