summaryrefslogtreecommitdiffstats
path: root/src/vnet/ip/ip_frag.c
blob: ca062bfd5e87fda2622b87086d24540fed410e19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
  Copyright (c) 2012 Eliot Dresselhaus

  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  without limitation the rights to use, copy, modify, merge, publish,
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#ifndef included_clib_elf_self_h
#define included_clib_elf_self_h

#include <vppinfra/elf.h>
#include <vppinfra/hash.h>

#define CLIB_ELF_SECTION_DATA_ALIGN 32

#define CLIB_ELF_SECTION_ADD_PREFIX(n) "clib_elf_section_" n

/* Attribute used is so that static registrations work even if
   variable is not referenced. */
#define CLIB_ELF_SECTION(SECTION)					\
   __attribute__ ((used,						\
		   aligned (CLIB_ELF_SECTION_DATA_ALIGN),		\
		   section (CLIB_ELF_SECTION_ADD_PREFIX (SECTION))))

/* Given pointer to previous data A get next pointer.  EXTRA gives extra
   space beyond A + 1 used in object. */
#define clib_elf_section_data_next(a,extra)				\
  uword_to_pointer (round_pow2 (pointer_to_uword (a + 1) + (extra),	\
				CLIB_ELF_SECTION_DATA_ALIGN),		\
		    void *)

typedef struct
{
  void *lo, *hi;
} clib_elf_section_bounds_t;

typedef struct
{
  /* Vector of bounds for this section.  Multiple shared objects may have instances
     of the same sections. */
  clib_elf_section_bounds_t *bounds;

  /* Name of ELF section (e.g. .text). */
  u8 *name;
} clib_elf_section_t;

typedef struct
{
  /* Vector of sections. */
  clib_elf_section_t *sections;

  /* Hash map of name to section index. */
  uword *section_by_name;

  /* Unix path that we were exec()ed with. */
  char *exec_path;

  elf_main_t *elf_mains;
} clib_elf_main_t;

always_inline void
clib_elf_main_free (clib_elf_main_t * m)
{
  clib_elf_section_t *s<
/*---------------------------------------------------------------------------
 * Copyright (c) 2009-2014 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *---------------------------------------------------------------------------
 */
/*
 * IPv4 Fragmentation Node
 *
 *
 */

#include "ip_frag.h"

#include <vnet/ip/ip.h>


typedef struct
{
  u8 ipv6;
  u16 header_offset;
  u16 mtu;
  u8 next;
  u16 n_fragments;
} ip_frag_trace_t;

static u8 *
format_ip_frag_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
  s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
	      t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
  return s;
}

static u32 running_fragment_id;

static void
ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
		      ip_frag_error_t * error)
{
  vlib_buffer_t *p;
  ip4_header_t *ip4;
  u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset;
  u8 *packet, more;

  vec_add1 (*buffer, pi);
  p = vlib_get_buffer (vm, pi);
  offset = vnet_buffer (p)->ip_frag.header_offset;
  mtu = vnet_buffer (p)->ip_frag.mtu;
  packet = (u8 *) vlib_buffer_get_current (p);
  ip4 = (ip4_header_t *) (packet + offset);

  rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4);
  ptr = 0;
  max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7;

  if (rem < (p->current_length - offset - sizeof (*ip4)))
    {
      *error = IP_FRAG_ERROR_MALFORMED;
      return;
    }

  if (mtu < sizeof (*ip4))
    {
      *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
      return;
    }

  if (ip4->flags_and_fragment_offset &
      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
    {
      *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
      return;
    }

  if (ip4_is_fragment (ip4))
    {
      ip_frag_id = ip4->fragment_id;
      ip_frag_offset = ip4_get_fragment_offset (ip4);
      more =
	! !(ip4->flags_and_fragment_offset &
	    clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
    }
  else
    {
      ip_frag_id = (++running_fragment_id);
      ip_frag_offset = 0;
      more = 0;
    }

  //Do the actual fragmentation
  while (rem)
    {
      u32 bi;
      vlib_buffer_t *b;
      ip4_header_t *fip4;

      len =
	(rem >
	 (mtu - sizeof (*ip4) -
	  vnet_buffer (p)->ip_frag.header_offset)) ? max : rem;

      if (ptr == 0)
	{
	  bi = pi;
	  b = p;
	  fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
	}
      else
	{
	  if (!vlib_buffer_alloc (vm, &bi, 1))
	    {
	      *error = IP_FRAG_ERROR_MEMORY;
	      return;
	    }
	  vec_add1 (*buffer, bi);
	  b = vlib_get_buffer (vm, bi);
	  vnet_buffer (b)->sw_if_index[VLIB_RX] =
	    vnet_buffer (p)->sw_if_index[VLIB_RX];
	  vnet_buffer (b)->sw_if_index[VLIB_TX] =
	    vnet_buffer (p)->sw_if_index[VLIB_TX];
	  fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);

	  //Copy offset and ip4 header
	  clib_memcpy (b->data, packet, offset + sizeof (*ip4));
	  //Copy data
	  clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4),
		       packet + offset + sizeof (*fip4) + ptr, len);
	}
      b->current_length = offset + len + sizeof (*fip4);

      fip4->fragment_id = ip_frag_id;
      fip4->flags_and_fragment_offset =
	clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset);
      fip4->flags_and_fragment_offset |=
	clib_host_to_net_u16 (((len != rem) || more) << 13);
      // ((len0 != rem0) || more0) << 13 is optimization for
      // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
      fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4));
      fip4->checksum = ip4_header_checksum (fip4);

      if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
	{
	  //Encapsulating ipv4 header
	  ip4_header_t *encap_header4 =
	    (ip4_header_t *) vlib_buffer_get_current (b);
	  encap_header4->length = clib_host_to_net_u16 (b->current_length);
	  encap_header4->checksum = ip4_header_checksum (encap_header4);
	}
      else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
	{
	  //Encapsulating ipv6 header
	  ip6_header_t *encap_header6 =
	    (ip6_header_t *) vlib_buffer_get_current (b);
	  encap_header6->payload_length =
	    clib_host_to_net_u16 (b->current_length -
				  sizeof (*encap_header6));
	}

      rem -= len;
      ptr += len;
    }
}

void
ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
			 u8 next_index, u8 flags)
{
  vnet_buffer (b)->ip_frag.header_offset = offset;
  vnet_buffer (b)->ip_frag.mtu = mtu;
  vnet_buffer (b)->ip_frag.next_index = next_index;
  vnet_buffer (b)->ip_frag.flags = flags;
}

static uword
ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip4_frag_node.index);
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  u32 frag_sent = 0, small_packets = 0;
  u32 *buffer = 0;

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0, *frag_from, frag_left;
	  vlib_buffer_t *p0;
	  ip_frag_error_t error0;
	  ip4_frag_next_t next0;

	  //Note: The packet is not enqueued now.
	  //It is instead put in a vector where other fragments
	  //will be put as well.
	  pi0 = from[0];
	  from += 1;
	  n_left_from -= 1;
	  error0 = IP_FRAG_ERROR_NONE;

	  p0 = vlib_get_buffer (vm, pi0);
	  ip4_frag_do_fragment (vm, pi0, &buffer, &error0);

	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      ip_frag_trace_t *tr =
		vlib_add_trace (vm, node, p0, sizeof (*tr));
	      tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
	      tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
	      tr->ipv6 = 0;
	      tr->n_fragments = vec_len (buffer);
	      tr->next = vnet_buffer (p0)->ip_frag.next_index;
	    }

	  if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
	    {
	      icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
					   ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
					   vnet_buffer (p0)->ip_frag.mtu);
	      vlib_buffer_advance (p0,
				   vnet_buffer (p0)->ip_frag.header_offset);
	      next0 = IP4_FRAG_NEXT_ICMP_ERROR;
	    }
	  else
	    next0 =
	      (error0 ==
	       IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
	      ip_frag.next_index : IP4_FRAG_NEXT_DROP;

	  if (error0 == IP_FRAG_ERROR_NONE)
	    {
	      frag_sent += vec_len (buffer);
	      small_packets += (vec_len (buffer) == 1);
	    }
	  else
	    vlib_error_count (vm, ip4_frag_node.index, error0, 1);

	  //Send fragments that were added in the frame
	  frag_from = buffer;
	  frag_left = vec_len (buffer);

	  while (frag_left > 0)
	    {
	      while (frag_left > 0 && n_left_to_next > 0)
		{
		  u32 i;
		  i = to_next[0] = frag_from[0];
		  frag_from += 1;
		  frag_left -= 1;
		  to_next += 1;
		  n_left_to_next -= 1;

		  vlib_get_buffer (vm, i)->error = error_node->errors[error0];
		  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
						   to_next, n_left_to_next, i,
						   next0);
		}
	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
	      vlib_get_next_frame (vm, node, next_index, to_next,
				   n_left_to_next);
	    }
	  vec_reset_length (buffer);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  vec_free (buffer);

  vlib_node_increment_counter (vm, ip4_frag_node.index,
			       IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
  vlib_node_increment_counter (vm, ip4_frag_node.index,
			       IP_FRAG_ERROR_SMALL_PACKET, small_packets);

  return frame->n_vectors;
}


static void
ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
		      ip_frag_error_t * error)
{
  vlib_buffer_t *p;
  ip6_header_t *ip6_hdr;
  ip6_frag_hdr_t *frag_hdr;
  u8 *payload, *next_header;

  p = vlib_get_buffer (vm, pi);

  //Parsing the IPv6 headers
  ip6_hdr =
    vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
  payload = (u8 *) (ip6_hdr + 1);
  next_header = &ip6_hdr->protocol;
  if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
    {
      next_header = payload;
      payload += payload[1] * 8;
    }

  if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
    {
      next_header = payload;
      payload += payload[1] * 8;
    }

  if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
    {
      next_header = payload;
      payload += payload[1] * 8;
    }

  if (PREDICT_FALSE
      (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
    {
      //A malicious packet could set an extension header with a too big size
      //and make us modify another vlib_buffer
      *error = IP_FRAG_ERROR_MALFORMED;
      return;
    }

  u8 has_more;
  u16 initial_offset;
  if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
    {
      //The fragmentation header is already there
      frag_hdr = (ip6_frag_hdr_t *) payload;
      has_more = ip6_frag_hdr_more (frag_hdr);
      initial_offset = ip6_frag_hdr_offset (frag_hdr);
    }
  else
    {
      //Insert a fragmentation header in the packet
      u8 nh = *next_header;
      *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
      vlib_buffer_advance (p, -sizeof (*frag_hdr));
      u8 *start = vlib_buffer_get_current (p);
      memmove (start, start + sizeof (*frag_hdr),
	       payload - (start + sizeof (*frag_hdr)));
      frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
      frag_hdr->identification = ++running_fragment_id;
      frag_hdr->next_hdr = nh;
      frag_hdr->rsv = 0;
      has_more = 0;
      initial_offset = 0;
    }
  payload = (u8 *) (frag_hdr + 1);

  u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
  u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
  u16 rem = p->current_length - headers_len;
  u16 ptr = 0;

  if (max_payload < 8)
    {
      *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
      return;
    }

  while (rem)
    {
      u32 bi;
      vlib_buffer_t *b;
      u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
      rem -= len;

      if (ptr != 0)
	{
	  if (!vlib_buffer_alloc (vm, &bi, 1))
	    {
	      *error = IP_FRAG_ERROR_MEMORY;
	      return;
	    }
	  b = vlib_get_buffer (vm, bi);
	  vnet_buffer (b)->sw_if_index[VLIB_RX] =
	    vnet_buffer (p)->sw_if_index[VLIB_RX];
	  vnet_buffer (b)->sw_if_index[VLIB_TX] =
	    vnet_buffer (p)->sw_if_index[VLIB_TX];
	  clib_memcpy (vlib_buffer_get_current (b),
		       vlib_buffer_get_current (p), headers_len);
	  clib_memcpy (vlib_buffer_get_current (b) + headers_len,
		       payload + ptr, len);
	  frag_hdr =
	    vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
	}
      else
	{
	  bi = pi;
	  b = vlib_get_buffer (vm, bi);
	  //frag_hdr already set here
	}

      ip6_hdr =
	vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
      frag_hdr->fragment_offset_and_more =
	ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
				      (rem || has_more));
      b->current_length = headers_len + len;
      ip6_hdr->payload_length =
	clib_host_to_net_u16 (b->current_length -
			      vnet_buffer (p)->ip_frag.header_offset -
			      sizeof (*ip6_hdr));

      if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
	{
	  //Encapsulating ipv4 header
	  ip4_header_t *encap_header4 =
	    (ip4_header_t *) vlib_buffer_get_current (b);
	  encap_header4->length = clib_host_to_net_u16 (b->current_length);
	  encap_header4->checksum = ip4_header_checksum (encap_header4);
	}
      else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
	{
	  //Encapsulating ipv6 header
	  ip6_header_t *encap_header6 =
	    (ip6_header_t *) vlib_buffer_get_current (b);
	  encap_header6->payload_length =
	    clib_host_to_net_u16 (b->current_length -
				  sizeof (*encap_header6));
	}

      vec_add1 (*buffer, bi);

      ptr += len;
    }
}

static uword
ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  vlib_node_runtime_t *error_node =
    vlib_node_get_runtime (vm, ip6_frag_node.index);
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  u32 frag_sent = 0, small_packets = 0;
  u32 *buffer = 0;

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0, *frag_from, frag_left;
	  vlib_buffer_t *p0;
	  ip_frag_error_t error0;
	  ip6_frag_next_t next0;

	  pi0 = from[0];
	  from += 1;
	  n_left_from -= 1;
	  error0 = IP_FRAG_ERROR_NONE;

	  p0 = vlib_get_buffer (vm, pi0);
	  ip6_frag_do_fragment (vm, pi0, &buffer, &error0);

	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      ip_frag_trace_t *tr =
		vlib_add_trace (vm, node, p0, sizeof (*tr));
	      tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
	      tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
	      tr->ipv6 = 1;
	      tr->n_fragments = vec_len (buffer);
	      tr->next = vnet_buffer (p0)->ip_frag.next_index;
	    }

	  next0 =
	    (error0 ==
	     IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
	    ip_frag.next_index : IP6_FRAG_NEXT_DROP;
	  frag_sent += vec_len (buffer);
	  small_packets += (vec_len (buffer) == 1);

	  //Send fragments that were added in the frame
	  frag_from = buffer;
	  frag_left = vec_len (buffer);
	  while (frag_left > 0)
	    {
	      while (frag_left > 0 && n_left_to_next > 0)
		{
		  u32 i;
		  i = to_next[0] = frag_from[0];
		  frag_from += 1;
		  frag_left -= 1;
		  to_next += 1;
		  n_left_to_next -= 1;

		  vlib_get_buffer (vm, i)->error = error_node->errors[error0];
		  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
						   to_next, n_left_to_next, i,
						   next0);
		}
	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
	      vlib_get_next_frame (vm, node, next_index, to_next,
				   n_left_to_next);
	    }
	  vec_reset_length (buffer);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  vec_free (buffer);
  vlib_node_increment_counter (vm, ip6_frag_node.index,
			       IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
  vlib_node_increment_counter (vm, ip6_frag_node.index,
			       IP_FRAG_ERROR_SMALL_PACKET, small_packets);

  return frame->n_vectors;
}

static char *ip4_frag_error_strings[] = {
#define _(sym,string) string,
  foreach_ip_frag_error
#undef _
};

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
  .function = ip4_frag,
  .name = IP4_FRAG_NODE_NAME,
  .vector_size = sizeof (u32),
  .format_trace = format_ip_frag_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = IP_FRAG_N_ERROR,
  .error_strings = ip4_frag_error_strings,

  .n_next_nodes = IP4_FRAG_N_NEXT,
  .next_nodes = {
    [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
    [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
    [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
    [IP4_FRAG_NEXT_DROP] = "error-drop"
  },
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
  .function = ip6_frag,
  .name = IP6_FRAG_NODE_NAME,
  .vector_size = sizeof (u32),
  .format_trace = format_ip_frag_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = IP_FRAG_N_ERROR,
  .error_strings = ip4_frag_error_strings,

  .n_next_nodes = IP6_FRAG_N_NEXT,
  .next_nodes = {
    [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
    [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
    [IP6_FRAG_NEXT_DROP] = "error-drop"
  },
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */