summaryrefslogtreecommitdiffstats
path: root/src/vnet/ip/ip_frag.c
blob: fd5bc6fa0bad8418ee1a23412e3d1eedf76c450e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<
<?xml version="1.0" encoding="UTF-8"?>
<!--
 Copyright (c) 2017 Cisco and/or its affiliates.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at:

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <groupId>io.fd.hc2vpp.common</groupId>
        <artifactId>vpp-impl-parent</artifactId>
        <version>1.19.04-SNAPSHOT</version>
        <relativePath>../../vpp-common/vpp-impl-parent</relativePath>
    </parent>

    <modelVersion>4.0.0</modelVersion>
    <groupId>io.fd.hc2vpp.dhcp</groupId>
    <artifactId>dhcp-impl</artifactId>
    <name>dhcp-impl</name>
    <version>1.19.04-SNAPSHOT</version>
    <packaging>bundle</packaging>

    <dependencies>
        <dependency>
            <groupId>${project.groupId}</groupId>
            <artifactId>dhcp-api</artifactId>
            <version>${project.version}</version>
        </dependency>

        <!-- Honeycomb infrastructure-->
        <dependency>
            <groupId>io.fd.honeycomb</groupId>
            <artifactId>translate-api</artifactId>
        </dependency>

        <dependency>
            <groupId>io.fd.honeycomb</groupId>
            <artifactId>translate-spi</artifactId>
        </dependency>

        <!-- Translation -->
        <dependency>
            <groupId>io.fd.hc2vpp.common</groupId>
            <artifactId>vpp-translate-utils</artifactId>
        </dependency>

        <!-- DI -->
        <dependency>
            <groupId>com.google.inject</groupId>
            <artifactId>guice</artifactId>
        </dependency>
        <dependency>
            <groupId>net.jmob</groupId>
            <artifactId>guice.conf</artifactId>
        </dependency>
        <dependency>
            <groupId>com.google.inject.extensions</groupId>
            <artifactId>guice-multibindings</artifactId>
        </dependency>
        <dependency>
            <groupId>io.fd.honeycomb</groupId>
            <artifactId>translate-impl</artifactId>
            <version>1.19.04-SNAPSHOT</version>
        </dependency>


        <!-- Testing dependencies-->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.hamcrest</groupId>
            <artifactId>hamcrest-all</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.google.inject.extensions</groupId>
       
/*---------------------------------------------------------------------------
 * Copyright (c) 2009-2014 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *---------------------------------------------------------------------------
 */
/*
 * IPv4 Fragmentation Node
 *
 *
 */

#include "ip_frag.h"

#include <vnet/ip/ip.h>


typedef struct
{
  u8 ipv6;
  u16 mtu;
  u8 next;
  u16 n_fragments;
} ip_frag_trace_t;

static u8 *
format_ip_frag_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
  s = format (s, "IPv%s mtu: %u fragments: %u",
	      t->ipv6 ? "6" : "4", t->mtu, t->n_fragments);
  return s;
}

static u32 running_fragment_id;

static void
frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
{
  vnet_buffer (to)->sw_if_index[VLIB_RX] =
    vnet_buffer (from)->sw_if_index[VLIB_RX];
  vnet_buffer (to)->sw_if_index[VLIB_TX] =
    vnet_buffer (from)->sw_if_index[VLIB_TX];

  /* Copy adj_index in case DPO based node is sending for the
   * fragmentation, the packet would be sent back to the proper
   * DPO next node and Index
   */
  vnet_buffer (to)->ip.adj_index[VLIB_RX] =
    vnet_buffer (from)->ip.adj_index[VLIB_RX];
  vnet_buffer (to)->ip.adj_index[VLIB_TX] =
    vnet_buffer (from)->ip.adj_index[VLIB_TX];

  /* Copy QoS Bits */
  if (PREDICT_TRUE (from->flags & VNET_BUFFER_F_QOS_DATA_VALID))
    {
      vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
      to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
    }
}

static vlib_buffer_t *
frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
{
  vlib_main_t *vm = vlib_get_main ();
  if (vlib_buffer_alloc (vm, bi, 1) != 1)
    return 0;

  vlib_buffer_t *b = vlib_get_buffer (vm, *bi);
  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
  vlib_buffer_copy_trace_flag (vm, org_b, *bi);

  return b;
}

/*
 * Limitation: Does follow buffer chains in the packet to fragment,
 * but does not generate buffer chains. I.e. a fragment is always
 * contained with in a single buffer and limited to the max buffer
 * size.
 */
void
ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
		      ip_frag_error_t * error)
{
  vlib_buffer_t *from_b;
  ip4_header_t *ip4;
  u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
  u8 *org_from_packet, more;

  from_b = vlib_get_buffer (vm, from_bi);
  mtu = vnet_buffer (from_b)->ip_frag.mtu;
  org_from_packet = vlib_buffer_get_current (from_b);
  ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);

  rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
  max =
    (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) -
     sizeof (ip4_header_t)) & ~0x7;

  if (rem >
      (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
    {
      *error = IP_FRAG_ERROR_MALFORMED;
      return;
    }

  if (mtu < sizeof (ip4_header_t))
    {
      *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
      return;
    }

  if (ip4->flags_and_fragment_offset &
      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
    {
      *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
      return;
    }

  if (ip4_is_fragment (ip4))
    {
      ip_frag_id = ip4->fragment_id;
      ip_frag_offset = ip4_get_fragment_offset (ip4);
      more =
	!(!(ip4->flags_and_fragment_offset &
	    clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)));
    }
  else
    {
      ip_frag_id = (++running_fragment_id);
      ip_frag_offset = 0;
      more = 0;
    }

  u8 *from_data = (void *) (ip4 + 1);
  vlib_buffer_t *org_from_b = from_b;
  u16 fo = 0;
  u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
  u16 ptr = 0;

  /* Do the actual fragmentation */
  while (rem)
    {
      u32 to_bi;
      vlib_buffer_t *to_b;
      ip4_header_t *to_ip4;
      u8 *to_data;

      len = (rem > max ? max : rem);
      if (len != rem)		/* Last fragment does not need to divisible by 8 */
	len &= ~0x7;
      if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
	{
	  *error = IP_FRAG_ERROR_MEMORY;
	  return;
	}
      vec_add1 (*buffer, to_bi);
      frag_set_sw_if_index (to_b, org_from_b);

      /* Copy ip4 header */
      clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t));
      to_ip4 = vlib_buffer_get_current (to_b);
      to_data = (void *) (to_ip4 + 1);

      /* Spin through from buffers filling up the to buffer */
      u16 left_in_to_buffer = len, to_ptr = 0;
      while (1)
	{
	  u16 bytes_to_copy;

	  /* Figure out how many bytes we can safely copy */
	  bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
	    left_in_to_buffer : left_in_from_buffer;
	  clib_memcpy_fast (to_data + to_ptr, from_data + ptr, bytes_to_copy);
	  left_in_to_buffer -= bytes_to_copy;
	  ptr += bytes_to_copy;
	  left_in_from_buffer -= bytes_to_copy;
	  if (left_in_to_buffer == 0)
	    break;

	  ASSERT (left_in_from_buffer <= 0);
	  /* Move buffer */
	  if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
	    {
	      *error = IP_FRAG_ERROR_MALFORMED;
	      return;
	    }
	  from_b = vlib_get_buffer (vm, from_b->next_buffer);
	  from_data = (u8 *) vlib_buffer_get_current (from_b);
	  ptr = 0;
	  left_in_from_buffer = from_b->current_length;
	  to_ptr += bytes_to_copy;
	}

      to_b->current_length = len + sizeof (ip4_header_t);

      to_ip4->fragment_id = ip_frag_id;
      to_ip4->flags_and_fragment_offset =
	clib_host_to_net_u16 ((fo >> 3) + ip_frag_offset);
      to_ip4->flags_and_fragment_offset |=
	clib_host_to_net_u16 (((len != rem) || more) << 13);
      to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t));
      to_ip4->checksum = ip4_header_checksum (to_ip4);

      if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
	{
	  /* Encapsulating ipv4 header */
	  ip4_header_t *encap_header4 =
	    (ip4_header_t *) vlib_buffer_get_current (to_b);
	  encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
	  encap_header4->checksum = ip4_header_checksum (encap_header4);
	}
      else if (vnet_buffer (org_from_b)->
	       ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
	{
	  /* Encapsulating ipv6 header */
	  ip6_header_t *encap_header6 =
	    (ip6_header_t *) vlib_buffer_get_current (to_b);
	  encap_header6->payload_length =
	    clib_host_to_net_u16 (to_b->current_length -
				  sizeof (*encap_header6));
	}
      rem -= len;
      fo += len;
    }
}

void
ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu, u8 next_index, u8 flags)
{
  vnet_buffer (b)->ip_frag.mtu = mtu;
  vnet_buffer (b)->ip_frag.next_index = next_index;
  vnet_buffer (b)->ip_frag.flags = flags;
}


static inline uword
frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
		  vlib_frame_t * frame, u32 node_index, bool is_ip6)
{
  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
  vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index);
  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  next_index = node->cached_next_index;
  u32 frag_sent = 0, small_packets = 0;
  u32 *buffer = 0;

  while (n_left_from > 0)
    {
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 pi0, *frag_from, frag_left;
	  vlib_buffer_t *p0;
	  ip_frag_error_t error0;
	  int next0;

	  /*
	   * Note: The packet is not enqueued now. It is instead put
	   * in a vector where other fragments will be put as well.
	   */
	  pi0 = from[0];
	  from += 1;
	  n_left_from -= 1;
	  error0 = IP_FRAG_ERROR_NONE;

	  p0 = vlib_get_buffer (vm, pi0);
	  if (is_ip6)
	    ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
	  else
	    ip4_frag_do_fragment (vm, pi0, &buffer, &error0);

	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
	    {
	      ip_frag_trace_t *tr =
		vlib_add_trace (vm, node, p0, sizeof (*tr));
	      tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
	      tr->ipv6 = is_ip6 ? 1 : 0;
	      tr->n_fragments = vec_len (buffer);
	      tr->next = vnet_buffer (p0)->ip_frag.next_index;
	    }

	  if (!is_ip6 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
	    {
	      icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
					   ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
					   vnet_buffer (p0)->ip_frag.mtu);
	      next0 = IP4_FRAG_NEXT_ICMP_ERROR;
	    }
	  else
	    {
	      if (is_ip6)
		next0 =
		  (error0 ==
		   IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
		  ip_frag.next_index : IP6_FRAG_NEXT_DROP;
	      else
		next0 =
		  (error0 ==
		   IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
		  ip_frag.next_index : IP4_FRAG_NEXT_DROP;
	    }

	  if (error0 == IP_FRAG_ERROR_NONE)
	    {
	      /* Free original buffer chain */
	      frag_sent += vec_len (buffer);
	      small_packets += (vec_len (buffer) == 1);
	      vlib_buffer_free_one (vm, pi0);	/* Free original packet */
	    }
	  else
	    {
	      vlib_error_count (vm, node_index, error0, 1);
	      vec_add1 (buffer, pi0);	/* Get rid of the original buffer */
	    }

	  /* Send fragments that were added in the frame */
	  frag_from = buffer;
	  frag_left = vec_len (buffer);

	  while (frag_left > 0)
	    {
	      while (frag_left > 0 && n_left_to_next > 0)
		{
		  u32 i;
		  i = to_next[0] = frag_from[0];
		  frag_from += 1;
		  frag_left -= 1;
		  to_next += 1;
		  n_left_to_next -= 1;

		  vlib_get_buffer (vm, i)->error = error_node->errors[error0];
		  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
						   to_next, n_left_to_next, i,
						   next0);
		}
	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
	      vlib_get_next_frame (vm, node, next_index, to_next,
				   n_left_to_next);
	    }
	  vec_reset_length (buffer);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }
  vec_free (buffer);

  vlib_node_increment_counter (vm, node_index,
			       IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
  vlib_node_increment_counter (vm, node_index,
			       IP_FRAG_ERROR_SMALL_PACKET, small_packets);

  return frame->n_vectors;
}



static uword
ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  return frag_node_inline (vm, node, frame, ip4_frag_node.index,
			   0 /* is_ip6 */ );
}

static uword
ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  return frag_node_inline (vm, node, frame, ip6_frag_node.index,
			   1 /* is_ip6 */ );
}

/*
 * Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
 * Caller must ensure the original packet is freed.
 */
void
ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
		      ip_frag_error_t * error)
{
  vlib_buffer_t *from_b;
  ip6_header_t *ip6;
  u16 mtu, len, max, rem, ip_frag_id;

  from_b = vlib_get_buffer (vm, from_bi);
  mtu = vnet_buffer (from_b)->ip_frag.mtu;
  ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);

  rem = clib_net_to_host_u16 (ip6->payload_length);
  max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7;	// TODO: Is max correct??

  if (rem >
      (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
    {
      *error = IP_FRAG_ERROR_MALFORMED;
      return;
    }

  /* TODO: Look through header chain for fragmentation header */
  if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
    {
      *error = IP_FRAG_ERROR_MALFORMED;
      return;
    }

  u8 *from_data = (void *) (ip6 + 1);
  vlib_buffer_t *org_from_b = from_b;
  u16 fo = 0;
  u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
  u16 ptr = 0;

  ip_frag_id = ++running_fragment_id;	// Fix

  /* Do the actual fragmentation */
  while (rem)
    {
      u32 to_bi;
      vlib_buffer_t *to_b;
      ip6_header_t *to_ip6;
      ip6_frag_hdr_t *to_frag_hdr;
      u8 *to_data;

      len =
	(rem >
	 (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
      if (len != rem)		/* Last fragment does not need to divisible by 8 */
	len &= ~0x7;
      if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
	{
	  *error = IP_FRAG_ERROR_MEMORY;
	  return;
	}
      vec_add1 (*buffer, to_bi);
      frag_set_sw_if_index (to_b, org_from_b);

      /* Copy ip6 header */
      clib_memcpy_fast (to_b->data, ip6, sizeof (ip6_header_t));
      to_ip6 = vlib_buffer_get_current (to_b);
      to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
      to_data = (void *) (to_frag_hdr + 1);

      /* Spin through from buffers filling up the to buffer */
      u16 left_in_to_buffer = len, to_ptr = 0;
      while (1)
	{
	  u16 bytes_to_copy;

	  /* Figure out how many bytes we can safely copy */
	  bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
	    left_in_to_buffer : left_in_from_buffer;
	  clib_memcpy_fast (to_data + to_ptr, from_data + ptr, bytes_to_copy);
	  left_in_to_buffer -= bytes_to_copy;
	  ptr += bytes_to_copy;
	  left_in_from_buffer -= bytes_to_copy;
	  if (left_in_to_buffer == 0)
	    break;

	  ASSERT (left_in_from_buffer <= 0);
	  /* Move buffer */
	  if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
	    {
	      *error = IP_FRAG_ERROR_MALFORMED;
	      return;
	    }
	  from_b = vlib_get_buffer (vm, from_b->next_buffer);
	  from_data = (u8 *) vlib_buffer_get_current (from_b);
	  ptr = 0;
	  left_in_from_buffer = from_b->current_length;
	  to_ptr += bytes_to_copy;
	}

      to_b->current_length =
	len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
      to_ip6->payload_length =
	clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
      to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
      to_frag_hdr->fragment_offset_and_more =
	ip6_frag_hdr_offset_and_more ((fo >> 3), len != rem);
      to_frag_hdr->identification = ip_frag_id;
      to_frag_hdr->next_hdr = ip6->protocol;
      to_frag_hdr->rsv = 0;

      rem -= len;
      fo += len;
    }
}

static char *ip4_frag_error_strings[] = {
#define _(sym,string) string,
  foreach_ip_frag_error
#undef _
};

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
  .function = ip4_frag,
  .name = IP4_FRAG_NODE_NAME,
  .vector_size = sizeof (u32),
  .format_trace = format_ip_frag_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = IP_FRAG_N_ERROR,
  .error_strings = ip4_frag_error_strings,

  .n_next_nodes = IP4_FRAG_N_NEXT,
  .next_nodes = {
    [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
    [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
    [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
    [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
    [IP4_FRAG_NEXT_DROP] = "ip4-drop"
  },
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
  .function = ip6_frag,
  .name = IP6_FRAG_NODE_NAME,
  .vector_size = sizeof (u32),
  .format_trace = format_ip_frag_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,

  .n_errors = IP_FRAG_N_ERROR,
  .error_strings = ip4_frag_error_strings,

  .n_next_nodes = IP6_FRAG_N_NEXT,
  .next_nodes = {
    [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
    [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
    [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
    [IP6_FRAG_NEXT_DROP] = "ip6-drop"
  },
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */