summaryrefslogtreecommitdiffstats
path: root/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
blob: 7361e8eb0d6c0cfe93e859e119b24c0a57658fbe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file
 * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys.
 *
 */

#include <vnet/dpo/load_balance.h>
#include <vnet/lisp-cp/control.h>
#include <vnet/lisp-cp/lisp_types.h>
#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/adj/adj_midchain.h>
#include <vppinfra/bihash_24_8.h>
#include <vppinfra/bihash_template.h>

/**
 * Memory pool of all adjacencies
 */
static lisp_gpe_adjacency_t *lisp_adj_pool;

/**
 * Hash table of all adjacencies. key:{nh, itf}
 * We never have an all zeros address since the interfaces are multi-access,
 * therefore there is no ambiguity between a v4 and v6 next-hop, so we don't
 * need to add the protocol to the key.
 */
static
BVT (clib_bihash)
  lisp_adj_db;

#define LISP_ADJ_SET_KEY(_key, _itf, _nh)       \
{						\
  _key.key[0] = (_nh)->ip.v6.as_u64[0];		\
  _key.key[1] = (_nh)->ip.v6.as_u64[1];		\
  _key.key[2] = (_itf);				\
}

     static index_t lisp_adj_find (const ip_address_t * addr, u32 sw_if_index)
{
  BVT (clib_bihash_kv) kv;

  LISP_ADJ_SET_KEY (kv, sw_if_index, addr);

  if (BV (clib_bihash_search) (&lisp_adj_db, &kv, &kv) < 0)
    {
      return (INDEX_INVALID);
    }
  else
    {
      return (kv.value);
    }
}

static void
lisp_adj_insert (const ip_address_t * addr, u32 sw_if_index, index_t ai)
{
  BVT (clib_bihash_kv) kv;

  LISP_ADJ_SET_KEY (kv, sw_if_index, addr);
  kv.value = ai;

  BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 1);
}

static void
lisp_adj_remove (const ip_address_t * addr, u32 sw_if_index)
{
  BVT (clib_bihash_kv) kv;

  LISP_ADJ_SET_KEY (kv, sw_if_index, addr);

  BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 0);
}

static lisp_gpe_adjacency_t *
lisp_gpe_adjacency_get_i (index_t lai)
{
  return (pool_elt_at_index (lisp_adj_pool, lai));
}

fib_forward_chain_type_t
lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj)
{
  switch (ip_addr_version (&ladj->remote_rloc))
    {
    case IP4:
      return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
    case IP6:
      return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
    default:
      ASSERT (0);
      break;
    }
  return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
}

static void
ip46_address_to_ip_address (const ip46_address_t * a, ip_address_t * b)
{
  if (ip46_address_is_ip4 (a))
    {
      clib_memset (b, 0, sizeof (*b));
      ip_address_set (b, &a->ip4, IP4);
    }
  else
    {
      ip_address_set (b, &a->ip6, IP6);
    }
}

/**
 * @brief Stack the tunnel's midchain on the IP forwarding chain of the via
 */
static void
lisp_gpe_adj_stack_one (lisp_gpe_adjacency_t * ladj, adj_index_t ai)
{
  const lisp_gpe_tunnel_t *lgt;

  lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);

  adj_nbr_midchain_stack_on_fib_entry (ai,
				       lgt->fib_entry_index,
				       lisp_gpe_adj_get_fib_chain_type
				       (ladj));
}

/**
 * @brief Call back when restacking all adjacencies on a GRE interface
 */
static adj_walk_rc_t
lisp_gpe_adj_walk_cb (adj_index_t ai, void *ctx)
{
  lisp_gpe_adjacency_t *ladj = ctx;

  lisp_gpe_adj_stack_one (ladj, ai);

  return (ADJ_WALK_RC_CONTINUE);
}

static void
lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
{
  fib_protocol_t nh_proto;
  ip46_address_t nh;

  ip_address_to_46 (&ladj->remote_rloc, &nh, &nh_proto);

  /*
   * walk all the adjacencies on th lisp interface and restack them
   */
  adj_nbr_walk_nh (ladj->sw_if_index,
		   nh_proto, &nh, lisp_gpe_adj_walk_cb, ladj);
}

static lisp_gpe_next_protocol_e
lisp_gpe_adj_proto_from_vnet_link_type (vnet_link_t linkt)
{
  switch (linkt)
    {
    case VNET_LINK_IP4:
      return (LISP_GPE_NEXT_PROTO_IP4);
    case VNET_LINK_IP6:
      return (LISP_GPE_NEXT_PROTO_IP6);
    case VNET_LINK_ETHERNET:
      return (LISP_GPE_NEXT_PROTO_ETHERNET);
    case VNET_LINK_NSH:
      return (LISP_GPE_NEXT_PROTO_NSH);
    default:
      ASSERT (0);
    }
  return (LISP_GPE_NEXT_PROTO_IP4);
}

#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40

static lisp_afi_e
lisp_afi_from_vnet_link_type (vnet_link_t link)
{
  switch (link)
    {
    case VNET_LINK_IP4:
      return LISP_AFI_IP;
    case VNET_LINK_IP6:
      return LISP_AFI_IP6;
    case VNET_LINK_ETHERNET:
      return LISP_AFI_MAC;
    default:
      return LISP_AFI_NO_ADDR;
    }
}

static void
lisp_gpe_increment_stats_counters (lisp_cp_main_t * lcm, ip_adjacency_t * adj,
				   vlib_buffer_t * b)
{
  lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
  lisp_gpe_adjacency_t *ladj;
  ip_address_t rloc;
  index_t lai;
  u32 si, di;
  gid_address_t src, dst;
  uword *feip;

  ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);
  si = vnet_buffer (b)->sw_if_index[VLIB_TX];
  lai = lisp_adj_find (&rloc, si);
  ASSERT (INDEX_INVALID != lai);

  ladj = pool_elt_at_index (lisp_adj_pool, lai);

  u8 *lisp_data = (u8 *) vlib_buffer_get_current (b);

  /* skip IP header */
  if (is_v4_packet (lisp_data))
    lisp_data += sizeof (ip4_header_t);
  else
    lisp_data += sizeof (ip6_header_t);

  /* skip UDP header */
  lisp_data += sizeof (udp_header_t);
  // TODO: skip TCP?

  /* skip LISP GPE header */
  lisp_data += sizeof (lisp_gpe_header_t);

  i16 saved_current_data = b->current_data;
  b->current_data = lisp_data - b->data;

  lisp_afi_e afi = lisp_afi_from_vnet_link_type (adj->ia_link);
  get_src_and_dst_eids_from_buffer (lcm, b, &src, &dst, afi);
  b->current_data = saved_current_data;
  di = gid_dictionary_sd_lookup (&lcm->mapping_index_by_gid, &dst, &src);
  if (PREDICT_FALSE (~0 == di))
    {
      clib_warning ("dst mapping not found (%U, %U)", format_gid_address,
		    &src, format_gid_address, &dst);
      return;
    }

  feip = hash_get (lcm->fwd_entry_by_mapping_index, di);
  if (PREDICT_FALSE (!feip))
    return;

  lisp_stats_key_t key;
  clib_memset (&key, 0, sizeof (key));
  key.fwd_entry_index = feip[0];
  key.tunnel_index = ladj->tunnel_index;

  uword *p = hash_get_mem (lgm->lisp_stats_index_by_key, &key);
  ASSERT (p);

  /* compute payload length starting after GPE */
  u32 bytes = b->current_length - (lisp_data - b->data - b->current_data);
  vlib_increment_combined_counter (&lgm->counters, vlib_get_thread_index (),
				   p[0], 1, bytes);
}

static void
lisp_gpe_fixup (vlib_main_t * vm,
		ip_adjacency_t * adj, vlib_buffer_t * b, const void *data)
{
  lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();

  if (lcm->flags & LISP_FLAG_STATS_ENABLED)
    lisp_gpe_increment_stats_counters (lcm, adj, b);

  /* Fixup the checksum and len fields in the LISP tunnel encap
   * that was applied at the midchain node */
  ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
}

/**
 * @brief The LISP-GPE interface registered function to update, i.e.
 * provide an rewrite string for, an adjacency.
 */
void
lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
{
  const lisp_gpe_tunnel_t *lgt;
  lisp_gpe_adjacency_t *ladj;
  ip_adjacency_t *adj;
  ip_address_t rloc;
  vnet_link_t linkt;
  adj_flags_t af;
  index_t lai;

  adj = adj_get (ai);
  ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);

  /*
   * find an existing or create a new adj
   */
  lai = lisp_adj_find (&rloc, sw_if_index);

  ASSERT (INDEX_INVALID != lai);

  ladj = pool_elt_at_index (lisp_adj_pool, lai);
  lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
  linkt = adj_get_link_type (ai);
  af = ADJ_FLAG_MIDCHAIN_IP_STACK;
  if (VNET_LINK_ETHERNET == linkt)
    af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;

  adj_nbr_midchain_update_rewrite
    (ai, lisp_gpe_fixup, NULL, af,
     lisp_gpe_tunnel_build_rewrite (lgt, ladj,
				    lisp_gpe_adj_proto_from_vnet_link_type
				    (linkt)));

  lisp_gpe_adj_stack_one (ladj, ai);
}

u8 *
lisp_gpe_build_rewrite (vnet_main_t * vnm,
			u32 sw_if_index,
			vnet_link_t link_type, const void *dst_address)
{
  ASSERT (0);
  return (NULL);
}

index_t
lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
					    u32 overlay_table_id, u32 vni)
{
  const lisp_gpe_sub_interface_t *l3s;
  const lisp_gpe_tunnel_t *lgt;
  lisp_gpe_adjacency_t *ladj;
  index_t lai, l3si;

  /*
   * first find the L3 sub-interface that corresponds to the loacl-rloc and vni
   */
  l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc,
							 overlay_table_id,
							 vni);
  l3s = lisp_gpe_sub_interface_get (l3si);

  /*
   * find an existing or create a new adj
   */
  lai = lisp_adj_find (&pair->rmt_loc, l3s->sw_if_index);

  if (INDEX_INVALID == lai)
    {

      pool_get (lisp_adj_pool, ladj);
      clib_memset (ladj, 0, sizeof (*ladj));
      lai = (ladj - lisp_adj_pool);

      ip_address_copy (&ladj->remote_rloc, &pair->rmt_loc);
      ladj->vni = vni;
      /* transfer the lock to the adj */
      ladj->lisp_l3_sub_index = l3si;
      ladj->sw_if_index = l3s->sw_if_index;

      /* if vni is non-default */
      if (ladj->vni)
	ladj->flags = LISP_GPE_FLAGS_I;

      /* work in lisp-gpe not legacy mode */
      ladj->flags |= LISP_GPE_FLAGS_P;

      /*
       * find the tunnel that will provide the underlying transport
       * and hence the rewrite.
       * The RLOC FIB index is default table - always.
       */
      ladj->tunnel_index = lisp_gpe_tunnel_find_or_create_and_lock (pair, 0);

      lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);

      /*
       * become of child of the RLOC FIB entry so we are updated when
       * its reachability changes, allowing us to re-stack the midcahins
       */
      ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index,
							 FIB_NODE_TYPE_LISP_ADJ,
							 lai);

      lisp_adj_insert (&ladj->remote_rloc, ladj->sw_if_index, lai);
    }
  else
    {
      /* unlock the interface from the find. */
      lisp_gpe_sub_interface_unlock (l3si);
      ladj = lisp_gpe_adjacency_get_i (lai);
    }

  ladj->locks++;

  return (lai);
}

/**
 * @brief Get a pointer to a tunnel from a pointer to a FIB node
 */
static lisp_gpe_adjacency_t *
lisp_gpe_adjacency_from_fib_node (const fib_node_t * node)
{
  return ((lisp_gpe_adjacency_t *)
	  ((char *) node -
	   STRUCT_OFFSET_OF (lisp_gpe_adjacency_t, fib_node)));
}

static void
lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj)
{
  const lisp_gpe_tunnel_t *lgt;

  /*
   * no children so we are not counting locks. no-op.
   * at least not counting
   */
  lisp_adj_remove (&ladj->remote_rloc, ladj->sw_if_index);

  /*
   * unlock the resources this adj holds
   */
  lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);

  fib_entry_child_remove (lgt->fib_entry_index, ladj->fib_entry_child_index);

  lisp_gpe_tunnel_unlock (ladj->tunnel_index);
  lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index);

  pool_put (lisp_adj_pool, ladj);
}

void
lisp_gpe_adjacency_unlock (index_t lai)
{
  lisp_gpe_adjacency_t *ladj;

  ladj = lisp_gpe_adjacency_get_i (lai);

  ladj->locks--;

  if (0 == ladj->locks)
    {
      lisp_gpe_adjacency_last_lock_gone (ladj);
    }
}

const lisp_gpe_adjacency_t *
lisp_gpe_adjacency_get (index_t lai)
{
  return (lisp_gpe_adjacency_get_i (lai));
}


/**
 * @brief LISP GPE tunnel back walk
 *
 * The FIB entry through which this tunnel resolves has been updated.
 * re-stack the midchain on the new forwarding.
 */
static fib_node_back_walk_rc_t
lisp_gpe_adjacency_back_walk (fib_node_t * node,
			      fib_node_back_walk_ctx_t * ctx)
{
  lisp_gpe_adj_stack (lisp_gpe_adjacency_from_fib_node (node));

  return (FIB_NODE_BACK_WALK_CONTINUE);
}

static fib_node_t *
lisp_gpe_adjacency_get_fib_node (fib_node_index_t index)
{
  lisp_gpe_adjacency_t *ladj;

  ladj = pool_elt_at_index (lisp_adj_pool, index);
  return (&ladj->fib_node);
}

static void
lisp_gpe_adjacency_last_fib_lock_gone (fib_node_t * node)
{
  lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_from_fib_node (node));
}

const static fib_node_vft_t lisp_gpe_tuennel_vft = {
  .fnv_get = lisp_gpe_adjacency_get_fib_node,
  .fnv_back_walk = lisp_gpe_adjacency_back_walk,
  .fnv_last_lock = lisp_gpe_adjacency_last_fib_lock_gone,
};

u8 *
format_lisp_gpe_adjacency (u8 * s, va_list * args)
{
  lisp_gpe_adjacency_t *ladj = va_arg (*args, lisp_gpe_adjacency_t *);
  lisp_gpe_adjacency_format_flags_t flags =
    va_arg (*args, lisp_gpe_adjacency_format_flags_t);

  if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
    {
      s =
	format (s, "index %d locks:%d\n", ladj - lisp_adj_pool, ladj->locks);
    }

  s = format (s, " vni: %d,", ladj->vni);
  s = format (s, " remote-RLOC: %U,", format_ip_address, &ladj->remote_rloc);

  if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL)
    {
      s = format (s, " %U\n",
		  format_lisp_gpe_sub_interface,
		  lisp_gpe_sub_interface_get (ladj->lisp_l3_sub_index));
      s = format (s, " %U\n",
		  format_lisp_gpe_tunnel,
		  lisp_gpe_tunnel_get (ladj->tunnel_index));
    }
  else
    {
      s = format (s, " LISP L3 sub-interface index: %d,",
		  ladj->lisp_l3_sub_index);
      s = format (s, " LISP tunnel index: %d", ladj->tunnel_index);
    }


  return (s);
}

static clib_error_t *
lisp_gpe_adjacency_show (vlib_main_t * vm,
			 unformat_input_t * input, vlib_cli_command_t * cmd)
{
  lisp_gpe_adjacency_t *ladj;
  index_t index;

  if (pool_elts (lisp_adj_pool) == 0)
    vlib_cli_output (vm, "No lisp-gpe Adjacencies");

  if (unformat (input, "%d", &index))
    {
      ladj = lisp_gpe_adjacency_get_i (index);
      vlib_cli_output (vm, "%U", format_lisp_gpe_adjacency, ladj,
		       LISP_GPE_ADJ_FORMAT_FLAG_DETAIL);
    }
  else
    {
      /* *INDENT-OFF* */
      pool_foreach (ladj, lisp_adj_pool,
      ({
	vlib_cli_output (vm, "[%d] %U\n",
			 ladj - lisp_adj_pool,
			 format_lisp_gpe_adjacency, ladj,
			 LISP_GPE_ADJ_FORMAT_FLAG_NONE);
      }));
      /* *INDENT-ON* */
    }

  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
{
  .path = "show gpe adjacency",
  .function = lisp_gpe_adjacency_show,
};
/* *INDENT-ON* */

#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256)
#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20)

static clib_error_t *
lisp_gpe_adj_module_init (vlib_main_t * vm)
{
  BV (clib_bihash_init) (&lisp_adj_db,
			 "Adjacency Neighbour table",
			 LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
			 LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);

  fib_node_register_type (FIB_NODE_TYPE_LISP_ADJ, &lisp_gpe_tuennel_vft);
  return (NULL);
}

VLIB_INIT_FUNCTION (lisp_gpe_adj_module_init)
/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
hread_index; init_nat_i2o_kv (&kv0, s, thread_index, s - nm->per_thread_data[thread_index].sessions); if (clib_bihash_add_or_overwrite_stale_8_8 ( &nm->in2out, &kv0, nat44_i2o_is_idle_session_cb, &ctx0)) nat_elog_notice (nm, "in2out key add failed"); init_nat_o2i_kv (&kv0, s, thread_index, s - nm->per_thread_data[thread_index].sessions); if (clib_bihash_add_or_overwrite_stale_8_8 ( &nm->out2in, &kv0, nat44_o2i_is_idle_session_cb, &ctx0)) nat_elog_notice (nm, "out2in key add failed"); /* log NAT event */ nat_ipfix_logging_nat44_ses_create (thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index); nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index, &s->in2out.addr, s->in2out.port, &s->out2in.addr, s->out2in.port, s->nat_proto); nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr, s->out2in.port, &s->ext_host_addr, s->ext_host_port, &s->ext_host_nat_addr, s->ext_host_nat_port, s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0); return s; } #ifndef CLIB_MARCH_VARIANT static_always_inline nat44_ei_out2in_error_t icmp_get_key (vlib_buffer_t *b, ip4_header_t *ip0, ip4_address_t *addr, u16 *port, nat_protocol_t *nat_proto) { icmp46_header_t *icmp0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0; void *l4_header = 0; icmp46_header_t *inner_icmp0; icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); if (!icmp_type_is_error_message (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { *nat_proto = NAT_PROTOCOL_ICMP; *addr = ip0->dst_address; *port = vnet_buffer (b)->ip.reass.l4_src_port; } else { inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); *nat_proto = ip_proto_to_nat_proto (inner_ip0->protocol); *addr = inner_ip0->src_address; switch (*nat_proto) { case NAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t *) l4_header; inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); *port = inner_echo0->identifier; break; case NAT_PROTOCOL_UDP: case NAT_PROTOCOL_TCP: *port = ((tcp_udp_header_t *) l4_header)->src_port; break; default: return NAT44_EI_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL; } } return -1; /* success */ } /** * Get address and port values to be used for ICMP packet translation * and create session if needed * * @param[in,out] nm NAT main * @param[in,out] node NAT node runtime * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[in,out] ip0 ip header * @param[out] p_proto protocol used for matching * @param[out] p_value address and port after NAT translation * @param[out] p_dont_translate if packet should not be translated * @param d optional parameter * @param e optional parameter */ u32 nat44_ei_icmp_match_out2in_slow (vlib_node_runtime_t *node, u32 thread_index, vlib_buffer_t *b0, ip4_header_t *ip0, ip4_address_t *addr, u16 *port, u32 *fib_index, nat_protocol_t *proto, nat44_ei_session_t **p_s0, u8 *dont_translate) { nat44_ei_main_t *nm = &nat44_ei_main; nat44_ei_main_per_thread_data_t *tnm = &nm->per_thread_data[thread_index]; u32 sw_if_index0; nat44_ei_session_t *s0 = 0; clib_bihash_kv_8_8_t kv0, value0; u8 is_addr_only; u32 next0 = ~0; int err; u8 identity_nat; vlib_main_t *vm = vlib_get_main (); *dont_translate = 0; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; *fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); *proto = 0; err = icmp_get_key (b0, ip0, addr, port, proto); if (err != -1) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } ip4_address_t mapping_addr; u16 mapping_port; u32 mapping_fib_index; init_nat_k (&kv0, *addr, *port, *fib_index, *proto); if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ if (nat44_ei_static_mapping_match ( *addr, *port, *fib_index, *proto, &mapping_addr, &mapping_port, &mapping_fib_index, 1, &is_addr_only, &identity_nat)) { if (!nm->forwarding_enabled) { /* Don't NAT packet aimed at the intfc address */ if (PREDICT_FALSE (nat44_ei_is_interface_addr ( nm->ip4_main, node, sw_if_index0, ip0->dst_address.as_u32))) { *dont_translate = 1; goto out; } b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } else { *dont_translate = 1; goto out; } } if (PREDICT_FALSE (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request || !is_addr_only))) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } if (PREDICT_FALSE (identity_nat)) { *dont_translate = 1; goto out; } /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping ( nm, b0, mapping_addr, mapping_port, mapping_fib_index, *addr, *port, *fib_index, *proto, node, thread_index, vlib_time_now (vm)); if (!s0) { next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } } else { if (PREDICT_FALSE (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request && !icmp_type_is_error_message (vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } s0 = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value0)); } out: if (s0) { *addr = s0->in2out.addr; *port = s0->in2out.port; *fib_index = s0->in2out.fib_index; } if (p_s0) *p_s0 = s0; return next0; } #endif #ifndef CLIB_MARCH_VARIANT u32 nat44_ei_icmp_match_out2in_fast (vlib_node_runtime_t *node, u32 thread_index, vlib_buffer_t *b0, ip4_header_t *ip0, ip4_address_t *mapping_addr, u16 *mapping_port, u32 *mapping_fib_index, nat_protocol_t *proto, nat44_ei_session_t **p_s0, u8 *dont_translate) { nat44_ei_main_t *nm = &nat44_ei_main; u32 sw_if_index0; u32 rx_fib_index0; u8 is_addr_only; u32 next0 = ~0; int err; ip4_address_t addr; u16 port; *dont_translate = 0; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); err = icmp_get_key (b0, ip0, &addr, &port, proto); if (err != -1) { b0->error = node->errors[err]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } if (nat44_ei_static_mapping_match (addr, port, rx_fib_index0, *proto, mapping_addr, mapping_port, mapping_fib_index, 1, &is_addr_only, 0)) { /* Don't NAT packet aimed at the intfc address */ if (nat44_ei_is_interface_addr (nm->ip4_main, node, sw_if_index0, ip0->dst_address.as_u32)) { *dont_translate = 1; goto out; } b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } if (PREDICT_FALSE (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request || !is_addr_only) && !icmp_type_is_error_message (vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } out: return next0; } #endif u32 nat44_ei_icmp_out2in (vlib_buffer_t *b0, ip4_header_t *ip0, icmp46_header_t *icmp0, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t *node, u32 next0, u32 thread_index, nat44_ei_session_t **p_s0); #ifndef CLIB_MARCH_VARIANT u32 nat44_ei_icmp_out2in (vlib_buffer_t *b0, ip4_header_t *ip0, icmp46_header_t *icmp0, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t *node, u32 next0, u32 thread_index, nat44_ei_session_t **p_s0) { nat44_ei_main_t *nm = &nat44_ei_main; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0 = 0; void *l4_header = 0; icmp46_header_t *inner_icmp0; u8 dont_translate; u32 new_addr0, old_addr0; u16 old_id0, new_id0; ip_csum_t sum0; u16 checksum0; u32 next0_tmp; vlib_main_t *vm = vlib_get_main (); ip4_address_t addr; u16 port; u32 fib_index; nat_protocol_t proto; echo0 = (icmp_echo_header_t *) (icmp0 + 1); if (PREDICT_TRUE (nm->pat)) { next0_tmp = nat44_ei_icmp_match_out2in_slow ( node, thread_index, b0, ip0, &addr, &port, &fib_index, &proto, p_s0, &dont_translate); } else { next0_tmp = nat44_ei_icmp_match_out2in_fast ( node, thread_index, b0, ip0, &addr, &port, &fib_index, &proto, p_s0, &dont_translate); } if (next0_tmp != ~0) next0 = next0_tmp; if (next0 == NAT44_EI_OUT2IN_NEXT_DROP || dont_translate) goto out; if (PREDICT_TRUE (!ip4_is_fragment (ip0))) { sum0 = ip_incremental_checksum_buffer (vm, b0, (u8 *) icmp0 - (u8 *) vlib_buffer_get_current (b0), ntohs (ip0->length) - ip4_header_bytes (ip0), 0); checksum0 = ~ip_csum_fold (sum0); if (checksum0 != 0 && checksum0 != 0xffff) { next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } } old_addr0 = ip0->dst_address.as_u32; new_addr0 = ip0->dst_address.as_u32 = addr.as_u32; vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { if (icmp0->checksum == 0) icmp0->checksum = 0xffff; if (!icmp_type_is_error_message (icmp0->type)) { new_id0 = port; if (PREDICT_FALSE (new_id0 != echo0->identifier)) { old_id0 = echo0->identifier; new_id0 = port; echo0->identifier = new_id0; sum0 = icmp0->checksum; sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, identifier /* changed member */ ); icmp0->checksum = ip_csum_fold (sum0); } } else { inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); if (!ip4_header_checksum_is_valid (inner_ip0)) { next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto out; } old_addr0 = inner_ip0->src_address.as_u32; inner_ip0->src_address = addr; new_addr0 = inner_ip0->src_address.as_u32; sum0 = icmp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */ ); icmp0->checksum = ip_csum_fold (sum0); switch (proto) { case NAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t *) l4_header; inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); old_id0 = inner_echo0->identifier; new_id0 = port; inner_echo0->identifier = new_id0; sum0 = icmp0->checksum; sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, identifier); icmp0->checksum = ip_csum_fold (sum0); break; case NAT_PROTOCOL_UDP: case NAT_PROTOCOL_TCP: old_id0 = ((tcp_udp_header_t *) l4_header)->src_port; new_id0 = port; ((tcp_udp_header_t *) l4_header)->src_port = new_id0; sum0 = icmp0->checksum; sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, src_port); icmp0->checksum = ip_csum_fold (sum0); break; default: ASSERT (0); } } } out: return next0; } #endif static inline u32 nat44_ei_icmp_out2in_slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0, ip4_header_t *ip0, icmp46_header_t *icmp0, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t *node, u32 next0, f64 now, u32 thread_index, nat44_ei_session_t **p_s0) { vlib_main_t *vm = vlib_get_main (); next0 = nat44_ei_icmp_out2in (b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, thread_index, p_s0); nat44_ei_session_t *s0 = *p_s0; if (PREDICT_TRUE (next0 != NAT44_EI_OUT2IN_NEXT_DROP && s0)) { /* Accounting */ nat44_ei_session_update_counters ( s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); /* Per-user LRU list maintenance */ nat44_ei_session_update_lru (nm, s0, thread_index); } return next0; } static int nat_out2in_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b, ip4_header_t *ip, u32 rx_fib_index) { clib_bihash_kv_8_8_t kv, value; nat44_ei_static_mapping_t *m; u32 old_addr, new_addr; ip_csum_t sum; init_nat_k (&kv, ip->dst_address, 0, 0, 0); if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value)) return 1; m = pool_elt_at_index (nm->static_mappings, value.value); old_addr = ip->dst_address.as_u32; new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; sum = ip->checksum; sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); ip->checksum = ip_csum_fold (sum); vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index; return 0; } VLIB_NODE_FN (nat44_ei_out2in_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u32 n_left_from, *from; nat44_ei_main_t *nm = &nat44_ei_main; f64 now = vlib_time_now (vm); u32 thread_index = vm->thread_index; nat44_ei_main_per_thread_data_t *tnm = &nm->per_thread_data[thread_index]; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; vlib_get_buffers (vm, from, b, n_left_from); while (n_left_from >= 2) { vlib_buffer_t *b0, *b1; u32 next0 = NAT44_EI_OUT2IN_NEXT_LOOKUP; u32 next1 = NAT44_EI_OUT2IN_NEXT_LOOKUP; u32 sw_if_index0, sw_if_index1; ip4_header_t *ip0, *ip1; ip_csum_t sum0, sum1; u32 new_addr0, old_addr0; u16 new_port0, old_port0; u32 new_addr1, old_addr1; u16 new_port1, old_port1; udp_header_t *udp0, *udp1; tcp_header_t *tcp0, *tcp1; icmp46_header_t *icmp0, *icmp1; u32 rx_fib_index0, rx_fib_index1; u32 proto0, proto1; nat44_ei_session_t *s0 = 0, *s1 = 0; clib_bihash_kv_8_8_t kv0, kv1, value0, value1; u8 identity_nat0, identity_nat1; ip4_address_t sm_addr0, sm_addr1; u16 sm_port0, sm_port1; u32 sm_fib_index0, sm_fib_index1; b0 = *b; b++; b1 = *b; b++; /* Prefetch next iteration. */ if (PREDICT_TRUE (n_left_from >= 4)) { vlib_buffer_t *p2, *p3; p2 = *b; p3 = *(b + 1); vlib_prefetch_buffer_header (p2, LOAD); vlib_prefetch_buffer_header (p3, LOAD); clib_prefetch_load (p2->data); clib_prefetch_load (p3->data); } vnet_buffer (b0)->snat.flags = 0; vnet_buffer (b1)->snat.flags = 0; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); if (PREDICT_FALSE (ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR; goto trace0; } proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { if (nat_out2in_sm_unknown_proto (nm, b0, ip0, rx_fib_index0)) { if (!nm->forwarding_enabled) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.other, thread_index, sw_if_index0, 1); goto trace0; } if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { next0 = nat44_ei_icmp_out2in_slow_path ( nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, now, thread_index, &s0); vlib_increment_simple_counter (&nm->counters.slowpath.out2in.icmp, thread_index, sw_if_index0, 1); goto trace0; } init_nat_k (&kv0, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0); if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ if (nat44_ei_static_mapping_match ( ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0, &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, &identity_nat0)) { /* * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next0, b0); goto trace0; } if (!nm->forwarding_enabled) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; } goto trace0; } if (PREDICT_FALSE (identity_nat0)) goto trace0; /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping ( nm, b0, sm_addr0, sm_port0, sm_fib_index0, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0, node, thread_index, now); if (!s0) { next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto trace0; } } else s0 = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value0)); old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; new_addr0 = ip0->dst_address.as_u32; vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; new_port0 = udp0->dst_port = s0->in2out.port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); tcp0->checksum = ip_csum_fold (sum0); } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.tcp, thread_index, sw_if_index0, 1); } else { if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; new_port0 = udp0->dst_port = s0->in2out.port; if (PREDICT_FALSE (udp0->checksum)) { sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); udp0->checksum = ip_csum_fold (sum0); } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.udp, thread_index, sw_if_index0, 1); } /* Accounting */ nat44_ei_session_update_counters ( s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); /* Per-user LRU list maintenance */ nat44_ei_session_update_lru (nm, s0, thread_index); trace0: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { nat44_ei_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (s0) t->session_index = s0 - nm->per_thread_data[thread_index].sessions; } if (next0 == NAT44_EI_OUT2IN_NEXT_DROP) { vlib_increment_simple_counter (&nm->counters.slowpath.out2in.drops, thread_index, sw_if_index0, 1); } ip1 = vlib_buffer_get_current (b1); udp1 = ip4_next_header (ip1); tcp1 = (tcp_header_t *) udp1; icmp1 = (icmp46_header_t *) udp1; sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; rx_fib_index1 = vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index1); if (PREDICT_FALSE (ip1->ttl == 1)) { vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next1 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR; goto trace1; } proto1 = ip_proto_to_nat_proto (ip1->protocol); if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_OTHER)) { if (nat_out2in_sm_unknown_proto (nm, b1, ip1, rx_fib_index1)) { if (!nm->forwarding_enabled) { b1->error = node->errors[NAT44_EI_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; next1 = NAT44_EI_OUT2IN_NEXT_DROP; } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.other, thread_index, sw_if_index1, 1); goto trace1; } if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP)) { next1 = nat44_ei_icmp_out2in_slow_path ( nm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1, now, thread_index, &s1); vlib_increment_simple_counter (&nm->counters.slowpath.out2in.icmp, thread_index, sw_if_index1, 1); goto trace1; } init_nat_k (&kv1, ip1->dst_address, vnet_buffer (b1)->ip.reass.l4_dst_port, rx_fib_index1, proto1); if (clib_bihash_search_8_8 (&nm->out2in, &kv1, &value1)) { /* Try to match static mapping by external address and port, destination address and port in packet */ if (nat44_ei_static_mapping_match ( ip1->dst_address, vnet_buffer (b1)->ip.reass.l4_dst_port, rx_fib_index1, proto1, &sm_addr1, &sm_port1, &sm_fib_index1, 1, 0, &identity_nat1)) { /* * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_UDP && (vnet_buffer (b1)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next1, b1); goto trace1; } if (!nm->forwarding_enabled) { b1->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; next1 = NAT44_EI_OUT2IN_NEXT_DROP; } goto trace1; } if (PREDICT_FALSE (identity_nat1)) goto trace1; /* Create session initiated by host from external network */ s1 = create_session_for_static_mapping ( nm, b1, sm_addr1, sm_port1, sm_fib_index1, ip1->dst_address, vnet_buffer (b1)->ip.reass.l4_dst_port, rx_fib_index1, proto1, node, thread_index, now); if (!s1) { next1 = NAT44_EI_OUT2IN_NEXT_DROP; goto trace1; } } else s1 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, nat_value_get_session_index (&value1)); old_addr1 = ip1->dst_address.as_u32; ip1->dst_address = s1->in2out.addr; new_addr1 = ip1->dst_address.as_u32; vnet_buffer (b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index; sum1 = ip1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ ); ip1->checksum = ip_csum_fold (sum1); if (PREDICT_TRUE (proto1 == NAT_PROTOCOL_TCP)) { if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; new_port1 = udp1->dst_port = s1->in2out.port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ ); sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */ , length /* changed member */ ); tcp1->checksum = ip_csum_fold (sum1); } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.tcp, thread_index, sw_if_index1, 1); } else { if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; new_port1 = udp1->dst_port = s1->in2out.port; if (PREDICT_FALSE (udp1->checksum)) { sum1 = udp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ ); sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */ , length /* changed member */ ); udp1->checksum = ip_csum_fold (sum1); } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.udp, thread_index, sw_if_index1, 1); } /* Accounting */ nat44_ei_session_update_counters ( s1, now, vlib_buffer_length_in_chain (vm, b1), thread_index); /* Per-user LRU list maintenance */ nat44_ei_session_update_lru (nm, s1, thread_index); trace1: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b1->flags & VLIB_BUFFER_IS_TRACED))) { nat44_ei_out2in_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); t->sw_if_index = sw_if_index1; t->next_index = next1; t->session_index = ~0; if (s1) t->session_index = s1 - nm->per_thread_data[thread_index].sessions; } if (next1 == NAT44_EI_OUT2IN_NEXT_DROP) { vlib_increment_simple_counter (&nm->counters.slowpath.out2in.drops, thread_index, sw_if_index1, 1); } n_left_from -= 2; next[0] = next0; next[1] = next1; next += 2; } while (n_left_from > 0) { vlib_buffer_t *b0; u32 next0 = NAT44_EI_OUT2IN_NEXT_LOOKUP; u32 sw_if_index0; ip4_header_t *ip0; ip_csum_t sum0; u32 new_addr0, old_addr0; u16 new_port0, old_port0; udp_header_t *udp0; tcp_header_t *tcp0; icmp46_header_t *icmp0; u32 rx_fib_index0; u32 proto0; nat44_ei_session_t *s0 = 0; clib_bihash_kv_8_8_t kv0, value0; u8 identity_nat0; ip4_address_t sm_addr0; u16 sm_port0; u32 sm_fib_index0; b0 = *b; ++b; vnet_buffer (b0)->snat.flags = 0; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { if (nat_out2in_sm_unknown_proto (nm, b0, ip0, rx_fib_index0)) { if (!nm->forwarding_enabled) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.other, thread_index, sw_if_index0, 1); goto trace00; } if (PREDICT_FALSE (ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR; goto trace00; } if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { next0 = nat44_ei_icmp_out2in_slow_path ( nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, now, thread_index, &s0); vlib_increment_simple_counter (&nm->counters.slowpath.out2in.icmp, thread_index, sw_if_index0, 1); goto trace00; } init_nat_k (&kv0, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0); if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ if (nat44_ei_static_mapping_match ( ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0, &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, &identity_nat0)) { /* * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next0, b0); goto trace00; } if (!nm->forwarding_enabled) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; next0 = NAT44_EI_OUT2IN_NEXT_DROP; } goto trace00; } if (PREDICT_FALSE (identity_nat0)) goto trace00; /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping ( nm, b0, sm_addr0, sm_port0, sm_fib_index0, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0, node, thread_index, now); if (!s0) { next0 = NAT44_EI_OUT2IN_NEXT_DROP; goto trace00; } } else s0 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, nat_value_get_session_index (&value0)); old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; new_addr0 = ip0->dst_address.as_u32; vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; new_port0 = udp0->dst_port = s0->in2out.port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); tcp0->checksum = ip_csum_fold (sum0); } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.tcp, thread_index, sw_if_index0, 1); } else { if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; new_port0 = udp0->dst_port = s0->in2out.port; if (PREDICT_FALSE (udp0->checksum)) { sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); udp0->checksum = ip_csum_fold (sum0); } } vlib_increment_simple_counter (&nm->counters.slowpath.out2in.udp, thread_index, sw_if_index0, 1); } /* Accounting */ nat44_ei_session_update_counters ( s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); /* Per-user LRU list maintenance */ nat44_ei_session_update_lru (nm, s0, thread_index); trace00: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { nat44_ei_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (s0) t->session_index = s0 - nm->per_thread_data[thread_index].sessions; } if (next0 == NAT44_EI_OUT2IN_NEXT_DROP) { vlib_increment_simple_counter (&nm->counters.slowpath.out2in.drops, thread_index, sw_if_index0, 1); } n_left_from--; next[0] = next0; next++; } vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, frame->n_vectors); return frame->n_vectors; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat44_ei_out2in_node) = { .name = "nat44-ei-out2in", .vector_size = sizeof (u32), .format_trace = format_nat44_ei_out2in_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(nat44_ei_out2in_error_strings), .error_strings = nat44_ei_out2in_error_strings, .runtime_data_bytes = sizeof (nat44_ei_runtime_t), .n_next_nodes = NAT44_EI_OUT2IN_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [NAT44_EI_OUT2IN_NEXT_DROP] = "error-drop", [NAT44_EI_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", [NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; /* *INDENT-ON* */ VLIB_NODE_FN (nat44_ei_out2in_fast_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u32 n_left_from, *from; nat44_ei_main_t *nm = &nat44_ei_main; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; vlib_get_buffers (vm, from, b, n_left_from); while (n_left_from > 0) { vlib_buffer_t *b0; u32 next0 = NAT44_EI_OUT2IN_NEXT_DROP; u32 sw_if_index0; ip4_header_t *ip0; ip_csum_t sum0; u32 new_addr0, old_addr0; u16 new_port0, old_port0; udp_header_t *udp0; tcp_header_t *tcp0; icmp46_header_t *icmp0; u32 proto0; u32 rx_fib_index0; ip4_address_t sm_addr0; u16 sm_port0; u32 sm_fib_index0; b0 = *b; b++; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); vnet_feature_next (&next0, b0); if (PREDICT_FALSE (ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR; goto trace00; } proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) goto trace00; if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { next0 = nat44_ei_icmp_out2in (b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, ~0, 0); goto trace00; } if (nat44_ei_static_mapping_match (ip0->dst_address, udp0->dst_port, rx_fib_index0, proto0, &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, 0)) { b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION]; goto trace00; } new_addr0 = sm_addr0.as_u32; new_port0 = sm_port0; vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm_fib_index0; old_addr0 = ip0->dst_address.as_u32; ip0->dst_address.as_u32 = new_addr0; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_FALSE (new_port0 != udp0->dst_port)) { old_port0 = udp0->dst_port; udp0->dst_port = new_port0; if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); tcp0->checksum = ip_csum_fold (sum0); } else if (udp0->checksum) { sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); udp0->checksum = ip_csum_fold (sum0); } } else { if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); tcp0->checksum = ip_csum_fold (sum0); } else if (udp0->checksum) { sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); udp0->checksum = ip_csum_fold (sum0); } } trace00: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { nat44_ei_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; t->next_index = next0; } if (next0 == NAT44_EI_OUT2IN_NEXT_DROP) { vlib_increment_simple_counter (&nm->counters.fastpath.out2in.drops, vm->thread_index, sw_if_index0, 1); } n_left_from--; next[0] = next0; next++; } vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, frame->n_vectors); return frame->n_vectors; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat44_ei_out2in_fast_node) = { .name = "nat44-ei-out2in-fast", .vector_size = sizeof (u32), .format_trace = format_nat44_ei_out2in_fast_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(nat44_ei_out2in_error_strings), .error_strings = nat44_ei_out2in_error_strings, .runtime_data_bytes = sizeof (nat44_ei_runtime_t), .n_next_nodes = NAT44_EI_OUT2IN_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [NAT44_EI_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", [NAT44_EI_OUT2IN_NEXT_DROP] = "error-drop", [NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; /* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */