aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/elf_clib.c
blob: 7bb72ee3e3f90fe6459782be6e5376ea49faacb3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <vppinfra/elf_clib.h>

#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>

typedef struct
{
  char **path;
} path_search_t;

always_inline void
path_search_free (path_search_t * p)
{
  uword i;
  for (i = 0; i < vec_len (p->path); i++)
    vec_free (p->path[i]);
  vec_free (p->path);
}

static char **
split_string (char *string, u8 delimiter)
{
  char **result = 0;
  char *p, *start, *s;

  p = string;
  while (1)
    {
      start = p;
      while (*p != 0 && *p != delimiter)
	p++;
      s = 0;
      vec_add (s, start, p - start);
      vec_add1 (s, 0);
      vec_add1 (result, s);
      if (*p == 0)
	break;
      p++;
    }

  return result;
}

static int
file_exists_and_is_executable (char *dir, char *file)
{
  char *path = (char *) format (0, "%s/%s%c", dir, file, 0);
  struct stat s;
  uword yes;

  yes = (stat (path, &s) >= 0
	 && S_ISREG (s.st_mode)
	 && 0 != (s.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)));

  vec_free (path);

  return yes;
}

static char *
path_search (char *file)
{
  path_search_t ps;
  uword i;
  char *result;

  /* Relative or absolute path. */
  if (file[0] == '.' || file[0] == '/')
    return file;

  if (getenv ("PATH") == 0)
    return file;

  ps.path = split_string (getenv ("PATH"), ':');

  for (i = 0; i < vec_len (ps.path); i++)
    if (file_exists_and_is_executable (ps.path[i], file))
      break;

  result = 0;
  if (i < vec_len (ps.path))
    result = (char *) format (0, "%s/%s%c", ps.path[i], file);

  path_search_free (&ps);

  return result;
}

static clib_error_t *
clib_elf_parse_file (clib_elf_main_t * cem,
		     char *file_name, void *link_address)
{
  elf_main_t *em;
  elf_section_t *s;
  int fd;
  struct stat fd_stat;
  uword mmap_length = 0;
  void *data = 0;
  clib_error_t *error = 0;

  vec_add2 (cem->elf_mains, em, 1);

  fd = open (file_name, 0);
  if (fd < 0)
    {
      error = clib_error_return_unix (0, "open `%s'", file_name);
      goto done;
    }

  if (fstat (fd, &fd_stat) < 0)
    {
      error = clib_error_return_unix (0, "fstat `%s'", file_name);
      goto done;
    }
  mmap_length = fd_stat.st_size;

  data = mmap (0, mmap_length, PROT_READ, MAP_SHARED, fd, /* offset */ 0);
  if (~pointer_to_uword (data) == 0)
    {
      error = clib_error_return_unix (0, "mmap `%s'", file_name);
      goto done;
    }

  error = elf_parse (em, data, mmap_length);
  if (error)
    goto done;

  /* Look for CLIB special sections. */
  {
    char *section_name_start = CLIB_ELF_SECTION_ADD_PREFIX ();
    uword section_name_start_len = strlen (section_name_start);

    vec_foreach (s, em->sections)
    {
      u8 *name = elf_section_name (em, s);
      uword *p;
      clib_elf_section_t *vs;
      clib_elf_section_bounds_t *b;

      /* Section name must begin with CLIB_ELF_SECTION key. */
      if (memcmp (name, section_name_start, section_name_start_len))
	continue;

      name += section_name_start_len;
      p = hash_get_mem (cem->section_by_name, name);
      if (p)
	vs = vec_elt_at_index (cem->sections, p[0]);
      else
	{
	  name = format (0, "%s%c", name, 0);
	  if (!cem->section_by_name)
	    cem->section_by_name = hash_create_string (0, sizeof (uword));
	  hash_set_mem (cem->section_by_name, name, vec_len (cem->sections));
	  vec_add2 (cem->sections, vs, 1);
	  vs->name = name;
	}

      vec_add2 (vs->bounds, b, 1);
      b->lo = link_address + s->header.exec_address;
      b->hi = b->lo + s->header.file_size;
    }
  }

  /* Parse symbols for this file. */
  {
    elf_symbol_table_t *t;
    elf64_symbol_t *s;

    elf_parse_symbols (em);
    vec_foreach (t, em->symbol_tables)
    {
      vec_foreach (s, t->symbols)
      {
	s->value += pointer_to_uword (link_address);
      }
    }
  }

  /* No need to keep section contents around. */
  {
    elf_section_t *s;
    vec_foreach (s, em->sections)
    {
      if (s->header.type != ELF_SECTION_STRING_TABLE)
	vec_free (s->contents);
    }
  }

done:
  if (error)
    elf_main_free (em);
  if (fd >= 0)
    close (fd);
  if (data)
    munmap (data, mmap_length);
  return error;
}

#define __USE_GNU
#include <link.h>

static int
add_section (struct dl_phdr_info *info, size_t size, void *opaque)
{
  clib_elf_main_t *cem = opaque;
  clib_error_t *error;
  char *name = (char *) info->dlpi_name;
  void *addr = (void *) info->dlpi_addr;
  uword is_main;

  is_main = strlen (name) == 0;
  if (is_main)
    {
      static int done;

      /* Only do main program once. */
      if (done++)
	return 0;

      name = path_search (cem->exec_path);
      if (!name)
	{
	  clib_error ("failed to find %s on PATH", cem->exec_path);
	  return 0;
	}
      addr = 0;
    }

  error = clib_elf_parse_file (cem, name, addr);
  if (error)
    clib_error_report (error);

  if (is_main && name != cem->exec_path)
    vec_free (name);

  return 0;
}

static clib_elf_main_t clib_elf_main;

void
clib_elf_main_init (char *exec_path)
{
  clib_elf_main_t *cem = &clib_elf_main;

  cem->exec_path = exec_path;

  dl_iterate_phdr (add_section, cem);
}

clib_elf_section_bounds_t *
clib_elf_get_section_bounds (char *name)
{
  clib_elf_main_t *em = &clib_elf_main;
  uword *p = hash_get (em->section_by_name, name);
  return p ? vec_elt_at_index (em->sections, p[0])->bounds : 0;
}

static uword
symbol_by_address_or_name (char *by_name,
			   uword by_address, clib_elf_symbol_t * s)
{
  clib_elf_main_t *cem = &clib_elf_main;
  elf_main_t *em;

  vec_foreach (em, cem->elf_mains)
  {
    elf_symbol_table_t *t;
    s->elf_main_index = em - cem->elf_mains;
    vec_foreach (t, em->symbol_tables)
    {
      s->symbol_table_index = t - em->symbol_tables;
      if (by_name)
	{
	  uword *p = hash_get (t->symbol_by_name, by_name);
	  if (p)
	    {
	      s->symbol = vec_elt (t->symbols, p[0]);
	      return 1;
	    }
	}
      else
	{
	  elf64_symbol_t *x;
	  /* FIXME linear search. */
	  vec_foreach (x, t->symbols)
	  {
	    if (by_address >= x->value && by_address < x->value + x->size)
	      {
		s->symbol = x[0];
		return 1;
	      }
	  }
	}
    }
  }

  return 0;
}

uword
clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t * s)
{
  return symbol_by_address_or_name (by_name, /* by_address */ 0, s);
}

uword
clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t * s)
{
  return symbol_by_address_or_name ( /* by_name */ 0, by_address, s);
}

u8 *
format_clib_elf_symbol (u8 * s, va_list * args)
{
  clib_elf_main_t *cem = &clib_elf_main;
  clib_elf_symbol_t *sym = va_arg (*args, clib_elf_symbol_t *);
  elf_main_t *em;
  elf_symbol_table_t *t;

  if (!sym)
    /* Just print table headings. */
    return format (s, "%U", format_elf_symbol, 0, 0, 0);

  else
    {
      em = vec_elt_at_index (cem->elf_mains, sym->elf_main_index);
      t = vec_elt_at_index (em->symbol_tables, sym->symbol_table_index);
      return format (s, "%U", format_elf_symbol, em, t, &sym->symbol);
    }
}

u8 *
format_clib_elf_symbol_with_address (u8 * s, va_list * args)
{
  uword address = va_arg (*args, uword);
  clib_elf_main_t *cem = &clib_elf_main;
  clib_elf_symbol_t sym;
  elf_main_t *em;
  elf_symbol_table_t *t;

  if (clib_elf_symbol_by_address (address, &sym))
    {
      em = vec_elt_at_index (cem->elf_mains, sym.elf_main_index);
      t = vec_elt_at_index (em->symbol_tables, sym.symbol_table_index);
      s = format (s, "%s + 0x%wx",
		  elf_symbol_name (t, &sym.symbol),
		  address - sym.symbol.value);
    }
  else
    s = format (s, "0x%wx", address);

  return s;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
an class="p">, }; const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL}; const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port, }; const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL }; const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port, }; const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL }; const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port, }; u32 lb_hash_time_now(vlib_main_t * vm) { return (u32) (vlib_time_now(vm) + 10000); } u8 *format_lb_main (u8 * s, va_list * args) { vlib_thread_main_t *tm = vlib_get_thread_main(); lb_main_t *lbm = &lb_main; s = format(s, "lb_main"); s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address); s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address); s = format(s, " #vips: %u\n", pool_elts(lbm->vips)); s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1); u32 thread_index; for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) { lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht; if (h) { s = format(s, "core %d\n", thread_index); s = format(s, " timeout: %ds\n", h->timeout); s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h)); } } return s; } static char *lb_vip_type_strings[] = { [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6", [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4", [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6", [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4", [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr", [LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4", [LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6", }; u8 *format_lb_vip_type (u8 * s, va_list * args) { lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t); u32 i; for (i=0; i<LB_VIP_N_TYPES; i++) if (vipt == i) return format(s, lb_vip_type_strings[i]); return format(s, "_WRONG_TYPE_"); } uword unformat_lb_vip_type (unformat_input_t * input, va_list * args) { lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *); u32 i; for (i=0; i<LB_VIP_N_TYPES; i++) if (unformat(input, lb_vip_type_strings[i])) { *vipt = i; return 1; } return 0; } u8 *format_lb_vip (u8 * s, va_list * args) { lb_vip_t *vip = va_arg (*args, lb_vip_t *); s = format(s, "%U %U new_size:%u #as:%u%s", format_lb_vip_type, vip->type, format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY, vip->new_flow_table_mask + 1, pool_elts(vip->as_indexes), (vip->flags & LB_VIP_FLAGS_USED)?"":" removed"); if (vip->port != 0) { s = format(s, " protocol:%u port:%u ", vip->protocol, vip->port); } if (vip->type == LB_VIP_TYPE_IP4_L3DSR) { s = format(s, " dscp:%u", vip->encap_args.dscp); } else if ((vip->type == LB_VIP_TYPE_IP4_NAT4) || (vip->type == LB_VIP_TYPE_IP6_NAT6)) { s = format (s, " type:%s port:%u target_port:%u", (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip": "nodeport", ntohs(vip->port), ntohs(vip->encap_args.target_port)); } return s; } u8 *format_lb_as (u8 * s, va_list * args) { lb_as_t *as = va_arg (*args, lb_as_t *); return format(s, "%U %s", format_ip46_address, &as->address, IP46_TYPE_ANY, (as->flags & LB_AS_FLAGS_USED)?"used":"removed"); } u8 *format_lb_vip_detailed (u8 * s, va_list * args) { lb_main_t *lbm = &lb_main; lb_vip_t *vip = va_arg (*args, lb_vip_t *); u32 indent = format_get_indent (s); s = format(s, "%U %U [%lu] %U%s\n" "%U new_size:%u\n", format_white_space, indent, format_lb_vip_type, vip->type, vip - lbm->vips, format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY, (vip->flags & LB_VIP_FLAGS_USED)?"":" removed", format_white_space, indent, vip->new_flow_table_mask + 1); if (vip->port != 0) { s = format(s, "%U protocol:%u port:%u\n", format_white_space, indent, vip->protocol, vip->port); } if (vip->type == LB_VIP_TYPE_IP4_L3DSR) { s = format(s, "%U dscp:%u\n", format_white_space, indent, vip->encap_args.dscp); } else if ((vip->type == LB_VIP_TYPE_IP4_NAT4) || (vip->type == LB_VIP_TYPE_IP6_NAT6)) { s = format (s, "%U type:%s port:%u target_port:%u", format_white_space, indent, (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip": "nodeport", ntohs(vip->port), ntohs(vip->encap_args.target_port)); } //Print counters s = format(s, "%U counters:\n", format_white_space, indent); u32 i; for (i=0; i<LB_N_VIP_COUNTERS; i++) s = format(s, "%U %s: %d\n", format_white_space, indent, lbm->vip_counters[i].name, vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips)); s = format(s, "%U #as:%u\n", format_white_space, indent, pool_elts(vip->as_indexes)); //Let's count the buckets for each AS u32 *count = 0; vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much... lb_new_flow_entry_t *nfe; vec_foreach(nfe, vip->new_flow_table) count[nfe->as_index]++; lb_as_t *as; u32 *as_index; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n", format_white_space, indent, format_ip46_address, &as->address, IP46_TYPE_ANY, count[as - lbm->ass], vlib_refcount_get(&lbm->as_refcount, as - lbm->ass), as->dpo.dpoi_index, (as->flags & LB_AS_FLAGS_USED)?"used":" removed"); }); vec_free(count); return s; } typedef struct { u32 as_index; u32 last; u32 skip; } lb_pseudorand_t; static int lb_pseudorand_compare(void *a, void *b) { lb_as_t *asa, *asb; lb_main_t *lbm = &lb_main; asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index]; asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index]; return memcmp(&asa->address, &asb->address, sizeof(asb->address)); } static void lb_vip_garbage_collection(lb_vip_t *vip) { lb_main_t *lbm = &lb_main; lb_snat4_key_t m_key4; clib_bihash_kv_8_8_t kv4, value4; lb_snat6_key_t m_key6; clib_bihash_kv_24_8_t kv6, value6; lb_snat_mapping_t *m = 0; ASSERT (lbm->writer_lock[0]); u32 now = (u32) vlib_time_now(vlib_get_main()); if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN)) return; vip->last_garbage_collection = now; lb_as_t *as; u32 *as_index; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; if (!(as->flags & LB_AS_FLAGS_USED) && //Not used clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) { //Not referenced if (lb_vip_is_nat4_port(vip)) { m_key4.addr = as->address.ip4; m_key4.port = vip->encap_args.target_port; m_key4.protocol = 0; m_key4.fib_index = 0; kv4.key = m_key4.as_u64; if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4)) m = pool_elt_at_index (lbm->snat_mappings, value4.value); ASSERT (m); kv4.value = m - lbm->snat_mappings; clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0); pool_put (lbm->snat_mappings, m); } else if (lb_vip_is_nat6_port(vip)) { m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0]; m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1]; m_key6.port = vip->encap_args.target_port; m_key6.protocol = 0; m_key6.fib_index = 0; kv6.key[0] = m_key6.as_u64[0]; kv6.key[1] = m_key6.as_u64[1]; kv6.key[2] = m_key6.as_u64[2]; if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6)) m = pool_elt_at_index (lbm->snat_mappings, value6.value); ASSERT (m); kv6.value = m - lbm->snat_mappings; clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0); pool_put (lbm->snat_mappings, m); } fib_entry_child_remove(as->next_hop_fib_entry_index, as->next_hop_child_index); fib_table_entry_delete_index(as->next_hop_fib_entry_index, FIB_SOURCE_RR); as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID; pool_put(vip->as_indexes, as_index); pool_put(lbm->ass, as); } }); } void lb_garbage_collection() { lb_main_t *lbm = &lb_main; lb_get_writer_lock(); lb_vip_t *vip; u32 *to_be_removed_vips = 0, *i; pool_foreach(vip, lbm->vips, { lb_vip_garbage_collection(vip); if (!(vip->flags & LB_VIP_FLAGS_USED) && (pool_elts(vip->as_indexes) == 0)) { vec_add1(to_be_removed_vips, vip - lbm->vips); } }); vec_foreach(i, to_be_removed_vips) { vip = &lbm->vips[*i]; pool_put(lbm->vips, vip); pool_free(vip->as_indexes); } vec_free(to_be_removed_vips); lb_put_writer_lock(); } static void lb_vip_update_new_flow_table(lb_vip_t *vip) { lb_main_t *lbm = &lb_main; lb_new_flow_entry_t *old_table; u32 i, *as_index; lb_new_flow_entry_t *new_flow_table = 0; lb_as_t *as; lb_pseudorand_t *pr, *sort_arr = 0; u32 count; ASSERT (lbm->writer_lock[0]); //We must have the lock //Check if some AS is configured or not i = 0; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore i = 1; goto out; //Not sure 'break' works in this macro-loop } }); out: if (i == 0) { //Only the default. i.e. no AS vec_validate(new_flow_table, vip->new_flow_table_mask); for (i=0; i<vec_len(new_flow_table); i++) new_flow_table[i].as_index = 0; goto finished; } //First, let's sort the ASs sort_arr = 0; vec_alloc(sort_arr, pool_elts(vip->as_indexes)); i = 0; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore continue; sort_arr[i].as_index = as - lbm->ass; i++; }); _vec_len(sort_arr) = i; vec_sort_with_function(sort_arr, lb_pseudorand_compare); //Now let's pseudo-randomly generate permutations vec_foreach(pr, sort_arr) { lb_as_t *as = &lbm->ass[pr->as_index]; u64 seed = clib_xxhash(as->address.as_u64[0] ^ as->address.as_u64[1]); /* We have 2^n buckets. * skip must be prime with 2^n. * So skip must be odd. * MagLev actually state that M should be prime, * but this has a big computation cost (% operation). * Using 2^n is more better (& operation). */ pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask; pr->last = (seed >> 32) & vip->new_flow_table_mask; } //Let's create a new flow table vec_validate(new_flow_table, vip->new_flow_table_mask); for (i=0; i<vec_len(new_flow_table); i++) new_flow_table[i].as_index = ~0; u32 done = 0; while (1) { vec_foreach(pr, sort_arr) { while (1) { u32 last = pr->last; pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask; if (new_flow_table[last].as_index == ~0) { new_flow_table[last].as_index = pr->as_index; break; } } done++; if (done == vec_len(new_flow_table)) goto finished; } } vec_free(sort_arr); finished: //Count number of changed entries count = 0; for (i=0; i<vec_len(new_flow_table); i++) if (vip->new_flow_table == 0 || new_flow_table[i].as_index != vip->new_flow_table[i].as_index) count++; old_table = vip->new_flow_table; vip->new_flow_table = new_flow_table; vec_free(old_table); } int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, u32 per_cpu_sticky_buckets, u32 flow_timeout) { lb_main_t *lbm = &lb_main; if (!is_pow2(per_cpu_sticky_buckets)) return VNET_API_ERROR_INVALID_MEMORY_SIZE; lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self lbm->ip4_src_address = *ip4_address; lbm->ip6_src_address = *ip6_address; lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets; lbm->flow_timeout = flow_timeout; lb_put_writer_lock(); return 0; } static int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen, u8 protocol, u16 port, lb_lkp_type_t lkp_type, u32 *vip_index) { lb_main_t *lbm = &lb_main; lb_vip_t *vip; ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned ip46_prefix_normalize(prefix, plen); pool_foreach(vip, lbm->vips, { if ((vip->flags & LB_AS_FLAGS_USED) && vip->plen == plen && vip->prefix.as_u64[0] == prefix->as_u64[0] && vip->prefix.as_u64[1] == prefix->as_u64[1]) { if((lkp_type == LB_LKP_SAME_IP_PORT && vip->protocol == protocol && vip->port == port) || (lkp_type == LB_LKP_ALL_PORT_IP && vip->port == 0) || (lkp_type == LB_LKP_DIFF_IP_PORT && (vip->protocol != protocol || vip->port != port) ) ) { *vip_index = vip - lbm->vips; return 0; } } }); return VNET_API_ERROR_NO_SUCH_ENTRY; } static int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen, u8 protocol, u16 port, u32 *vip_index) { return lb_vip_port_find_index(prefix, plen, protocol, port, LB_LKP_SAME_IP_PORT, vip_index); } static int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen, u32 *vip_index) { return lb_vip_port_find_index(prefix, plen, ~0, 0, LB_LKP_ALL_PORT_IP, vip_index); } /* Find out per-port-vip entry with different protocol and port */ static int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen, u8 protocol, u16 port, u32 *vip_index) { return lb_vip_port_find_index(prefix, plen, protocol, port, LB_LKP_DIFF_IP_PORT, vip_index); } int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol, u16 port, u32 *vip_index) { int ret; lb_get_writer_lock(); ret = lb_vip_port_find_index_with_lock(prefix, plen, protocol, port, vip_index); lb_put_writer_lock(); return ret; } static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index) { lb_main_t *lbm = &lb_main; ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned lb_as_t *as; u32 *asi; pool_foreach(asi, vip->as_indexes, { as = &lbm->ass[*asi]; if (as->vip_index == (vip - lbm->vips) && as->address.as_u64[0] == address->as_u64[0] && as->address.as_u64[1] == address->as_u64[1]) { *as_index = as - lbm->ass; return 0; } }); return -1; } int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n) { lb_main_t *lbm = &lb_main; lb_get_writer_lock(); lb_vip_t *vip; if (!(vip = lb_vip_get_by_index(vip_index))) { lb_put_writer_lock(); return VNET_API_ERROR_NO_SUCH_ENTRY; } ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6; u32 *to_be_added = 0; u32 *to_be_updated = 0; u32 i; u32 *ip; lb_snat_mapping_t *m; //Sanity check while (n--) { if (!lb_as_find_index_vip(vip, &addresses[n], &i)) { if (lbm->ass[i].flags & LB_AS_FLAGS_USED) { vec_free(to_be_added); vec_free(to_be_updated); lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } vec_add1(to_be_updated, i); goto next; } if (ip46_address_type(&addresses[n]) != type) { vec_free(to_be_added); vec_free(to_be_updated); lb_put_writer_lock(); return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; } if (n) { u32 n2 = n; while(n2--) //Check for duplicates if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && addresses[n2].as_u64[1] == addresses[n].as_u64[1]) goto next; } vec_add1(to_be_added, n); next: continue; } //Update reused ASs vec_foreach(ip, to_be_updated) { lbm->ass[*ip].flags = LB_AS_FLAGS_USED; } vec_free(to_be_updated); //Create those who have to be created vec_foreach(ip, to_be_added) { lb_as_t *as; u32 *as_index; pool_get(lbm->ass, as); as->address = addresses[*ip]; as->flags = LB_AS_FLAGS_USED; as->vip_index = vip_index; pool_get(vip->as_indexes, as_index); *as_index = as - lbm->ass; /* * become a child of the FIB entry * so we are informed when its forwarding changes */ fib_prefix_t nh = {}; if (lb_encap_is_ip4(vip)) { nh.fp_addr.ip4 = as->address.ip4; nh.fp_len = 32; nh.fp_proto = FIB_PROTOCOL_IP4; } else { nh.fp_addr.ip6 = as->address.ip6; nh.fp_len = 128; nh.fp_proto = FIB_PROTOCOL_IP6; } as->next_hop_fib_entry_index = fib_table_entry_special_add(0, &nh, FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); as->next_hop_child_index = fib_entry_child_add(as->next_hop_fib_entry_index, lbm->fib_node_type, as - lbm->ass); lb_as_stack(as); if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) ) { /* Add SNAT static mapping */ pool_get (lbm->snat_mappings, m); memset (m, 0, sizeof (*m)); if (lb_vip_is_nat4_port(vip)) { lb_snat4_key_t m_key4; clib_bihash_kv_8_8_t kv4; m_key4.addr = as->address.ip4; m_key4.port = vip->encap_args.target_port; m_key4.protocol = 0; m_key4.fib_index = 0; if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP) { m->src_ip.ip4 = vip->prefix.ip4; } else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT) { m->src_ip.ip4 = lbm->ip4_src_address; } m->src_ip_is_ipv6 = 0; m->as_ip.ip4 = as->address.ip4; m->as_ip_is_ipv6 = 0; m->src_port = vip->port; m->target_port = vip->encap_args.target_port; m->vrf_id = 0; m->fib_index = 0; kv4.key = m_key4.as_u64; kv4.value = m - lbm->snat_mappings; clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1); } else { lb_snat6_key_t m_key6; clib_bihash_kv_24_8_t kv6; m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0]; m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1]; m_key6.port = vip->encap_args.target_port; m_key6.protocol = 0; m_key6.fib_index = 0; if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP) { m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0]; m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1]; } else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT) { m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0]; m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1]; } m->src_ip_is_ipv6 = 1; m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0]; m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1]; m->as_ip_is_ipv6 = 1; m->src_port = vip->port; m->target_port = vip->encap_args.target_port; m->vrf_id = 0; m->fib_index = 0; kv6.key[0] = m_key6.as_u64[0]; kv6.key[1] = m_key6.as_u64[1]; kv6.key[2] = m_key6.as_u64[2]; kv6.value = m - lbm->snat_mappings; clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1); } } } vec_free(to_be_added); //Recompute flows lb_vip_update_new_flow_table(vip); //Garbage collection maybe lb_vip_garbage_collection(vip); lb_put_writer_lock(); return 0; } int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n) { lb_main_t *lbm = &lb_main; u32 now = (u32) vlib_time_now(vlib_get_main()); u32 *ip = 0; u32 as_index = 0; lb_vip_t *vip; if (!(vip = lb_vip_get_by_index(vip_index))) { return VNET_API_ERROR_NO_SUCH_ENTRY; } u32 *indexes = NULL; while (n--) { if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) { vec_free(indexes); return VNET_API_ERROR_NO_SUCH_ENTRY; } if (n) { //Check for duplicates u32 n2 = n - 1; while(n2--) { if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && addresses[n2].as_u64[1] == addresses[n].as_u64[1]) goto next; } } vec_add1(indexes, as_index); next: continue; } //Garbage collection maybe lb_vip_garbage_collection(vip); if (indexes != NULL) { vec_foreach(ip, indexes) { lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED; lbm->ass[*ip].last_used = now; } //Recompute flows lb_vip_update_new_flow_table(vip); } vec_free(indexes); return 0; } int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n) { lb_get_writer_lock(); int ret = lb_vip_del_ass_withlock(vip_index, addresses, n); lb_put_writer_lock(); return ret; } static int lb_vip_prefix_index_alloc (lb_main_t *lbm) { /* * Check for dynamically allocaetd instance number. */ u32 bit; bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes); lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1); return bit; } static int lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance) { if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0) { return -1; } lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes, instance, 0); return 0; } /** * Add the VIP adjacency to the ip4 or ip6 fib */ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip, u32 *vip_prefix_index) { dpo_proto_t proto = 0; dpo_type_t dpo_type = 0; u32 vip_idx = 0; if (vip->port != 0) { /* for per-port vip, if VIP adjacency has been added, * no need to add adjacency. */ if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen, vip->protocol, vip->port, &vip_idx)) { return; } /* Allocate an index for per-port vip */ *vip_prefix_index = lb_vip_prefix_index_alloc(lbm); } else { *vip_prefix_index = vip - lbm->vips; } dpo_id_t dpo = DPO_INVALID; fib_prefix_t pfx = {}; if (lb_vip_is_ip4(vip->type)) { pfx.fp_addr.ip4 = vip->prefix.ip4; pfx.fp_len = vip->plen - 96; pfx.fp_proto = FIB_PROTOCOL_IP4; proto = DPO_PROTO_IP4; } else { pfx.fp_addr.ip6 = vip->prefix.ip6; pfx.fp_len = vip->plen; pfx.fp_proto = FIB_PROTOCOL_IP6; proto = DPO_PROTO_IP6; } if (lb_vip_is_gre4(vip)) dpo_type = lbm->dpo_gre4_type; else if (lb_vip_is_gre6(vip)) dpo_type = lbm->dpo_gre6_type; else if (lb_vip_is_gre4_port(vip)) dpo_type = lbm->dpo_gre4_port_type; else if (lb_vip_is_gre6_port(vip)) dpo_type = lbm->dpo_gre6_port_type; else if (lb_vip_is_l3dsr(vip)) dpo_type = lbm->dpo_l3dsr_type; else if (lb_vip_is_l3dsr_port(vip)) dpo_type = lbm->dpo_l3dsr_port_type; else if(lb_vip_is_nat4_port(vip)) dpo_type = lbm->dpo_nat4_port_type; else if (lb_vip_is_nat6_port(vip)) dpo_type = lbm->dpo_nat6_port_type; dpo_set(&dpo, dpo_type, proto, *vip_prefix_index); fib_table_entry_special_dpo_add(0, &pfx, FIB_SOURCE_PLUGIN_HI, FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); dpo_reset(&dpo); } /** * Add the VIP filter entry */ static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip, u32 vip_prefix_index, u32 vip_idx) { vip_port_key_t key; clib_bihash_kv_8_8_t kv; key.vip_prefix_index = vip_prefix_index; key.protocol = vip->protocol; key.port = clib_host_to_net_u16(vip->port); key.rsv = 0; kv.key = key.as_u64; kv.value = vip_idx; clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1); return 0; } /** * Del the VIP filter entry */ static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip) { vip_port_key_t key; clib_bihash_kv_8_8_t kv, value; lb_vip_t *m = 0; key.vip_prefix_index = vip->vip_prefix_index; key.protocol = vip->protocol; key.port = clib_host_to_net_u16(vip->port); kv.key = key.as_u64; if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) == 0) m = pool_elt_at_index (lbm->vips, value.value); ASSERT (m); kv.value = m - lbm->vips; clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0); return 0; } /** * Deletes the adjacency associated with the VIP */ static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip) { fib_prefix_t pfx = {}; u32 vip_idx = 0; if (vip->port != 0) { /* If this vip adjacency is used by other per-port vip, * no need to del this adjacency. */ if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen, vip->protocol, vip->port, &vip_idx)) { lb_put_writer_lock(); return; } /* Return vip_prefix_index for per-port vip */ lb_vip_prefix_index_free(lbm, vip->vip_prefix_index); } if (lb_vip_is_ip4(vip->type)) { pfx.fp_addr.ip4 = vip->prefix.ip4; pfx.fp_len = vip->plen - 96; pfx.fp_proto = FIB_PROTOCOL_IP4; } else { pfx.fp_addr.ip6 = vip->prefix.ip6; pfx.fp_len = vip->plen; pfx.fp_proto = FIB_PROTOCOL_IP6; } fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI); } int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index) { lb_main_t *lbm = &lb_main; vlib_main_t *vm = vlib_get_main(); lb_vip_t *vip; lb_vip_type_t type = args.type; u32 vip_prefix_index = 0; lb_get_writer_lock(); ip46_prefix_normalize(&(args.prefix), args.plen); if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen, args.protocol, args.port, vip_index)) { lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } /* Make sure we can't add a per-port VIP entry * when there already is an all-port VIP for the same prefix. */ if ((args.port != 0) && !lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index)) { lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } /* Make sure we can't add a all-port VIP entry * when there already is an per-port VIP for the same prefix. */ if ((args.port == 0) && !lb_vip_port_find_diff_port(&(args.prefix), args.plen, args.protocol, args.port, vip_index)) { lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } /* Make sure all VIP for a given prefix (using different ports) have the same type. */ if ((args.port != 0) && !lb_vip_port_find_diff_port(&(args.prefix), args.plen, args.protocol, args.port, vip_index) && (args.type != lbm->vips[*vip_index].type)) { lb_put_writer_lock(); return VNET_API_ERROR_INVALID_ARGUMENT; } if (!is_pow2(args.new_length)) { lb_put_writer_lock(); return VNET_API_ERROR_INVALID_MEMORY_SIZE; } if (ip46_prefix_is_ip4(&(args.prefix), args.plen) && !lb_vip_is_ip4(type)) { lb_put_writer_lock(); return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; } if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) && !lb_vip_is_ip6(type)) { lb_put_writer_lock(); return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; } if ((type == LB_VIP_TYPE_IP4_L3DSR) && (args.encap_args.dscp >= 64) ) { lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } //Allocate pool_get(lbm->vips, vip); //Init memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix)); vip->plen = args.plen; if (args.port != 0) { vip->protocol = args.protocol; vip->port = args.port; } else { vip->protocol = (u8)~0; vip->port = 0; } vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main()); vip->type = args.type; if (args.type == LB_VIP_TYPE_IP4_L3DSR) { vip->encap_args.dscp = args.encap_args.dscp; } else if ((args.type == LB_VIP_TYPE_IP4_NAT4) ||(args.type == LB_VIP_TYPE_IP6_NAT6)) { vip->encap_args.srv_type = args.encap_args.srv_type; vip->encap_args.target_port = clib_host_to_net_u16(args.encap_args.target_port); } vip->flags = LB_VIP_FLAGS_USED; vip->as_indexes = 0; //Validate counters u32 i; for (i = 0; i < LB_N_VIP_COUNTERS; i++) { vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); } //Configure new flow table vip->new_flow_table_mask = args.new_length - 1; vip->new_flow_table = 0; //Update flow hash table lb_vip_update_new_flow_table(vip); //Create adjacency to direct traffic lb_vip_add_adjacency(lbm, vip, &vip_prefix_index); if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip)) && (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) ) { u32 key; uword * entry; //Create maping from nodeport to vip_index key = clib_host_to_net_u16(args.port); entry = hash_get_mem (lbm->vip_index_by_nodeport, &key); if (entry) { lb_put_writer_lock(); return VNET_API_ERROR_VALUE_EXIST; } hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips); /* receive packets destined to NodeIP:NodePort */ udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1); udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0); } *vip_index = vip - lbm->vips; //Create per-port vip filtering table if (args.port != 0) { lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index); vip->vip_prefix_index = vip_prefix_index; } lb_put_writer_lock(); return 0; } int lb_vip_del(u32 vip_index) { lb_main_t *lbm = &lb_main; lb_vip_t *vip; /* Does not remove default vip, i.e. vip_index = 0 */ if (vip_index == 0) return 0; lb_get_writer_lock(); if (!(vip = lb_vip_get_by_index(vip_index))) { lb_put_writer_lock(); return VNET_API_ERROR_NO_SUCH_ENTRY; } //FIXME: This operation is actually not working //We will need to remove state before performing this. { //Remove all ASs ip46_address_t *ass = 0; lb_as_t *as; u32 *as_index; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; vec_add1(ass, as->address); }); if (vec_len(ass)) lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass)); vec_free(ass); } //Delete adjacency lb_vip_del_adjacency(lbm, vip); //Delete per-port vip filtering entry if (vip->port != 0) { lb_vip_del_port_filter(lbm, vip); } //Set the VIP as unused vip->flags &= ~LB_VIP_FLAGS_USED; lb_put_writer_lock(); return 0; } /* *INDENT-OFF* */ VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, .description = "Load Balancer", }; /* *INDENT-ON* */ u8 *format_lb_dpo (u8 * s, va_list * va) { index_t index = va_arg (*va, index_t); CLIB_UNUSED(u32 indent) = va_arg (*va, u32); lb_main_t *lbm = &lb_main; lb_vip_t *vip = pool_elt_at_index (lbm->vips, index); return format (s, "%U", format_lb_vip, vip); } static void lb_dpo_lock (dpo_id_t *dpo) {} static void lb_dpo_unlock (dpo_id_t *dpo) {} static fib_node_t * lb_fib_node_get_node (fib_node_index_t index) { lb_main_t *lbm = &lb_main; lb_as_t *as = pool_elt_at_index (lbm->ass, index); return (&as->fib_node); } static void lb_fib_node_last_lock_gone (fib_node_t *node) { } static lb_as_t * lb_as_from_fib_node (fib_node_t *node) { return ((lb_as_t*)(((char*)node) - STRUCT_OFFSET_OF(lb_as_t, fib_node))); } static void lb_as_stack (lb_as_t *as) { lb_main_t *lbm = &lb_main; lb_vip_t *vip = &lbm->vips[as->vip_index]; dpo_type_t dpo_type = 0; if (lb_vip_is_gre4(vip)) dpo_type = lbm->dpo_gre4_type; else if (lb_vip_is_gre6(vip)) dpo_type = lbm->dpo_gre6_type; else if (lb_vip_is_gre4_port(vip)) dpo_type = lbm->dpo_gre4_port_type; else if (lb_vip_is_gre6_port(vip)) dpo_type = lbm->dpo_gre6_port_type; else if (lb_vip_is_l3dsr(vip)) dpo_type = lbm->dpo_l3dsr_type; else if (lb_vip_is_l3dsr_port(vip)) dpo_type = lbm->dpo_l3dsr_port_type; else if(lb_vip_is_nat4_port(vip)) dpo_type = lbm->dpo_nat4_port_type; else if (lb_vip_is_nat6_port(vip)) dpo_type = lbm->dpo_nat6_port_type; dpo_stack(dpo_type, lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6, &as->dpo, fib_entry_contribute_ip_forwarding( as->next_hop_fib_entry_index)); } static fib_node_back_walk_rc_t lb_fib_node_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx) { lb_as_stack(lb_as_from_fib_node(node)); return (FIB_NODE_BACK_WALK_CONTINUE); } int lb_nat4_interface_add_del (u32 sw_if_index, int is_del) { if (is_del) { vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out", sw_if_index, 0, 0, 0); } else { vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out", sw_if_index, 1, 0, 0); } return 0; } int lb_nat6_interface_add_del (u32 sw_if_index, int is_del) { if (is_del) { vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out", sw_if_index, 0, 0, 0); } else { vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out", sw_if_index, 1, 0, 0); } return 0; } clib_error_t * lb_init (vlib_main_t * vm) { vlib_thread_main_t *tm = vlib_get_thread_main (); lb_main_t *lbm = &lb_main; lbm->vnet_main = vnet_get_main (); lbm->vlib_main = vm; lb_vip_t *default_vip; lb_as_t *default_as; fib_node_vft_t lb_fib_node_vft = { .fnv_get = lb_fib_node_get_node, .fnv_last_lock = lb_fib_node_last_lock_gone, .fnv_back_walk = lb_fib_node_back_walk_notify, }; dpo_vft_t lb_vft = { .dv_lock = lb_dpo_lock, .dv_unlock = lb_dpo_unlock, .dv_format = format_lb_dpo, }; //Allocate and init default VIP. lbm->vips = 0; pool_get(lbm->vips, default_vip); default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL; default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL; default_vip->protocol = ~0; default_vip->port = 0; default_vip->flags = LB_VIP_FLAGS_USED; lbm->per_cpu = 0; vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1); lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); lbm->writer_lock[0] = 0; lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS; lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT; lbm->ip4_src_address.as_u32 = 0xffffffff; lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL; lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL; lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes); lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes); lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_port_nodes); lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_port_nodes); lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft, lb_dpo_l3dsr_nodes); lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft, lb_dpo_l3dsr_port_nodes); lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft, lb_dpo_nat4_port_nodes); lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft, lb_dpo_nat6_port_nodes); lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft); //Init AS reference counters vlib_refcount_init(&lbm->as_refcount); //Allocate and init default AS. lbm->ass = 0; pool_get(lbm->ass, default_as); default_as->flags = 0; default_as->dpo.dpoi_next_node = LB_NEXT_DROP; default_as->vip_index = ~0; default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL; default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL; lbm->vip_index_by_nodeport = hash_create_mem (0, sizeof(u16), sizeof (uword)); clib_bihash_init_8_8 (&lbm->vip_index_per_port, "vip_index_per_port", LB_VIP_PER_PORT_BUCKETS, LB_VIP_PER_PORT_MEMORY_SIZE); clib_bihash_init_8_8 (&lbm->mapping_by_as4, "mapping_by_as4", LB_MAPPING_BUCKETS, LB_MAPPING_MEMORY_SIZE); clib_bihash_init_24_8 (&lbm->mapping_by_as6, "mapping_by_as6", LB_MAPPING_BUCKETS, LB_MAPPING_MEMORY_SIZE); #define _(a,b,c) lbm->vip_counters[c].name = b; lb_foreach_vip_counter #undef _ return NULL; } VLIB_INIT_FUNCTION (lb_init);