summaryrefslogtreecommitdiffstats
path: root/src/vnet/fib/mpls_fib.h
blob: e7ea5d583d1c30eda75f767ea95f3f8434906258 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
 * mpls_fib.h: The Label/MPLS FIB
 *
 * Copyright (c) 2012 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __MPLS_FIB_TABLE_H__
#define __MPLS_FIB_TABLE_H__

#include <vnet/vnet.h>
#include <vnet/mpls/mpls.h>
#include <vnet/fib/fib_types.h>
#include <vnet/dpo/dpo.h>
#include <vnet/mpls/mpls.h>
#include <vnet/fib/fib_table.h>

#define MPLS_FIB_DEFAULT_TABLE_ID 0

/**
 * Type exposure is to allow the DP fast/inlined access
 */
#define MPLS_FIB_KEY_SIZE 21
#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))

/**
 * There are no options for controlling the MPLS flow hash,
 * but since it mostly entails using IP data to create one, use that.
 */
#define MPLS_FLOW_HASH_DEFAULT IP_FLOW_HASH_DEFAULT

typedef struct mpls_fib_t_
{
  /**
   * Required for pool_get_aligned
   */
  CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);

  /**
   * A hash table of entries. 21 bit key
   * Hash table for reduced memory footprint
   */
  uword * mf_entries;

  /**
   * The load-balance indices keyed by 21 bit label+eos bit.
   * A flat array for maximum lookup performance.
   */
  index_t mf_lbs[MPLS_FIB_DB_SIZE];
} mpls_fib_t;

static inline mpls_fib_t*
mpls_fib_get (fib_node_index_t index)
{
    return (pool_elt_at_index(mpls_main.mpls_fibs, index));
}

extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id,
                                                  fib_source_t src);
extern u32 mpls_fib_table_create_and_lock(fib_source_t src);
// extern mpls_fib_t * mpls_fib_find(u32 table_id);
extern u32 mpls_fib_index_from_table_id(u32 table_id);

extern u8 *format_mpls_fib_table_name(u8 * s, va_list * args);

extern fib_node_index_t mpls_fib_table_entry_add_from_ip_fib_entry (
    u32 table_id,
    mpls_label_t label,
    mpls_eos_bit_t eos,
    fib_node_index_t fib_entry_index);


extern fib_node_index_t mpls_fib_table_lookup(const mpls_fib_t *mf,
					      mpls_label_t label,
					      mpls_eos_bit_t eos);

extern void mpls_fib_table_entry_remove(mpls_fib_t *mf,
					mpls_label_t label,
					mpls_eos_bit_t eos);
extern void mpls_fib_table_entry_insert(mpls_fib_t *mf,
					mpls_label_t label,
					mpls_eos_bit_t eos,
					fib_node_index_t fei);
extern void mpls_fib_table_destroy(u32 fib_index);


extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,
					     mpls_label_t label,
					     mpls_eos_bit_t eos,
					     const dpo_id_t *dpo);
extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf,
					    mpls_label_t label,
					    mpls_eos_bit_t eos);

/**
 * @brief Walk all entries in a FIB table
 * N.B: This is NOT safe to deletes. If you need to delete walk the whole
 * table and store elements in a vector, then delete the elements
 */
extern void mpls_fib_table_walk(mpls_fib_t *fib,
                                fib_table_walk_fn_t fn,
                                void *ctx);

extern u8 *format_mpls_fib_table_memory(u8 * s, va_list * args);

/**
 * @brief
 *  Lookup a label and EOS bit in the MPLS_FIB table to retrieve the
 *  load-balance index to be used for packet forwarding.
 */
static inline index_t
mpls_fib_table_forwarding_lookup (u32 mpls_fib_index,
				  const mpls_unicast_header_t *hdr)
{
    mpls_label_t label;
    mpls_fib_t *mf;
    u32 key;

    label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
    key = (vnet_mpls_uc_get_label(label) << 1) | vnet_mpls_uc_get_s(label);

    mf = mpls_fib_get(mpls_fib_index);

    return (mf->mf_lbs[key]);
}

static inline u32
mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
{
    mpls_main_t *mm = &mpls_main;

    ASSERT(vec_len(mm->fib_index_by_sw_if_index) > sw_if_index);

    return (mm->fib_index_by_sw_if_index[sw_if_index]);
}

#endif
; /* * format_all_svm_regions * Maps / unmaps regions. Do NOT call from client code! */ u8 *format_all_svm_regions (u8 *s, va_list * args) { int verbose = va_arg (*args, int); svm_region_t *root_rp = svm_get_root_rp(); svm_main_region_t *mp; svm_subregion_t *subp; svm_region_t *rp; svm_map_region_args_t *a = 0; u8 ** svm_names=0; u8 *name=0; int i; ASSERT(root_rp); pthread_mutex_lock (&root_rp->mutex); s = format (s, "%U", format_svm_region, root_rp, verbose); mp = root_rp->data_base; /* * Snapshoot names, can't hold root rp mutex across * find_or_create. */ pool_foreach (subp, mp->subregions, ({ name = vec_dup (subp->subregion_name); vec_add1(svm_names, name); })); pthread_mutex_unlock (&root_rp->mutex); for (i = 0; i < vec_len(svm_names); i++) { vec_validate(a, 0); a->name = (char *) svm_names[i]; rp = svm_region_find_or_create (a); if (rp) { pthread_mutex_lock (&rp->mutex); s = format (s, "%U", format_svm_region, rp, verbose); pthread_mutex_unlock (&rp->mutex); svm_region_unmap (rp); vec_free(svm_names[i]); } vec_free (a); } vec_free(svm_names); return (s); } void show (char *chroot_path, int verbose) { svm_map_region_args_t *a = 0; vec_validate (a, 0); svm_region_init_chroot(chroot_path); fformat(stdout, "My pid is %d\n", getpid()); fformat(stdout, "%U", format_all_svm_regions, verbose); svm_region_exit (); vec_free (a); } static void *svm_map_region_nolock (svm_map_region_args_t *a) { int svm_fd; svm_region_t *rp; int deadman=0; u8 *shm_name; ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size); shm_name = shm_name_from_svm_map_region_args (a); svm_fd = shm_open((char *)shm_name, O_RDWR, 0777); if (svm_fd < 0) { perror("svm_region_map(mmap open)"); return (0); } vec_free (shm_name); rp = mmap(0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); if (rp == (svm_region_t *) MAP_FAILED) { close(svm_fd); clib_warning("mmap"); return (0); } /* * We lost the footrace to create this region; make sure * the winner has crossed the finish line. */ while (rp->version == 0 && deadman++ < 5) { sleep(1); } /* * <bleep>-ed? */ if (rp->version == 0) { clib_warning("rp->version %d not %d", rp->version, SVM_VERSION); return (0); } /* Remap now that the region has been placed */ a->baseva = rp->virtual_base; a->size = rp->virtual_size; munmap(rp, MMAP_PAGESIZE); rp = (void *) mmap ((void *)a->baseva, a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if ((uword)rp == (uword)MAP_FAILED) { clib_unix_warning ("mmap"); return (0); } if ((uword) rp != rp->virtual_base) { clib_warning("mmap botch"); } if (pthread_mutex_trylock(&rp->mutex)) { clib_warning ("rp->mutex LOCKED by pid %d, tag %d, cleared...", rp->mutex_owner_pid, rp->mutex_owner_tag); memset(&rp->mutex, 0, sizeof (rp->mutex)); } else { clib_warning ("mutex OK...\n"); pthread_mutex_unlock(&rp->mutex); } return ((void *) rp); } /* * rnd_pagesize * Round to a pagesize multiple, presumably 4k works */ static unsigned int rnd_pagesize(unsigned int size) { unsigned int rv; rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1); return(rv); } #define MUTEX_DEBUG always_inline void region_lock(svm_region_t *rp, int tag) { pthread_mutex_lock(&rp->mutex); #ifdef MUTEX_DEBUG rp->mutex_owner_pid = getpid(); rp->mutex_owner_tag = tag; #endif } always_inline void region_unlock(svm_region_t *rp) { #ifdef MUTEX_DEBUG rp->mutex_owner_pid = 0; rp->mutex_owner_tag = 0; #endif pthread_mutex_unlock(&rp->mutex); } static void *svm_existing_region_map_nolock (void *root_arg, svm_map_region_args_t *a) { svm_region_t *root_rp = root_arg; svm_main_region_t *mp; svm_region_t *rp; void *oldheap; uword *p; a->size += MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE; a->size = rnd_pagesize(a->size); region_lock (root_rp, 4); oldheap = svm_push_pvt_heap(root_rp); mp = root_rp->data_base; ASSERT(mp); p = hash_get_mem (mp->name_hash, a->name); if (p) { rp = svm_map_region_nolock (a); region_unlock(root_rp); svm_pop_heap (oldheap); return rp; } return 0; } static void trace (char *chroot_path, char *name, int enable_disable) { svm_map_region_args_t *a = 0; svm_region_t *db_rp; void *oldheap; vec_validate (a, 0); svm_region_init_chroot(chroot_path); a->name = name; a->size = 1<<20; a->flags = SVM_FLAGS_MHEAP; db_rp = svm_region_find_or_create (a); ASSERT(db_rp); region_lock (db_rp, 20); oldheap = svm_push_data_heap (db_rp); mheap_trace (db_rp->data_heap, enable_disable); svm_pop_heap (oldheap); region_unlock (db_rp); svm_region_unmap ((void *)db_rp); svm_region_exit (); vec_free (a); } static void subregion_repair(char *chroot_path) { int i; svm_main_region_t *mp; svm_map_region_args_t a; svm_region_t *root_rp; svm_region_t *rp; svm_subregion_t *subp; u8 *name=0; u8 ** svm_names=0; svm_region_init_chroot(chroot_path); root_rp = svm_get_root_rp(); pthread_mutex_lock (&root_rp->mutex); mp = root_rp->data_base; /* * Snapshoot names, can't hold root rp mutex across * find_or_create. */ pool_foreach (subp, mp->subregions, ({ name = vec_dup (subp->subregion_name); vec_add1(svm_names, name); })); pthread_mutex_unlock (&root_rp->mutex); for (i = 0; i < vec_len(svm_names); i++) { memset (&a, 0, sizeof (a)); a.root_path = chroot_path; a.name = (char *) svm_names[i]; fformat(stdout, "Checking %s region...\n", a.name); rp = svm_existing_region_map_nolock (root_rp, &a); if (rp) { svm_region_unmap (rp); vec_free(svm_names[i]); } } vec_free(svm_names); } void repair (char *chroot_path, int crash_root_region) { svm_region_t *root_rp = 0; svm_map_region_args_t *a = 0; void *svm_map_region (svm_map_region_args_t *a); int svm_fd; u8 *shm_name; fformat(stdout, "our pid: %d\n", getpid()); vec_validate (a, 0); a->root_path = chroot_path; a->name = SVM_GLOBAL_REGION_NAME; a->baseva = SVM_GLOBAL_REGION_BASEVA; a->size = SVM_GLOBAL_REGION_SIZE; a->flags = SVM_FLAGS_NODATA; shm_name = shm_name_from_svm_map_region_args (a); svm_fd = shm_open ((char *)shm_name, O_RDWR, 0777); if (svm_fd < 0) { perror("svm_region_map(mmap open)"); goto out; } vec_free(shm_name); root_rp = mmap(0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); if (root_rp == (svm_region_t *) MAP_FAILED) { close(svm_fd); clib_warning("mmap"); goto out; } /* Remap now that the region has been placed */ clib_warning ("remap to 0x%x", root_rp->virtual_base); a->baseva = root_rp->virtual_base; a->size = root_rp->virtual_size; munmap(root_rp, MMAP_PAGESIZE); root_rp = (void *) mmap ((void *)a->baseva, a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if ((uword)root_rp == (uword)MAP_FAILED) { clib_unix_warning ("mmap"); goto out; } close(svm_fd); if ((uword) root_rp != root_rp->virtual_base) { clib_warning("mmap botch"); goto out; } if (pthread_mutex_trylock(&root_rp->mutex)) { clib_warning ("root_rp->mutex LOCKED by pid %d, tag %d, cleared...", root_rp->mutex_owner_pid, root_rp->mutex_owner_tag); memset(&root_rp->mutex, 0, sizeof (root_rp->mutex)); goto out; } else { clib_warning ("root_rp->mutex OK...\n"); pthread_mutex_unlock(&root_rp->mutex); } out: vec_free (a); /* * Now that the root region is known to be OK, * fix broken subregions */ subregion_repair(chroot_path); if (crash_root_region) { clib_warning ("Leaving root region locked on purpose..."); pthread_mutex_lock(&root_rp->mutex); root_rp->mutex_owner_pid = getpid(); root_rp->mutex_owner_tag = 99; } svm_region_exit (); } int main (int argc, char **argv) { unformat_input_t input; int parsed =0; char *name; char *chroot_path = 0; u8 *chroot_u8; unformat_init_command_line (&input, argv); while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) { if (unformat(&input, "show-verbose")) { show (chroot_path, 1); parsed++; } else if (unformat (&input, "show")) { show (chroot_path, 0); parsed++; } else if (unformat (&input, "client-scan")) { svm_client_scan(chroot_path); parsed++; } else if (unformat (&input, "repair")) { repair(chroot_path, 0 /* fix it */); parsed++; } else if (unformat (&input, "crash")) { repair (chroot_path, 1 /* crash it */); parsed++; } else if (unformat (&input, "trace-on %s", &name)) { trace (chroot_path, name, 1); parsed++; } else if (unformat (&input, "trace-off %s", &name)) { trace (chroot_path, name, 0); parsed++; } else if (unformat (&input, "chroot %s", &chroot_u8)) { chroot_path = (char *) chroot_u8; } else { break; } } unformat_free (&input); if (!parsed) { fformat(stdout, "%s: show | show-verbose | client-scan | trace-on <region-name>\n", argv[0]); fformat(stdout, " trace-off <region-name>\n"); } exit (0); }