/* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_VALUE_U24 0xffffff /* mapping timer control constants (in seconds) */ #define TIME_UNTIL_REFETCH_OR_DELETE 20 #define MAPPING_TIMEOUT (((m->ttl) * 60) - TIME_UNTIL_REFETCH_OR_DELETE) lisp_cp_main_t lisp_control_main; u8 *format_lisp_cp_input_trace (u8 * s, va_list * args); static void *send_map_request_thread_fn (void *arg); typedef enum { LISP_CP_INPUT_NEXT_DROP, LISP_CP_INPUT_N_NEXT, } lisp_cp_input_next_t; typedef struct { u8 is_resend; gid_address_t seid; gid_address_t deid; u8 smr_invoked; } map_request_args_t; u8 vnet_lisp_get_map_request_mode (void) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); return lcm->map_request_mode; } static u16 auth_data_len_by_key_id (lisp_key_type_t key_id) { switch (key_id) { case HMAC_SHA_1_96: return SHA1_AUTH_DATA_LEN; case HMAC_SHA_256_128: return SHA256_AUTH_DATA_LEN; default: clib_warning ("unsupported key type: %d!", key_id); return (u16) ~ 0; } return (u16) ~ 0; } static const EVP_MD * get_encrypt_fcn (lisp_key_type_t key_id) { switch (key_id) { case HMAC_SHA_1_96: return EVP_sha1 (); case HMAC_SHA_256_128: return EVP_sha256 (); default: clib_warning ("unsupported encryption key type: %d!", key_id); break; } return 0; } static int queue_map_request (gid_address_t * seid, gid_address_t * deid, u8 smr_invoked, u8 is_resend); ip_interface_address_t * ip_interface_get_first_interface_address (ip_lookup_main_t * lm, u32 sw_if_index, u8 loop) { vnet_main_t *vnm = vnet_get_main (); vnet_sw_interface_t *swif = vnet_get_sw_interface (vnm, sw_if_index); if (loop && swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) sw_if_index = swif->unnumbered_sw_if_index; u32 ia = (vec_len ((lm)->if_address_pool_index_by_sw_if_index) > (sw_if_index)) ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, (sw_if_index)) : (u32) ~ 0; return pool_elt_at_index ((lm)->if_address_pool, ia); } void * ip_interface_get_first_address (ip_lookup_main_t * lm, u32 sw_if_index, u8 version) { ip_interface_address_t *ia; ia = ip_interface_get_first_interface_address (lm, sw_if_index, 1); if (!ia) return 0; return ip_interface_address_get_address (lm, ia); } int ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index, u8 version, ip_address_t * result) { ip_lookup_main_t *lm; void *addr; lm = (version == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main; addr = ip_interface_get_first_address (lm, sw_if_index, version); if (!addr) return 0; ip_address_set (result, addr, version); return 1; } /** * convert from a LISP address to a FIB prefix */ void ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix) { if (addr->version == IP4) { prefix->fp_len = 32; prefix->fp_proto = FIB_PROTOCOL_IP4; memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad)); memcpy (&prefix->fp_addr.ip4, &addr->ip, sizeof (prefix->fp_addr.ip4)); } else { prefix->fp_len = 128; prefix->fp_proto = FIB_PROTOCOL_IP6; memcpy (&prefix->fp_addr.ip6, &addr->ip, sizeof (prefix->fp_addr.ip6)); } } /** * convert from a LISP to a FIB prefix */ void ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix, fib_prefix_t * fib_prefix) { ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix); fib_prefix->fp_len = ip_prefix->len; } /** * Find the sw_if_index of the interface that would be used to egress towards * dst. */ u32 ip_fib_get_egress_iface_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst) { fib_node_index_t fei; fib_prefix_t prefix; ip_address_to_fib_prefix (dst, &prefix); fei = fib_table_lookup (0, &prefix); return (fib_entry_get_resolving_interface (fei)); } /** * Find first IP of the interface that would be used to egress towards dst. * Returns 1 if the address is found 0 otherwise. */ int ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst, ip_address_t * result) { u32 si; ip_lookup_main_t *lm; void *addr = 0; u8 ipver; ASSERT (result != 0); ipver = ip_addr_version (dst); lm = (ipver == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main; si = ip_fib_get_egress_iface_for_dst (lcm, dst); if ((u32) ~ 0 == si) return 0; /* find the first ip address */ addr = ip_interface_get_first_address (lm, si, ipver); if (0 == addr) return 0; ip_address_set (result, addr, ipver); return 1; } static int dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_l2, u8 is_add, u8 with_default_route) { uword *dp_table; if (!is_l2) { dp_table = hash_get (lcm->table_id_by_vni, vni); if (!dp_table) { clib_warning ("vni %d not associated to a vrf!", vni); return VNET_API_ERROR_INVALID_VALUE; } } else { dp_table = hash_get (lcm->bd_id_by_vni, vni); if (!dp_table) { clib_warning ("vni %d not associated to a bridge domain!", vni); return VNET_API_ERROR_INVALID_VALUE; } } /* enable/disable data-plane interface */ if (is_add) { if (is_l2) lisp_gpe_tenant_l2_iface_add_or_lock (vni, dp_table[0]); else lisp_gpe_tenant_l3_iface_add_or_lock (vni, dp_table[0], with_default_route); } else { if (is_l2) lisp_gpe_tenant_l2_iface_unlock (vni); else lisp_gpe_tenant_l3_iface_unlock (vni); } return 0; } static void dp_del_fwd_entry (lisp_cp_main_t * lcm, u32 dst_map_index) { vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a; fwd_entry_t *fe = 0; uword *feip = 0; memset (a, 0, sizeof (*a)); feip = hash_get (lcm->fwd_entry_by_mapping_index, dst_map_index); if (!feip) return; fe = pool_elt_at_index (lcm->fwd_entry_pool, feip[0]); /* delete dp fwd entry */ u32 sw_if_index; a->is_add = 0; a->locator_pairs = fe->locator_pairs; a->vni = gid_address_vni (&fe->reid); gid_address_copy (&a->rmt_eid, &fe->reid); if (fe->is_src_dst) gid_address_copy (&a->lcl_eid, &fe->leid); vnet_lisp_gpe_del_fwd_counters (a, feip[0]); vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index); /* delete entry in fwd table */ hash_unset (lcm->fwd_entry_by_mapping_index, dst_map_index); vec_free (fe->locator_pairs); pool_put (lcm->fwd_entry_pool, fe); } /** * Finds first remote locator with best (lowest) priority that has a local * peer locator with an underlying route to it. * */ static u32 get_locator_pairs (lisp_cp_main_t * lcm, mapping_t * lcl_map, mapping_t * rmt_map, locator_pair_t ** locator_pairs) { u32 i, limitp = 0, li, found = 0, esi; locator_set_t *rmt_ls, *lcl_ls; ip_address_t _lcl_addr, *lcl_addr = &_lcl_addr; locator_t *lp, *rmt = 0; uword *checked = 0; locator_pair_t pair; rmt_ls = pool_elt_at_index (lcm->locator_set_pool, rmt_map->locator_set_index); lcl_ls = pool_elt_at_index (lcm->locator_set_pool, lcl_map->locator_set_index); if (!rmt_ls || vec_len (rmt_ls->locator_indices) == 0) return 0; while (1) { rmt = 0; /* find unvisited remote locator with best priority */ for (i = 0; i < vec_len (rmt_ls->locator_indices); i++) { if (0 != hash_get (checked, i)) continue; li = vec_elt (rmt_ls->locator_indices, i); lp = pool_elt_at_index (lcm->locator_pool, li); /* we don't support non-IP locators for now */ if (gid_address_type (&lp->address) != GID_ADDR_IP_PREFIX) continue; if ((found && lp->priority == limitp) || (!found && lp->priority >= limitp)) { rmt = lp; /* don't search for locators with lower priority and don't * check this locator again*/ limitp = lp->priority; hash_set (checked, i, 1); break; } } /* check if a local locator with a route to remote locator exists */ if (rmt != 0) { /* find egress sw_if_index for rmt locator */ esi = ip_fib_get_egress_iface_for_dst (lcm, &gid_address_ip (&rmt->address)); if ((u32) ~ 0 == esi) continue; for (i = 0; i < vec_len (lcl_ls->locator_indices); i++) { li = vec_elt (lcl_ls->locator_indices, i); locator_t *sl = pool_elt_at_index (lcm->locator_pool, li); /* found local locator with the needed sw_if_index */ if (sl->sw_if_index == esi) { /* and it has an address */ if (0 == ip_interface_get_first_ip_address (lcm, sl->sw_if_index, gid_address_ip_version (&rmt->address), lcl_addr)) continue; memset (&pair, 0, sizeof (pair)); ip_address_copy (&pair.rmt_loc, &gid_address_ip (&rmt->address)); ip_address_copy (&pair.lcl_loc, lcl_addr); pair.weight = rmt->weight; pair.priority = rmt->priority; vec_add1 (locator_pairs[0], pair); found = 1; } } } else break; } hash_free (checked); return found; } static void gid_address_sd_to_flat (gid_address_t * dst, gid_address_t * src, fid_address_t * fid) { ASSERT (GID_ADDR_SRC_DST == gid_address_type (src)); dst[0] = src[0]; switch (fid_addr_type (fid)) { case FID_ADDR_IP_PREF: gid_address_type (dst) = GID_ADDR_IP_PREFIX; gid_address_ippref (dst) = fid_addr_ippref (fid); break; case FID_ADDR_MAC: gid_address_type (dst) = GID_ADDR_MAC; mac_copy (gid_address_mac (dst), fid_addr_mac (fid)); break; default: clib_warning ("Unsupported fid type %d!", fid_addr_type (fid)); break; } } u8 vnet_lisp_map_register_state_get (void) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); return lcm->map_registering; } u8 vnet_lisp_rloc_probe_state_get (void) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); return lcm->rloc_probing; } static void dp_add_fwd_entry (lisp_cp_main_t * lcm, u32 src_map_index, u32 dst_map_index) { vnet_lisp_gpe_add_del_fwd_entry_args_t _a, *a = &_a; gid_address_t *rmt_eid, *lcl_eid; mapping_t *lcl_map, *rmt_map; u32 sw_if_index, **rmts, rmts_idx; uword *feip = 0, *dpid, *rmts_stored_idxp = 0; fwd_entry_t *fe; u8 type, is_src_dst = 0; int rv; memset (a, 0, sizeof (*a)); /* remove entry if it already exists */ feip = hash_get (lcm->fwd_entry_by_mapping_index, dst_map_index); if (feip) dp_del_fwd_entry (lcm, dst_map_index); /* * Determine local mapping and eid */ if (lcm->flags & LISP_FLAG_PITR_MODE) { if (lcm->pitr_map_index != ~0) lcl_map = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index); else { clib_warning ("no PITR mapping configured!"); return; } } else lcl_map = pool_elt_at_index (lcm->mapping_pool, src_map_index); lcl_eid = &lcl_map->eid; /* * Determine remote mapping and eid */ rmt_map = pool_elt_at_index (lcm->mapping_pool, dst_map_index); rmt_eid = &rmt_map->eid; /* * Build and insert data plane forwarding entry */ a->is_add = 1; if (MR_MODE_SRC_DST == lcm->map_request_mode) { if (GID_ADDR_SRC_DST == gid_address_type (rmt_eid)) { gid_address_sd_to_flat (&a->rmt_eid, rmt_eid, &gid_address_sd_dst (rmt_eid)); gid_address_sd_to_flat (&a->lcl_eid, rmt_eid, &gid_address_sd_src (rmt_eid)); } else { gid_address_copy (&a->rmt_eid, rmt_eid); gid_address_copy (&a->lcl_eid, lcl_eid); } is_src_dst = 1; } else gid_address_copy (&a->rmt_eid, rmt_eid); a->vni = gid_address_vni (&a->rmt_eid); a->is_src_dst = is_src_dst; /* get vrf or bd_index associated to vni */ type = gid_address_type (&a->rmt_eid); if (GID_ADDR_IP_PREFIX == type) { dpid = hash_get (lcm->table_id_by_vni, a->vni); if (!dpid) { clib_warning ("vni %d not associated to a vrf!", a->vni); return; } a->table_id = dpid[0]; } else if (GID_ADDR_MAC == type) { dpid = hash_get (lcm->bd_id_by_vni, a->vni); if (!dpid) { clib_warning ("vni %d not associated to a bridge domain !", a->vni); return; } a->bd_id = dpid[0]; } /* find best locator pair that 1) verifies LISP policy 2) are connected */ rv = get_locator_pairs (lcm, lcl_map, rmt_map, &a->locator_pairs); /* Either rmt mapping is negative or we can't find underlay path. * Try again with petr if configured */ if (rv == 0 && (lcm->flags & LISP_FLAG_USE_PETR)) { rmt_map = lisp_get_petr_mapping (lcm); rv = get_locator_pairs (lcm, lcl_map, rmt_map, &a->locator_pairs); } /* negative entry */ if (rv == 0) { a->is_negative = 1; a->action = rmt_map->action; } rv = vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index); if (rv) { if (a->locator_pairs) vec_free (a->locator_pairs); return; } /* add tunnel to fwd entry table */ pool_get (lcm->fwd_entry_pool, fe); vnet_lisp_gpe_add_fwd_counters (a, fe - lcm->fwd_entry_pool); fe->locator_pairs = a->locator_pairs; gid_address_copy (&fe->reid, &a->rmt_eid); if (is_src_dst) gid_address_copy (&fe->leid, &a->lcl_eid); else gid_address_copy (&fe->leid, lcl_eid); fe->is_src_dst = is_src_dst; hash_set (lcm->fwd_entry_by_mapping_index, dst_map_index, fe - lcm->fwd_entry_pool); /* Add rmt mapping to the vector of adjacent mappings to lcl mapping */ rmts_stored_idxp = hash_get (lcm->lcl_to_rmt_adjs_by_lcl_idx, src_map_index); if (!rmts_stored_idxp) { pool_get (lcm->lcl_to_rmt_adjacencies, rmts); memset (rmts, 0, sizeof (*rmts)); rmts_idx = rmts - lcm->lcl_to_rmt_adjacencies; hash_set (lcm->lcl_to_rmt_adjs_by_lcl_idx, src_map_index, rmts_idx); } else { rmts_idx = (u32) (*rmts_stored_idxp); rmts = pool_elt_at_index (lcm->lcl_to_rmt_adjacencies, rmts_idx); } vec_add1 (rmts[0], dst_map_index); } typedef struct { u32 si; u32 di; } fwd_entry_mt_arg_t; static void * dp_add_fwd_entry_thread_fn (void *arg) { fwd_entry_mt_arg_t *a = arg; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); dp_add_fwd_entry (lcm, a->si, a->di); return 0; } static int dp_add_fwd_entry_from_mt (u32 si, u32 di) { fwd_entry_mt_arg_t a; memset (&a, 0, sizeof (a)); a.si = si; a.di = di; vl_api_rpc_call_main_thread (dp_add_fwd_entry_thread_fn, (u8 *) & a, sizeof (a)); return 0; } /** * Returns vector of adjacencies. * * The caller must free the vector returned by this function. * * @param vni virtual network identifier * @return vector of adjacencies */ lisp_adjacency_t * vnet_lisp_adjacencies_get_by_vni (u32 vni) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); fwd_entry_t *fwd; lisp_adjacency_t *adjs = 0, adj; /* *INDENT-OFF* */ pool_foreach(fwd, lcm->fwd_entry_pool, ({ if (gid_address_vni (&fwd->reid) != vni) continue; gid_address_copy (&adj.reid, &fwd->reid); gid_address_copy (&adj.leid, &fwd->leid); vec_add1 (adjs, adj); })); /* *INDENT-ON* */ return adjs; } static lisp_msmr_t * get_map_server (ip_address_t * a) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); lisp_msmr_t *m; vec_foreach (m, lcm->map_servers) { if (!ip_address_cmp (&m->address, a)) { return m; } } return 0; } static lisp_msmr_t * get_map_resolver (ip_address_t * a) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); lisp_msmr_t *m; vec_foreach (m, lcm->map_resolvers) { if (!ip_address_cmp (&m->address, a)) { return m; } } return 0; } int vnet_lisp_add_del_map_server (ip_address_t * addr, u8 is_add) { u32 i; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); lisp_msmr_t _ms, *ms = &_ms; if (vnet_lisp_enable_disable_status () == 0) { clib_warning ("LISP is disabled!"); return VNET_API_ERROR_LISP_DISABLED; } if (is_add) { if (get_map_server (addr)) { clib_warning ("map-server %U already exists!", format_ip_address, addr); return -1; } memset (ms, 0, sizeof (*ms)); ip_address_copy (&ms->address, addr); vec_add1 (lcm->map_servers, ms[0]); if (vec_len (lcm->map_servers) == 1) lcm->do_map_server_election = 1; } else { for (i = 0; i < vec_len (lcm->map_servers); i++) { ms = vec_elt_at_index (lcm->map_servers, i); if (!ip_address_cmp (&ms->address, addr)) { if (!ip_address_cmp (&ms->address, &lcm->active_map_server)) lcm->do_map_server_election = 1; vec_del1 (lcm->map_servers, i); break; } } } return 0; } /** * Add/remove mapping to/from map-cache. Overwriting not allowed. */ int vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a, u32 * map_index_result) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); u32 mi, *map_indexp, map_index, i; u32 **rmts = 0, *remote_idxp, rmts_itr, remote_idx; uword *rmts_idxp; mapping_t *m, *old_map; u32 **eid_indexes; if (gid_address_type (&a->eid) == GID_ADDR_NSH) { if (gid_address_vni (&a->eid) != 0) { clib_warning ("Supported only default VNI for NSH!"); return VNET_API_ERROR_INVALID_ARGUMENT; } if (gid_address_nsh_spi (&a->eid) > MAX_VALUE_U24) { clib_warning ("SPI is greater than 24bit!"); return VNET_API_ERROR_INVALID_ARGUMENT; } } mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &a->eid); old_map = mi != ~0 ? pool_elt_at_index (lcm->mapping_pool, mi) : 0; if (a->is_add) { /* TODO check if overwriting and take appropriate actions */ if (mi != GID_LOOKUP_MISS && !gid_address_cmp (&old_map->eid, &a->eid)) { clib_warning ("eid %U found in the eid-table", format_gid_address, &a->eid); return VNET_API_ERROR_VALUE_EXIST; } pool_get (lcm->mapping_pool, m); gid_address_copy (&m->eid, &a->eid); m->locator_set_index = a->locator_set_index; m->ttl = a->ttl; m->action = a->action; m->local = a->local; m->is_static = a->is_static; m->key = vec_dup (a->key); m->key_id = a->key_id; map_index = m - lcm->mapping_pool; gid_dictionary_add_del (&lcm->mapping_index_by_gid, &a->eid, map_index, 1); if (pool_is_free_index (lcm->locator_set_pool, a->locator_set_index)) { clib_warning ("Locator set with index %d doesn't exist", a->locator_set_index); return VNET_API_ERROR_INVALID_VALUE; } /* add eid to list of eids supported by locator-set */ vec_validate (lcm->locator_set_to_eids, a->locator_set_index); eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids, a->locator_set_index); vec_add1 (eid_indexes[0], map_index); if (a->local) { /* mark as local */ vec_add1 (lcm->local_mappings_indexes, map_index); } map_index_result[0] = map_index; } else { if (mi == GID_LOOKUP_MISS) { clib_warning ("eid %U not found in the eid-table", format_gid_address, &a->eid); return VNET_API_ERROR_INVALID_VALUE; } /* clear locator-set to eids binding */ eid_indexes = vec_elt_at_index (lcm->locator_set_to_eids, a->locator_set_index); for (i = 0; i < vec_len (eid_indexes[0]); i++) { map_indexp = vec_elt_at_index (eid_indexes[0], i); if (map_indexp[0] == mi) break; } vec_del1 (eid_indexes[0], i); /* remove local mark if needed */ m = pool_elt_at_index (lcm->mapping_pool, mi); if (m->local) { /* Remove adjacencies associated with the local mapping */ rmts_idxp = hash_get (lcm->lcl_to_rmt_adjs_by_lcl_idx, mi); if (rmts_idxp) { rmts = pool_elt_at_index (lcm->lcl_to_rmt_adjacencies, rmts_idxp[0]); vec_foreach (remote_idxp, rmts[0]) { dp_del_fwd_entry (lcm, remote_idxp[0]); } vec_free (rmts[0]); pool_put (lcm->lcl_to_rmt_adjacencies, rmts); hash_unset (lcm->lcl_to_rmt_adjs_by_lcl_idx, mi); } u32 k, *lm_indexp; for (k = 0; k < vec_len (lcm->local_mappings_indexes); k++) { lm_indexp = vec_elt_at_index (lcm->local_mappings_indexes, k); if (lm_indexp[0] == mi) break; } vec_del1 (lcm->local_mappings_indexes, k); } else { /* Remove remote (if present) from the vectors of lcl-to-rmts * TODO: Address this in a more efficient way. */ /* *INDENT-OFF* */ pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies, ({ vec_foreach_index (rmts_itr, rmts[0]) { remote_idx = vec_elt (rmts[0], rmts_itr); if (mi == remote_idx) { vec_del1 (rmts[0], rmts_itr); break; } } })); /* *INDENT-ON* */ } /* remove mapping from dictionary */ gid_dictionary_add_del (&lcm->mapping_index_by_gid, &a->eid, 0, 0); gid_address_free (&m->eid); pool_put_index (lcm->mapping_pool, mi); } return 0; } /** * Add/update/delete mapping to/in/from map-cache. */ int vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a, u32 * map_index_result) { uword *dp_table = 0; u32 vni; u8 type; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); if (vnet_lisp_enable_disable_status () == 0) { clib_warning ("LISP is disabled!"); return VNET_API_ERROR_LISP_DISABLED; } vni = gid_address_vni (&a->eid); type = gid_address_type (&a->eid); if (GID_ADDR_IP_PREFIX == type) dp_table = hash_get (lcm->table_id_by_vni, vni); else if (GID_ADDR_MAC == type) dp_table = hash_get (lcm->bd_id_by_vni, vni); if (!dp_table && GID_ADDR_NSH != type) { clib_warning ("vni %d not associated to a %s!", vni, GID_ADDR_IP_PREFIX == type ? "vrf" : "bd"); return VNET_API_ERROR_INVALID_VALUE; } /* store/remove mapping from map-cache */ return vnet_lisp_map_cache_add_del (a, map_index_result); } static void add_l2_arp_bd (BVT (clib_bihash_kv) * kvp, void *arg) { u32 **ht = arg; u32 version = (u32) kvp->key[0]; if (IP6 == version) return; u32 bd = (u32) (kvp->key[0] >> 32); hash_set (ht[0], bd, 0); } u32 * vnet_lisp_l2_arp_bds_get (void) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); u32 *bds = 0; gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid, add_l2_arp_bd, &bds); return bds; } static void add_ndp_bd (BVT (clib_bihash_kv) * kvp, void *arg) { u32 **ht = arg; u32 version = (u32) kvp->key[0]; if (IP4 == version) return; u32 bd = (u32) (kvp->key[0] >> 32); hash_set (ht[0], bd, 0); } u32 * vnet_lisp_ndp_bds_get (void) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); u32 *bds = 0; gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid, add_ndp_bd, &bds); return bds; } typedef struct { void *vector; u32 bd; } lisp_add_l2_arp_ndp_args_t; static void add_l2_arp_entry (BVT (clib_bihash_kv) * kvp, void *arg) { lisp_add_l2_arp_ndp_args_t *a = arg; lisp_api_l2_arp_entry_t **vector = a->vector, e; u32 version = (u32) kvp->key[0]; if (IP6 == version) return; u32 bd = (u32) (kvp->key[0] >> 32); if (bd == a->bd) { mac_copy (e.mac, (void *) &kvp->value); e.ip4 = (u32) kvp->key[1]; vec_add1 (vector[0], e); } } lisp_api_l2_arp_entry_t * vnet_lisp_l2_arp_entries_get_by_bd (u32 bd) { lisp_api_l2_arp_entry_t *entries = 0; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); lisp_add_l2_arp_ndp_args_t a; a.vector = &entries; a.bd = bd; gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid, add_l2_arp_entry, &a); return entries; } static void add_ndp_entry (BVT (clib_bihash_kv) * kvp, void *arg) { lisp_add_l2_arp_ndp_args_t *a = arg; lisp_api_ndp_entry_t **vector = a->vector, e; u32 version = (u32) kvp->key[0]; if (IP4 == version) return; u32 bd = (u32) (kvp->key[0] >> 32); if (bd == a->bd) { mac_copy (e.mac, (void *) &kvp->value); clib_memcpy (e.ip6, &kvp->key[1], 16); vec_add1 (vector[0], e); } } lisp_api_ndp_entry_t * vnet_lisp_ndp_entries_get_by_bd (u32 bd) { lisp_api_ndp_entry_t *entries = 0; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); lisp_add_l2_arp_ndp_args_t a; a.vector = &entries; a.bd = bd; gid_dict_foreach_l2_arp_ndp_entry (&lcm->mapping_index_by_gid, add_ndp_entry, &a); return entries; } int vnet_lisp_add_del_l2_arp_ndp_entry (gid_address_t * key, u8 * mac, u8 is_add) { if (vnet_lisp_enable_disable_status () == 0) { clib_warning ("LISP is disabled!"); return VNET_API_ERROR_LISP_DISABLED; } lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); int rc = 0; u64 res = gid_dictionary_lookup (&lcm->mapping_index_by_gid, key); if (is_add) { if (res != GID_LOOKUP_MISS_L2) { clib_warning ("Entry %U exists in DB!", format_gid_address, key); return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; } u64 val = mac_to_u64 (mac); gid_dictionary_add_del (&lcm->mapping_index_by_gid, key, val, 1 /* is_add */ ); } else { if (res == GID_LOOKUP_MISS_L2) { clib_warning ("ONE entry %U not found - cannot delete!", format_gid_address, key); return -1; } gid_dictionary_add_del (&lcm->mapping_index_by_gid, key, 0, 0 /* is_add */ ); } return rc; } int vnet_lisp_eid_table_map (u32 vni, u32 dp_id, u8 is_l2, u8 is_add) { lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); uword *dp_idp, *vnip, **dp_table_by_vni, **vni_by_dp_table; if (vnet_lisp_enable_disable_status () == 0) { clib_warning ("LISP is disabled!"); return VNET_API_ERROR_LISP_DISABLED; } dp_table_by_vni = is_l2 ? &lcm->bd_id_by_vni : &lcm->table_id_by_vni; vni_by_dp_table = is_l2 ? &lcm->vni_by_bd_id : &lcm->vni_by_table_id; if (!is_l2 && (vni == 0 || dp_id == 0)) { clib_warning ("can't add/del default vni-vrf mapping!"); return -1; } dp_idp = hash_get (dp_table_by_vni[0], vni); vnip = hash_get (vni_by_dp_table[0], dp_id); if (is_add) { if (dp_idp || vnip) { clib_warning ("vni %d or vrf %d already used in vrf/vni " "mapping!", vni, dp_id); return -1; } hash_set (dp_table_by_vni[0], vni, dp_id); hash_set (vni_by_dp_table[0], dp_id, vni); /* create dp iface */ dp_add_del_iface (lcm, vni, is_l2, 1 /* is_add */ , 1 /* with_default_route */ ); } else { if (!dp_idp || !vnip) { clib_warning ("vni %d or vrf %d not used in any vrf/vni! " "mapping!", vni, dp_id); return -1; } /* remove dp iface */ dp_add_del_iface (lcm, vni, is_l2, 0 /* is_add */ , 0 /* unused */ ); hash_unset (dp_table_by_vni[0], vni); hash_unset (vni_by_dp_table[0], dp_id); } return 0; } /* return 0 if the two locator sets are identical 1 otherwise */ static u8 compare_locators (lisp_cp_main_t * lcm, u32 * old_ls_indexes, locator_t * new_locators) { u32 i, old_li; locator_t *old_loc, *new_loc; if (vec_len (old_ls_indexes) != vec_len (new_locators)) return 1; for (i = 0; i < vec_len (new_locators); i++) { old_li = vec_elt (old_ls_indexes, i); old_loc = pool_elt_at_index (lcm->locator_pool, old_li); new_loc = vec_elt_at_index (new_locators, i); if (locator_cmp (old_loc, new_loc)) return 1; } return 0; } typedef struct { u8 is_negative; void *lcm; gid_address_t *eids_to_be_deleted; } remove_mapping_args_t; /** * Callback invoked when a sub-prefix is found */ static void remove_mapping_if_needed (u32 mi, void *arg) { u8 delete = 0; remove_mapping_args_t *a = arg; lisp_cp_main_t *lcm = a->lcm; mapping_t *m; locator_set_t *ls; m = pool_elt_at_index (lcm->mapping_pool, mi); if (!m) return; ls = pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index); if (a->is_negative) { if (0 != vec_len (ls->locator_indices)) delete = 1; } else { if (0 == vec_len (ls->locator_indices)) delete = 1; } if (delete) vec_add1 (a->eids_to_be_deleted, m->eid); } /** * This function searches map cache and looks for IP prefixes that are subset * of the provided one. If such prefix is found depending on 'is_negative' * it does follows: * * 1) if is_negative is true and found prefix points to positive mapping, * then the mapping is removed * 2) if is_negative is false and found prefix points to negative mapping, * then the mapping is removed */ static void remove_overlapping_sub_prefixes (lisp_cp_main_t * lcm, gid_address_t * eid, u8 is_negative) { gid_address_t *e; remove_mapping_args_t a; memset (&a, 0, sizeof (a)); /* do this only in src/dst mode ... */ if (MR_MODE_SRC_DST != lcm->map_request_mode) return; /* ... and only for IP prefix */ if (GID_ADDR_SRC_DST != gid_address_type (eid) || (FID_ADDR_IP_PREF != gid_address_sd_dst_type (eid))) return; a.is_negative = is_negative; a.lcm =
/*
 *------------------------------------------------------------------
 * Copyright (c) 2017 Intel and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */
#include <stdint.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <inttypes.h>

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
#include <vnet/dpo/interface_tx_dpo.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <vnet/ppp/packet.h>
#include <pppoe/pppoe.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/adj/adj_mcast.h>

#include <vppinfra/hash.h>
#include <vppinfra/bihash_template.c>

pppoe_main_t pppoe_main;

u8 *
format_pppoe_session (u8 * s, va_list * args)
{
  pppoe_session_t *t = va_arg (*args, pppoe_session_t *);
  pppoe_main_t *pem = &pppoe_main;

  s = format (s, "[%d] sw-if-index %d client-ip %U session-id %d ",
	      t - pem->sessions, t->sw_if_index,
	      format_ip46_address, &t->client_ip, IP46_TYPE_ANY,
	      t->session_id);

  s = format (s, "encap-if-index %d decap-fib-index %d\n",
	      t->encap_if_index, t->decap_fib_index);

  s = format (s, "    local-mac %U  client-mac %U",
	      format_ethernet_address, t->local_mac,
	      format_ethernet_address, t->client_mac);

  return s;
}

static u8 *
format_pppoe_name (u8 * s, va_list * args)
{
  u32 dev_instance = va_arg (*args, u32);
  return format (s, "pppoe_session%d", dev_instance);
}

static uword
dummy_interface_tx (vlib_main_t * vm,
		    vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  clib_warning ("you shouldn't be here, leaking buffers...");
  return frame->n_vectors;
}

static clib_error_t *
pppoe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
  u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
    VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
  vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);

  return /* no error */ 0;
}

/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pppoe_device_class,static) = {
  .name = "PPPoE",
  .format_device_name = format_pppoe_name,
  .tx_function = dummy_interface_tx,
  .admin_up_down_function = pppoe_interface_admin_up_down,
};
/* *INDENT-ON* */

static u8 *
format_pppoe_header_with_length (u8 * s, va_list * args)
{
  u32 dev_instance = va_arg (*args, u32);
  s = format (s, "unimplemented dev %u", dev_instance);
  return s;
}

static u8 *
pppoe_build_rewrite (vnet_main_t * vnm,
		     u32 sw_if_index,
		     vnet_link_t link_type, const void *dst_address)
{
  int len = sizeof (pppoe_header_t) + sizeof (ethernet_header_t);
  pppoe_main_t *pem = &pppoe_main;
  pppoe_session_t *t;
  u32 session_id;
  u8 *rw = 0;

  session_id = pem->session_index_by_sw_if_index[sw_if_index];
  t = pool_elt_at_index (pem->sessions, session_id);

  vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);

  ethernet_header_t *eth_hdr = (ethernet_header_t *) rw;
  clib_memcpy (eth_hdr->dst_address, t->client_mac, 6);
  clib_memcpy (eth_hdr->src_address, t->local_mac, 6);
  eth_hdr->type = clib_host_to_net_u16 (ETHERNET_TYPE_PPPOE_SESSION);

  pppoe_header_t *pppoe = (pppoe_header_t *) (eth_hdr + 1);
  pppoe->ver_type = PPPOE_VER_TYPE;
  pppoe->code = 0;
  pppoe->session_id = clib_host_to_net_u16 (t->session_id);
  pppoe->length = 0;		/* To be filled in at run-time */

  switch (link_type)
    {
    case VNET_LINK_IP4:
      pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip4);
      break;
    case VNET_LINK_IP6:
      pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip6);
      break;
    default:
      break;
    }

  return rw;
}

/**
 * @brief Fixup the adj rewrite post encap. Insert the packet's length
 */
static void
pppoe_fixup (vlib_main_t * vm,
	     ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
{
  const pppoe_session_t *t;
  pppoe_header_t *pppoe0;

  /* update the rewrite string */
  pppoe0 = vlib_buffer_get_current (b0) + sizeof (ethernet_header_t);

  pppoe0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
					 - sizeof (pppoe_header_t)
					 + sizeof (pppoe0->ppp_proto)
					 - sizeof (ethernet_header_t));
  /* Swap to the the packet's output interface to the encap (not the
   * session) interface */
  t = data;
  vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->encap_if_index;
}

static void
pppoe_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
{
  pppoe_main_t *pem = &pppoe_main;
  dpo_id_t dpo = DPO_INVALID;
  ip_adjacency_t *adj;
  pppoe_session_t *t;
  u32 session_id;

  ASSERT (ADJ_INDEX_INVALID != ai);

  adj = adj_get (ai);
  session_id = pem->session_index_by_sw_if_index[sw_if_index];
  t = pool_elt_at_index (pem->sessions, session_id);

  switch (adj->lookup_next_index)
    {
    case IP_LOOKUP_NEXT_ARP:
    case IP_LOOKUP_NEXT_GLEAN:
    case IP_LOOKUP_NEXT_BCAST:
      adj_nbr_midchain_update_rewrite (ai, pppoe_fixup, t,
				       ADJ_FLAG_NONE,
				       pppoe_build_rewrite (vnm,
							    sw_if_index,
							    adj->ia_link,
							    NULL));
      break;
    case IP_LOOKUP_NEXT_MCAST:
      /*
       * Construct a partial rewrite from the known ethernet mcast dest MAC
       * There's no MAC fixup, so the last 2 parameters are 0
       */
      adj_mcast_midchain_update_rewrite (ai, pppoe_fixup, t,
					 ADJ_FLAG_NONE,
					 pppoe_build_rewrite (vnm,
							      sw_if_index,
							      adj->ia_link,
							      NULL), 0, 0);
      break;

    case IP_LOOKUP_NEXT_DROP:
    case IP_LOOKUP_NEXT_PUNT:
    case IP_LOOKUP_NEXT_LOCAL:
    case IP_LOOKUP_NEXT_REWRITE:
    case IP_LOOKUP_NEXT_MIDCHAIN:
    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
    case IP_LOOKUP_NEXT_ICMP_ERROR:
    case IP_LOOKUP_N_NEXT:
      ASSERT (0);
      break;
    }

  interface_tx_dpo_add_or_lock (vnet_link_to_dpo_proto (adj->ia_link),
				t->encap_if_index, &dpo);

  adj_nbr_midchain_stack (ai, &dpo);

  dpo_reset (&dpo);
}

/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
{
  .name = "PPPoE",
  .format_header = format_pppoe_header_with_length,
  .build_rewrite = pppoe_build_rewrite,
  .update_adjacency = pppoe_update_adj,
  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/* *INDENT-ON* */

#define foreach_copy_field                      \
_(session_id)                                   \
_(encap_if_index)                               \
_(decap_fib_index)                              \
_(client_ip)

static bool
pppoe_decap_next_is_valid (pppoe_main_t * pem, u32 is_ip6,
			   u32 decap_fib_index)
{
  vlib_main_t *vm = pem->vlib_main;
  u32 input_idx = (!is_ip6) ? ip4_input_node.index : ip6_input_node.index;
  vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);

  return decap_fib_index < r->n_next_nodes;
}

int vnet_pppoe_add_del_session
  (vnet_pppoe_add_del_session_args_t * a, u32 * sw_if_indexp)
{
  pppoe_main_t *pem = &pppoe_main;
  pppoe_session_t *t = 0;
  vnet_main_t *vnm = pem->vnet_main;
  u32 hw_if_index = ~0;
  u32 sw_if_index = ~0;
  u32 is_ip6 = a->is_ip6;
  pppoe_entry_key_t cached_key;
  pppoe_entry_result_t cached_result;
  u32 bucket;
  pppoe_entry_key_t key;
  pppoe_entry_result_t result;
  vnet_hw_interface_t *hi;
  vnet_sw_interface_t *si;
  fib_prefix_t pfx;

  cached_key.raw = ~0;
  cached_result.raw = ~0;	/* warning be gone */
  memset (&pfx, 0, sizeof (pfx));

  if (!is_ip6)
    {
      pfx.fp_addr.ip4.as_u32 = a->client_ip.ip4.as_u32;
      pfx.fp_len = 32;
      pfx.fp_proto = FIB_PROTOCOL_IP4;
    }
  else
    {
      pfx.fp_addr.ip6.as_u64[0] = a->client_ip.ip6.as_u64[0];
      pfx.fp_addr.ip6.as_u64[1] = a->client_ip.ip6.as_u64[1];
      pfx.fp_len = 128;
      pfx.fp_proto = FIB_PROTOCOL_IP6;
    }

  /* Get encap_if_index and local mac address from link_table */
  pppoe_lookup_1 (&pem->link_table, &cached_key, &cached_result,
		  a->client_mac, 0, &key, &bucket, &result);
  a->encap_if_index = result.fields.sw_if_index;

  if (a->encap_if_index == ~0)
    return VNET_API_ERROR_INVALID_SW_IF_INDEX;

  si = vnet_get_sw_interface (vnm, a->encap_if_index);
  hi = vnet_get_hw_interface (vnm, si->hw_if_index);

  /* lookup session_table */
  pppoe_lookup_1 (&pem->session_table, &cached_key, &cached_result,
		  a->client_mac, clib_host_to_net_u16 (a->session_id),
		  &key, &bucket, &result);

  /* learn client session */
  pppoe_learn_process (&pem->session_table, a->encap_if_index,
		       &key, &cached_key, &bucket, &result);

  if (a->is_add)
    {
      /* adding a session: session must not already exist */
      if (result.fields.session_index != ~0)
	return VNET_API_ERROR_TUNNEL_EXIST;

      /*if not set explicitly, default to ip4 */
      if (!pppoe_decap_next_is_valid (pem, is_ip6, a->decap_fib_index))
	return VNET_API_ERROR_INVALID_DECAP_NEXT;

      pool_get_aligned (pem->sessions, t, CLIB_CACHE_LINE_BYTES);
      memset (t, 0, sizeof (*t));

      clib_memcpy (t->local_mac, hi->hw_address, 6);

      /* copy from arg structure */
#define _(x) t->x = a->x;
      foreach_copy_field;
#undef _

      clib_memcpy (t->client_mac, a->client_mac, 6);

      /* update pppoe fib with session_index */
      result.fields.session_index = t - pem->sessions;
      pppoe_update_1 (&pem->session_table,
		      a->client_mac, clib_host_to_net_u16 (a->session_id),
		      &key, &bucket, &result);

      vnet_hw_interface_t *hi;
      if (vec_len (pem->free_pppoe_session_hw_if_indices) > 0)
	{
	  vnet_interface_main_t *im = &vnm->interface_main;
	  hw_if_index = pem->free_pppoe_session_hw_if_indices
	    [vec_len (pem->free_pppoe_session_hw_if_indices) - 1];
	  _vec_len (pem->free_pppoe_session_hw_if_indices) -= 1;

	  hi = vnet_get_hw_interface (vnm, hw_if_index);
	  hi->dev_instance = t - pem->sessions;
	  hi->hw_instance = hi->dev_instance;

	  /* clear old stats of freed session before reuse */
	  sw_if_index = hi->sw_if_index;
	  vnet_interface_counter_lock (im);
	  vlib_zero_combined_counter
	    (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
	     sw_if_index);
	  vlib_zero_combined_counter (&im->combined_sw_if_counters
				      [VNET_INTERFACE_COUNTER_RX],
				      sw_if_index);
	  vlib_zero_simple_counter (&im->sw_if_counters
				    [VNET_INTERFACE_COUNTER_DROP],
				    sw_if_index);
	  vnet_interface_counter_unlock (im);
	}
      else
	{
	  hw_if_index = vnet_register_interface
	    (vnm, pppoe_device_class.index, t - pem->sessions,
	     pppoe_hw_class.index, t - pem->sessions);
	  hi = vnet_get_hw_interface (vnm, hw_if_index);
	}

      t->hw_if_index = hw_if_index;
      t->sw_if_index = sw_if_index = hi->sw_if_index;

      vec_validate_init_empty (pem->session_index_by_sw_if_index, sw_if_index,
			       ~0);
      pem->session_index_by_sw_if_index[sw_if_index] = t - pem->sessions;

      vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
      si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
      vnet_sw_interface_set_flags (vnm, sw_if_index,
				   VNET_SW_INTERFACE_FLAG_ADMIN_UP);

      /* add reverse route for client ip */
      fib_table_entry_path_add (a->decap_fib_index, &pfx,
				FIB_SOURCE_PLUGIN_HI, FIB_ENTRY_FLAG_NONE,
				fib_proto_to_dpo (pfx.fp_proto),
				&pfx.fp_addr, sw_if_index, ~0,
				1, NULL, FIB_ROUTE_PATH_FLAG_NONE);

    }
  else
    {
      /* deleting a session: session must exist */
      if (result.fields.session_index == ~0)
	return VNET_API_ERROR_NO_SUCH_ENTRY;

      t = pool_elt_at_index (pem->sessions, result.fields.session_index);
      sw_if_index = t->sw_if_index;

      vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
      vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
      si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;

      vec_add1 (pem->free_pppoe_session_hw_if_indices, t->hw_if_index);

      pem->session_index_by_sw_if_index[t->sw_if_index] = ~0;

      /* update pppoe fib with session_inde=~0x */
      result.fields.session_index = ~0;
      pppoe_update_1 (&pem->session_table,
		      a->client_mac, clib_host_to_net_u16 (a->session_id),
		      &key, &bucket, &result);


      /* delete reverse route for client ip */
      fib_table_entry_path_remove (a->decap_fib_index, &pfx,
				   FIB_SOURCE_PLUGIN_HI,
				   fib_proto_to_dpo (pfx.fp_proto),
				   &pfx.fp_addr,
				   sw_if_index, ~0, 1,
				   FIB_ROUTE_PATH_FLAG_NONE);

      pool_put (pem->sessions, t);
    }

  if (sw_if_indexp)
    *sw_if_indexp = sw_if_index;

  return 0;
}

static clib_error_t *
pppoe_add_del_session_command_fn (vlib_main_t * vm,
				  unformat_input_t * input,
				  vlib_cli_command_t * cmd)
{
  unformat_input_t _line_input, *line_input = &_line_input;
  u16 session_id = 0;
  ip46_address_t client_ip;
  u8 is_add = 1;
  u8 client_ip_set = 0;
  u8 ipv4_set = 0;
  u8 ipv6_set = 0;
  u32 encap_if_index = 0;
  u32 decap_fib_index = 0;
  u8 client_mac[6] = { 0 };
  u8 client_mac_set = 0;
  int rv;
  u32 tmp;
  vnet_pppoe_add_del_session_args_t _a, *a = &_a;
  u32 session_sw_if_index;
  clib_error_t *error = NULL;

  /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
  memset (&client_ip, 0, sizeof client_ip);

  /* Get a line of input. */
  if (!unformat_user (input, unformat_line_input, line_input))
    return 0;

  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (line_input, "del"))
	{
	  is_add = 0;
	}
      else if (unformat (line_input, "session-id %d", &session_id))
	;
      else if (unformat (line_input, "client-ip %U",
			 unformat_ip4_address, &client_ip.ip4))
	{
	  client_ip_set = 1;
	  ipv4_set = 1;
	}
      else if (unformat (line_input, "client-ip %U",
			 unformat_ip6_address, &client_ip.ip6))
	{
	  client_ip_set = 1;
	  ipv6_set = 1;
	}
      else if (unformat (line_input, "decap-vrf-id %d", &tmp))
	{
	  if (ipv6_set)
	    decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
	  else
	    decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);

	  if (decap_fib_index == ~0)
	    {
	      error =
		clib_error_return (0, "nonexistent decap fib id %d", tmp);
	      goto done;
	    }
	}
      else
	if (unformat
	    (line_input, "client-mac %U", unformat_ethernet_address,
	     client_mac))
	client_mac_set = 1;
      else
	{
	  error = clib_error_return (0, "parse error: '%U'",
				     format_unformat_error, line_input);
	  goto done;
	}
    }

  if (client_ip_set == 0)
    {
      error =
	clib_error_return (0, "session client ip address not specified");
      goto done;
    }

  if (ipv4_set && ipv6_set)
    {
      error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
      goto done;
    }

  if (client_mac_set == 0)
    {
      error = clib_error_return (0, "session client mac not specified");
      goto done;
    }

  memset (a, 0, sizeof (*a));

  a->is_add = is_add;
  a->is_ip6 = ipv6_set;

#define _(x) a->x = x;
  foreach_copy_field;
#undef _

  clib_memcpy (a->client_mac, client_mac, 6);

  rv = vnet_pppoe_add_del_session (a, &session_sw_if_index);

  switch (rv)
    {
    case 0:
      if (is_add)
	vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
			 vnet_get_main (), session_sw_if_index);
      break;

    case VNET_API_ERROR_TUNNEL_EXIST:
      error = clib_error_return (0, "session already exists...");
      goto done;

    case VNET_API_ERROR_NO_SUCH_ENTRY:
      error = clib_error_return (0, "session does not exist...");
      goto done;

    default:
      error = clib_error_return
	(0, "vnet_pppoe_add_del_session returned %d", rv);
      goto done;
    }

done:
  unformat_free (line_input);

  return error;
}

/*?
 * Add or delete a PPPoE Session.
 *
 * @cliexpar
 * Example of how to create a PPPoE Session:
 * @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
 *             client-mac 00:01:02:03:04:05 }
 * Example of how to delete a PPPoE Session:
 * @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
 *             client-mac 00:01:02:03:04:05 del }
 ?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
  .path = "create pppoe session",
  .short_help =
  "create pppoe session client-ip <client-ip> session-id <nn>"
  " client-mac <client-mac> [decap-vrf-id <nn>] [del]",
  .function = pppoe_add_del_session_command_fn,
};
/* *INDENT-ON* */

/* *INDENT-OFF* */
static clib_error_t *
show_pppoe_session_command_fn (vlib_main_t * vm,
			       unformat_input_t * input,
			       vlib_cli_command_t * cmd)
{
  pppoe_main_t *pem = &pppoe_main;
  pppoe_session_t *t;

  if (pool_elts (pem->sessions) == 0)
    vlib_cli_output (vm, "No pppoe sessions configured...");

  pool_foreach (t, pem->sessions,
		({
		    vlib_cli_output (vm, "%U",format_pppoe_session, t);
		}));

  return 0;
}
/* *INDENT-ON* */

/*?
 * Display all the PPPoE Session entries.
 *
 * @cliexpar
 * Example of how to display the PPPoE Session entries:
 * @cliexstart{show pppoe session}
 * [0] client-ip 10.0.3.1 session_id 13 encap-if-index 0 decap-vrf-id 13 sw_if_index 5
 *     local-mac a0:b0:c0:d0:e0:f0 client-mac 00:01:02:03:04:05
 * @cliexend
 ?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_session_command, static) = {
    .path = "show pppoe session",
    .short_help = "show pppoe session",
    .function = show_pppoe_session_command_fn,
};
/* *INDENT-ON* */

typedef struct pppoe_show_walk_ctx_t_
{
  vlib_main_t *vm;
  u8 first_entry;
  u32 total_entries;
} pppoe_show_walk_ctx_t;

static void
pppoe_show_walk_cb (BVT (clib_bihash_kv) * kvp, void *arg)
{
  pppoe_show_walk_ctx_t *ctx = arg;
  pppoe_entry_result_t result;
  pppoe_entry_key_t key;

  if (ctx->first_entry)
    {
      ctx->first_entry = 0;
      vlib_cli_output (ctx->vm,
		       "%=19s%=12s%=13s%=14s",
		       "Mac-Address", "session_id", "sw_if_index",
		       "session_index");
    }

  key.raw = kvp->key;
  result.raw = kvp->value;

  vlib_cli_output (ctx->vm,
		   "%=19U%=12d%=13d%=14d",
		   format_ethernet_address, key.fields.mac,
		   clib_net_to_host_u16 (key.fields.session_id),
		   result.fields.sw_if_index == ~0
		   ? -1 : result.fields.sw_if_index,
		   result.fields.session_index == ~0
		   ? -1 : result.fields.session_index);
  ctx->total_entries++;
}

/** Display the contents of the PPPoE Fib. */
static clib_error_t *
show_pppoe_fib_command_fn (vlib_main_t * vm,
			   unformat_input_t * input, vlib_cli_command_t * cmd)
{
  pppoe_main_t *pem = &pppoe_main;
  pppoe_show_walk_ctx_t ctx = {
    .first_entry = 1,
    .vm = vm,
  };

  BV (clib_bihash_foreach_key_value_pair)
    (&pem->session_table, pppoe_show_walk_cb, &ctx);

  if (ctx.total_entries == 0)
    vlib_cli_output (vm, "no pppoe fib entries");
  else
    vlib_cli_output (vm, "%lld pppoe fib entries", ctx.total_entries);

  return 0;
}

/*?
 * This command dispays the MAC Address entries of the PPPoE FIB table.
 * Output can be filtered to just get the number of MAC Addresses or display
 * each MAC Address.
 *
 * @cliexpar
 * Example of how to display the number of MAC Address entries in the PPPoE
 * FIB table:
 * @cliexstart{show pppoe fib}
 *     Mac Address      session_id      Interface           sw_if_index  session_index
 *  52:54:00:53:18:33     1          GigabitEthernet0/8/0        2          0
 *  52:54:00:53:18:55     2          GigabitEthernet0/8/1        3          1
 * @cliexend
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_fib_command, static) = {
    .path = "show pppoe fib",
    .short_help = "show pppoe fib",
    .function = show_pppoe_fib_command_fn,
};
/* *INDENT-ON* */

clib_error_t *
pppoe_init (vlib_main_t * vm)
{
  pppoe_main_t *pem = &pppoe_main;

  pem->vnet_main = vnet_get_main ();
  pem->vlib_main = vm;

  /* Create the hash table  */
  BV (clib_bihash_init) (&pem->link_table, "pppoe link table",
			 PPPOE_NUM_BUCKETS, PPPOE_MEMORY_SIZE);

  BV (clib_bihash_init) (&pem->session_table, "pppoe session table",
			 PPPOE_NUM_BUCKE