/* * Copyright (c) 2020 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include vlib_log_class_t lcp_itf_pair_logger; /** * Pool of LIP objects */ lcp_itf_pair_t *lcp_itf_pair_pool = NULL; u32 lcp_itf_num_pairs (void) { return pool_elts (lcp_itf_pair_pool); } /** * DBs of interface-pair objects: * - key'd by VIF (linux ID) * - key'd by VPP's physical interface * - number of shared uses of VPP's tap/host interface */ static uword *lip_db_by_vif; index_t *lip_db_by_phy; u32 *lip_db_by_host; /** * vector of virtual function table */ static lcp_itf_pair_vft_t *lcp_itf_vfts = NULL; void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft) { vec_add1 (lcp_itf_vfts, *lcp_itf_vft); } u8 * format_lcp_itf_pair (u8 *s, va_list *args) { vnet_main_t *vnm = vnet_get_main (); lcp_itf_pair_t *lip = va_arg (*args, lcp_itf_pair_t *); vnet_sw_interface_t *swif_phy; vnet_sw_interface_t *swif_host; s = format (s, "itf-pair: [%d]", lip - lcp_itf_pair_pool); swif_phy = vnet_get_sw_interface_or_null (vnm, lip->lip_phy_sw_if_index); if (!swif_phy) s = format (s, " "); else s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_phy); swif_host = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index); if (!swif_host) s = format (s, " "); else s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_host); s = format (s, " %v %d type %s", lip->lip_host_name, lip->lip_vif_index, (lip->lip_host_type == LCP_ITF_HOST_TAP) ? "tap" : "tun"); if (lip->lip_namespace) s = format (s, " netns %s", lip->lip_namespace); return s; } static walk_rc_t lcp_itf_pair_walk_show_cb (index_t api, void *ctx) { vlib_main_t *vm; lcp_itf_pair_t *lip; lip = lcp_itf_pair_get (api); if (!lip) return WALK_STOP; vm = vlib_get_main (); vlib_cli_output (vm, "%U\n", format_lcp_itf_pair, lip); return WALK_CONTINUE; } void lcp_itf_pair_show (u32 phy_sw_if_index) { vlib_main_t *vm; u8 *ns; index_t api; vm = vlib_get_main (); ns = lcp_get_default_ns (); vlib_cli_output (vm, "lcp default netns '%s'\n", ns ? (char *) ns : ""); vlib_cli_output (vm, "lcp lcp-auto-subint %s\n", lcp_auto_subint () ? "on" : "off"); vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off"); vlib_cli_output (vm, "lcp del-static-on-link-down %s\n", lcp_get_del_static_on_link_down () ? "on" : "off"); vlib_cli_output (vm, "lcp del-dynamic-on-link-down %s\n", lcp_get_del_dynamic_on_link_down () ? "on" : "off"); if (phy_sw_if_index == ~0) { lcp_itf_pair_walk (lcp_itf_pair_walk_show_cb, 0); } else { api = lcp_itf_pair_find_by_phy (phy_sw_if_index); if (api != INDEX_INVALID) lcp_itf_pair_walk_show_cb (api, 0); } } lcp_itf_pair_t * lcp_itf_pair_get (u32 index) { if (!lcp_itf_pair_pool) return NULL; if (index == INDEX_INVALID) return NULL; return pool_elt_at_index (lcp_itf_pair_pool, index); } index_t lcp_itf_pair_find_by_vif (u32 vif_index) { uword *p; p = hash_get (lip_db_by_vif, vif_index); if (p) return p[0]; return INDEX_INVALID; } const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = { [LCP_ITF_HOST_TAP] = { [AF_IP4] = "linux-cp-xc-ip4", [AF_IP6] = "linux-cp-xc-ip6", }, [LCP_ITF_HOST_TUN] = { [AF_IP4] = "linux-cp-xc-l3-ip4", [AF_IP6] = "linux-cp-xc-l3-ip6", }, }; const fib_route_path_flags_t lcp_itf_route_path_flags[N_LCP_ITF_HOST] = { [LCP_ITF_HOST_TAP] = FIB_ROUTE_PATH_DVR, [LCP_ITF_HOST_TUN] = FIB_ROUTE_PATH_FLAG_NONE, }; static void lcp_itf_unset_adjs (lcp_itf_pair_t *lip) { adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP4]); adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP6]); } static void lcp_itf_set_adjs (lcp_itf_pair_t *lip) { if (lip->lip_host_type == LCP_ITF_HOST_TUN) { lip->lip_phy_adjs.adj_index[AF_IP4] = adj_nbr_add_or_lock ( FIB_PROTOCOL_IP4, VNET_LINK_IP4, &zero_addr, lip->lip_phy_sw_if_index); lip->lip_phy_adjs.adj_index[AF_IP6] = adj_nbr_add_or_lock ( FIB_PROTOCOL_IP6, VNET_LINK_IP6, &zero_addr, lip->lip_phy_sw_if_index); } else { lip->lip_phy_adjs.adj_index[AF_IP4] = adj_mcast_add_or_lock ( FIB_PROTOCOL_IP4, VNET_LINK_IP4, lip->lip_phy_sw_if_index); lip->lip_phy_adjs.adj_index[AF_IP6] = adj_mcast_add_or_lock ( FIB_PROTOCOL_IP6, VNET_LINK_IP6, lip->lip_phy_sw_if_index); } ip_adjacency_t *adj; adj = adj_get (lip->lip_phy_adjs.adj_index[AF_IP4]); lip->lip_rewrite_len = adj->rewrite_header.data_bytes; } int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, u32 host_index, lip_host_type_t host_type, u8 *ns) { index_t lipi; lcp_itf_pair_t *lip; if (host_sw_if_index == ~0) { LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host"); return VNET_API_ERROR_INVALID_SW_IF_INDEX; } lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); if (lipi != INDEX_INVALID) return VNET_API_ERROR_VALUE_EXIST; LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s", format_vnet_sw_if_index_name, vnet_get_main (), host_sw_if_index, format_vnet_sw_if_index_name, vnet_get_main (), phy_sw_if_index, host_name, host_index, ns); /* * Create a new pair. */ pool_get (lcp_itf_pair_pool, lip); lipi = lip - lcp_itf_pair_pool; vec_validate_init_empty (lip_db_by_phy, phy_sw_if_index, INDEX_INVALID); vec_validate_init_empty (lip_db_by_host, host_sw_if_index, INDEX_INVALID); lip_db_by_phy[phy_sw_if_index] = lipi; lip_db_by_host[host_sw_if_index] = lipi; if (clib_strcmp ((char *) ns, (char *) lcp_get_default_ns ()) == 0) { hash_set (lip_db_by_vif, host_index, lipi); } lip->lip_host_sw_if_index = host_sw_if_index; lip->lip_phy_sw_if_index = phy_sw_if_index; lip->lip_host_name = vec_dup (host_name); lip->lip_host_type = host_type; lip->lip_vif_index = host_index; lip->lip_namespace = vec_dup (ns); /* * First use of this host interface. * Enable the x-connect feature on the host to send * all packets to the phy. */ ip_address_family_t af; FOR_EACH_IP_ADDRESS_FAMILY (af) ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT, lcp_itf_l3_feat_names[lip->lip_host_type][af], lip->lip_host_sw_if_index, 1, NULL, 0); /* * Configure passive punt to the host interface. */ fib_route_path_t *rpaths = NULL, rpath = { .frp_flags = lcp_itf_route_path_flags[lip->lip_host_type], .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = lip->lip_host_sw_if_index, .frp_weight = 1, .frp_fib_index = ~0, }; vec_add1 (rpaths, rpath); ip4_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths); rpaths[0].frp_proto = DPO_PROTO_IP6; ip6_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths); vec_free (rpaths); lcp_itf_set_adjs (lip); /* enable ARP feature node for broadcast interfaces */ if (lip->lip_host_type != LCP_ITF_HOST_TUN) { vnet_feature_enable_disable ("arp", "linux-cp-arp-phy", lip->lip_phy_sw_if_index, 1, NULL, 0); vnet_feature_enable_disable ("arp", "linux-cp-arp-host", lip->lip_host_sw_if_index, 1, NULL, 0); } else { if (hash_elts (lip_db_by_vif) == 1) { vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL, 0); vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL, 0); } } /* invoke registered callbacks for pair addition */ lcp_itf_pair_vft_t *vft; vec_foreach (vft, lcp_itf_vfts) { if (vft->pair_add_fn) vft->pair_add_fn (lip); } /* set timestamp when pair entered service */ lip->lip_create_ts = vlib_time_now (vlib_get_main ()); return 0; } static clib_error_t * lcp_netlink_add_link_vlan (int parent, u32 vlan, u16 proto, const char *name) { struct rtnl_link *link; struct nl_sock *sk; int err; sk = nl_socket_alloc (); if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0) { LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: connect error: %s", nl_geterror (err)); return clib_error_return (NULL, "Unable to connect socket: %d", err); } link = rtnl_link_vlan_alloc (); rtnl_link_set_link (link, parent); rtnl_link_set_name (link, name); rtnl_link_vlan_set_id (link, vlan); rtnl_link_vlan_set_protocol (link, htons (proto)); if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0) { LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: link add error: %s", nl_geterror (err)); return clib_error_return (NULL, "Unable to add link %s: %d", name, err); } rtnl_link_put (link); nl_close (sk); return NULL; } static clib_error_t * lcp_netlink_del_link (const char *name) { struct rtnl_link *link; struct nl_sock *sk; int err; sk = nl_socket_alloc (); if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0) return clib_error_return (NULL, "Unable to connect socket: %d", err); link = rtnl_link_alloc (); rtnl_link_set_name (link, name); if ((err = rtnl_link_delete (sk, link)) < 0) return clib_error_return (NULL, "Unable to del link %s: %d", name, err); rtnl_link_put (link); nl_close (sk); return NULL; } int lcp_itf_pair_del (u32 phy_sw_if_index) { ip_address_family_t af; lcp_itf_pair_t *lip; u32 lipi; lcp_itf_pair_vft_t *vft; lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); if (lipi == INDEX_INVALID) return VNET_API_ERROR_INVALID_SW_IF_INDEX; lip = lcp_itf_pair_get (lipi); LCP_ITF_PAIR_NOTICE ( "pair_del: host:%U phy:%U host_if:%v vif:%d ns:%v", format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index, format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index, lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace); /* invoke registered callbacks for pair deletion */ vec_foreach (vft, lcp_itf_vfts) { if (vft->pair_del_fn) vft->pair_del_fn (lip); } FOR_EACH_IP_ADDRESS_FAMILY (af) ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT, lcp_itf_l3_feat_names[lip->lip_host_type][af], lip->lip_host_sw_if_index, 0, NULL, 0); lcp_itf_unset_adjs (lip); ip4_punt_redirect_del (lip->lip_phy_sw_if_index); ip6_punt_redirect_del (lip->lip_phy_sw_if_index); /* disable ARP feature node for broadcast interfaces */ if (lip->lip_host_type != LCP_ITF_HOST_TUN) { vnet_feature_enable_disable ("arp", "linux-cp-arp-phy", lip->lip_phy_sw_if_index, 0, NULL, 0); vnet_feature_enable_disable ("arp", "linux-cp-arp-host", lip->lip_host_sw_if_index, 0, NULL, 0); } else { if (hash_elts (lip_db_by_vif) == 1) { vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL, 0); vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL, 0); } } lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID; lip_db_by_host[lip->lip_host_sw_if_index] = INDEX_INVALID; hash_unset (lip_db_by_vif, lip->lip_vif_index); vec_free (lip->lip_host_name); vec_free (lip->lip_namespace); pool_put (lcp_itf_pair_pool, lip); return 0; } static void lcp_itf_pair_delete_by_index (index_t lipi) { u32 host_sw_if_index; lcp_itf_pair_t *lip; u8 *host_name, *ns; lip = lcp_itf_pair_get (lipi); host_name = vec_dup (lip->lip_host_name); host_sw_if_index = lip->lip_host_sw_if_index; ns = vec_dup (lip->lip_namespace); lcp_itf_pair_del (lip->lip_phy_sw_if_index); if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index)) { int curr_ns_fd = -1; int vif_ns_fd = -1; if (ns) { curr_ns_fd = clib_netns_open (NULL /* self */); vif_ns_fd = clib_netns_open ((u8 *) ns); if (vif_ns_fd != -1) clib_setns (vif_ns_fd); } lcp_netlink_del_link ((const char *) host_name); if (vif_ns_fd != -1) close (vif_ns_fd); if (curr_ns_fd != -1) { clib_setns (curr_ns_fd); close (curr_ns_fd); } vnet_delete_sub_interface (host_sw_if_index); } else tap_delete_if (vlib_get_main (), host_sw_if_index); vec_free (host_name); vec_free (ns); } int lcp_itf_pair_delete (u32 phy_sw_if_index) { index_t lipi; lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); if (lipi == INDEX_INVALID) return VNET_API_ERROR_INVALID_SW_IF_INDEX; lcp_itf_pair_delete_by_index (lipi); return 0; } /** * lcp_itf_interface_add_del * * Registered to receive interface Add and delete notifications */ static clib_error_t * lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) { if (!is_add) /* remove any interface pair we have for this interface */ lcp_itf_pair_delete (sw_if_index); return (NULL); } VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del); void lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx) { u32 api; pool_foreach_index (api, lcp_itf_pair_pool) { if (!cb (api, ctx)) break; }; } static clib_error_t * lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input) { u8 *default_ns; u32 tmp; default_ns = NULL; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "default netns %v", &default_ns)) { vec_add1 (default_ns, 0); if (lcp_set_default_ns (default_ns) < 0) { return clib_error_return (0, "linux-cp default namespace must" " be less than %d characters", LCP_NS_LEN); } } else if (unformat (input, "lcp-auto-subint")) lcp_set_auto_subint (1 /* is_auto */); else if (unformat (input, "lcp-sync")) lcp_set_sync (1 /* is_auto */); else if (unformat (input, "del-static-on-link-down")) lcp_set_del_static_on_link_down (1 /* is_del */); else if (unformat (input, "del-dynamic-on-link-down")) lcp_set_del_dynamic_on_link_down (1 /* is_del */); else if (unformat (input, "num-rx-queues %d", &tmp)) lcp_set_default_num_queues (tmp, 0 /* is_tx */); else if (unformat (input, "num-tx-queues %d", &tmp)) lcp_set_default_num_queues (tmp, 1 /* is_tx */); else return clib_error_return (0, "interfaces not found"); } vec_free (default_ns); return NULL; } VLIB_EARLY_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-cp"); /* * Returns 1 if the tap name is valid. * Returns 0 if the tap name is invalid. */ static int lcp_validate_if_name (u8 *name) { int len; char *p; p = (char *) name; len = clib_strnlen (p, IFNAMSIZ); if (len >= IFNAMSIZ) return 0; for (; *p; ++p) { if (isalnum (*p)) continue; switch (*p) { case '-': case '_': case '%': case '@': case ':': case '.': continue; } return 0; } return 1; } void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state) { int curr_ns_fd, vif_ns_fd; if (!lip) return; curr_ns_fd = vif_ns_fd = -1; if (lip->lip_namespace) { curr_ns_fd = clib_netns_open (NULL /* self */); vif_ns_fd = clib_netns_open (lip->lip_namespace); if (vif_ns_fd != -1) clib_setns (vif_ns_fd); } /* Set the same link state on the netlink interface */ vnet_netlink_set_link_state (lip->lip_vif_index, state); if (vif_ns_fd != -1) close (vif_ns_fd); if (curr_ns_fd != -1) { clib_setns (curr_ns_fd); close (curr_ns_fd); } return; } void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip) { ip4_main_t *im4 = &ip4_main; ip6_main_t *im6 = &ip6_main; ip_lookup_main_t *lm4 = &im4->lookup_main; ip_lookup_main_t *lm6 = &im6->lookup_main; ip_interface_address_t *ia = 0; int vif_ns_fd = -1; int curr_ns_fd = -1; if (!lip) return; if (lip->lip_namespace) { curr_ns_fd = clib_netns_open (NULL /* self */); vif_ns_fd = clib_netns_open (lip->lip_namespace); if (vif_ns_fd != -1) clib_setns (vif_ns_fd); } /* Sync any IP4 addressing info into LCP */ foreach_ip_interface_address ( lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia); LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d", format_lcp_itf_pair, lip, format_ip4_address, r4, ia->address_length); vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length); })); /* Sync any IP6 addressing info into LCP */ foreach_ip_interface_address ( lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia); LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d", format_lcp_itf_pair, lip, format_ip6_address, r6, ia->address_length); vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length); })); if (vif_ns_fd != -1) close (vif_ns_fd); if (curr_ns_fd != -1) { clib_setns (curr_ns_fd); close (curr_ns_fd); } } typedef struct { u32 vlan; bool dot1ad; u32 matched_sw_if_index; } lcp_itf_match_t; static walk_rc_t lcp_itf_pair_find_walk (vnet_main_t *vnm, u32 sw_if_index, void *arg) { lcp_itf_match_t *match = arg; const vnet_sw_interface_t *sw; sw = vnet_get_sw_interface (vnm, sw_if_index); if (sw && (sw->sub.eth.inner_vlan_id == 0) && (sw->sub.eth.outer_vlan_id == match->vlan) && (sw->sub.eth.flags.dot1ad == match->dot1ad)) { LCP_ITF_PAIR_DBG ("find_walk: found match outer %d dot1ad %d " "inner-dot1q %d: interface %U", sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad, sw->sub.eth.inner_vlan_id, format_vnet_sw_if_index_name, vnet_get_main (), sw->sw_if_index); match->matched_sw_if_index = sw->sw_if_index; return WALK_STOP; } return WALK_CONTINUE; } /* Return the index of the sub-int on the phy that has the given vlan and * proto, */ static index_t lcp_itf_pair_find_by_outer_vlan (u32 sup_if_index, u16 vlan, bool dot1ad) { lcp_itf_match_t match; const vnet_hw_interface_t *hw; match.vlan = vlan; match.dot1ad = dot1ad; match.matched_sw_if_index = INDEX_INVALID; hw = vnet_get_sup_hw_interface (vnet_get_main (), sup_if_index); vnet_hw_interface_walk_sw (vnet_get_main (), hw->hw_if_index, lcp_itf_pair_find_walk, &match); if (match.matched_sw_if_index >= vec_len (lip_db_by_phy)) return INDEX_INVALID; return lip_db_by_phy[match.matched_sw_if_index]; } static clib_error_t *lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags); int lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, lip_host_type_t host_if_type, u8 *ns, u32 *host_sw_if_indexp) { vlib_main_t *vm; vnet_main_t *vnm; u32 vif_index = 0, host_sw_if_index = ~0; const vnet_sw_interface_t *sw; const vnet_hw_interface_t *hw; const lcp_itf_pair_t *lip; index_t lipi; lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); if (lipi != INDEX_INVALID) { LCP_ITF_PAIR_ERR ("pair_create: already created"); return VNET_API_ERROR_VALUE_EXIST; } if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index)) { LCP_ITF_PAIR_ERR ("pair_create: invalid phy index %u", phy_sw_if_index); return VNET_API_ERROR_INVALID_SW_IF_INDEX; } if (!lcp_validate_if_name (host_if_name)) { LCP_ITF_PAIR_ERR ("pair_create: invalid host-if-name '%s'", host_if_name); return VNET_API_ERROR_INVALID_ARGUMENT; } vnm = vnet_get_main (); sw = vnet_get_sw_interface (vnm, phy_sw_if_index); hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index); if (!sw || !hw) { LCP_ITF_PAIR_ERR ("pair_create: invalid interface"); return VNET_API_ERROR_INVALID_SW_IF_INDEX; } if (hw->hw_class_index != ethernet_hw_interface_class.index && host_if_type == LCP_ITF_HOST_TAP) { LCP_ITF_PAIR_ERR ( "pair_create: don't create TAP for non-eth interface; use tun"); return VNET_API_ERROR_INVALID_ARGUMENT; } /* * Use interface-specific netns if supplied. * Otherwise, use netns if defined, otherwise use the OS default. */ if (ns == 0 || ns[0] == 0) ns = lcp_get_default_ns (); /* sub interfaces do not need a tap created */ if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index)) { index_t parent_if_index; int orig_ns_fd, ns_fd; clib_error_t *err; u16 outer_vlan, inner_vlan; u16 outer_proto, inner_proto; u16 vlan, proto; u32 parent_vif_index; err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index); if (err) { LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a " "sub-interface without exact-match set"); return VNET_API_ERROR_INVALID_ARGUMENT; } outer_vlan = sw->sub.eth.outer_vlan_id; inner_vlan = sw->sub.eth.inner_vlan_id; outer_proto = inner_proto = ETH_P_8021Q; if (1 == sw->sub.eth.flags.dot1ad) outer_proto = ETH_P_8021AD; LCP_ITF_PAIR_INFO ("pair_create: subif: dot1%s outer %d inner %d on %U", sw->sub.eth.flags.dot1ad ? "ad" : "q", outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm, hw->sw_if_index); parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index); if (INDEX_INVALID == parent_if_index) { LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for %U", format_vnet_sw_if_index_name, vnet_get_main (), sw->sup_sw_if_index); return VNET_API_ERROR_INVALID_SW_IF_INDEX; } lip = lcp_itf_pair_get (parent_if_index); if (!lip) { LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a " "sub-interface without an LCP on the parent"); return VNET_API_ERROR_INVALID_ARGUMENT; } LCP_ITF_PAIR_DBG ("pair_create: parent %U", format_lcp_itf_pair, lip); parent_vif_index = lip->lip_vif_index; /* * see if the requested host interface has already been created */ orig_ns_fd = ns_fd = -1; err = NULL; if (ns && ns[0] != 0) { orig_ns_fd = clib_netns_open (NULL /* self */); ns_fd = clib_netns_open (ns); if (orig_ns_fd == -1 || ns_fd == -1) goto socket_close; clib_setns (ns_fd); } vif_index = if_nametoindex ((const char *) host_if_name); if (!vif_index) { /* * no existing host interface, create it now */ /* * Find the parent tap: * - if this is an outer VLAN, use the pair from the parent phy * - if this is an inner VLAN, find the pair from the outer sub-int, * which must exist. */ if (inner_vlan) { index_t linux_parent_if_index; const lcp_itf_pair_t *llip; vlan = inner_vlan; proto = inner_proto; linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan ( hw->sw_if_index, sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad); if (INDEX_INVALID == linux_parent_if_index || !(llip = lcp_itf_pair_get (linux_parent_if_index))) { LCP_ITF_PAIR_ERR ( "pair_create: can't find LCP for outer vlan %d " "proto %s on %U", outer_vlan, outer_proto == ETH_P_8021AD ? "dot1ad" : "dot1q", format_vnet_sw_if_index_name, vnm, hw->sw_if_index); err = clib_error_return (0, "parent pair not found"); goto socket_close; } LCP_ITF_PAIR_DBG ("pair_create: linux parent %U", format_lcp_itf_pair, llip); parent_vif_index = llip->lip_vif_index; } else { vlan = outer_vlan; proto = outer_proto; } err = lcp_netlink_add_link_vlan (parent_vif_index, vlan, proto, (const char *) host_if_name); if (err != 0) { LCP_ITF_PAIR_ERR ("pair_create: cannot create link " "outer(proto:0x%04x,vlan:%u).inner(proto:0x%" "04x,vlan:%u) name:'%s'", outer_proto, outer_vlan, inner_proto, inner_vlan, host_if_name); } if (!err) vif_index = if_nametoindex ((char *) host_if_name); } /* * create a sub-interface on the tap */ if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index, sw->sub.id, sw->sub.eth.raw_flags, inner_vlan, outer_vlan, &host_sw_if_index)) { LCP_ITF_PAIR_ERR ( "pair_create: failed to create tap subint: %d.%d on %U", outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm, lip->lip_host_sw_if_index); err = clib_error_return ( 0, "failed to create tap subint: %d.%d. on %U", outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm, lip->lip_host_sw_if_index); } socket_close: if (orig_ns_fd != -1) { clib_setns (orig_ns_fd); close (orig_ns_fd); } if (ns_fd != -1) close (ns_fd); if (err) return VNET_API_ERROR_INVALID_ARGUMENT; } else { tap_create_if_args_t args = { .num_rx_queues = clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)), .num_tx_queues = clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)), .id = ~0, .auto_id_offset = 4096, .sw_if_index = ~0, .rx_ring_sz = 256, .tx_ring_sz = 256, .host_if_name = host_if_name, .host_namespace = 0, .rv = 0, .error = NULL, }; ethernet_interface_t *ei; u32 host_sw_mtu_size; if (host_if_type == LCP_ITF_HOST_TUN) args.tap_flags |= TAP_FLAG_TUN; else { ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance); mac_address_copy (&args.host_mac_addr, &ei->address.mac); } /* * The TAP interface does copy forward the host MTU based on the VPP * interface's L3 MTU, but it should also ensure that the VPP tap * interface has an MTU that is greater-or-equal to those. Considering * users can set the interfaces at runtime (set interface mtu packet ...) * ensure that the tap MTU is large enough, taking the VPP interface L3 * if it's set, and otherwise a sensible default. */ host_sw_mtu_size = sw->mtu[VNET_MTU_L3]; if (host_sw_mtu_size) { args.host_mtu_set = 1; args.host_mtu_size = host_sw_mtu_size; } else host_sw_mtu_size = ETHERNET_MAX_PACKET_BYTES; if (ns && ns[0] != 0) args.host_namespace = ns; vm = vlib_get_main (); tap_create_if (vm, &args); if (args.rv < 0) { LCP_ITF_PAIR_ERR ("pair_create: could not create tap, retval:%d", args.rv); clib_error_free (args.error); return args.rv; } vnet_sw_interface_set_mtu (vnm, args.sw_if_index, host_sw_mtu_size); /* * get the hw and ethernet of the tap */ hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index); virtio_main_t *mm = &virtio_main; virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); /* * Leave the TAP permanently up on the VPP side. * This TAP will be shared by many sub-interface. * Therefore we can't use it to manage admin state. * force the tap in promiscuous mode. */ if (host_if_type == LCP_ITF_HOST_TAP) { ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance); ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3; } vif_index = vif->ifindex; host_sw_if_index = args.sw_if_index; } if (!vif_index) { LCP_ITF_PAIR_INFO ("failed pair add (no vif index): {%U, %U, %s}", format_vnet_sw_if_index_name, vnet_get_main (), phy_sw_if_index, format_vnet_sw_if_index_name, vnet_get_main (), host_sw_if_index, host_if_name); return -1; } LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name, vnet_get_main (), phy_sw_if_index, format_vnet_sw_if_index_name, vnet_get_main (), host_sw_if_index, host_if_name); lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index, host_if_type, ns); /* * Copy the link state from VPP into the host side. * The TAP is shared by many interfaces, always keep it up. * This controls whether the host can RX/TX. */ sw = vnet_get_sw_interface (vnm, phy_sw_if_index); lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index)); LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u", format_lcp_itf_pair, lip, sw->flags, hw->flags); vnet_sw_interface_admin_up (vnm, host_sw_if_index); lcp_itf_set_link_state (lip, sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP); /* * Reflect current link state and link speed of the hardware interface on the * TAP interface. */ if (host_if_type == LCP_ITF_HOST_TAP && !vnet_sw_interface_is_sub (vnm, phy_sw_if_index)) { hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index); lcp_itf_pair_link_up_down (vnm, hw->hw_if_index, hw->flags); } if (host_sw_if_indexp) *host_sw_if_indexp = host_sw_if_index; return 0; } static walk_rc_t lcp_itf_pair_walk_mark (index_t lipi, void *ctx) { lcp_itf_pair_t *lip; lip = lcp_itf_pair_get (lipi); lip->lip_flags |= LIP_FLAG_STALE; return (WALK_CONTINUE); } int lcp_itf_pair_replace_begin (void) { lcp_itf_pair_walk (lcp_itf_pair_walk_mark, NULL); return (0); } typedef struct lcp_itf_pair_sweep_ctx_t_ { index_t *indicies; } lcp_itf_pair_sweep_ctx_t; static walk_rc_t lcp_itf_pair_walk_sweep (index_t lipi, void *arg) { lcp_itf_pair_sweep_ctx_t *ctx = arg; lcp_itf_pair_t *lip; lip = lcp_itf_pair_get (lipi); if (lip->lip_flags & LIP_FLAG_STALE) vec_add1 (ctx->indicies, lipi); return (WALK_CONTINUE); } int lcp_itf_pair_replace_end (void) { lcp_itf_pair_sweep_ctx_t ctx = { .indicies = NULL, }; index_t *lipi; lcp_itf_pair_walk (lcp_itf_pair_walk_sweep, &ctx); vec_foreach (lipi, ctx.indicies) lcp_itf_pair_delete_by_index (*lipi); vec_free (ctx.indicies); return (0); } static clib_error_t * lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) { vnet_hw_interface_t *hi; vnet_sw_interface_t *si; index_t lipi; lcp_itf_pair_t *lip; hi = vnet_get_hw_interface_or_null (vnm, hw_if_index); if (!hi) return 0; lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index); if (lipi == INDEX_INVALID) return 0; lip = lcp_itf_pair_get (lipi); si = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index); if (!si) return 0; if (!lcp_main.test_mode) { tap_set_carrier (si->hw_if_index, (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)); if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP && hi->link_speed != UINT32_MAX) { tap_set_speed (si->hw_if_index, hi->link_speed / 1000); } } return 0; } VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down); static clib_error_t * lcp_interface_init (vlib_main_t *vm) { vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp"); /* punt IKE */ vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0], "linux-cp-punt"); vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP6_SPI_UDP_0], "linux-cp-punt"); /* punt all unknown ports */ udp_punt_unknown (vm, 0, 1); udp_punt_unknown (vm, 1, 1); tcp_punt_unknown (vm, 0, 1); tcp_punt_unknown (vm, 1, 1); lcp_itf_pair_logger = vlib_log_register_class ("linux-cp", "itf"); return NULL; } VLIB_INIT_FUNCTION (lcp_interface_init) = { .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"), }; /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */