From 57bdb26ba42c3fc6da601d007b27fa5c3f09dd96 Mon Sep 17 00:00:00 2001 From: arikachen Date: Fri, 12 Nov 2021 06:40:55 +0000 Subject: af_xdp: introduce to netns api In some situation, we support to deploy vpp as per host and handler packet in container, so we use xdp to redirect the flow. Type: improvement Signed-off-by: arikachen Change-Id: Iab42d6a0abb2b330a284d519018a90aff2fa4371 --- src/plugins/af_xdp/af_xdp.api | 46 ++++++++++++++++++++ src/plugins/af_xdp/af_xdp.h | 3 ++ src/plugins/af_xdp/api.c | 32 ++++++++++++++ src/plugins/af_xdp/cli.c | 4 +- src/plugins/af_xdp/device.c | 97 +++++++++++++++++++++++++++++++++++++++---- src/plugins/af_xdp/test_api.c | 53 +++++++++++++++++++++++ src/plugins/af_xdp/unformat.c | 2 + 7 files changed, 227 insertions(+), 10 deletions(-) diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api index c6716123703..9fe6bc04939 100644 --- a/src/plugins/af_xdp/af_xdp.api +++ b/src/plugins/af_xdp/af_xdp.api @@ -60,6 +60,38 @@ define af_xdp_create option status="in_progress"; }; +/** \brief + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param host_if - Linux netdev interface name + @param name - new af_xdp interface name (optional) + @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) + @param rxq_size - receive queue size (optional) + @param txq_size - transmit queue size (optional) + @param mode - operation mode (optional) + @param flags - flags (optional) + @param prog - eBPF program path (optional) + @param namespace - netns of nic (optional) +*/ + +define af_xdp_create_v2 +{ + u32 client_index; + u32 context; + + string host_if[64]; + string name[64]; + u16 rxq_num [default=1]; + u16 rxq_size [default=0]; + u16 txq_size [default=0]; + vl_api_af_xdp_mode_t mode [default=0]; + vl_api_af_xdp_flag_t flags [default=0]; + string prog[256]; + string namespace[64]; + option vat_help = " [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues ] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]"; + option status="in_progress"; +}; + /** \brief @param context - sender context, to match reply w/ request @param retval - return value for request @@ -74,6 +106,20 @@ define af_xdp_create_reply option status="in_progress"; }; +/** \brief + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index for the new af_xdp interface +*/ + +define af_xdp_create_v2_reply +{ + u32 context; + i32 retval; + vl_api_interface_index_t sw_if_index; + option status="in_progress"; +}; + /** \brief @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/af_xdp/af_xdp.h b/src/plugins/af_xdp/af_xdp.h index 825a3fb29fd..85c8fee545d 100644 --- a/src/plugins/af_xdp/af_xdp.h +++ b/src/plugins/af_xdp/af_xdp.h @@ -113,6 +113,8 @@ typedef struct u8 rxq_num; + char *netns; + struct xsk_umem **umem; struct xsk_socket **xsk; @@ -149,6 +151,7 @@ typedef struct char *linux_ifname; char *name; char *prog; + char *netns; af_xdp_mode_t mode; af_xdp_create_flag_t flags; u32 rxq_size; diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c index 1864c4c2ee9..45dab284e4c 100644 --- a/src/plugins/af_xdp/api.c +++ b/src/plugins/af_xdp/api.c @@ -86,6 +86,38 @@ vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp) /* *INDENT-ON* */ } +static void +vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + af_xdp_main_t *rm = &af_xdp_main; + vl_api_af_xdp_create_v2_reply_t *rmp; + af_xdp_create_if_args_t args; + int rv; + + clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t)); + + args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0; + args.name = mp->name[0] ? (char *) mp->name : 0; + args.prog = mp->prog[0] ? (char *) mp->prog : 0; + args.netns = mp->namespace[0] ? (char *) mp->namespace : 0; + args.mode = af_xdp_api_mode (mp->mode); + args.flags = af_xdp_api_flags (mp->flags); + args.rxq_size = ntohs (mp->rxq_size); + args.txq_size = ntohs (mp->txq_size); + args.rxq_num = ntohs (mp->rxq_num); + + af_xdp_create_if (vm, &args); + rv = args.rv; + + /* clang-format off */ + REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY + rm->msg_id_base, + ({ + rmp->sw_if_index = ntohl (args.sw_if_index); + })); + /* clang-format on */ +} + static void vl_api_af_xdp_delete_t_handler (vl_api_af_xdp_delete_t * mp) { diff --git a/src/plugins/af_xdp/cli.c b/src/plugins/af_xdp/cli.c index 2f3deffaaee..660725813aa 100644 --- a/src/plugins/af_xdp/cli.c +++ b/src/plugins/af_xdp/cli.c @@ -40,6 +40,8 @@ af_xdp_create_command_fn (vlib_main_t * vm, unformat_input_t * input, vec_free (args.linux_ifname); vec_free (args.name); + vec_free (args.prog); + vec_free (args.netns); return args.error; } @@ -50,7 +52,7 @@ VLIB_CLI_COMMAND (af_xdp_create_command, static) = { .short_help = "create interface af_xdp [name ifname] " "[rx-queue-size size] [tx-queue-size size] [num-rx-queues ] " - "[prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]", + "[prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]", .function = af_xdp_create_command_fn, }; /* *INDENT-ON* */ diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c index 8365a716e97..0b39280c569 100644 --- a/src/plugins/af_xdp/device.c +++ b/src/plugins/af_xdp/device.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -89,6 +90,48 @@ af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) return ~0; } +int +af_xdp_enter_netns (char *netns, int *fds) +{ + *fds = *(fds + 1) = -1; + if (netns != NULL) + { + *fds = clib_netns_open (NULL /* self */); + if ((*(fds + 1) = clib_netns_open ((u8 *) netns)) == -1) + return VNET_API_ERROR_SYSCALL_ERROR_8; + if (clib_setns (*(fds + 1)) == -1) + return VNET_API_ERROR_SYSCALL_ERROR_9; + } + return 0; +} + +void +af_xdp_cleanup_netns (int *fds) +{ + if (*fds != -1) + close (*fds); + + if (*(fds + 1) != -1) + close (*(fds + 1)); + + *fds = *(fds + 1) = -1; +} + +int +af_xdp_exit_netns (char *netns, int *fds) +{ + int ret = 0; + if (netns != NULL) + { + if (*fds != -1) + ret = clib_setns (*fds); + + af_xdp_cleanup_netns (fds); + } + + return ret; +} + void af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad) { @@ -118,7 +161,11 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad) if (ad->bpf_obj) { + int ns_fds[2]; + af_xdp_enter_netns (ad->netns, ns_fds); bpf_set_link_xdp_fd (ad->linux_ifindex, -1, 0); + af_xdp_exit_netns (ad->netns, ns_fds); + bpf_object__unload (ad->bpf_obj); } @@ -127,6 +174,9 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad) vec_free (ad->buffer_template); vec_free (ad->rxqs); vec_free (ad->txqs); + vec_free (ad->name); + vec_free (ad->linux_ifname); + vec_free (ad->netns); clib_error_free (ad->error); pool_put (axm->devices, ad); } @@ -392,7 +442,8 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) vnet_sw_interface_t *sw; vnet_hw_interface_t *hw; int rxq_num, txq_num, q_num; - int i; + int ns_fds[2]; + int i, ret; args->rxq_size = args->rxq_size ? args->rxq_size : 2 * VLIB_FRAME_SIZE; args->txq_size = args->txq_size ? args->txq_size : 2 * VLIB_FRAME_SIZE; @@ -417,13 +468,22 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) goto err0; } + ret = af_xdp_enter_netns (args->netns, ns_fds); + if (ret) + { + args->rv = ret; + args->error = clib_error_return (0, "enter netns %s failed, ret %d", + args->netns, args->rv); + goto err0; + } + af_xdp_get_q_count (args->linux_ifname, &rxq_num, &txq_num); if (args->rxq_num > rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num) { args->rv = VNET_API_ERROR_INVALID_VALUE; args->error = clib_error_create ("too many rxq requested (%d > %d)", args->rxq_num, rxq_num); - goto err0; + goto err1; } rxq_num = clib_min (rxq_num, args->rxq_num); txq_num = clib_min (txq_num, tm->n_vlib_mains); @@ -437,8 +497,10 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) ad->linux_ifname = (char *) format (0, "%s", args->linux_ifname); vec_validate (ad->linux_ifname, IFNAMSIZ - 1); /* libbpf expects ifname to be at least IFNAMSIZ */ + ad->netns = (char *) format (0, "%s", args->netns); + if (args->prog && af_xdp_load_program (args, ad)) - goto err1; + goto err2; q_num = clib_max (rxq_num, txq_num); ad->rxq_num = rxq_num; @@ -476,7 +538,7 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num)) { ad->rxq_num = ad->txq_num = 0; - goto err1; /* failed creating requested rxq: fatal error, bailing + goto err2; /* failed creating requested rxq: fatal error, bailing out */ } @@ -487,6 +549,13 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) } } + if (af_xdp_exit_netns (args->netns, ns_fds)) + { + args->rv = VNET_API_ERROR_SYSCALL_ERROR_10; + args->error = clib_error_return (0, "exit netns failed"); + goto err2; + } + if (ad->txq_num < tm->n_vlib_mains) { /* initialize lock for shared txq */ @@ -501,8 +570,16 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) af_xdp_get_numa (ad->linux_ifname)); if (!args->name) - ad->name = - (char *) format (0, "%s/%d", ad->linux_ifname, ad->dev_instance); + { + char *ifname = ad->linux_ifname; + if (args->netns != NULL && strncmp (args->netns, "pid:", 4) == 0) + { + ad->name = + (char *) format (0, "%s/%u", ifname, atoi (args->netns + 4)); + } + else + ad->name = (char *) format (0, "%s/%d", ifname, ad->dev_instance); + } else ad->name = (char *) format (0, "%s", args->name); @@ -516,7 +593,7 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = clib_error_return (0, "ethernet_register_interface() failed"); - goto err1; + goto err2; } sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index); @@ -543,7 +620,7 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index, rxq->file_index); if (af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING)) - goto err1; + goto err2; } vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index); @@ -558,8 +635,10 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) return; -err1: +err2: af_xdp_delete_if (vm, ad); +err1: + af_xdp_cleanup_netns (ns_fds); err0: vlib_log_err (am->log_class, "%U", format_clib_error, args->error); } diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c index 6dffa29bdd1..46ba6f100ee 100644 --- a/src/plugins/af_xdp/test_api.c +++ b/src/plugins/af_xdp/test_api.c @@ -91,6 +91,41 @@ api_af_xdp_create (vat_main_t * vam) return ret; } +/* af_xdp create v2 API */ +static int +api_af_xdp_create_v2 (vat_main_t *vam) +{ + vl_api_af_xdp_create_v2_t *mp; + af_xdp_create_if_args_t args; + int ret; + + if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args)) + { + clib_warning ("unknown input `%U'", format_unformat_error, vam->input); + return -99; + } + + M (AF_XDP_CREATE, mp); + + snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", + args.linux_ifname ?: ""); + snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: ""); + snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s", + args.netns ?: ""); + mp->rxq_num = clib_host_to_net_u16 (args.rxq_num); + mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); + mp->txq_size = clib_host_to_net_u16 (args.txq_size); + mp->mode = api_af_xdp_mode (args.mode); + if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK) + mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK; + snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: ""); + + S (mp); + W (ret); + + return ret; +} + /* af_xdp-create reply handler */ static void vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp) @@ -109,6 +144,24 @@ vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp) vam->regenerate_interface_table = 1; } +/* af_xdp-create v2 reply handler */ +static void +vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp) +{ + vat_main_t *vam = af_xdp_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created af_xdp with sw_if_index %d\n", + ntohl (mp->sw_if_index)); + } + + vam->retval = retval; + vam->result_ready = 1; + vam->regenerate_interface_table = 1; +} + /* af_xdp delete API */ static int api_af_xdp_delete (vat_main_t * vam) diff --git a/src/plugins/af_xdp/unformat.c b/src/plugins/af_xdp/unformat.c index bb4c3048d23..8c0482d83ff 100644 --- a/src/plugins/af_xdp/unformat.c +++ b/src/plugins/af_xdp/unformat.c @@ -46,6 +46,8 @@ unformat_af_xdp_create_if_args (unformat_input_t * input, va_list * vargs) ; else if (unformat (line_input, "prog %s", &args->prog)) ; + else if (unformat (line_input, "netns %s", &args->netns)) + ; else if (unformat (line_input, "no-zero-copy")) args->mode = AF_XDP_MODE_COPY; else if (unformat (line_input, "zero-copy")) -- cgit 1.2.3-korg