diff options
Diffstat (limited to 'src/plugins')
199 files changed, 23149 insertions, 4203 deletions
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index e52e82fcf28..fbd94761027 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -2845,6 +2845,17 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm, } \ } while (0) +#define vec_validate_macip_acl_rules(v, idx) \ + do \ + { \ + if (vec_len (v) < idx + 1) \ + { \ + vec_validate (v, idx); \ + v[idx].is_permit = 0x1; \ + } \ + } \ + while (0) + static clib_error_t * acl_set_aclplugin_acl_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -3062,6 +3073,160 @@ acl_show_aclplugin_macip_interface_fn (vlib_main_t * vm, return error; } +static clib_error_t * +acl_set_aclplugin_macip_acl_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vl_api_macip_acl_rule_t *rules = 0; + int rule_idx = 0; + int rv = 0; + u32 acl_index = ~0; + u32 action = 0; + u8 src_mac[6]; + u8 *tag = 0; + u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + ip_prefix_t src_ip; + + unformat_input_t _line_input, *line_input = &_line_input; + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + vec_validate_macip_acl_rules (rules, rule_idx); + if (unformat (line_input, "permit")) + { + rules[rule_idx].is_permit = 1; + } + else if (unformat (line_input, "deny")) + { + rules[rule_idx].is_permit = 0; + } + else if (unformat (line_input, "action %d", &action)) + { + rules[rule_idx].is_permit = action; + } + else if (unformat (line_input, "ip %U", unformat_ip_prefix, &src_ip)) + { + ip_prefix_encode2 (&src_ip, &rules[rule_idx].src_prefix); + } + else if (unformat (line_input, "src")) + { + /* Everything in MACIP is "source" but allow this verbosity */ + } + else if (unformat (line_input, "mac %U", unformat_mac_address, &src_mac)) + { + memcpy (rules[rule_idx].src_mac, &src_mac, + sizeof (rules[rule_idx].src_mac)); + memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, + sizeof (rules[rule_idx].src_mac_mask)); + } + else if (unformat (line_input, "mask %U", unformat_mac_address, + &src_mac)) + { + memcpy (rules[rule_idx].src_mac_mask, &src_mac, + sizeof (rules[rule_idx].src_mac_mask)); + } + else if (unformat (line_input, "tag %s", &tag)) + ; + else if (unformat (line_input, ",")) + { + rule_idx++; + } + else + break; + } + + if (!tag) + vec_add (tag, "cli", 4); + + rv = macip_acl_add_list (vec_len (rules), rules, &acl_index, tag); + vec_free (rules); + vec_free (tag); + + unformat_free (line_input); + if (rv) + return clib_error_return (0, "Failed to set MACIP ACL rule"); + + vlib_cli_output (vm, "ACL index:%u", acl_index); + return 0; +} + +static clib_error_t * +acl_macip_delete_aclplugin_acl_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int rv; + u32 macip_acl_index = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "index %u", &macip_acl_index)) + { + /* operate on this acl index (which must exist) */ + } + else + break; + } + + if (macip_acl_index == ~0) + return (clib_error_return (0, "invalid acl index")); + + rv = macip_acl_del_list (macip_acl_index); + + unformat_free (line_input); + if (rv) + return (clib_error_return (0, "Failed to delete ACL index")); + + vlib_cli_output (vm, "Deleted ACL index:%u", macip_acl_index); + return 0; +} + +static clib_error_t * +acl_set_aclplugin_macip_interface_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + int rv = 0; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u32 is_add = 1; + unformat_input_t _line_input, *line_input = &_line_input; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "acl %u", &acl_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return (clib_error_return (0, "invalid interface")); + + if (acl_index == ~0) + return (clib_error_return (0, "invalid acl index")); + + rv = macip_acl_interface_add_del_acl (sw_if_index, is_add, acl_index); + + if (rv) + return (clib_error_return (0, "Failed to add acl rule to interface")); + + return 0; +} + static void acl_plugin_show_acl (acl_main_t * am, u32 acl_index) { @@ -3632,6 +3797,38 @@ VLIB_CLI_COMMAND (aclplugin_set_acl_command, static) = { }; /*? + * Create an MACIP Access Control List (ACL) + * A MACIP ACL is used to add L2-L3 ACL rules. + * A MACIP ACL can be added similar to ACL rules by using following command : + * + * @cliexcmd{set acl-plugin macip acl <permit|deny|action N> + * ip <PREFIX> mac <MAC> mask <int> [tag FOO] {use comma + * separated list for multiple rules}} + ?*/ +VLIB_CLI_COMMAND (aclplugin_macip_set_acl_command, static) = { + .path = "set acl-plugin macip acl ", + .short_help = "set acl-plugin macip acl <permit|deny|action N> " + "ip <PREFIX> mac <MAC> mask <int> [tag FOO] {use comma " + "separated list for multiple rules}", + .function = acl_set_aclplugin_macip_acl_fn, +}; + +/*? + * [un]Apply a MACIP ACL to an interface. + * The ACL being applied must already exist. + * + * @cliexpar + * <b><em> set acl-plugin macip interface <interface> <acl INDEX> [del] + </b></em> + * @cliexend + ?*/ +VLIB_CLI_COMMAND (aclplugin_macip_set_interface_command, static) = { + .path = "set acl-plugin macip interface", + .short_help = "set acl-plugin macip interface <interface> <acl INDEX> [del]", + .function = acl_set_aclplugin_macip_interface_fn, +}; + +/*? * Delete an Access Control List (ACL) * Removes an ACL at the specified index, which must exist but not in use by * any interface. @@ -3644,6 +3841,20 @@ VLIB_CLI_COMMAND (aclplugin_delete_acl_command, static) = { .function = acl_delete_aclplugin_acl_fn, }; +/*? + * Delete a MACIP Access Control List (ACL) + * Removes an MACIP ACL at the specified index, which must exist but not in + * use by + * any interface. + * + * @cliexcmd{delete acl-plugin macip acl index <idx>} + ?*/ +VLIB_CLI_COMMAND (aclplugin_macip_delete_acl_command, static) = { + .path = "delete acl-plugin macip acl", + .short_help = "delete acl-plugin macip acl index <idx>", + .function = acl_macip_delete_aclplugin_acl_fn, +}; + static clib_error_t * acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) { diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c index 8404689dc06..98803a916cb 100644 --- a/src/plugins/acl/acl_test.c +++ b/src/plugins/acl/acl_test.c @@ -114,7 +114,7 @@ static void vl_api_acl_interface_list_details_t_handler int i; vat_main_t * vam = acl_test_main.vat_main; u8 *out = 0; - vl_api_acl_interface_list_details_t_endian(mp); + vl_api_acl_interface_list_details_t_endian (mp, 0 /* from network */); out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); out = format(out, " input "); for(i=0; i<mp->count; i++) { @@ -141,7 +141,8 @@ static void vl_api_acl_interface_etype_whitelist_details_t_handler int i; vat_main_t * vam = acl_test_main.vat_main; u8 *out = 0; - vl_api_acl_interface_etype_whitelist_details_t_endian(mp); + vl_api_acl_interface_etype_whitelist_details_t_endian ( + mp, 0 /* from network */); out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); out = format(out, " input "); for(i=0; i<mp->count; i++) { @@ -173,15 +174,15 @@ vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a) inet_ntop(af, &a->src_prefix.address.un, (void *)src, sizeof(src)); inet_ntop(af, &a->dst_prefix.address.un, (void *)dst, sizeof(dst)); - out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d", - a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, - src, a->src_prefix.len, - dst, a->dst_prefix.len, - a->proto, - a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, - a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, - a->tcp_flags_value, a->tcp_flags_mask); - return(out); + out = format (out, + "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport " + "%d-%d tcpflags %d mask %d", + a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, src, + a->src_prefix.len, dst, a->dst_prefix.len, a->proto, + a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, + a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, + a->tcp_flags_value, a->tcp_flags_mask); + return (out); } @@ -191,9 +192,10 @@ static void vl_api_acl_details_t_handler { int i; vat_main_t * vam = acl_test_main.vat_main; - vl_api_acl_details_t_endian(mp); - u8 *out = 0; - out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + vl_api_acl_details_t_endian (mp, 0 /* from network */); + u8 *out = 0; + out = format (0, "acl_index: %d, count: %d\n tag {%s}\n", + mp->acl_index, mp->count, mp->tag); for(i=0; i<mp->count; i++) { out = format(out, " "); out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]); @@ -225,8 +227,9 @@ static void vl_api_macip_acl_details_t_handler { int i; vat_main_t * vam = acl_test_main.vat_main; - vl_api_macip_acl_details_t_endian(mp); - u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + vl_api_macip_acl_details_t_endian (mp, 0 /* from network */); + u8 *out = format (0, "MACIP acl_index: %d, count: %d\n tag {%s}\n", + mp->acl_index, mp->count, mp->tag); for(i=0; i<mp->count; i++) { out = format(out, " "); out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]); diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api index 4c2908e2037..20aa20b4d7d 100644 --- a/src/plugins/af_xdp/af_xdp.api +++ b/src/plugins/af_xdp/af_xdp.api @@ -33,96 +33,6 @@ enumflag af_xdp_flag : u8 /** \brief @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param host_if - Linux netdev interface name - @param name - new af_xdp interface name (optional) - @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) - @param rxq_size - receive queue size (optional) - @param txq_size - transmit queue size (optional) - @param mode - operation mode (optional) - @param flags - flags (optional) - @param prog - eBPF program path (optional) -*/ - -define af_xdp_create -{ - u32 client_index; - u32 context; - - string host_if[64]; - string name[64]; - u16 rxq_num [default=1]; - u16 rxq_size [default=0]; - u16 txq_size [default=0]; - vl_api_af_xdp_mode_t mode [default=0]; - vl_api_af_xdp_flag_t flags [default=0]; - string prog[256]; - option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]"; - option deprecated; -}; - -/** \brief - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param host_if - Linux netdev interface name - @param name - new af_xdp interface name (optional) - @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) - @param rxq_size - receive queue size (optional) - @param txq_size - transmit queue size (optional) - @param mode - operation mode (optional) - @param flags - flags (optional) - @param prog - eBPF program path (optional) - @param namespace - netns of nic (optional) -*/ - -define af_xdp_create_v2 -{ - u32 client_index; - u32 context; - - string host_if[64]; - string name[64]; - u16 rxq_num [default=1]; - u16 rxq_size [default=0]; - u16 txq_size [default=0]; - vl_api_af_xdp_mode_t mode [default=0]; - vl_api_af_xdp_flag_t flags [default=0]; - string prog[256]; - string namespace[64]; - option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]"; - option deprecated; -}; - -/** \brief - @param context - sender context, to match reply w/ request - @param retval - return value for request - @param sw_if_index - software index for the new af_xdp interface -*/ - -define af_xdp_create_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; - option deprecated; -}; - -/** \brief - @param context - sender context, to match reply w/ request - @param retval - return value for request - @param sw_if_index - software index for the new af_xdp interface -*/ - -define af_xdp_create_v2_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; - option deprecated; -}; - -/** \brief - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request @param sw_if_index - interface index */ diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c index 3e9a3fe2578..9ead9856ff5 100644 --- a/src/plugins/af_xdp/api.c +++ b/src/plugins/af_xdp/api.c @@ -57,65 +57,6 @@ af_xdp_api_flags (vl_api_af_xdp_flag_t flags) } static void -vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - af_xdp_main_t *rm = &af_xdp_main; - vl_api_af_xdp_create_reply_t *rmp; - af_xdp_create_if_args_t args; - int rv; - - clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t)); - - args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0; - args.name = mp->name[0] ? (char *) mp->name : 0; - args.prog = mp->prog[0] ? (char *) mp->prog : 0; - args.mode = af_xdp_api_mode (mp->mode); - args.flags = af_xdp_api_flags (mp->flags); - args.rxq_size = ntohs (mp->rxq_size); - args.txq_size = ntohs (mp->txq_size); - args.rxq_num = ntohs (mp->rxq_num); - - af_xdp_create_if (vm, &args); - rv = args.rv; - - REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY, - ({ rmp->sw_if_index = ntohl (args.sw_if_index); })); -} - -static void -vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp) -{ - vlib_main_t *vm = vlib_get_main (); - af_xdp_main_t *rm = &af_xdp_main; - vl_api_af_xdp_create_v2_reply_t *rmp; - af_xdp_create_if_args_t args; - int rv; - - clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t)); - - args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0; - args.name = mp->name[0] ? (char *) mp->name : 0; - args.prog = mp->prog[0] ? (char *) mp->prog : 0; - args.netns = mp->namespace[0] ? (char *) mp->namespace : 0; - args.mode = af_xdp_api_mode (mp->mode); - args.flags = af_xdp_api_flags (mp->flags); - args.rxq_size = ntohs (mp->rxq_size); - args.txq_size = ntohs (mp->txq_size); - args.rxq_num = ntohs (mp->rxq_num); - - af_xdp_create_if (vm, &args); - rv = args.rv; - - /* clang-format off */ - REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY, - ({ - rmp->sw_if_index = ntohl (args.sw_if_index); - })); - /* clang-format on */ -} - -static void vl_api_af_xdp_create_v3_t_handler (vl_api_af_xdp_create_v3_t *mp) { vlib_main_t *vm = vlib_get_main (); diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c index 581697e341d..5f622adcb04 100644 --- a/src/plugins/af_xdp/test_api.c +++ b/src/plugins/af_xdp/test_api.c @@ -58,75 +58,7 @@ api_af_xdp_mode (af_xdp_mode_t mode) return ~0; } -/* af_xdp create API */ -static int -api_af_xdp_create (vat_main_t * vam) -{ - vl_api_af_xdp_create_t *mp; - af_xdp_create_if_args_t args; - int ret; - - if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args)) - { - clib_warning ("unknown input `%U'", format_unformat_error, vam->input); - return -99; - } - - M (AF_XDP_CREATE, mp); - - snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", - args.linux_ifname ? : ""); - snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ? : ""); - mp->rxq_num = clib_host_to_net_u16 (args.rxq_num); - mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); - mp->txq_size = clib_host_to_net_u16 (args.txq_size); - mp->mode = api_af_xdp_mode (args.mode); - if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK) - mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK; - snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ? : ""); - - S (mp); - W (ret); - - return ret; -} - -/* af_xdp create v2 API */ -static int -api_af_xdp_create_v2 (vat_main_t *vam) -{ - vl_api_af_xdp_create_v2_t *mp; - af_xdp_create_if_args_t args; - int ret; - - if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args)) - { - clib_warning ("unknown input `%U'", format_unformat_error, vam->input); - return -99; - } - - M (AF_XDP_CREATE, mp); - - snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", - args.linux_ifname ?: ""); - snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: ""); - snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s", - args.netns ?: ""); - mp->rxq_num = clib_host_to_net_u16 (args.rxq_num); - mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); - mp->txq_size = clib_host_to_net_u16 (args.txq_size); - mp->mode = api_af_xdp_mode (args.mode); - if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK) - mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK; - snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: ""); - - S (mp); - W (ret); - - return ret; -} - -/* af_xdp create v2 API */ +/* af_xdp create v3 API */ static int api_af_xdp_create_v3 (vat_main_t *vam) { @@ -140,7 +72,7 @@ api_af_xdp_create_v3 (vat_main_t *vam) return -99; } - M (AF_XDP_CREATE, mp); + M (AF_XDP_CREATE_V3, mp); snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", args.linux_ifname ?: ""); @@ -160,45 +92,9 @@ api_af_xdp_create_v3 (vat_main_t *vam) return ret; } -/* af_xdp-create reply handler */ -static void -vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp) -{ - vat_main_t *vam = af_xdp_test_main.vat_main; - i32 retval = ntohl (mp->retval); - - if (retval == 0) - { - fformat (vam->ofp, "created af_xdp with sw_if_index %d\n", - ntohl (mp->sw_if_index)); - } - - vam->retval = retval; - vam->result_ready = 1; - vam->regenerate_interface_table = 1; -} - -/* af_xdp-create v2 reply handler */ -static void -vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp) -{ - vat_main_t *vam = af_xdp_test_main.vat_main; - i32 retval = ntohl (mp->retval); - - if (retval == 0) - { - fformat (vam->ofp, "created af_xdp with sw_if_index %d\n", - ntohl (mp->sw_if_index)); - } - - vam->retval = retval; - vam->result_ready = 1; - vam->regenerate_interface_table = 1; -} - /* af_xdp-create v3 reply handler */ static void -vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp) +vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v3_reply_t *mp) { vat_main_t *vam = af_xdp_test_main.vat_main; i32 retval = mp->retval; diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h index f6f79cf0e09..774aac0151b 100644 --- a/src/plugins/avf/avf.h +++ b/src/plugins/avf/avf.h @@ -180,6 +180,7 @@ typedef struct u8 int_mode; u8 buffer_pool_index; u32 queue_index; + u64 total_packets; } avf_rxq_t; typedef struct @@ -198,6 +199,8 @@ typedef struct avf_tx_desc_t *tmp_descs; u32 *tmp_bufs; u32 queue_index; + u64 total_packets; + u64 no_free_tx_count; } avf_txq_t; typedef struct diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c index 1618800c432..98169f0bcfe 100644 --- a/src/plugins/avf/device.c +++ b/src/plugins/avf/device.c @@ -288,6 +288,7 @@ avf_rxq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 rxq_size) d->qword[0] = vlib_buffer_get_pa (vm, b); d++; } + rxq->total_packets = 0; return 0; } @@ -337,6 +338,9 @@ avf_txq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 txq_size) vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES); vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES); + txq->total_packets = 0; + txq->no_free_tx_count = 0; + return 0; } diff --git a/src/plugins/avf/format.c b/src/plugins/avf/format.c index 0a153a093d9..436f5b9fbf2 100644 --- a/src/plugins/avf/format.c +++ b/src/plugins/avf/format.c @@ -104,6 +104,7 @@ format_avf_device (u8 * s, va_list * args) u8 *a = 0; avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, 0); avf_txq_t *txq = vec_elt_at_index (ad->txqs, 0); + u32 idx = 0; s = format (s, "rx: queues %u, desc %u (min %u max %u)", ad->n_rx_queues, rxq->size, AVF_QUEUE_SZ_MIN, AVF_QUEUE_SZ_MAX); @@ -114,6 +115,22 @@ format_avf_device (u8 * s, va_list * args) format_avf_device_flags, ad); s = format (s, "\n%Ucapability flags: %U", format_white_space, indent, format_avf_vf_cap_flags, ad->cap_flags); + s = + format (s, "\n%U Rx Queue: Total Packets", format_white_space, indent + 4); + for (idx = 0; idx < ad->n_rx_queues; idx++) + { + rxq = vec_elt_at_index (ad->rxqs, idx); + s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4, idx, + rxq->total_packets); + } + s = format (s, "\n%U Tx Queue: Total Packets\t Total Drops", + format_white_space, indent + 4); + for (idx = 0; idx < ad->n_tx_queues; idx++) + { + txq = vec_elt_at_index (ad->txqs, idx); + s = format (s, "\n%U %8u : %llu\t %llu", format_white_space, indent + 4, + idx, txq->total_packets, txq->no_free_tx_count); + } s = format (s, "\n%Unum-queue-pairs %d max-vectors %u max-mtu %u " "rss-key-size %u rss-lut-size %u", format_white_space, indent, diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 06007db540d..890259c88ab 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -539,6 +539,8 @@ done: else avf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */ ); + rxq->total_packets += n_rx_packets; + return n_rx_packets; } diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c index daa86ae86b2..0952886aaee 100644 --- a/src/plugins/avf/output.c +++ b/src/plugins/avf/output.c @@ -510,6 +510,7 @@ retry: avf_tail_write (txq->qtx_tail, txq->next); txq->n_enqueued += n_desc; n_left -= n_enq; + txq->total_packets += n_enq; } if (n_left) @@ -522,6 +523,7 @@ retry: vlib_buffer_free (vm, buffers, n_left); vlib_error_count (vm, node->node_index, AVF_TX_ERROR_NO_FREE_SLOTS, n_left); + txq->no_free_tx_count += n_left; } if (tf->shared_queue) diff --git a/src/plugins/builtinurl/FEATURE.yaml b/src/plugins/builtinurl/FEATURE.yaml deleted file mode 100644 index ba8e3c7ea7b..00000000000 --- a/src/plugins/builtinurl/FEATURE.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -name: Builtin URL support for the static http or https server -maintainer: Dave Barach <dave@barachs.net> -features: - - Builtin URLs for the static http/https server -description: "The (builtinurl) plugin adds a set of URLs to the static http/https server. - Current URLs, all of which return data in .json fmt: - <root-url>/version.json - vpp version info - <root-url>/interface_list.json - list of interfaces - <root-url>/interface_stats - single interface via HTTP POST - <root-url>/interface_stats - all intfcs via HTTP GET." -state: development -properties: [API, CLI, MULTITHREAD] diff --git a/src/plugins/builtinurl/builtins.c b/src/plugins/builtinurl/builtins.c deleted file mode 100644 index b04e9dd5c7c..00000000000 --- a/src/plugins/builtinurl/builtins.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/vnet.h> -#include <builtinurl/builtinurl.h> -#include <http_static/http_static.h> -#include <vpp/app/version.h> - -hss_url_handler_rc_t -handle_get_version (hss_url_handler_args_t *args) -{ - u8 *s = 0; - - /* Build some json bullshit */ - s = format (s, "{\"vpp_details\": {"); - s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER); - s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE); - - args->data = s; - args->data_len = vec_len (s); - args->free_vec_data = 1; - return HSS_URL_HANDLER_OK; -} - -void -trim_path_from_request (u8 * s, char *path) -{ - u8 *cp; - int trim_length = strlen (path) + 1 /* remove '?' */ ; - - /* Get rid of the path and question-mark */ - vec_delete (s, trim_length, 0); - - /* Tail trim irrelevant browser info */ - cp = s; - while ((cp - s) < vec_len (s)) - { - if (*cp == ' ') - { - /* - * Makes request a vector which happens to look - * like a c-string. - */ - *cp = 0; - vec_set_len (s, cp - s); - break; - } - cp++; - } -} - -hss_url_handler_rc_t -handle_get_interface_stats (hss_url_handler_args_t *args) -{ - u8 *s = 0, *stats = 0; - uword *p; - u32 *sw_if_indices = 0; - vnet_hw_interface_t *hi; - vnet_sw_interface_t *si; - char *q = "\""; - int i; - int need_comma = 0; - u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im, - vnet_sw_interface_t * si, int json); - vnet_main_t *vnm = vnet_get_main (); - vnet_interface_main_t *im = &vnm->interface_main; - - /* Get stats for a single interface via http POST */ - if (args->reqtype == HTTP_REQ_POST) - { - trim_path_from_request (args->request, "interface_stats.json"); - - /* Find the sw_if_index */ - p = hash_get (im->hw_interface_by_name, args->request); - if (!p) - { - s = format (s, "{\"interface_stats\": {[\n"); - s = format (s, " \"name\": \"%s\",", args->request); - s = format (s, " \"error\": \"%s\"", "UnknownInterface"); - s = format (s, "]}\n"); - goto out; - } - - vec_add1 (sw_if_indices, p[0]); - } - else /* default, HTTP_BUILTIN_METHOD_GET */ - { - pool_foreach (hi, im->hw_interfaces) - { - vec_add1 (sw_if_indices, hi->sw_if_index); - } - } - - s = format (s, "{%sinterface_stats%s: [\n", q, q); - - for (i = 0; i < vec_len (sw_if_indices); i++) - { - si = vnet_get_sw_interface (vnm, sw_if_indices[i]); - if (need_comma) - s = format (s, ",\n"); - - need_comma = 1; - - s = format (s, "{%sname%s: %s%U%s, ", q, q, q, - format_vnet_sw_if_index_name, vnm, sw_if_indices[i], q); - - stats = format_vnet_sw_interface_cntrs (stats, &vnm->interface_main, si, - 1 /* want json */ ); - if (vec_len (stats)) - s = format (s, "%v}", stats); - else - s = format (s, "%snone%s: %strue%s}", q, q, q, q); - vec_reset_length (stats); - } - - s = format (s, "]}\n"); - -out: - args->data = s; - args->data_len = vec_len (s); - args->free_vec_data = 1; - vec_free (sw_if_indices); - vec_free (stats); - return HSS_URL_HANDLER_OK; -} - -hss_url_handler_rc_t -handle_get_interface_list (hss_url_handler_args_t *args) -{ - u8 *s = 0; - int i; - vnet_main_t *vnm = vnet_get_main (); - vnet_interface_main_t *im = &vnm->interface_main; - vnet_hw_interface_t *hi; - u32 *hw_if_indices = 0; - int need_comma = 0; - - /* Construct vector of active hw_if_indexes ... */ - pool_foreach (hi, im->hw_interfaces) - { - /* No point in mentioning "local0"... */ - if (hi - im->hw_interfaces) - vec_add1 (hw_if_indices, hi - im->hw_interfaces); - } - - /* Build answer */ - s = format (s, "{\"interface_list\": [\n"); - for (i = 0; i < vec_len (hw_if_indices); i++) - { - if (need_comma) - s = format (s, ",\n"); - hi = pool_elt_at_index (im->hw_interfaces, hw_if_indices[i]); - s = format (s, "\"%v\"", hi->name); - need_comma = 1; - } - s = format (s, "]}\n"); - vec_free (hw_if_indices); - - args->data = s; - args->data_len = vec_len (s); - args->free_vec_data = 1; - return HSS_URL_HANDLER_OK; -} - -void -builtinurl_handler_init (builtinurl_main_t * bm) -{ - - bm->register_handler (handle_get_version, "version.json", HTTP_REQ_GET); - bm->register_handler (handle_get_interface_list, "interface_list.json", - HTTP_REQ_GET); - bm->register_handler (handle_get_interface_stats, "interface_stats.json", - HTTP_REQ_GET); - bm->register_handler (handle_get_interface_stats, "interface_stats.json", - HTTP_REQ_POST); -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/builtinurl/builtinurl.api b/src/plugins/builtinurl/builtinurl.api deleted file mode 100644 index f292fd77a8e..00000000000 --- a/src/plugins/builtinurl/builtinurl.api +++ /dev/null @@ -1,43 +0,0 @@ -/* - * builtinurl.api - binary API skeleton - * - * Copyright (c) <current-year> <your-organization> - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file builtinurl.api - * @brief VPP control-plane API messages. - * - * This file defines VPP control-plane binary API messages which are generally - * called through a shared memory interface. - */ - -/* Version and type recitations */ - -option version = "1.0.0"; - -/** @brief API to enable / disable builtinurl on an interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param enable_disable - 1 to enable, 0 to disable the feature - @param sw_if_index - interface handle -*/ - -autoreply define builtinurl_enable { - /* Client identifier, set from api_main.my_client_index */ - u32 client_index; - - /* Arbitrary context, so client can match reply to request */ - u32 context; -}; diff --git a/src/plugins/builtinurl/builtinurl.c b/src/plugins/builtinurl/builtinurl.c deleted file mode 100644 index 749a2c93b8a..00000000000 --- a/src/plugins/builtinurl/builtinurl.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * builtinurl.c - skeleton vpp engine plug-in - * - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/vnet.h> -#include <vnet/plugin/plugin.h> -#include <builtinurl/builtinurl.h> - -#include <vlibapi/api.h> -#include <vlibmemory/api.h> -#include <vpp/app/version.h> -#include <stdbool.h> - -/* define message IDs */ -#include <builtinurl/builtinurl.api_enum.h> -#include <builtinurl/builtinurl.api_types.h> - -#define REPLY_MSG_ID_BASE bmp->msg_id_base -#include <vlibapi/api_helper_macros.h> - -builtinurl_main_t builtinurl_main; - -/* Action function shared between message handler and debug CLI */ - -int -builtinurl_enable (builtinurl_main_t * bmp) -{ - void (*fp) (void *, char *, int); - - if (bmp->initialized) - return 0; - - /* Look up the builtin URL registration handler */ - fp = vlib_get_plugin_symbol - ("http_static_plugin.so", "http_static_server_register_builtin_handler"); - - /* Most likely, the http_static plugin isn't loaded. Done. */ - if (fp == 0) - return VNET_API_ERROR_NO_SUCH_TABLE; - - bmp->register_handler = fp; - builtinurl_handler_init (bmp); - bmp->initialized = 1; - - return 0; -} - -static clib_error_t * -builtinurl_enable_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - builtinurl_main_t *bmp = &builtinurl_main; - - int rv; - - rv = builtinurl_enable (bmp); - - switch (rv) - { - case 0: - break; - - case VNET_API_ERROR_NO_SUCH_TABLE: - return clib_error_return - (0, "http_static_server_register_builtin_handler undefined"); - break; - - default: - return clib_error_return (0, "builtinurl_enable returned %d", rv); - } - return 0; -} - -VLIB_CLI_COMMAND (builtinurl_enable_command, static) = -{ - .path = "builtinurl enable", - .short_help = "Turn on builtin http/https GET and POST urls", - .function = builtinurl_enable_command_fn, -}; - -/* API message handler */ -static void vl_api_builtinurl_enable_t_handler - (vl_api_builtinurl_enable_t * mp) -{ - vl_api_builtinurl_enable_reply_t *rmp; - builtinurl_main_t *bmp = &builtinurl_main; - int rv; - - rv = builtinurl_enable (bmp); - - REPLY_MACRO (VL_API_BUILTINURL_ENABLE_REPLY); -} - -#include <builtinurl/builtinurl.api.c> -static clib_error_t * -builtinurl_init (vlib_main_t * vm) -{ - builtinurl_main_t *bmp = &builtinurl_main; - - bmp->vlib_main = vm; - bmp->vnet_main = vnet_get_main (); - - /* Ask for a correctly-sized block of API message decode slots */ - bmp->msg_id_base = setup_message_id_table (); - - return 0; -} - -VLIB_INIT_FUNCTION (builtinurl_init); - -VLIB_PLUGIN_REGISTER () = -{ - .version = VPP_BUILD_VER, - .description = "vpp built-in URL support", -}; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/builtinurl/builtinurl.h b/src/plugins/builtinurl/builtinurl.h deleted file mode 100644 index 91302c1eee5..00000000000 --- a/src/plugins/builtinurl/builtinurl.h +++ /dev/null @@ -1,57 +0,0 @@ - -/* - * builtinurl.h - built-in URLs for the http static server - * - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_builtinurl_h__ -#define __included_builtinurl_h__ - -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> -#include <vnet/ethernet/ethernet.h> - -#include <vppinfra/hash.h> -#include <vppinfra/error.h> - -typedef struct -{ - /* API message ID base */ - u16 msg_id_base; - - /* GET / POST handler registration function */ - void (*register_handler) (void *, char *, int); - - /* Been there, done that */ - int initialized; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - ethernet_main_t *ethernet_main; -} builtinurl_main_t; - -extern builtinurl_main_t builtinurl_main; - -void builtinurl_handler_init (builtinurl_main_t * bm); - -#endif /* __included_builtinurl_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/builtinurl/builtinurl_test.c b/src/plugins/builtinurl/builtinurl_test.c deleted file mode 100644 index 9edfb81c525..00000000000 --- a/src/plugins/builtinurl/builtinurl_test.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * builtinurl.c - skeleton vpp-api-test plug-in - * - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include <vat/vat.h> -#include <vlibapi/api.h> -#include <vlibmemory/api.h> -#include <vppinfra/error.h> -#include <stdbool.h> - -uword unformat_sw_if_index (unformat_input_t * input, va_list * args); - -/* Declare message IDs */ -#include <builtinurl/builtinurl.api_enum.h> -#include <builtinurl/builtinurl.api_types.h> - -typedef struct -{ - /* API message ID base */ - u16 msg_id_base; - vat_main_t *vat_main; -} builtinurl_test_main_t; - -builtinurl_test_main_t builtinurl_test_main; - -#define __plugin_msg_base builtinurl_test_main.msg_id_base -#include <vlibapi/vat_helper_macros.h> - -static int -api_builtinurl_enable (vat_main_t * vam) -{ - vl_api_builtinurl_enable_t *mp; - int ret; - - /* Construct the API message */ - M (BUILTINURL_ENABLE, mp); - - /* send it... */ - S (mp); - - /* Wait for a reply... */ - W (ret); - return ret; -} - -#include <builtinurl/builtinurl.api_test.c> - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/crypto_native/FEATURE.yaml b/src/plugins/crypto_native/FEATURE.yaml index 06f26d4a8cf..d54816d673f 100644 --- a/src/plugins/crypto_native/FEATURE.yaml +++ b/src/plugins/crypto_native/FEATURE.yaml @@ -4,6 +4,9 @@ maintainer: Damjan Marion <damarion@cisco.com> features: - CBC(128, 192, 256) - GCM(128, 192, 256) + - CTR(128, 192, 256) + - SHA(224, 256) + - HMAC-SHA(224, 256) description: "An implementation of a native crypto-engine" state: production diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c index dd7ca3f1cf1..c981897783f 100644 --- a/src/plugins/crypto_native/aes_cbc.c +++ b/src/plugins/crypto_native/aes_cbc.c @@ -25,191 +25,40 @@ #pragma GCC optimize ("O3") #endif -#if defined(__VAES__) && defined(__AVX512F__) -#define u8xN u8x64 -#define u32xN u32x16 -#define u32xN_min_scalar u32x16_min_scalar -#define u32xN_is_all_zero u32x16_is_all_zero -#define u32xN_splat u32x16_splat -#elif defined(__VAES__) -#define u8xN u8x32 -#define u32xN u32x8 -#define u32xN_min_scalar u32x8_min_scalar -#define u32xN_is_all_zero u32x8_is_all_zero -#define u32xN_splat u32x8_splat -#else -#define u8xN u8x16 -#define u32xN u32x4 -#define u32xN_min_scalar u32x4_min_scalar -#define u32xN_is_all_zero u32x4_is_all_zero -#define u32xN_splat u32x4_splat -#endif +#define CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE 256 static_always_inline u32 aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops, aes_key_size_t ks) { crypto_native_main_t *cm = &crypto_native_main; - int rounds = AES_KEY_ROUNDS (ks); - u8 placeholder[8192]; - u32 i, j, count, n_left = n_ops; - u32xN placeholder_mask = { }; - u32xN len = { }; - vnet_crypto_key_index_t key_index[4 * N_AES_LANES]; - u8 *src[4 * N_AES_LANES] = {}; - u8 *dst[4 * N_AES_LANES] = {}; - u8xN r[4] = {}; - u8xN k[15][4] = {}; - - for (i = 0; i < 4 * N_AES_LANES; i++) - key_index[i] = ~0; - -more: - for (i = 0; i < 4 * N_AES_LANES; i++) - if (len[i] == 0) - { - if (n_left == 0) - { - /* no more work to enqueue, so we are enqueueing placeholder buffer */ - src[i] = dst[i] = placeholder; - len[i] = sizeof (placeholder); - placeholder_mask[i] = 0; - } - else - { - u8x16 t = aes_block_load (ops[0]->iv); - ((u8x16 *) r)[i] = t; - - src[i] = ops[0]->src; - dst[i] = ops[0]->dst; - len[i] = ops[0]->len; - placeholder_mask[i] = ~0; - if (key_index[i] != ops[0]->key_index) - { - aes_cbc_key_data_t *kd; - key_index[i] = ops[0]->key_index; - kd = (aes_cbc_key_data_t *) cm->key_data[key_index[i]]; - for (j = 0; j < rounds + 1; j++) - ((u8x16 *) k[j])[i] = kd->encrypt_key[j]; - } - ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED; - n_left--; - ops++; - } - } - - count = u32xN_min_scalar (len); - - ASSERT (count % 16 == 0); - - for (i = 0; i < count; i += 16) + u32 i, n_left = n_ops; + uword key_indices[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {}; + u8 *plaintext[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {}; + uword oplen[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {}; + u8 *iv[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {}; + u8 *ciphertext[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {}; + + while (n_left) { -#if defined(__VAES__) && defined(__AVX512F__) - r[0] = u8x64_xor3 (r[0], aes_block_load_x4 (src, i), k[0][0]); - r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src + 4, i), k[0][1]); - r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src + 8, i), k[0][2]); - r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src + 12, i), k[0][3]); - - for (j = 1; j < rounds; j++) + i = 0; + while (n_left && i < CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE) { - r[0] = aes_enc_round_x4 (r[0], k[j][0]); - r[1] = aes_enc_round_x4 (r[1], k[j][1]); - r[2] = aes_enc_round_x4 (r[2], k[j][2]); - r[3] = aes_enc_round_x4 (r[3], k[j][3]); + key_indices[i] = ops[0]->key_index; + plaintext[i] = ops[0]->src; + ciphertext[i] = ops[0]->dst; + oplen[i] = ops[0]->len; + iv[i] = ops[0]->iv; + ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED; + + ops++; + n_left--; + i++; } - r[0] = aes_enc_last_round_x4 (r[0], k[j][0]); - r[1] = aes_enc_last_round_x4 (r[1], k[j][1]); - r[2] = aes_enc_last_round_x4 (r[2], k[j][2]); - r[3] = aes_enc_last_round_x4 (r[3], k[j][3]); - - aes_block_store_x4 (dst, i, r[0]); - aes_block_store_x4 (dst + 4, i, r[1]); - aes_block_store_x4 (dst + 8, i, r[2]); - aes_block_store_x4 (dst + 12, i, r[3]); -#elif defined(__VAES__) - r[0] = u8x32_xor3 (r[0], aes_block_load_x2 (src, i), k[0][0]); - r[1] = u8x32_xor3 (r[1], aes_block_load_x2 (src + 2, i), k[0][1]); - r[2] = u8x32_xor3 (r[2], aes_block_load_x2 (src + 4, i), k[0][2]); - r[3] = u8x32_xor3 (r[3], aes_block_load_x2 (src + 6, i), k[0][3]); - - for (j = 1; j < rounds; j++) - { - r[0] = aes_enc_round_x2 (r[0], k[j][0]); - r[1] = aes_enc_round_x2 (r[1], k[j][1]); - r[2] = aes_enc_round_x2 (r[2], k[j][2]); - r[3] = aes_enc_round_x2 (r[3], k[j][3]); - } - r[0] = aes_enc_last_round_x2 (r[0], k[j][0]); - r[1] = aes_enc_last_round_x2 (r[1], k[j][1]); - r[2] = aes_enc_last_round_x2 (r[2], k[j][2]); - r[3] = aes_enc_last_round_x2 (r[3], k[j][3]); - - aes_block_store_x2 (dst, i, r[0]); - aes_block_store_x2 (dst + 2, i, r[1]); - aes_block_store_x2 (dst + 4, i, r[2]); - aes_block_store_x2 (dst + 6, i, r[3]); -#else -#if __x86_64__ - r[0] = u8x16_xor3 (r[0], aes_block_load (src[0] + i), k[0][0]); - r[1] = u8x16_xor3 (r[1], aes_block_load (src[1] + i), k[0][1]); - r[2] = u8x16_xor3 (r[2], aes_block_load (src[2] + i), k[0][2]); - r[3] = u8x16_xor3 (r[3], aes_block_load (src[3] + i), k[0][3]); - - for (j = 1; j < rounds; j++) - { - r[0] = aes_enc_round_x1 (r[0], k[j][0]); - r[1] = aes_enc_round_x1 (r[1], k[j][1]); - r[2] = aes_enc_round_x1 (r[2], k[j][2]); - r[3] = aes_enc_round_x1 (r[3], k[j][3]); - } - - r[0] = aes_enc_last_round_x1 (r[0], k[j][0]); - r[1] = aes_enc_last_round_x1 (r[1], k[j][1]); - r[2] = aes_enc_last_round_x1 (r[2], k[j][2]); - r[3] = aes_enc_last_round_x1 (r[3], k[j][3]); - - aes_block_store (dst[0] + i, r[0]); - aes_block_store (dst[1] + i, r[1]); - aes_block_store (dst[2] + i, r[2]); - aes_block_store (dst[3] + i, r[3]); -#else - r[0] ^= aes_block_load (src[0] + i); - r[1] ^= aes_block_load (src[1] + i); - r[2] ^= aes_block_load (src[2] + i); - r[3] ^= aes_block_load (src[3] + i); - for (j = 0; j < rounds - 1; j++) - { - r[0] = vaesmcq_u8 (vaeseq_u8 (r[0], k[j][0])); - r[1] = vaesmcq_u8 (vaeseq_u8 (r[1], k[j][1])); - r[2] = vaesmcq_u8 (vaeseq_u8 (r[2], k[j][2])); - r[3] = vaesmcq_u8 (vaeseq_u8 (r[3], k[j][3])); - } - r[0] = vaeseq_u8 (r[0], k[j][0]) ^ k[rounds][0]; - r[1] = vaeseq_u8 (r[1], k[j][1]) ^ k[rounds][1]; - r[2] = vaeseq_u8 (r[2], k[j][2]) ^ k[rounds][2]; - r[3] = vaeseq_u8 (r[3], k[j][3]) ^ k[rounds][3]; - aes_block_store (dst[0] + i, r[0]); - aes_block_store (dst[1] + i, r[1]); - aes_block_store (dst[2] + i, r[2]); - aes_block_store (dst[3] + i, r[3]); -#endif -#endif + clib_aes_cbc_encrypt_multi ((aes_cbc_key_data_t **) cm->key_data, + key_indices, plaintext, oplen, iv, ks, + ciphertext, i); } - - len -= u32xN_splat (count); - - for (i = 0; i < 4 * N_AES_LANES; i++) - { - src[i] += count; - dst[i] += count; - } - - if (n_left > 0) - goto more; - - if (!u32xN_is_all_zero (len & placeholder_mask)) - goto more; - return n_ops; } diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c index 459ce6d8e79..6787f629104 100644 --- a/src/plugins/crypto_native/sha2.c +++ b/src/plugins/crypto_native/sha2.c @@ -118,13 +118,25 @@ sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type) static int probe () { -#if defined(__SHA__) && defined(__x86_64__) +#if defined(__x86_64__) + +#if defined(__SHA__) && defined(__AVX512F__) + if (clib_cpu_supports_sha () && clib_cpu_supports_avx512f ()) + return 30; +#elif defined(__SHA__) && defined(__AVX2__) + if (clib_cpu_supports_sha () && clib_cpu_supports_avx2 ()) + return 20; +#elif defined(__SHA__) if (clib_cpu_supports_sha ()) - return 50; -#elif defined(__ARM_FEATURE_SHA2) + return 10; +#endif + +#elif defined(__aarch64__) +#if defined(__ARM_FEATURE_SHA2) if (clib_cpu_supports_sha2 ()) return 10; #endif +#endif return -1; } diff --git a/src/plugins/crypto_openssl/main.c b/src/plugins/crypto_openssl/main.c index b070cf336a5..c59b5d34a29 100644 --- a/src/plugins/crypto_openssl/main.c +++ b/src/plugins/crypto_openssl/main.c @@ -219,6 +219,17 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_t *op = ops[i]; int len = 0; + if (i + 2 < n_ops) + { + CLIB_PREFETCH (ops[i + 1]->src, 4 * CLIB_CACHE_PREFETCH_BYTES, LOAD); + CLIB_PREFETCH (ops[i + 1]->dst, 4 * CLIB_CACHE_PREFETCH_BYTES, + STORE); + + CLIB_PREFETCH (ops[i + 2]->src, 4 * CLIB_CACHE_PREFETCH_BYTES, LOAD); + CLIB_PREFETCH (ops[i + 2]->dst, 4 * CLIB_CACHE_PREFETCH_BYTES, + STORE); + } + ctx = ptd->evp_cipher_enc_ctx[op->key_index]; EVP_EncryptInit_ex (ctx, 0, 0, NULL, op->iv); if (op->aad_len) diff --git a/src/plugins/dev_armada/CMakeLists.txt b/src/plugins/dev_armada/CMakeLists.txt new file mode 100644 index 00000000000..e755e7bdd46 --- /dev/null +++ b/src/plugins/dev_armada/CMakeLists.txt @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2022 Cisco Systems, Inc. + + +find_path(MUSDK_INCLUDE_DIR NAMES mv_std.h) +find_library(MUSDK_LIB NAMES libmusdk.a) + +if(NOT MUSDK_INCLUDE_DIR OR NOT MUSDK_LIB) + message(WARNING "Marvell MUSDK not found - dev_armada plugin disabled") + return() +endif() + +get_filename_component(MUSDK_LIB_DIR ${MUSDK_LIB} DIRECTORY) +set(MUSDK_LINK_FLAGS "-Wl,--whole-archive,${MUSDK_LIB_DIR}/libmusdk.a,--no-whole-archive") + +add_vpp_plugin(dev_armada + SOURCES + plugin.c + pp2/counters.c + pp2/init.c + pp2/format.c + pp2/port.c + pp2/queue.c + pp2/rx.c + pp2/tx.c + + LINK_FLAGS + ${MUSDK_LINK_FLAGS} +) +include_directories(${MUSDK_INCLUDE_DIR}) + diff --git a/src/plugins/dev_armada/README.rst b/src/plugins/dev_armada/README.rst new file mode 100644 index 00000000000..2c757d04a06 --- /dev/null +++ b/src/plugins/dev_armada/README.rst @@ -0,0 +1,61 @@ +Armada device plugin +===================== + +Overview +-------- + +This plugins provides native device support for Marvell PP2 network +device, found in Marvel Armada family of SOCs. +It uses Marvell Usermode SDK +(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__). + +Prerequisites +------------- + +Plugins depends on installed MUSDK and Marvell provided linux in Marvell SDK. +Following kernel modules from MUSDK must be loaded for plugin to work: +``musdk_cma.ko`` +``mv_pp_uio.ko`` + +Musdk 18.09.3 compilation steps +------------------------------- + +:: + + ./bootstrap + ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no + sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c + sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c + make + sudo make install + +Usage +----- + +Interface Creation and Deletion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Interfaces are using new vnet dev APIs, CLIs or startup.conf to create and +delete interfaces. + +Sample startup.conf: + +:: + + devices { + dev platform/f2000000.ethernet { + port 1 { name ppio1 } + } + +Device identifier in this example is 'platform/f2000000.ethernet' where +'platform' is bus name and 'f2000000.ethernet' is linux platform bus +identifier for specific PP2. + +Platform identifier can be found in sysfs: + +:: + + $ ls /sys/bus/platform/devices | grep ethernet + f2000000.ethernet + + diff --git a/src/plugins/dev_armada/musdk.h b/src/plugins/dev_armada/musdk.h new file mode 100644 index 00000000000..aad2f4a1cef --- /dev/null +++ b/src/plugins/dev_armada/musdk.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _MUSDK_H_ +#define _MUSDK_H_ + +#define MVCONF_DBG_LEVEL 0 +#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32 +#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64 +#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64 +#define MVCONF_SYS_DMA_UIO +#define MVCONF_TYPES_PUBLIC +#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC + +#include <mv_std.h> +#include <env/mv_sys_dma.h> +#include <drivers/mv_pp2.h> +#include <drivers/mv_pp2_bpool.h> +#include <drivers/mv_pp2_ppio.h> + +#endif /* _MUSDK_H_ */ diff --git a/src/plugins/dev_armada/plugin.c b/src/plugins/dev_armada/plugin.c new file mode 100644 index 00000000000..1dc465c9a25 --- /dev/null +++ b/src/plugins/dev_armada/plugin.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Marvell Armada Drivers", +}; diff --git a/src/plugins/dev_armada/pp2/counters.c b/src/plugins/dev_armada/pp2/counters.c new file mode 100644 index 00000000000..a041138bc79 --- /dev/null +++ b/src/plugins/dev_armada/pp2/counters.c @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <vppinfra/ring.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> + +VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = { + .class_name = "armada", + .subclass_name = "pp2-counters", +}; + +typedef enum +{ + MVPP2_PORT_CTR_RX_BYTES, + MVPP2_PORT_CTR_RX_PACKETS, + MVPP2_PORT_CTR_RX_UCAST, + MVPP2_PORT_CTR_RX_ERRORS, + MVPP2_PORT_CTR_RX_FULLQ_DROPPED, + MVPP2_PORT_CTR_RX_BM_DROPPED, + MVPP2_PORT_CTR_RX_EARLY_DROPPED, + MVPP2_PORT_CTR_RX_FIFO_DROPPED, + MVPP2_PORT_CTR_RX_CLS_DROPPED, + + MVPP2_PORT_CTR_TX_BYTES, + MVPP2_PORT_CTR_TX_PACKETS, + MVPP2_PORT_CTR_TX_UCAST, + MVPP2_PORT_CTR_TX_ERRORS, +} mvpp2_port_counter_id_t; + +typedef enum +{ + MVPP2_RXQ_CTR_ENQ_DESC, + MVPP2_RXQ_CTR_DROP_FULLQ, + MVPP2_RXQ_CTR_DROP_EARLY, + MVPP2_RXQ_CTR_DROP_BM, +} mvpp2_rxq_counter_id_t; + +typedef enum +{ + MVPP2_TXQ_CTR_ENQ_DESC, + MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR, + MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR, + MVPP2_TXQ_CTR_DEQ_DESC, +} mvpp2_txq_counter_id_t; + +static vnet_dev_counter_t mvpp2_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (MVPP2_PORT_CTR_RX_BYTES), + VNET_DEV_CTR_RX_PACKETS (MVPP2_PORT_CTR_RX_PACKETS), + VNET_DEV_CTR_RX_DROPS (MVPP2_PORT_CTR_RX_ERRORS), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FULLQ_DROPPED, RX, PACKETS, + "fullq dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_BM_DROPPED, RX, PACKETS, + "bm dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_EARLY_DROPPED, RX, PACKETS, + "early dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FIFO_DROPPED, RX, PACKETS, + "fifo dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_CLS_DROPPED, RX, PACKETS, + "cls dropped"), + + VNET_DEV_CTR_TX_BYTES (MVPP2_PORT_CTR_TX_BYTES), + VNET_DEV_CTR_TX_PACKETS (MVPP2_PORT_CTR_TX_PACKETS), + VNET_DEV_CTR_TX_DROPS (MVPP2_PORT_CTR_TX_ERRORS), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"), +}; + +static vnet_dev_counter_t mvpp2_rxq_counters[] = { + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_ENQ_DESC, RX, DESCRIPTORS, "enqueued"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_FULLQ, RX, PACKETS, "drop fullQ"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_EARLY, RX, PACKETS, "drop early"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_BM, RX, PACKETS, "drop BM"), +}; + +static vnet_dev_counter_t mvpp2_txq_counters[] = { + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DESC, TX, DESCRIPTORS, "enqueued"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_DEQ_DESC, TX, PACKETS, "dequeued"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR, TX, BUFFERS, + "enq to DDR"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR, TX, DESCRIPTORS, + "enq to DDR"), +}; + +void +mvpp2_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, mvpp2_port_counters, + ARRAY_LEN (mvpp2_port_counters)); + + foreach_vnet_dev_port_rx_queue (q, port) + vnet_dev_rx_queue_add_counters (vm, q, mvpp2_rxq_counters, + ARRAY_LEN (mvpp2_rxq_counters)); + + foreach_vnet_dev_port_tx_queue (q, port) + vnet_dev_tx_queue_add_counters (vm, q, mvpp2_txq_counters, + ARRAY_LEN (mvpp2_txq_counters)); +} + +void +mvpp2_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + struct pp2_ppio_statistics stats; + pp2_ppio_get_statistics (mp->ppio, &stats, 1); +} + +void +mvpp2_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *q) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (q->port); + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1); +} + +void +mvpp2_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *q) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (q->port); + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1); +} + +vnet_dev_rv_t +mvpp2_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + struct pp2_ppio_statistics stats; + pp2_ppio_get_statistics (mp->ppio, &stats, 0); + + foreach_vnet_dev_counter (c, port->counter_main) + { + switch (c->user_data) + { + case MVPP2_PORT_CTR_RX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_bytes); + break; + case MVPP2_PORT_CTR_RX_PACKETS: + vnet_dev_counter_value_update (vm, c, stats.rx_packets); + break; + case MVPP2_PORT_CTR_RX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_unicast_packets); + break; + case MVPP2_PORT_CTR_RX_ERRORS: + vnet_dev_counter_value_update (vm, c, stats.rx_errors); + break; + case MVPP2_PORT_CTR_TX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.tx_bytes); + break; + case MVPP2_PORT_CTR_TX_PACKETS: + vnet_dev_counter_value_update (vm, c, stats.tx_packets); + break; + case MVPP2_PORT_CTR_TX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_unicast_packets); + break; + case MVPP2_PORT_CTR_TX_ERRORS: + vnet_dev_counter_value_update (vm, c, stats.tx_errors); + break; + case MVPP2_PORT_CTR_RX_FULLQ_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_fullq_dropped); + break; + case MVPP2_PORT_CTR_RX_BM_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_bm_dropped); + break; + case MVPP2_PORT_CTR_RX_EARLY_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_early_dropped); + break; + case MVPP2_PORT_CTR_RX_FIFO_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_fifo_dropped); + break; + case MVPP2_PORT_CTR_RX_CLS_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_cls_dropped); + break; + + default: + ASSERT (0); + } + } + + foreach_vnet_dev_port_rx_queue (q, port) + { + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 0); + + foreach_vnet_dev_counter (c, q->counter_main) + { + switch (c->user_data) + { + case MVPP2_RXQ_CTR_ENQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.enq_desc); + break; + case MVPP2_RXQ_CTR_DROP_BM: + vnet_dev_counter_value_update (vm, c, stats.drop_bm); + break; + case MVPP2_RXQ_CTR_DROP_EARLY: + vnet_dev_counter_value_update (vm, c, stats.drop_early); + break; + case MVPP2_RXQ_CTR_DROP_FULLQ: + vnet_dev_counter_value_update (vm, c, stats.drop_fullq); + break; + default: + ASSERT (0); + } + } + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + struct pp2_ppio_outq_statistics stats; + pp2_ppio_outq_get_statistics (mp->ppio, q->queue_id, &stats, 0); + + foreach_vnet_dev_counter (c, q->counter_main) + { + switch (c->user_data) + { + case MVPP2_TXQ_CTR_ENQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.enq_desc); + break; + case MVPP2_TXQ_CTR_DEQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.deq_desc); + break; + case MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR: + vnet_dev_counter_value_update (vm, c, stats.enq_buf_to_ddr); + break; + case MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR: + vnet_dev_counter_value_update (vm, c, stats.enq_dec_to_ddr); + break; + default: + ASSERT (0); + } + } + } + + return VNET_DEV_OK; +} diff --git a/src/plugins/dev_armada/pp2/format.c b/src/plugins/dev_armada/pp2/format.c new file mode 100644 index 00000000000..42c4114c512 --- /dev/null +++ b/src/plugins/dev_armada/pp2/format.c @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> + +static inline u32 +mrvl_get_u32_bits (void *start, int offset, int first, int last) +{ + u32 value = *(u32 *) (((u8 *) start) + offset); + if ((last == 0) && (first == 31)) + return value; + value >>= last; + value &= (1 << (first - last + 1)) - 1; + return value; +} + +u8 * +format_pp2_ppio_link_info (u8 *s, va_list *args) +{ + struct pp2_ppio_link_info *li = va_arg (*args, struct pp2_ppio_link_info *); + + char *port_duplex[] = { + [MV_NET_LINK_DUPLEX_HALF] = "half", + [MV_NET_LINK_DUPLEX_FULL] = "full", + }; + + u32 port_speeds[] = { + [MV_NET_LINK_SPEED_10] = 10, [MV_NET_LINK_SPEED_100] = 100, + [MV_NET_LINK_SPEED_1000] = 1000, [MV_NET_LINK_SPEED_2500] = 2500, + [MV_NET_LINK_SPEED_10000] = 10000, + }; + + char *port_phy_modes[] = { + [MV_NET_PHY_MODE_NONE] = "NONE", + [MV_NET_PHY_MODE_MII] = "MII", + [MV_NET_PHY_MODE_GMII] = "GMII", + [MV_NET_PHY_MODE_SGMII] = "SGMII", + [MV_NET_PHY_MODE_TBI] = "TBI", + [MV_NET_PHY_MODE_REVMII] = "REVMII", + [MV_NET_PHY_MODE_RMII] = "RMII", + [MV_NET_PHY_MODE_RGMII] = "RGMII", + [MV_NET_PHY_MODE_RGMII_ID] = "RGMII_ID", + [MV_NET_PHY_MODE_RGMII_RXID] = "RGMII_RXID", + [MV_NET_PHY_MODE_RGMII_TXID] = "RGMII_TXID", + [MV_NET_PHY_MODE_RTBI] = "RTBI", + [MV_NET_PHY_MODE_SMII] = "SMII", + [MV_NET_PHY_MODE_XGMII] = "XGMII", + [MV_NET_PHY_MODE_MOCA] = "MOCA", + [MV_NET_PHY_MODE_QSGMII] = "QSGMII", + [MV_NET_PHY_MODE_XAUI] = "XAUI", + [MV_NET_PHY_MODE_RXAUI] = "RXAUI", + [MV_NET_PHY_MODE_KR] = "KR", + }; + + s = + format (s, "duplex %s speed %d up %d phy_mode %s", port_duplex[li->duplex], + port_speeds[li->speed], li->up, port_phy_modes[li->phy_mode]); + + return s; +} + +u8 * +format_mvpp2_port_status (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *); + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + struct pp2_ppio_link_info li = {}; + + if (mp->ppio == 0 || pp2_ppio_get_link_info (mp->ppio, &li)) + return format (s, "link info not available"); + + return format (s, "%U", format_pp2_ppio_link_info, &li); +} + +u8 * +format_mvpp2_dev_info (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + mvpp2_device_t *md = vnet_dev_get_data (dev); + + format (s, "pp_id is %u", md->pp_id); + return s; +} + +#define foreach_pp2_rx_desc_field \ + _ (0x00, 6, 0, l3_offset) \ + _ (0x00, 12, 8, ip_hdlen) \ + _ (0x00, 14, 13, ec) \ + _ (0x00, 15, 15, es) \ + _ (0x00, 19, 16, pool_id) \ + _ (0x00, 21, 21, hwf_sync) \ + _ (0x00, 22, 22, l4_chk_ok) \ + _ (0x00, 23, 23, ip_frg) \ + _ (0x00, 24, 24, ipv4_hdr_err) \ + _ (0x00, 27, 25, l4_info) \ + _ (0x00, 30, 28, l3_info) \ + _ (0x00, 31, 31, buf_header) \ + _ (0x04, 5, 0, lookup_id) \ + _ (0x04, 8, 6, cpu_code) \ + _ (0x04, 9, 9, pppoe) \ + _ (0x04, 11, 10, l3_cast_info) \ + _ (0x04, 13, 12, l2_cast_info) \ + _ (0x04, 15, 14, vlan_info) \ + _ (0x04, 31, 16, byte_count) \ + _ (0x08, 11, 0, gem_port_id) \ + _ (0x08, 13, 12, color) \ + _ (0x08, 14, 14, gop_sop_u) \ + _ (0x08, 15, 15, key_hash_enable) \ + _ (0x08, 31, 16, l4chk) \ + _ (0x0c, 31, 0, timestamp) \ + _ (0x10, 31, 0, buf_phys_ptr_lo) \ + _ (0x14, 7, 0, buf_phys_ptr_hi) \ + _ (0x14, 31, 8, key_hash) \ + _ (0x18, 31, 0, buf_virt_ptr_lo) \ + _ (0x1c, 7, 0, buf_virt_ptr_hi) \ + _ (0x1c, 14, 8, buf_qset_no) \ + _ (0x1c, 15, 15, buf_type) \ + _ (0x1c, 21, 16, mod_dscp) \ + _ (0x1c, 24, 22, mod_pri) \ + _ (0x1c, 25, 25, mdscp) \ + _ (0x1c, 26, 26, mpri) \ + _ (0x1c, 27, 27, mgpid) \ + _ (0x1c, 31, 29, port_num) + +u8 * +format_mvpp2_rx_desc (u8 *s, va_list *args) + +{ + struct pp2_ppio_desc *d = va_arg (*args, struct pp2_ppio_desc *); + u32 indent = format_get_indent (s); + u32 r32; + +#define _(a, b, c, n) \ + r32 = mrvl_get_u32_bits (d, a, b, c); \ + if (r32 > 9) \ + s = format (s, "%s %u (0x%x)", #n, r32, r32); \ + else \ + s = format (s, "%s %u", #n, r32); \ + if (format_get_indent (s) > 72) \ + s = format (s, "\n%U", format_white_space, indent + 2); \ + else \ + s = format (s, " "); + + foreach_pp2_rx_desc_field; +#undef _ + return s; +} + +u8 * +format_mv_dsa_tag (u8 *s, va_list *args) +{ + mv_dsa_tag_t *tag = va_arg (*args, mv_dsa_tag_t *); + u32 cnt = 0; + +#define _(b, n) \ + if (#n[0] != '_') \ + s = format (s, "%s" #n " %u", cnt++ ? " " : "", tag->n); + foreach_mv_dsa_tag_field +#undef _ + return s; +} + +u8 * +format_mvpp2_rx_trace (u8 *s, va_list *args) +{ + vlib_main_t *vm = va_arg (*args, vlib_main_t *); + vlib_node_t *node = va_arg (*args, vlib_node_t *); + mvpp2_rx_trace_t *t = va_arg (*args, mvpp2_rx_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + u32 indent = format_get_indent (s); + struct pp2_ppio_desc *d = &t->desc; + + if (t->sw_if_index != CLIB_U32_MAX) + s = format (s, "pp2: %U (%d) next-node %U", format_vnet_sw_if_index_name, + vnm, t->sw_if_index, t->sw_if_index, + format_vlib_next_node_name, vm, node->index, t->next_index); + else + s = format (s, "pp2: next-node %U", format_vlib_next_node_name, vm, + node->index, t->next_index); + + s = format (s, "\n%U%U", format_white_space, indent + 2, + format_mvpp2_rx_desc, d); + if (t->dsa_tag.as_u32) + s = format (s, "\n%Udsa tag: %U", format_white_space, indent + 2, + format_mv_dsa_tag, &t->dsa_tag); + + return s; +} diff --git a/src/plugins/dev_armada/pp2/init.c b/src/plugins/dev_armada/pp2/init.c new file mode 100644 index 00000000000..4333dbb352f --- /dev/null +++ b/src/plugins/dev_armada/pp2/init.c @@ -0,0 +1,421 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <vppinfra/ring.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +#include <linux/if.h> +#include <sys/ioctl.h> + +#define MV_SYS_DMA_MEM_SZ (2 << 20) + +VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = { + .class_name = "armada", + .subclass_name = "init", +}; + +static int num_pp2_in_use = 0; +static int dma_mem_initialized = 0; +static int global_pp2_initialized = 0; + +#define _(f, n, s, d) \ + { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s }, + +vlib_error_desc_t mvpp2_rx_node_counters[] = { foreach_mvpp2_rx_node_counter }; +vlib_error_desc_t mvpp2_tx_node_counters[] = { foreach_mvpp2_tx_node_counter }; +#undef _ + +vnet_dev_node_t mvpp2_rx_node = { + .error_counters = mvpp2_rx_node_counters, + .n_error_counters = ARRAY_LEN (mvpp2_rx_node_counters), + .format_trace = format_mvpp2_rx_trace, +}; + +vnet_dev_node_t mvpp2_tx_node = { + .error_counters = mvpp2_tx_node_counters, + .n_error_counters = ARRAY_LEN (mvpp2_tx_node_counters), +}; + +static u8 * +mvpp2_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info) +{ + vnet_dev_bus_platform_device_info_t *di = dev_info; + + if (clib_dt_node_is_compatible (di->node, "marvell,armada-7k-pp22")) + return format (0, "Marvell Armada Packet Processor v2.2"); + return 0; +} +static void +mvpp2_global_deinit (vlib_main_t *vm, vnet_dev_t *dev) +{ + mvpp2_device_t *md = vnet_dev_get_data (dev); + log_debug (dev, ""); + if (--num_pp2_in_use == 0) + { + if (global_pp2_initialized) + { + for (u32 i = 0; i < ARRAY_LEN (md->thread); i++) + if (md->thread[i].bpool) + { + pp2_bpool_deinit (md->thread[i].bpool); + md->thread[i].bpool = 0; + } + for (u32 i = 0; i < ARRAY_LEN (md->hif); i++) + if (md->hif[i]) + { + pp2_hif_deinit (md->hif[i]); + md->hif[i] = 0; + } + + pp2_deinit (); + global_pp2_initialized = 0; + } + if (dma_mem_initialized) + { + mv_sys_dma_mem_destroy (); + log_debug (0, "mv_sys_dma_mem_destroy()"); + dma_mem_initialized = 0; + } + } +} + +static void +mvpp2_deinit (vlib_main_t *vm, vnet_dev_t *dev) +{ + log_debug (dev, ""); + mvpp2_global_deinit (vm, dev); +} + +static vnet_dev_rv_t +mvpp2_global_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + mvpp2_device_t *md = vnet_dev_get_data (dev); + vnet_dev_rv_t rv = VNET_DEV_OK; + int mrv; + u16 free_hifs, free_bpools; + u16 n_threads = vlib_get_n_threads (); + + struct pp2_init_params init_params = { + .hif_reserved_map = 0xf, + .bm_pool_reserved_map = 0x7, + }; + + if (num_pp2_in_use++) + return rv; + + mrv = mv_sys_dma_mem_init (MV_SYS_DMA_MEM_SZ); + if (mrv < 0) + { + log_err (0, "mv_sys_dma_mem_init failed, err %d", mrv); + rv = VNET_DEV_ERR_INIT_FAILED; + goto done; + } + + dma_mem_initialized = 1; + log_debug (0, "mv_sys_dma_mem_init(%u) ok", MV_SYS_DMA_MEM_SZ); + + if ((mrv = pp2_init (&init_params))) + { + log_err (dev, "pp2_init failed, err %d", mrv); + rv = VNET_DEV_ERR_INIT_FAILED; + goto done; + } + + log_debug (dev, "pp2_init() ok"); + + free_hifs = pow2_mask (MVPP2_NUM_HIFS) ^ init_params.hif_reserved_map; + free_bpools = + pow2_mask (MVPP2_NUM_BPOOLS) ^ init_params.bm_pool_reserved_map; + + if (n_threads > count_set_bits (free_hifs)) + { + log_err (dev, "no enough HIFs (needed %u available %u)", n_threads, + count_set_bits (free_hifs)); + rv = VNET_DEV_ERR_INIT_FAILED; + goto done; + } + + for (u32 i = 0; i < n_threads; i++) + { + char match[16]; + u8 index; + struct pp2_hif_params hif_params = { + .match = match, + .out_size = 2048, + }; + struct pp2_bpool_params bpool_params = { + .match = match, + .buff_len = vlib_buffer_get_default_data_size (vm), + }; + + index = get_lowest_set_bit_index (free_hifs); + free_hifs ^= 1 << index; + snprintf (match, sizeof (match), "hif-%u", index); + + mrv = pp2_hif_init (&hif_params, md->hif + i); + if (mrv < 0) + { + log_err (dev, "pp2_hif_init failed for hif %u thread %u, err %d", + index, i, mrv); + rv = VNET_DEV_ERR_INIT_FAILED; + goto done; + } + log_debug (dev, "pp2_hif_init(hif %u, thread %u) ok", index, i); + + index = get_lowest_set_bit_index (free_bpools); + free_bpools ^= 1 << index; + snprintf (match, sizeof (match), "pool-%u:%u", md->pp_id, index); + + mrv = pp2_bpool_init (&bpool_params, &md->thread[i].bpool); + if (mrv < 0) + { + log_err (dev, "pp2_bpool_init failed for bpool %u thread %u, err %d", + index, i, mrv); + rv = VNET_DEV_ERR_INIT_FAILED; + goto done; + } + log_debug (dev, "pp2_bpool_init(bpool %u, thread %u) pool-%u:%u ok", + index, i, md->thread[i].bpool->pp2_id, + md->thread[i].bpool->id); + for (u32 j = 0; j < ARRAY_LEN (md->thread[0].bre); j++) + md->thread[i].bre[j].bpool = md->thread[i].bpool; + } + +done: + return rv; +} + +static vnet_dev_rv_t +mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + mvpp2_device_t *md = vnet_dev_get_data (dev); + vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_bus_platform_device_data_t *dd = vnet_dev_get_bus_data (dev); + clib_dt_node_t *sc; + clib_dt_node_t *sw = 0; + int pp_id = -1; + + if (!clib_dt_node_is_compatible (dd->node, "marvell,armada-7k-pp22")) + return VNET_DEV_ERR_NOT_SUPPORTED; + + sc = clib_dt_dereference_node (dd->node, "marvell,system-controller"); + + if (sc && vec_len (sc->path) > strlen ("/cpX/")) + { + if (strncmp ((char *) sc->path, "/cp0/", 4) == 0) + pp_id = 0; + else if (strncmp ((char *) sc->path, "/cp1/", 4) == 0) + pp_id = 1; + } + + if (pp_id < 0) + return VNET_DEV_ERR_UNKNOWN_DEVICE; + + foreach_clib_dt_tree_node (n, clib_dt_get_root_node (sc)) + if (clib_dt_node_is_compatible (n, "marvell,mv88e6190") || + clib_dt_node_is_compatible (n, "marvell,mv88e6393x")) + { + clib_dt_node_t *ports; + sw = n; + log_debug (dev, "found mv88e6190 compatible switch at %v", n->path); + ports = clib_dt_get_child_node (sw, "ports"); + foreach_clib_dt_child_node (pn, ports) + { + u32 reg = CLIB_U32_MAX; + char *label = "(no label)"; + clib_dt_property_t *p; + clib_dt_node_t *n; + + p = clib_dt_get_node_property_by_name (pn, "reg"); + if (p) + reg = clib_dt_property_get_u32 (p); + p = clib_dt_get_node_property_by_name (pn, "label"); + if (p) + label = clib_dt_property_get_string (p); + + log_debug (dev, "port %u label %s", reg, label); + + n = clib_dt_dereference_node (pn, "phy-handle"); + if (n) + log_debug (dev, " phy is %v", n->path); + + n = clib_dt_dereference_node (pn, "sfp"); + if (n) + log_debug (dev, " sfp is %v", n->path); + + n = clib_dt_dereference_node (pn, "ethernet"); + if (n) + log_debug (dev, " connected to %v", n->path); + + p = clib_dt_get_node_property_by_name (pn, "phy-mode"); + if (p) + log_debug (dev, " phy mode is %s", + clib_dt_property_get_string (p)); + } + } + + if ((mvpp2_global_init (vm, dev)) != VNET_DEV_OK) + return rv; + + md->pp_id = pp_id; + + foreach_clib_dt_child_node (cn, dd->node) + { + clib_dt_property_t *p; + char netdev_name[IFNAMSIZ]; + struct ifreq s = {}; + u8 ppio_id; + int fd, srv; + + p = clib_dt_get_node_property_by_name (cn, "port-id"); + + if (!clib_dt_property_is_u32 (p)) + continue; + + ppio_id = clib_dt_property_get_u32 (p); + log_debug (dev, "found port with ppio id %u", ppio_id); + + if (pp2_ppio_available (md->pp_id, ppio_id) == 0) + continue; + + if (pp2_netdev_get_ifname (md->pp_id, ppio_id, netdev_name) < 0) + { + log_warn (dev, "failed to get ifname, skipping port %u ", ppio_id); + continue; + } + + srv = -1; + if ((fd = socket (PF_INET, SOCK_DGRAM, IPPROTO_IP)) >= 0) + { + strcpy (s.ifr_name, netdev_name); + srv = ioctl (fd, SIOCGIFHWADDR, &s); + close (fd); + } + + if (srv < 0) + { + log_warn (dev, "unable to get hw address, skipping port %u", + ppio_id); + continue; + } + + log_debug (dev, "adding ppio %u (netdev name %s, hwaddr %U)", ppio_id, + netdev_name, format_ethernet_address, s.ifr_addr.sa_data); + + mvpp2_port_t mvpp2_port = { + .ppio_id = ppio_id, + }; + + if (sw) + { + clib_dt_node_t *ports = clib_dt_get_child_node (sw, "ports"); + if (ports) + foreach_clib_dt_child_node (sp, ports) + { + clib_dt_node_t *eth; + + eth = clib_dt_dereference_node (sp, "ethernet"); + + if (cn != eth) + continue; + + mvpp2_port.is_dsa = 1; + mvpp2_port.switch_node = sw; + mvpp2_port.switch_port_node = sp; + log_debug (dev, "port is connected to switch port %v", + sp->path); + break; + } + } + + vnet_dev_port_add_args_t port_add_args = { + .port = { + .attr = { + .type = VNET_DEV_PORT_TYPE_ETHERNET, + .max_rx_queues = PP2_PPIO_MAX_NUM_INQS, + .max_tx_queues = PP2_PPIO_MAX_NUM_OUTQS, + .max_supported_rx_frame_size = 9216, + .caps.secondary_interfaces = mvpp2_port.is_dsa != 0, + }, + .ops = { + .init = mvpp2_port_init, + .deinit = mvpp2_port_deinit, + .start = mvpp2_port_start, + .stop = mvpp2_port_stop, + .add_sec_if = mvpp2_port_add_sec_if, + .del_sec_if = mvpp2_port_del_sec_if, + .config_change = mvpp2_port_cfg_change, + .config_change_validate = mvpp2_port_cfg_change_validate, + .format_status = format_mvpp2_port_status, + .clear_counters = mvpp2_port_clear_counters, + }, + .data_size = sizeof (mvpp2_port_t), + .initial_data = &mvpp2_port, + .sec_if_args = VNET_DEV_ARGS ( + VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_SWITCH, "dsa_switch", "DSA source switch ID", .max= 31), + VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_PORT, "dsa_port", "DSA source switch port ID", .max = 31) + ), + }, + .rx_node = &mvpp2_rx_node, + .tx_node = &mvpp2_tx_node, + .rx_queue = { + .config = { + .data_size = sizeof (mvpp2_rxq_t), + .default_size = 512, + .multiplier = 32, + .min_size = 32, + .max_size = 4096, + .size_is_power_of_two = 1, + }, + .ops = { + .clear_counters = mvpp2_rxq_clear_counters, + }, + }, + .tx_queue = { + .config = { + .data_size = sizeof (mvpp2_txq_t), + .default_size = 512, + .multiplier = 32, + .min_size = 32, + .max_size = 4096, + .size_is_power_of_two = 1, + }, + .ops = { + .alloc = mvpp2_txq_alloc, + .free = mvpp2_txq_free, + .clear_counters = mvpp2_txq_clear_counters, + }, + }, + }; + + vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr, + (u8 *) s.ifr_addr.sa_data); + + vnet_dev_port_add (vm, dev, ppio_id, &port_add_args); + } + + if (rv != VNET_DEV_OK) + mvpp2_deinit (vm, dev); + return rv; +} + +VNET_DEV_REGISTER_DRIVER (pp2) = { + .name = "mvpp2", + .bus = PLATFORM_BUS_NAME, + .device_data_sz = sizeof (mvpp2_device_t), + .ops = { + .init = mvpp2_init, + .deinit = mvpp2_deinit, + .probe = mvpp2_probe, + .format_info = format_mvpp2_dev_info, + }, +}; diff --git a/src/plugins/dev_armada/pp2/port.c b/src/plugins/dev_armada/pp2/port.c new file mode 100644 index 00000000000..63a212e80c2 --- /dev/null +++ b/src/plugins/dev_armada/pp2/port.c @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <vppinfra/ring.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> + +VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = { + .class_name = "armada", + .subclass_name = "pp2-port", +}; + +vnet_dev_rv_t +mvpp2_port_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + mvpp2_device_t *md = vnet_dev_get_data (dev); + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_rx_queue_t *rxq0 = vnet_dev_get_port_rx_queue_by_id (port, 0); + struct pp2_ppio_link_info li; + char match[16]; + int mrv; + + log_debug (port->dev, ""); + + snprintf (match, sizeof (match), "ppio-%d:%d", md->pp_id, port->port_id); + + struct pp2_ppio_params ppio_params = { + .match = match, + .type = PP2_PPIO_T_NIC, + .eth_start_hdr = mp->is_dsa ? PP2_PPIO_HDR_ETH_DSA : PP2_PPIO_HDR_ETH, + .inqs_params = { + .num_tcs = 1, + .tcs_params[0] = { + .pkt_offset = 0, + .num_in_qs = 1, + .inqs_params = &(struct pp2_ppio_inq_params) { .size = rxq0->size }, + .pools[0][0] = md->thread[rxq0->rx_thread_index].bpool, + }, + }, + }; + + foreach_vnet_dev_port_tx_queue (q, port) + { + struct pp2_ppio_outqs_params *oqs = &ppio_params.outqs_params; + oqs->outqs_params[q->queue_id].weight = 1; + oqs->outqs_params[q->queue_id].size = q->size; + oqs->num_outqs++; + } + + mrv = pp2_ppio_init (&ppio_params, &mp->ppio); + if (mrv) + { + rv = VNET_DEV_ERR_INIT_FAILED; + log_err (dev, "port %u ppio '%s' init failed, rv %d", port->port_id, + match, mrv); + goto done; + } + log_debug (dev, "port %u ppio '%s' init ok", port->port_id, match); + + mrv = pp2_ppio_get_link_info (mp->ppio, &li); + if (mrv) + { + rv = VNET_DEV_ERR_INIT_FAILED; + log_err (dev, "failed to get link info for port %u, rv %d", + port->port_id, mrv); + goto done; + } + + log_debug (dev, "port %u %U", port->port_id, format_pp2_ppio_link_info, &li); + + for (u32 i = 0; i < VLIB_FRAME_SIZE; i++) + mp->desc_ptrs[i] = mp->descs + i; + + mvpp2_port_add_counters (vm, port); + +done: + if (rv != VNET_DEV_OK) + mvpp2_port_stop (vm, port); + return rv; +} + +void +mvpp2_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + + log_debug (port->dev, ""); + + if (mp->ppio) + { + pp2_ppio_deinit (mp->ppio); + mp->ppio = 0; + } +} + +void +mvpp2_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + vnet_dev_t *dev = port->dev; + vnet_dev_port_state_changes_t changes = {}; + struct pp2_ppio_link_info li; + int mrv; + + mrv = pp2_ppio_get_link_info (mp->ppio, &li); + + if (mrv) + { + log_debug (dev, "pp2_ppio_get_link_info: failed, rv %d", mrv); + return; + } + + if (mp->last_link_info.up != li.up) + { + changes.change.link_state = 1; + changes.link_state = li.up != 0; + log_debug (dev, "link state changed to %u", changes.link_state); + } + + if (mp->last_link_info.duplex != li.duplex) + { + changes.change.link_duplex = 1; + changes.full_duplex = li.duplex != 0; + log_debug (dev, "link full duplex changed to %u", changes.full_duplex); + } + + if (mp->last_link_info.speed != li.speed) + { + u32 speeds[] = { + [MV_NET_LINK_SPEED_AN] = 0, + [MV_NET_LINK_SPEED_10] = 10000, + [MV_NET_LINK_SPEED_100] = 100000, + [MV_NET_LINK_SPEED_1000] = 1000000, + [MV_NET_LINK_SPEED_2500] = 2500000, + [MV_NET_LINK_SPEED_10000] = 10000000, + }; + + if (li.speed < ARRAY_LEN (speeds)) + { + changes.change.link_speed = 1; + changes.link_speed = speeds[li.speed]; + log_debug (dev, "link speed changed to %u", changes.link_speed); + } + } + + if (changes.change.any) + { + mp->last_link_info = li; + vnet_dev_port_state_change (vm, port, changes); + } + + mvpp2_port_get_stats (vm, port); +} + +vnet_dev_rv_t +mvpp2_port_start (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + int mrv; + + log_debug (port->dev, ""); + + mrv = pp2_ppio_enable (mp->ppio); + if (mrv) + { + log_err (port->dev, "pp2_ppio_enable() failed, rv %d", mrv); + return VNET_DEV_ERR_NOT_READY; + } + + mp->is_enabled = 1; + + vnet_dev_poll_port_add (vm, port, 0.5, mvpp2_port_poll); + + return VNET_DEV_OK; +} + +void +mvpp2_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) +{ + int rv; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + + log_debug (port->dev, ""); + + if (mp->is_enabled) + { + vnet_dev_poll_port_remove (vm, port, mvpp2_port_poll); + + rv = pp2_ppio_disable (mp->ppio); + if (rv) + log_err (port->dev, "pp2_ppio_disable() failed, rv %d", rv); + + vnet_dev_port_state_change (vm, port, + (vnet_dev_port_state_changes_t){ + .change.link_state = 1, + .change.link_speed = 1, + .link_speed = 0, + .link_state = 0, + }); + mp->is_enabled = 0; + } +} + +vnet_dev_rv_t +mvpp2_port_add_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p) +{ + vnet_dev_port_interface_t *sif = p; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + u32 port_id = CLIB_U32_MAX, switch_id = 0, index; + + if (mp->is_dsa == 0) + return VNET_DEV_ERR_NOT_SUPPORTED; + + foreach_vnet_dev_args (a, sif) + { + switch (a->id) + { + case MVPP2_SEC_IF_ARG_DSA_PORT: + if (a->val_set) + port_id = vnet_dev_arg_get_uint32 (a); + break; + case MVPP2_SEC_IF_ARG_DSA_SWITCH: + switch_id = vnet_dev_arg_get_uint32 (a); + break; + default: + break; + } + } + + if (port_id == CLIB_U32_MAX) + { + log_err (port->dev, "missing dsa_port argument"); + return VNET_DEV_ERR_INVALID_ARG; + } + + log_debug (port->dev, "switch %u port %u", switch_id, port_id); + + mv_dsa_tag_t tag = { + .tag_type = MV_DSA_TAG_TYPE_FROM_CPU, + .src_port_or_lag = port_id, + .src_dev = switch_id, + }; + + index = switch_id << 5 | port_id; + + sif->user_data = tag.as_u32; + uword_bitmap_set_bits_at_index (mp->valid_dsa_src_bitmap, index, 1); + mp->dsa_to_sec_if[index] = sif->index; + return VNET_DEV_OK; +} + +vnet_dev_rv_t +mvpp2_port_del_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p) +{ + vnet_dev_port_interface_t *sif = p; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + mv_dsa_tag_t tag = { .as_u32 = sif->user_data }; + u32 index = tag.src_dev << 5 | tag.src_port_or_lag; + + log_debug (port->dev, "switch %u port %u", tag.src_dev, tag.src_port_or_lag); + + uword_bitmap_clear_bits_at_index (mp->valid_dsa_src_bitmap, index, 1); + return VNET_DEV_OK; +} + +vnet_dev_rv_t +mvpp2_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + switch (req->type) + { + case VNET_DEV_PORT_CFG_PROMISC_MODE: + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + break; + + default: + rv = VNET_DEV_ERR_NOT_SUPPORTED; + }; + + return rv; +} + +vnet_dev_rv_t +mvpp2_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + vnet_dev_rv_t rv = VNET_DEV_OK; + eth_addr_t addr; + int mrv; + + switch (req->type) + { + + case VNET_DEV_PORT_CFG_PROMISC_MODE: + mrv = pp2_ppio_set_promisc (mp->ppio, req->promisc); + if (mrv) + { + log_err (port->dev, "pp2_ppio_set_promisc: failed, rv %d", mrv); + rv = VNET_DEV_ERR_INTERNAL; + } + else + log_debug (port->dev, "pp2_ppio_set_promisc: promisc %u", + req->promisc); + break; + + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr)); + mrv = pp2_ppio_set_mac_addr (mp->ppio, addr); + if (mrv) + { + log_err (port->dev, "pp2_ppio_set_mac_addr: failed, rv %d", mrv); + rv = VNET_DEV_ERR_INTERNAL; + } + else + log_debug (port->dev, "pp2_ppio_set_mac_addr: %U added", + format_ethernet_address, &addr); + break; + + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr)); + mrv = pp2_ppio_add_mac_addr (mp->ppio, addr); + if (mrv) + { + log_err (port->dev, "pp2_ppio_add_mac_addr: failed, rv %d", mrv); + rv = VNET_DEV_ERR_INTERNAL; + } + else + log_debug (port->dev, "pp2_ppio_add_mac_addr: %U added", + format_ethernet_address, &addr); + break; + + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr)); + mrv = pp2_ppio_remove_mac_addr (mp->ppio, addr); + if (mrv) + { + log_err (port->dev, "pp2_ppio_remove_mac_addr: failed, rv %d", mrv); + rv = VNET_DEV_ERR_INTERNAL; + } + else + log_debug (port->dev, "pp2_ppio_remove_mac_addr: %U added", + format_ethernet_address, &addr); + break; + + default: + return VNET_DEV_ERR_NOT_SUPPORTED; + }; + + return rv; +} diff --git a/src/plugins/dev_armada/pp2/pp2.h b/src/plugins/dev_armada/pp2/pp2.h new file mode 100644 index 00000000000..160bfd20c5c --- /dev/null +++ b/src/plugins/dev_armada/pp2/pp2.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _PP2_H_ +#define _PP2_H_ + +#include <vppinfra/clib.h> +#include <vppinfra/error_bootstrap.h> +#include <vppinfra/format.h> +#include <vppinfra/devicetree.h> +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> + +#define MVCONF_DBG_LEVEL 0 +#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32 +#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64 +#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64 +#define MVCONF_SYS_DMA_UIO +#define MVCONF_TYPES_PUBLIC +#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC + +#include "mv_std.h" +#include "env/mv_sys_dma.h" +#include "drivers/mv_pp2.h" +#include <drivers/mv_pp2_bpool.h> +#include <drivers/mv_pp2_ppio.h> + +#define MVPP2_NUM_HIFS 9 +#define MVPP2_NUM_BPOOLS 16 +#define MVPP2_MAX_THREADS 4 +#define MRVL_PP2_BUFF_BATCH_SZ 32 +#define MV_DSA_N_SRC 32 + +#define foreach_mv_dsa_tag_field \ + _ (12, vid) \ + _ (1, _zero13) \ + _ (3, pri) \ + _ (1, cfi_dei) \ + _ (1, _unused17) \ + _ (1, src_is_lag) \ + _ (5, src_port_or_lag) \ + _ (5, src_dev) \ + _ (1, src_tagged) \ + _ (2, tag_type) + +typedef enum +{ + MV_DSA_TAG_TYPE_TO_CPU = 0, + MV_DSA_TAG_TYPE_FROM_CPU = 1, + MV_DSA_TAG_TYPE_TO_SNIFFER = 2, + MV_DSA_TAG_TYPE_FORWARD = 3 +} mv_dsa_tag_type_t; + +typedef enum +{ + MVPP2_SEC_IF_ARG_DSA_SWITCH, + MVPP2_SEC_IF_ARG_DSA_PORT +} mvpp2_sec_if_args_t; + +typedef union +{ + struct + { +#define _(b, n) u32 (n) : (b); + foreach_mv_dsa_tag_field +#undef _ + }; + u32 as_u32; +} mv_dsa_tag_t; + +STATIC_ASSERT_SIZEOF (mv_dsa_tag_t, 4); + +static_always_inline mv_dsa_tag_t +mv_dsa_tag_read (void *p) +{ + return (mv_dsa_tag_t){ .as_u32 = clib_net_to_host_u32 (*(u32u *) p) }; +} + +static_always_inline void +mv_dsa_tag_write (void *p, mv_dsa_tag_t tag) +{ + ((mv_dsa_tag_t *) p)->as_u32 = clib_host_to_net_u32 (tag.as_u32); +} + +typedef struct +{ + u8 pp_id; + struct pp2_hif *hif[MVPP2_NUM_HIFS]; + struct + { + struct pp2_bpool *bpool; + struct buff_release_entry bre[MRVL_PP2_BUFF_BATCH_SZ]; + } thread[MVPP2_NUM_BPOOLS]; + +} mvpp2_device_t; + +typedef struct +{ + u8 is_enabled : 1; + u8 is_dsa : 1; + struct pp2_ppio *ppio; + u8 ppio_id; + struct pp2_ppio_link_info last_link_info; + clib_dt_node_t *switch_node; + clib_dt_node_t *switch_port_node; + + struct pp2_ppio_desc descs[VLIB_FRAME_SIZE]; + struct pp2_ppio_desc *desc_ptrs[VLIB_FRAME_SIZE]; + uword valid_dsa_src_bitmap[1024 / uword_bits]; + u16 dsa_to_sec_if[1024]; +} mvpp2_port_t; + +typedef struct +{ + u16 next; + u16 n_enq; + u32 *buffers; +} mvpp2_txq_t; + +typedef struct +{ +} mvpp2_rxq_t; + +typedef struct +{ + struct pp2_ppio_desc desc; + u32 sw_if_index; + u16 next_index; + mv_dsa_tag_t dsa_tag; +} mvpp2_rx_trace_t; + +/* counters.c */ +void mvpp2_port_add_counters (vlib_main_t *, vnet_dev_port_t *); +void mvpp2_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +void mvpp2_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +void mvpp2_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t mvpp2_port_get_stats (vlib_main_t *, vnet_dev_port_t *); + +/* format.c */ +format_function_t format_pp2_ppio_link_info; +format_function_t format_mvpp2_port_status; +format_function_t format_mvpp2_dev_info; +format_function_t format_mvpp2_rx_trace; +format_function_t format_mvpp2_rx_desc; +format_function_t format_mv_dsa_tag; + +/* port.c */ +vnet_dev_port_op_t mvpp2_port_init; +vnet_dev_port_op_no_rv_t mvpp2_port_deinit; +vnet_dev_port_op_t mvpp2_port_start; +vnet_dev_port_op_no_rv_t mvpp2_port_stop; +vnet_dev_port_op_with_ptr_t mvpp2_port_add_sec_if; +vnet_dev_port_op_with_ptr_t mvpp2_port_del_sec_if; +vnet_dev_rv_t mvpp2_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t +mvpp2_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); + +/* queue.c */ +vnet_dev_tx_queue_op_t mvpp2_txq_alloc; +vnet_dev_tx_queue_op_no_rv_t mvpp2_txq_free; + +/* inline funcs */ + +#define log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, mvpp2_log.class, "%U" f, \ + format_vnet_dev_log, (dev), \ + clib_string_skip_prefix (__func__, "mvpp2_"), ##__VA_ARGS__) +#define log_info(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, mvpp2_log.class, "%U" f, \ + format_vnet_dev_log, (dev), 0, ##__VA_ARGS__) +#define log_notice(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_NOTICE, mvpp2_log.class, "%U" f, \ + format_vnet_dev_log, (dev), 0, ##__VA_ARGS__) +#define log_warn(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_WARNING, mvpp2_log.class, "%U" f, \ + format_vnet_dev_log, (dev), 0, ##__VA_ARGS__) +#define log_err(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, mvpp2_log.class, "%U" f, format_vnet_dev_log, \ + (dev), 0, ##__VA_ARGS__) + +#define foreach_mvpp2_tx_node_counter \ + _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \ + _ (PPIO_SEND, ppio_semd, ERROR, "pp2_ppio_send errors") \ + _ (PPIO_GET_NUM_OUTQ_DONE, ppio_get_num_outq_done, ERROR, \ + "pp2_ppio_get_num_outq_done errors") + +typedef enum +{ +#define _(f, n, s, d) MVPP2_TX_NODE_CTR_##f, + foreach_mvpp2_tx_node_counter +#undef _ +} mvpp2_tx_node_counter_t; + +#define foreach_mvpp2_rx_node_counter \ + _ (PPIO_RECV, ppio_recv, ERROR, "pp2_ppio_recv error") \ + _ (BPOOL_GET_NUM_BUFFS, bpool_get_num_bufs, ERROR, \ + "pp2_bpool_get_num_buffs error") \ + _ (BPOOL_PUT_BUFFS, bpool_put_buffs, ERROR, "pp2_bpool_put_buffs error") \ + _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error") \ + _ (UNKNOWN_DSA_SRC, unknown_dsa_src, ERROR, "unknown DSA source") \ + _ (MAC_CE, mac_ce, ERROR, "MAC error (CRC error)") \ + _ (MAC_OR, mac_or, ERROR, "overrun error") \ + _ (MAC_RSVD, mac_rsvd, ERROR, "unknown MAC error") \ + _ (MAC_RE, mac_re, ERROR, "resource error") \ + _ (IP_HDR, ip_hdr, ERROR, "ip4 header error") + +typedef enum +{ +#define _(f, n, s, d) MVPP2_RX_NODE_CTR_##f, + foreach_mvpp2_rx_node_counter +#undef _ +} mvpp2_rx_node_counter_t; + +#endif /* _PP2_H_ */ diff --git a/src/plugins/dev_armada/pp2/queue.c b/src/plugins/dev_armada/pp2/queue.c new file mode 100644 index 00000000000..05015414816 --- /dev/null +++ b/src/plugins/dev_armada/pp2/queue.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <vppinfra/ring.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> + +VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = { + .class_name = "armada", + .subclass_name = "pp2-queue", +}; + +vnet_dev_rv_t +mvpp2_txq_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq); + log_debug (txq->port->dev, ""); + + ASSERT (mtq->buffers == 0); + if (mtq->buffers == 0) + { + u32 sz = sizeof (u32) * txq->size; + mtq->buffers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES); + clib_memset (mtq->buffers, 0, sz); + } + + return rv; +} + +void +mvpp2_txq_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq); + + log_debug (txq->port->dev, ""); + if (mtq->buffers) + { + clib_mem_free (mtq->buffers); + mtq->buffers = 0; + } +} diff --git a/src/plugins/dev_armada/pp2/rx.c b/src/plugins/dev_armada/pp2/rx.c new file mode 100644 index 00000000000..5b0e8d35000 --- /dev/null +++ b/src/plugins/dev_armada/pp2/rx.c @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/dev/dev.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/vector/mask_compare.h> +#include <vppinfra/vector/compress.h> + +#include <dev_armada/pp2/pp2.h> + +static_always_inline vlib_buffer_t * +desc_to_vlib_buffer (vlib_main_t *vm, struct pp2_ppio_desc *d) +{ + return vlib_get_buffer (vm, pp2_ppio_inq_desc_get_cookie (d)); +} + +static_always_inline u64 +mrvl_pp2_rx_one_if (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq, + vnet_dev_rx_queue_if_rt_data_t *if_rt_data, + struct pp2_ppio_desc **desc_ptrs, u32 n_desc, + i32 current_data, i32 len_adj, mv_dsa_tag_t tag) +{ + vnet_main_t *vnm = vnet_get_main (); + u64 n_rx_bytes = 0; + vlib_buffer_t *b0, *b1; + u32 n_trace, n_left = n_desc; + u32 buffer_indices[VLIB_FRAME_SIZE], *bi = buffer_indices; + struct pp2_ppio_desc **dp = desc_ptrs; + u32 next_index = if_rt_data->next_index; + vlib_buffer_template_t bt = if_rt_data->buffer_template; + u32 sw_if_index = if_rt_data->sw_if_index; + + bt.current_data = current_data; + + for (; n_left >= 4; dp += 2, bi += 2, n_left -= 2) + { + clib_prefetch_store (desc_to_vlib_buffer (vm, dp[2])); + clib_prefetch_store (desc_to_vlib_buffer (vm, dp[3])); + b0 = desc_to_vlib_buffer (vm, dp[0]); + b1 = desc_to_vlib_buffer (vm, dp[1]); + bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]); + bi[1] = pp2_ppio_inq_desc_get_cookie (dp[1]); + b0->template = bt; + b1->template = bt; + + n_rx_bytes += b0->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj; + n_rx_bytes += b1->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[1]) + len_adj; + } + + for (; n_left; dp++, bi++, n_left--) + { + b0 = desc_to_vlib_buffer (vm, dp[0]); + bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]); + b0->template = bt; + + n_rx_bytes += b0->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj; + } + + /* trace */ + n_trace = vlib_get_trace_count (vm, node); + if (PREDICT_FALSE (n_trace > 0)) + { + for (u32 i = 0; i < n_desc && n_trace > 0; i++) + { + vlib_buffer_t *b = desc_to_vlib_buffer (vm, desc_ptrs[i]); + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b, + /* follow_chain */ 0))) + { + mvpp2_rx_trace_t *tr; + tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->desc = *desc_ptrs[i]; + tr->next_index = next_index; + tr->sw_if_index = sw_if_index; + tr->dsa_tag = tag; + n_trace--; + } + } + vlib_set_trace_count (vm, node, n_trace); + } + vlib_buffer_enqueue_to_single_next (vm, node, buffer_indices, next_index, + n_desc); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + vm->thread_index, sw_if_index, n_desc, n_rx_bytes); + + return n_rx_bytes; +} + +static_always_inline uword +mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port = rxq->port; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + mv_dsa_tag_t dsa_tags[VLIB_FRAME_SIZE]; + u16 n_desc = VLIB_FRAME_SIZE; + vlib_buffer_t *b; + u32 i; + + if (PREDICT_FALSE ( + pp2_ppio_recv (mp->ppio, 0, rxq->queue_id, mp->descs, &n_desc))) + { + vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_PPIO_RECV, 1); + return 0; + } + + if (mp->is_dsa) + { + for (i = 0; i < n_desc; i++) + { + b = desc_to_vlib_buffer (vm, mp->descs + i); + u8 *start = b->data; + mv_dsa_tag_t tag = mv_dsa_tag_read (start + 14); + dsa_tags[i] = tag; + clib_memmove (start + 6, start + 2, 12); + } + + vlib_frame_bitmap_t avail_bmp = {}; + vlib_frame_bitmap_init (avail_bmp, n_desc); + u32 n_avail = n_desc; + + while (n_avail) + { + vlib_frame_bitmap_t selected_bmp = {}; + struct pp2_ppio_desc *sel_descs[VLIB_FRAME_SIZE]; + mv_dsa_tag_t tag; + u32 n_sel, index; + + tag = dsa_tags[vlib_frame_bitmap_find_first_set (avail_bmp)]; + index = tag.src_dev << 5 | tag.src_port_or_lag; + + clib_mask_compare_u32 (tag.as_u32, (u32 *) dsa_tags, selected_bmp, + n_desc); + n_sel = vlib_frame_bitmap_count_set_bits (selected_bmp); + n_avail -= n_sel; + + if (uword_bitmap_is_bit_set (mp->valid_dsa_src_bitmap, index)) + { + clib_compress_u64 ((uword *) sel_descs, (uword *) mp->desc_ptrs, + selected_bmp, n_desc); + mrvl_pp2_rx_one_if (vm, node, rxq, + vnet_dev_get_rx_queue_sec_if_rt_data ( + rxq, mp->dsa_to_sec_if[index]), + sel_descs, n_sel, 6, -4, tag); + } + else + { + u32 n_free = 0, buffer_indices[VLIB_FRAME_SIZE]; + + foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp) + buffer_indices[n_free++] = + pp2_ppio_inq_desc_get_cookie (mp->descs + i); + + u32 n_trace = vlib_get_trace_count (vm, node); + if (PREDICT_FALSE (n_trace > 0)) + { + foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp) + { + vlib_buffer_t *b = + desc_to_vlib_buffer (vm, mp->descs + i); + + if (PREDICT_TRUE (vlib_trace_buffer ( + vm, node, VNET_DEV_ETH_RX_PORT_NEXT_DROP, b, + /* follow_chain */ 0))) + { + mvpp2_rx_trace_t *tr; + tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->desc = mp->descs[i]; + tr->next_index = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + tr->sw_if_index = CLIB_U32_MAX; + tr->dsa_tag = dsa_tags[i]; + n_trace--; + } + if (n_trace == 0) + break; + } + vlib_set_trace_count (vm, node, n_trace); + } + + vlib_buffer_free (vm, buffer_indices, n_free); + vlib_error_count (vm, node->node_index, + MVPP2_RX_NODE_CTR_UNKNOWN_DSA_SRC, 1); + } + } + } + else + { + mrvl_pp2_rx_one_if (vm, node, rxq, + vnet_dev_get_rx_queue_if_rt_data (rxq), + mp->desc_ptrs, n_desc, 2, 0, (mv_dsa_tag_t){}); + } + + return n_desc; +} + +static_always_inline void +mrvl_pp2_rx_refill (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + mvpp2_device_t *md = vnet_dev_get_data (dev); + u32 thread_index = vm->thread_index; + struct pp2_hif *hif = md->hif[thread_index]; + struct pp2_bpool *bpool = md->thread[thread_index].bpool; + struct buff_release_entry *bre = md->thread[thread_index].bre; + u32 n_bufs, *bi; + + if (PREDICT_FALSE (pp2_bpool_get_num_buffs (bpool, &n_bufs))) + { + vlib_error_count (vm, node->node_index, + MVPP2_RX_NODE_CTR_BPOOL_GET_NUM_BUFFS, 1); + return; + } + + n_bufs = rxq->size - n_bufs; + while (n_bufs >= MRVL_PP2_BUFF_BATCH_SZ) + { + u16 n_alloc, i; + struct buff_release_entry *e = bre; + u32 buffer_indices[MRVL_PP2_BUFF_BATCH_SZ]; + + n_alloc = vlib_buffer_alloc (vm, buffer_indices, MRVL_PP2_BUFF_BATCH_SZ); + + if (PREDICT_FALSE (n_alloc == 0)) + { + vlib_error_count (vm, node->node_index, + MVPP2_RX_NODE_CTR_BUFFER_ALLOC, 1); + return; + } + + for (i = n_alloc, bi = buffer_indices; i--; e++, bi++) + { + + vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]); + e->buff.addr = vlib_buffer_get_pa (vm, b) - 64; + e->buff.cookie = bi[0]; + } + + if (PREDICT_FALSE (pp2_bpool_put_buffs (hif, bre, &n_alloc))) + { + vlib_error_count (vm, node->node_index, + MVPP2_RX_NODE_CTR_BPOOL_PUT_BUFFS, 1); + vlib_buffer_free (vm, buffer_indices, n_alloc); + return; + } + + n_bufs -= n_alloc; + } +} + +VNET_DEV_NODE_FN (mvpp2_rx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_rx = 0; + foreach_vnet_dev_rx_queue_runtime (rxq, node) + { + n_rx += mrvl_pp2_rx_inline (vm, node, frame, rxq); + mrvl_pp2_rx_refill (vm, node, rxq); + } + return n_rx; +} diff --git a/src/plugins/dev_armada/pp2/tx.c b/src/plugins/dev_armada/pp2/tx.c new file mode 100644 index 00000000000..583eec71d60 --- /dev/null +++ b/src/plugins/dev_armada/pp2/tx.c @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/dev/dev.h> +#include <vnet/ethernet/ethernet.h> + +#include <dev_armada/pp2/pp2.h> + +VNET_DEV_NODE_FN (mvpp2_tx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); + vnet_dev_instance_t *ins = vnet_dev_get_dev_instance (rt->dev_instance); + vnet_dev_tx_queue_t *txq = rt->tx_queue; + vnet_dev_port_t *port = txq->port; + vnet_dev_t *dev = port->dev; + mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq); + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + mvpp2_device_t *md = vnet_dev_get_data (dev); + u8 qid = txq->queue_id; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_vectors = frame->n_vectors, n_left; + u16 n_sent; + struct pp2_ppio *ppio = mp->ppio; + struct pp2_hif *hif = md->hif[vm->thread_index]; + struct pp2_ppio_desc descs[VLIB_FRAME_SIZE], *d = descs; + u16 sz = txq->size; + u16 mask = sz - 1; + i16 len_adj = 0; + + if (ins->is_primary_if == 0) + { + vnet_dev_port_interface_t *sif = + vnet_dev_port_get_sec_if_by_index (port, ins->sec_if_index); + + mv_dsa_tag_t tag = { .as_u32 = sif->user_data }; + + for (u32 i = 0; i < n_vectors; i++) + { + vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]); + u8 *start = vlib_buffer_get_current (b); + clib_memmove (start - 4, start, 12); + mv_dsa_tag_write (start + 8, tag); + } + len_adj = 4; + } + + if (mtq->n_enq) + { + u16 n_done = 0; + if (PREDICT_FALSE (pp2_ppio_get_num_outq_done (ppio, hif, qid, &n_done))) + vlib_error_count (vm, node->node_index, + MVPP2_TX_NODE_CTR_PPIO_GET_NUM_OUTQ_DONE, 1); + + if (n_done) + { + vlib_buffer_free_from_ring ( + vm, mtq->buffers, (mtq->next - mtq->n_enq) & mask, sz, n_done); + mtq->n_enq -= n_done; + } + } + + n_sent = clib_min (n_vectors, sz - mtq->n_enq); + + for (d = descs, n_left = n_sent; n_left; d++, buffers++, n_left--) + { + vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]); + u64 paddr = vlib_buffer_get_pa (vm, b0); + + pp2_ppio_outq_desc_reset (d); + pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data - len_adj); + pp2_ppio_outq_desc_set_pkt_offset (d, 0); + pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length + len_adj); + } + + buffers = vlib_frame_vector_args (frame); + + if (pp2_ppio_send (ppio, hif, qid, descs, &n_sent)) + { + n_sent = 0; + vlib_error_count (vm, node->node_index, MVPP2_TX_NODE_CTR_PPIO_SEND, 1); + } + else if (n_sent) + { + vlib_buffer_copy_indices_to_ring (mtq->buffers, buffers, + mtq->next & mask, sz, n_sent); + mtq->next += n_sent; + mtq->n_enq += n_sent; + } + + /* free unsent buffers */ + if (PREDICT_FALSE (n_sent != n_vectors)) + { + vlib_buffer_free (vm, buffers + n_sent, n_vectors - n_sent); + vlib_error_count (vm, node->node_index, MVPP2_TX_NODE_CTR_NO_FREE_SLOTS, + n_vectors - n_sent); + } + + return n_sent; +} diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c index ead090839c7..ed5c47ed505 100644 --- a/src/plugins/dev_ena/ena.c +++ b/src/plugins/dev_ena/ena.c @@ -4,7 +4,7 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> +#include <vnet/dev/bus/pci.h> #include <dev_ena/ena.h> #include <dev_ena/ena_inlines.h> #include <vnet/ethernet/ethernet.h> diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c index 2b26fefc5e3..95d8ff3a08c 100644 --- a/src/plugins/dev_ena/port.c +++ b/src/plugins/dev_ena/port.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <dev_ena/ena.h> #include <dev_ena/ena_inlines.h> #include <vnet/ethernet/ethernet.h> diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c index 41fc5b8c943..51c6dbce84c 100644 --- a/src/plugins/dev_ena/rx_node.c +++ b/src/plugins/dev_ena/rx_node.c @@ -251,7 +251,6 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_dev_rx_queue_t *rxq) { ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq); - vnet_dev_port_t *port = rxq->port; vnet_main_t *vnm = vnet_get_main (); vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b; ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8]; @@ -260,13 +259,13 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 *csi; uword n_rx_packets = 0, n_rx_bytes = 0; vlib_frame_bitmap_t head_bmp = {}; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u32 n_trace, n_deq, n_left; u32 cq_next = q->cq_next; - u32 next_index = rxq->next_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); vlib_frame_t *next_frame; - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 *bi; int maybe_chained; diff --git a/src/plugins/dev_iavf/adminq.c b/src/plugins/dev_iavf/adminq.c index c12dc8aa2f6..2072c697033 100644 --- a/src/plugins/dev_iavf/adminq.c +++ b/src/plugins/dev_iavf/adminq.c @@ -5,7 +5,7 @@ #include <ctype.h> #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> +#include <vnet/dev/bus/pci.h> #include <vnet/dev/counters.h> #include <dev_iavf/iavf.h> #include <dev_iavf/iavf_regs.h> diff --git a/src/plugins/dev_iavf/counters.c b/src/plugins/dev_iavf/counters.c index 6dcd01141f0..3ab463edb9a 100644 --- a/src/plugins/dev_iavf/counters.c +++ b/src/plugins/dev_iavf/counters.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <dev_iavf/iavf.h> #include <dev_iavf/virtchnl.h> diff --git a/src/plugins/dev_iavf/format.c b/src/plugins/dev_iavf/format.c index 9a3dde47ee9..b4a29e4e20a 100644 --- a/src/plugins/dev_iavf/format.c +++ b/src/plugins/dev_iavf/format.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <dev_iavf/iavf.h> #include <dev_iavf/virtchnl.h> diff --git a/src/plugins/dev_iavf/iavf.c b/src/plugins/dev_iavf/iavf.c index d1c2b9edc63..f13440f4161 100644 --- a/src/plugins/dev_iavf/iavf.c +++ b/src/plugins/dev_iavf/iavf.c @@ -4,7 +4,7 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> +#include <vnet/dev/bus/pci.h> #include <vnet/dev/counters.h> #include <vppinfra/ring.h> #include <dev_iavf/iavf.h> diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c index 982436d9b45..a0530822688 100644 --- a/src/plugins/dev_iavf/port.c +++ b/src/plugins/dev_iavf/port.c @@ -4,7 +4,7 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> +#include <vnet/dev/bus/pci.h> #include <vnet/dev/counters.h> #include <dev_iavf/iavf.h> #include <dev_iavf/iavf_regs.h> @@ -42,29 +42,35 @@ iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_t *dev = port->dev; iavf_port_t *ap = vnet_dev_get_port_data (port); virtchnl_vlan_caps_t vc; - vnet_dev_rv_t rv; + vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED; u32 outer, inner; const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100; - if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0) - return iavf_vc_op_disable_vlan_stripping (vm, dev); + if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) + { + if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc))) + return rv; - if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc))) - return rv; + outer = vc.offloads.stripping_support.outer; + inner = vc.offloads.stripping_support.inner; - outer = vc.offloads.stripping_support.outer; - inner = vc.offloads.stripping_support.inner; + outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0; + inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0; - outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0; - inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0; + virtchnl_vlan_setting_t vs = { + .vport_id = ap->vsi_id, + .outer_ethertype_setting = outer, + .inner_ethertype_setting = inner, + }; - virtchnl_vlan_setting_t vs = { - .vport_id = ap->vsi_id, - .outer_ethertype_setting = outer, - .inner_ethertype_setting = inner, - }; + if ((rv = iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs))) + return rv; + } - return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs); + if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + return iavf_vc_op_disable_vlan_stripping (vm, dev); + + return rv; } vnet_dev_rv_t @@ -85,7 +91,7 @@ iavf_port_init_rss (vlib_main_t *vm, vnet_dev_port_t *port) .key_len = keylen, }; - clib_memcpy (key->key, default_rss_key, sizeof (default_rss_key)); + clib_memcpy (key->key, default_rss_key, keylen); return iavf_vc_op_config_rss_key (vm, dev, key); } @@ -257,7 +263,7 @@ avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line) iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32); vlib_node_set_interrupt_pending (vlib_get_main_by_index (line), - port->intf.rx_node_index); + vnet_dev_get_port_rx_node_index (port)); } vnet_dev_rv_t @@ -275,7 +281,12 @@ iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port) u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1); if ((rv = iavf_port_vlan_strip_disable (vm, port))) - return rv; + { + if (rv == VNET_DEV_ERR_NOT_SUPPORTED) + log_warn (port->dev, "device doesn't support vlan stripping"); + else + return rv; + } if ((rv = iavf_port_init_rss (vm, port))) return rv; @@ -414,17 +425,20 @@ iavf_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, int is_primary) { iavf_port_t *ap = vnet_dev_get_port_data (port); - virtchnl_ether_addr_list_t al = { + u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, 1)]; + virtchnl_ether_addr_list_t *al = (virtchnl_ether_addr_list_t *) buffer; + + *al = (virtchnl_ether_addr_list_t){ .vsi_id = ap->vsi_id, .num_elements = 1, .list[0].primary = is_primary ? 1 : 0, .list[0].extra = is_primary ? 0 : 1, }; - clib_memcpy (al.list[0].addr, addr, sizeof (al.list[0].addr)); + clib_memcpy (al->list[0].addr, addr, sizeof (al->list[0].addr)); - return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, &al) : - iavf_vc_op_del_eth_addr (vm, port->dev, &al); + return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, al) : + iavf_vc_op_del_eth_addr (vm, port->dev, al); } static vnet_dev_rv_t diff --git a/src/plugins/dev_iavf/queue.c b/src/plugins/dev_iavf/queue.c index 113c0dbdfc7..51bf69a458a 100644 --- a/src/plugins/dev_iavf/queue.c +++ b/src/plugins/dev_iavf/queue.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <vppinfra/ring.h> #include <dev_iavf/iavf.h> diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c index ee6d7e8def0..bf650f9bfb9 100644 --- a/src/plugins/dev_iavf/rx_node.c +++ b/src/plugins/dev_iavf/rx_node.c @@ -249,14 +249,14 @@ iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 thr_idx = vlib_get_thread_index (); iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm); iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq); - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0; u16 n_tail_desc = 0; u64 or_qw1 = 0; u32 *bi, *to_next, n_left_to_next; - u32 next_index = rxq->next_index; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u16 next = arq->next; u16 size = rxq->size; u16 mask = size - 1; diff --git a/src/plugins/dev_iavf/virtchnl.c b/src/plugins/dev_iavf/virtchnl.c index eca48106ce3..7e7715262c2 100644 --- a/src/plugins/dev_iavf/virtchnl.c +++ b/src/plugins/dev_iavf/virtchnl.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <dev_iavf/iavf.h> #include <dev_iavf/virtchnl.h> diff --git a/src/plugins/dev_iavf/virtchnl_funcs.h b/src/plugins/dev_iavf/virtchnl_funcs.h index e7f3901e0ee..0d4ab2835f4 100644 --- a/src/plugins/dev_iavf/virtchnl_funcs.h +++ b/src/plugins/dev_iavf/virtchnl_funcs.h @@ -9,6 +9,10 @@ #include <vnet/dev/dev.h> #include <dev_iavf/iavf.h> +/* The "+ 1" fakes a trailing element, but the driver requires that. + * Using this "wrong" macro is the easiest solution, as long as + * port.c uses buffer sized by the same macro as the functions here. + */ #define VIRTCHNL_MSG_SZ(s, e, n) STRUCT_OFFSET_OF (s, e[(n) + 1]) typedef struct diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt index e8abf1a3389..6109de57a7d 100644 --- a/src/plugins/dev_octeon/CMakeLists.txt +++ b/src/plugins/dev_octeon/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2022 Cisco Systems, Inc. -if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10") +if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon9") return() endif() @@ -21,6 +21,10 @@ endif() include_directories (${OCTEON_ROC_DIR}/) +if (VPP_PLATFORM_NAME STREQUAL "octeon9") + add_compile_definitions(PLATFORM_OCTEON9) +endif() + add_vpp_plugin(dev_octeon SOURCES init.c @@ -31,6 +35,8 @@ add_vpp_plugin(dev_octeon rx_node.c tx_node.c flow.c + counter.c + crypto.c MULTIARCH_SOURCES rx_node.c diff --git a/src/plugins/dev_octeon/counter.c b/src/plugins/dev_octeon/counter.c new file mode 100644 index 00000000000..6f57c1ee649 --- /dev/null +++ b/src/plugins/dev_octeon/counter.c @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <dev_octeon/octeon.h> +#include <dev_octeon/common.h> + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "oct", + .subclass_name = "counters", +}; + +typedef enum +{ + OCT_PORT_CTR_RX_BYTES, + OCT_PORT_CTR_TX_BYTES, + OCT_PORT_CTR_RX_PACKETS, + OCT_PORT_CTR_TX_PACKETS, + OCT_PORT_CTR_RX_DROPS, + OCT_PORT_CTR_TX_DROPS, + OCT_PORT_CTR_RX_DROP_BYTES, + OCT_PORT_CTR_RX_UCAST, + OCT_PORT_CTR_TX_UCAST, + OCT_PORT_CTR_RX_MCAST, + OCT_PORT_CTR_TX_MCAST, + OCT_PORT_CTR_RX_BCAST, + OCT_PORT_CTR_TX_BCAST, + OCT_PORT_CTR_RX_FCS, + OCT_PORT_CTR_RX_ERR, + OCT_PORT_CTR_RX_DROP_MCAST, + OCT_PORT_CTR_RX_DROP_BCAST, + OCT_PORT_CTR_RX_DROP_L3_MCAST, + OCT_PORT_CTR_RX_DROP_L3_BCAST, +} oct_port_counter_id_t; + +vnet_dev_counter_t oct_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_PORT_CTR_RX_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_PORT_CTR_RX_PACKETS), + VNET_DEV_CTR_RX_DROPS (OCT_PORT_CTR_RX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BYTES, RX, BYTES, "drop bytes"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_FCS, RX, PACKETS, "fcs"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_ERR, RX, PACKETS, "error"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_MCAST, RX, PACKETS, + "drop multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BCAST, RX, PACKETS, + "drop broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_MCAST, RX, PACKETS, + "drop L3 multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_BCAST, RX, PACKETS, + "drop L3 broadcast"), + + VNET_DEV_CTR_TX_BYTES (OCT_PORT_CTR_TX_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_PORT_CTR_TX_PACKETS), + VNET_DEV_CTR_TX_DROPS (OCT_PORT_CTR_TX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"), +}; + +typedef enum +{ + OCT_RXQ_CTR_BYTES, + OCT_RXQ_CTR_PKTS, + OCT_RXQ_CTR_DROPS, + OCT_RXQ_CTR_DROP_BYTES, + OCT_RXQ_CTR_ERR, +} oct_rxq_counter_id_t; + +vnet_dev_counter_t oct_rxq_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_RXQ_CTR_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_RXQ_CTR_PKTS), + VNET_DEV_CTR_RX_DROPS (OCT_RXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_DROP_BYTES, RX, BYTES, "drop bytes"), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_ERR, RX, PACKETS, "error"), +}; + +typedef enum +{ + OCT_TXQ_CTR_BYTES, + OCT_TXQ_CTR_PKTS, + OCT_TXQ_CTR_DROPS, + OCT_TXQ_CTR_DROP_BYTES, +} oct_txq_counter_id_t; + +vnet_dev_counter_t oct_txq_counters[] = { + VNET_DEV_CTR_TX_BYTES (OCT_TXQ_CTR_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_TXQ_CTR_PKTS), + VNET_DEV_CTR_TX_DROPS (OCT_TXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_TXQ_CTR_DROP_BYTES, TX, BYTES, "drop bytes"), +}; + +static vnet_dev_rv_t +oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) +{ + u8 *s = 0; + va_list va; + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + + log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv); + + vec_free (s); + return VNET_DEV_ERR_INTERNAL; +} + +void +oct_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, oct_port_counters, + ARRAY_LEN (oct_port_counters)); + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + vnet_dev_rx_queue_add_counters (vm, rxq, oct_rxq_counters, + ARRAY_LEN (oct_rxq_counters)); + } + + foreach_vnet_dev_port_tx_queue (txq, port) + { + vnet_dev_tx_queue_add_counters (vm, txq, oct_txq_counters, + ARRAY_LEN (oct_txq_counters)); + } +} + +vnet_dev_rv_t +oct_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + struct roc_nix_stats stats; + + if ((rrv = roc_nix_stats_get (nix, &stats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_get() failed"); + + foreach_vnet_dev_counter (c, port->counter_main) + { + switch (c->user_data) + { + case OCT_PORT_CTR_RX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_octs); + break; + case OCT_PORT_CTR_TX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.tx_octs); + break; + case OCT_PORT_CTR_RX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.rx_ucast + stats.rx_bcast + stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.tx_ucast + stats.tx_bcast + stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.rx_drop); + break; + case OCT_PORT_CTR_TX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.tx_drop); + break; + case OCT_PORT_CTR_RX_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_octs); + break; + case OCT_PORT_CTR_RX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_ucast); + break; + case OCT_PORT_CTR_TX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_ucast); + break; + case OCT_PORT_CTR_RX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_bcast); + break; + case OCT_PORT_CTR_TX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_bcast); + break; + case OCT_PORT_CTR_RX_FCS: + vnet_dev_counter_value_update (vm, c, stats.rx_fcs); + break; + case OCT_PORT_CTR_RX_ERR: + vnet_dev_counter_value_update (vm, c, stats.rx_err); + break; + case OCT_PORT_CTR_RX_DROP_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_mcast); + break; + case OCT_PORT_CTR_RX_DROP_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_bcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_mcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_bcast); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_rxq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, crq->rq.qid, 1, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, rxq->counter_main) + { + switch (c->user_data) + { + case OCT_RXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_octs); + break; + case OCT_RXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.rx_pkts); + break; + case OCT_RXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_pkts); + break; + case OCT_RXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_octs); + break; + case OCT_RXQ_CTR_ERR: + vnet_dev_counter_value_update (vm, c, qstats.rx_error_pkts); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_txq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, ctq->sq.qid, 0, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, txq->counter_main) + { + switch (c->user_data) + { + case OCT_TXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_octs); + break; + case OCT_TXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.tx_pkts); + break; + case OCT_TXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_pkts); + break; + case OCT_TXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_octs); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +void +oct_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_reset (nix))) + oct_roc_err (dev, rrv, "roc_nix_stats_reset() failed"); +} + +void +oct_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_t *dev = rxq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, crq->rq.qid, 1))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for rx queue %u", + rxq->queue_id); +} + +void +oct_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, ctq->sq.qid, 0))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for tx queue %u", + txq->queue_id); +} diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c new file mode 100644 index 00000000000..8796704edf4 --- /dev/null +++ b/src/plugins/dev_octeon/crypto.c @@ -0,0 +1,1754 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include <vnet/dev/dev.h> +#include <vnet/devices/devices.h> +#include <dev_octeon/octeon.h> +#include <dev_octeon/crypto.h> +#include <base/roc_api.h> +#include <common.h> + +oct_crypto_main_t oct_crypto_main; +oct_crypto_dev_t oct_crypto_dev; + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "crypto", +}; + +static_always_inline void +oct_map_keyindex_to_session (oct_crypto_sess_t *sess, u32 key_index, u8 type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + + ckey = vec_elt_at_index (ocm->keys[type], key_index); + + ckey->sess = sess; + sess->key_index = key_index; +} + +static_always_inline oct_crypto_sess_t * +oct_crypto_session_alloc (vlib_main_t *vm, u8 type) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_sess_t *addr = NULL; + oct_crypto_main_t *ocm; + oct_crypto_dev_t *ocd; + u32 size; + + ocm = &oct_crypto_main; + ocd = ocm->crypto_dev[type]; + + size = sizeof (oct_crypto_sess_t); + + addr = oct_plt_init_param.oct_plt_zmalloc (size, CLIB_CACHE_LINE_BYTES); + if (addr == NULL) + { + log_err (ocd->dev, "Failed to allocate crypto session memory"); + return NULL; + } + + return addr; +} + +static_always_inline i32 +oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index, + int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_sess_t *session; + vnet_crypto_key_t *key; + oct_crypto_key_t *ckey; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + { + /* + * Read crypto or integ key session. And map link key index to same. + */ + if (key->index_crypto != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_crypto); + session = ckey->sess; + } + else if (key->index_integ != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_integ); + session = ckey->sess; + } + else + return -1; + } + else + { + session = oct_crypto_session_alloc (vm, op_type); + if (session == NULL) + return -1; + } + + oct_map_keyindex_to_session (session, key_index, op_type); + return 0; +} + +void +oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey_linked; + oct_crypto_key_t *ckey; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } +} + +void +oct_crypto_key_add_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + oct_crypto_dev_t *ocd = &oct_crypto_dev; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_ENCRYPT)) + { + log_err (ocd->dev, "Unable to create crypto session"); + return; + } + } + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_DECRYPT)) + { + log_err (ocd->dev, "Unable to create crypto session"); + return; + } + } +} + +void +oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + + if (kop == VNET_CRYPTO_KEY_OP_DEL) + { + oct_crypto_key_del_handler (vm, idx); + return; + } + oct_crypto_key_add_handler (vm, idx); + + ocm->started = 1; +} + +static_always_inline void +oct_crypto_session_free (vlib_main_t *vm, oct_crypto_sess_t *sess) +{ + extern oct_plt_init_param_t oct_plt_init_param; + + oct_plt_init_param.oct_plt_free (sess); + return; +} + +#ifdef PLATFORM_OCTEON9 +static inline void +oct_cpt_inst_submit (struct cpt_inst_s *inst, uint64_t lmtline, + uint64_t io_addr) +{ + uint64_t lmt_status; + + do + { + /* Copy CPT command to LMTLINE */ + roc_lmt_mov64 ((void *) lmtline, inst); + + /* + * Make sure compiler does not reorder memcpy and ldeor. + * LMTST transactions are always flushed from the write + * buffer immediately, a DMB is not required to push out + * LMTSTs. + */ + asm volatile ("dmb oshst" : : : "memory"); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#endif + +static_always_inline void +oct_crypto_burst_submit (oct_crypto_dev_t *crypto_dev, struct cpt_inst_s *inst, + u32 n_left) +{ + u64 lmt_base; + u64 io_addr; + u32 count; + +#ifdef PLATFORM_OCTEON9 + lmt_base = crypto_dev->lf.lmt_base; + io_addr = crypto_dev->lf.io_addr; + + for (count = 0; count < n_left; count++) + oct_cpt_inst_submit (inst + count, lmt_base, io_addr); +#else + u64 *lmt_line[OCT_MAX_LMT_SZ]; + u64 lmt_arg, core_lmt_id; + + lmt_base = crypto_dev->lmtline.lmt_base; + io_addr = crypto_dev->lmtline.io_addr; + + ROC_LMT_CPT_BASE_ID_GET (lmt_base, core_lmt_id); + + for (count = 0; count < 16; count++) + { + lmt_line[count] = OCT_CPT_LMT_GET_LINE_ADDR (lmt_base, count); + } + + while (n_left > OCT_MAX_LMT_SZ) + { + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile ("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < 16; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (OCT_MAX_LMT_SZ - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + inst += OCT_MAX_LMT_SZ; + n_left -= OCT_MAX_LMT_SZ; + } + + if (n_left > 0) + { + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile ("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < n_left; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (n_left - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + } +#endif +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_iov (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_iov_ptr *from, + uint32_t from_offset, uint32_t *psize, + struct roc_se_buf_ptr *extra_buf, + uint32_t extra_offset) +{ + uint32_t extra_len = extra_buf ? extra_buf->size : 0; + uint32_t size = *psize; + int32_t j; + + for (j = 0; j < from->buf_cnt; j++) + { + struct roc_sglist_comp *to = &list[i >> 2]; + uint32_t buf_sz = from->bufs[j].size; + void *vaddr = from->bufs[j].vaddr; + uint64_t e_vaddr; + uint32_t e_len; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (uint64_t) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (uint64_t) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + to->ptr[i % 4] = clib_host_to_net_u64 (e_vaddr); + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + uint32_t next_len = e_len - extra_offset; + uint64_t next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + { + i--; + } + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (extra_len); + to->ptr[i % 4] = + clib_host_to_net_u64 ((uint64_t) extra_buf->vaddr); + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (next_len); + to->ptr[i % 4] = clib_host_to_net_u64 (next_vaddr); + size -= next_len; + } + extra_len = 0; + } + else + { + size -= e_len; + } + if (extra_offset) + extra_offset -= size; + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (uint32_t) i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_iov (struct roc_sg2list_comp *list, u32 i, + struct roc_se_iov_ptr *from, + u32 from_offset, u32 *psize, + struct roc_se_buf_ptr *extra_buf, + u32 extra_offset) +{ + u32 extra_len = extra_buf ? extra_buf->size : 0; + u32 size = *psize, buf_sz, e_len, next_len; + struct roc_sg2list_comp *to; + u64 e_vaddr, next_vaddr; + void *vaddr; + i32 j; + + for (j = 0; j < from->buf_cnt; j++) + { + to = &list[i / 3]; + buf_sz = from->bufs[j].size; + vaddr = from->bufs[j].vaddr; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (u64) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (u64) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 3] = (e_len); + to->ptr[i % 3] = (e_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + next_len = e_len - extra_offset; + next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + i--; + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 3] = (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (extra_len); + to->ptr[i % 3] = ((u64) extra_buf->vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (next_len); + to->ptr[i % 3] = (next_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= next_len; + } + extra_len = 0; + } + else + size -= e_len; + + if (extra_offset) + extra_offset -= size; + + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (u32) i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_buf (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_buf_ptr *from) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (from->size); + to->ptr[i % 4] = clib_host_to_net_u64 ((uint64_t) from->vaddr); + return ++i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp (struct roc_sglist_comp *list, uint32_t i, + uint64_t dma_addr, uint32_t size) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (size); + to->ptr[i % 4] = clib_host_to_net_u64 (dma_addr); + return ++i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp (struct roc_sg2list_comp *list, u32 index, + u64 dma_addr, u32 size) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (size); + to->ptr[index % 3] = (dma_addr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_buf (struct roc_sg2list_comp *list, u32 index, + struct roc_se_buf_ptr *from) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (from->size); + to->ptr[index % 3] = ((u64) from->vaddr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, uint64_t offset_ctrl, + const uint8_t *iv_s, int iv_len, uint8_t pack_iv, + uint8_t pdcp_alg_type, int32_t inputlen, + int32_t outputlen, uint32_t passthrough_len, + uint32_t req_flags, int pdcp_flag, int decrypt) +{ + struct roc_sglist_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + uint32_t mac_len = 0, aad_len = 0; + struct roc_se_ctx *se_ctx; + uint32_t i, g_size_bytes; + uint64_t *offset_vaddr; + uint32_t s_size_bytes; + uint8_t *in_buffer; + uint32_t size; + uint8_t *iv_d; + int ret = 0; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = + (uint8_t *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (uint64_t) ROC_DMA_MODE_SG; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + + iv_d = ((uint8_t *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + /* TODO Add error check if space will be sufficient */ + gather_comp = (struct roc_sglist_comp *) ((uint8_t *) m_vaddr + 8); + + /* + * Input Gather List + */ + i = 0; + + /* Offset control word followed by iv */ + + i = oct_crypto_fill_sg_comp (gather_comp, i, (uint64_t) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + + if (PREDICT_TRUE (size)) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer" + " space, size %d needed", + size); + return -1; + } + } + + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (gather_comp, i, ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + in_buffer = m_vaddr; + ((uint16_t *) in_buffer)[0] = 0; + ((uint16_t *) in_buffer)[1] = 0; + ((uint16_t *) in_buffer)[2] = clib_host_to_net_u16 (i); + + g_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + /* + * Output Scatter List + */ + + i = 0; + scatter_comp = + (struct roc_sglist_comp *) ((uint8_t *) gather_comp + g_size_bytes); + + i = oct_crypto_fill_sg_comp ( + scatter_comp, i, (uint64_t) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (scatter_comp, i, ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + ((uint16_t *) in_buffer)[3] = clib_host_to_net_u16 (i); + s_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + + size = g_size_bytes + s_size_bytes + ROC_SG_LIST_HDR_SIZE; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = size; + + if (PREDICT_FALSE (size > ROC_SG_MAX_DLEN_SIZE)) + { + clib_warning ( + "Cryptodev: Exceeds max supported components. Reduce segments"); + ret = -1; + } + + inst->dptr = (uint64_t) in_buffer; + return ret; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg2_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, u64 offset_ctrl, + const u8 *iv_s, int iv_len, u8 pack_iv, + u8 pdcp_alg_type, i32 inputlen, i32 outputlen, + u32 passthrough_len, u32 req_flags, int pdcp_flag, + int decrypt) +{ + u32 mac_len = 0, aad_len = 0, size, index, g_size_bytes; + struct roc_sg2list_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + union cpt_inst_w5 cpt_inst_w5; + union cpt_inst_w6 cpt_inst_w6; + u16 scatter_sz, gather_sz; + struct roc_se_ctx *se_ctx; + u64 *offset_vaddr; + int ret = 0; + u8 *iv_d; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = (u8 *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (u64) ROC_DMA_MODE_SG; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = inputlen + ROC_SE_OFF_CTRL_LEN; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + iv_d = ((u8 *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + clib_memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + gather_comp = (struct roc_sg2list_comp *) ((u8 *) m_vaddr); + + /* + * Input Gather List + */ + index = 0; + + /* Offset control word followed by iv */ + + index = oct_crypto_fill_sg2_comp (gather_comp, index, (u64) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + if (size) + { + /* input data only */ + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer" + " space, size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (gather_comp, index, + ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + gather_sz = (index + 2) / 3; + g_size_bytes = gather_sz * sizeof (struct roc_sg2list_comp); + + /* + * Output Scatter List + */ + + index = 0; + scatter_comp = + (struct roc_sg2list_comp *) ((u8 *) gather_comp + g_size_bytes); + + index = oct_crypto_fill_sg2_comp ( + scatter_comp, index, (u64) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (scatter_comp, index, + ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + scatter_sz = (index + 2) / 3; + + cpt_inst_w5.s.gather_sz = gather_sz; + cpt_inst_w6.s.scatter_sz = scatter_sz; + + cpt_inst_w5.s.dptr = (u64) gather_comp; + cpt_inst_w6.s.rptr = (u64) scatter_comp; + + inst->w5.u64 = cpt_inst_w5.u64; + inst->w6.u64 = cpt_inst_w6.u64; + + if (PREDICT_FALSE ((scatter_sz >> 4) || (gather_sz >> 4))) + { + clib_warning ( + "Cryptodev: Exceeds max supported components. Reduce segments"); + ret = -1; + } + + return ret; +} + +static_always_inline int +oct_crypto_cpt_hmac_prep (u32 flags, u64 d_offs, u64 d_lens, + struct roc_se_fc_params *fc_params, + struct cpt_inst_s *inst, u8 is_decrypt) +{ + u32 encr_data_len, auth_data_len, aad_len = 0; + i32 inputlen, outputlen, enc_dlen, auth_dlen; + u32 encr_offset, auth_offset, iv_offset = 0; + union cpt_inst_w4 cpt_inst_w4; + u32 cipher_type; + struct roc_se_ctx *se_ctx; + u32 passthrough_len = 0; + const u8 *src = NULL; + u64 offset_ctrl; + u8 iv_len = 16; + u8 op_minor; + u32 mac_len; + int ret; + + encr_offset = ROC_SE_ENCR_OFFSET (d_offs); + auth_offset = ROC_SE_AUTH_OFFSET (d_offs); + encr_data_len = ROC_SE_ENCR_DLEN (d_lens); + auth_data_len = ROC_SE_AUTH_DLEN (d_lens); + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + auth_data_len = 0; + auth_offset = 0; + aad_len = fc_params->aad_buf.size; + } + + se_ctx = fc_params->ctx; + cipher_type = se_ctx->enc_cipher; + mac_len = se_ctx->mac_len; + cpt_inst_w4.u64 = se_ctx->template_w4.u64; + op_minor = cpt_inst_w4.s.opcode_minor; + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* + * When AAD is given, data above encr_offset is pass through + * Since AAD is given as separate pointer and not as offset, + * this is a special case as we need to fragment input data + * into passthrough + encr_data and then insert AAD in between. + */ + passthrough_len = encr_offset; + auth_offset = passthrough_len + iv_len; + encr_offset = passthrough_len + aad_len + iv_len; + auth_data_len = aad_len + encr_data_len; + } + else + { + encr_offset += iv_len; + auth_offset += iv_len; + } + + auth_dlen = auth_offset + auth_data_len; + enc_dlen = encr_data_len + encr_offset; + + cpt_inst_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (is_decrypt) + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + + if (auth_dlen > enc_dlen) + { + inputlen = auth_dlen + mac_len; + outputlen = auth_dlen; + } + else + { + inputlen = enc_dlen + mac_len; + outputlen = enc_dlen; + } + } + else + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + + /* Round up to 16 bytes alignment */ + if (PREDICT_FALSE (encr_data_len & 0xf)) + { + if (PREDICT_TRUE (cipher_type == ROC_SE_AES_CBC) || + (cipher_type == ROC_SE_DES3_CBC)) + enc_dlen = PLT_ALIGN_CEIL (encr_data_len, 8) + encr_offset; + } + + /* + * auth_dlen is larger than enc_dlen in Authentication cases + * like AES GMAC Authentication + */ + if (PREDICT_FALSE (auth_dlen > enc_dlen)) + { + inputlen = auth_dlen; + outputlen = auth_dlen + mac_len; + } + else + { + inputlen = enc_dlen; + outputlen = enc_dlen + mac_len; + } + } + + if (op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST) + outputlen = enc_dlen; + + cpt_inst_w4.s.param1 = encr_data_len; + cpt_inst_w4.s.param2 = auth_data_len; + + if (PREDICT_FALSE ((encr_offset >> 16) || (iv_offset >> 8) || + (auth_offset >> 8))) + { + clib_warning ("Cryptodev: Offset not supported"); + clib_warning ( + "Cryptodev: enc_offset: %d, iv_offset : %d, auth_offset: %d", + encr_offset, iv_offset, auth_offset); + return -1; + } + + offset_ctrl = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + src = fc_params->iv_buf; + + inst->w4.u64 = cpt_inst_w4.u64; + +#ifdef PLATFORM_OCTEON9 + ret = oct_crypto_sg_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#else + ret = oct_crypto_sg2_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#endif + + if (PREDICT_FALSE (ret)) + return -1; + + return 0; +} + +static_always_inline int +oct_crypto_fill_fc_params (oct_crypto_sess_t *sess, struct cpt_inst_s *inst, + const bool is_aead, u8 aad_length, u8 *payload, + vnet_crypto_async_frame_elt_t *elts, void *mdata, + u32 cipher_data_length, u32 cipher_data_offset, + u32 auth_data_length, u32 auth_data_offset, + vlib_buffer_t *b, u16 adj_len) +{ + struct roc_se_fc_params fc_params = { 0 }; + struct roc_se_ctx *ctx = &sess->cpt_ctx; + u64 d_offs = 0, d_lens = 0; + vlib_buffer_t *buffer = b; + u32 flags = 0, index = 0; + u8 op_minor = 0, cpt_op; + char src[SRC_IOV_SIZE]; + u32 *iv_buf; + + cpt_op = sess->cpt_op; + + if (is_aead) + { + flags |= ROC_SE_VALID_IV_BUF; + iv_buf = (u32 *) elts->iv; + iv_buf[3] = clib_host_to_net_u32 (0x1); + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = d_offs << 16; + + d_lens = cipher_data_length; + d_lens = d_lens << 32; + + fc_params.aad_buf.vaddr = elts->aad; + fc_params.aad_buf.size = aad_length; + flags |= ROC_SE_VALID_AAD_BUF; + + if (sess->cpt_ctx.mac_len) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->tag; + } + } + else + { + op_minor = ctx->template_w4.s.opcode_minor; + + flags |= ROC_SE_VALID_IV_BUF; + + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = (d_offs << 16) | auth_data_offset; + + d_lens = cipher_data_length; + d_lens = (d_lens << 32) | auth_data_length; + + if (PREDICT_TRUE (sess->cpt_ctx.mac_len)) + { + if (!(op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST)) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->digest; + } + } + } + + fc_params.ctx = &sess->cpt_ctx; + + fc_params.src_iov = (void *) src; + + fc_params.src_iov->bufs[index].vaddr = payload; + fc_params.src_iov->bufs[index].size = b->current_length - adj_len; + index++; + + while (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) + { + buffer = vlib_get_buffer (vlib_get_main (), buffer->next_buffer); + fc_params.src_iov->bufs[index].vaddr = + buffer->data + buffer->current_data; + fc_params.src_iov->bufs[index].size = buffer->current_length; + index++; + } + + fc_params.src_iov->buf_cnt = index; + + fc_params.dst_iov = (void *) src; + + fc_params.meta_buf.vaddr = mdata; + fc_params.meta_buf.size = OCT_SCATTER_GATHER_BUFFER_SIZE; + + return oct_crypto_cpt_hmac_prep (flags, d_offs, d_lens, &fc_params, inst, + cpt_op); +} + +static_always_inline u64 +oct_cpt_inst_w7_get (oct_crypto_sess_t *sess, struct roc_cpt *roc_cpt) +{ + union cpt_inst_w7 inst_w7; + + inst_w7.u64 = 0; + inst_w7.s.cptr = (u64) &sess->cpt_ctx.se_ctx.fctx; + /* Set the engine group */ + inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE]; + + return inst_w7.u64; +} + +static_always_inline i32 +oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + vnet_crypto_key_t *crypto_key, *auth_key; + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + vnet_crypto_key_t *key; + u32 digest_len = ~0; + i32 rv = 0; + + key = vnet_crypto_get_key (key_index); + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA1_TAG12: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA224_TAG14: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA256_TAG16: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA384_TAG24: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA512_TAG32: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA1_TAG12: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_MD5_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_MD5_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA1_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA224_TAG14: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA256_TAG16: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA384_TAG24: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA512_TAG32: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + default: + clib_warning ( + "Cryptodev: Undefined link algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + if (type == VNET_CRYPTO_OP_TYPE_ENCRYPT) + sess->cpt_ctx.ciph_then_auth = true; + else + sess->cpt_ctx.auth_then_ciph = true; + + sess->iv_length = 16; + sess->cpt_op = type; + + crypto_key = vnet_crypto_get_key (key->index_crypto); + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, crypto_key->data, + vec_len (crypto_key->data)); + if (rv) + { + clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + auth_key = vnet_crypto_get_key (key->index_integ); + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, auth_key->data, + vec_len (auth_key->data), digest_len); + if (rv) + { + clib_warning ("Cryptodev: Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + return 0; +} + +static_always_inline i32 +oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + vnet_crypto_key_t *key = vnet_crypto_get_key (key_index); + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + u32 digest_len = ~0; + i32 rv = 0; + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_GCM: + case VNET_CRYPTO_ALG_AES_192_GCM: + case VNET_CRYPTO_ALG_AES_256_GCM: + enc_type = ROC_SE_AES_GCM; + sess->aes_gcm = 1; + sess->iv_offset = 0; + sess->iv_length = 16; + sess->cpt_ctx.mac_len = 16; + sess->cpt_op = type; + digest_len = 16; + break; + default: + clib_warning ( + "Cryptodev: Undefined cipher algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, + vec_len (key->data)); + if (rv) + { + clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, NULL, 0, digest_len); + if (rv) + { + clib_warning ("Cryptodev: Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + return 0; +} + +static_always_inline i32 +oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session, + vnet_crypto_key_index_t key_index, int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_key_t *key; + oct_crypto_dev_t *ocd; + i32 rv = 0; + + ocd = ocm->crypto_dev[op_type]; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + rv = oct_crypto_link_session_update (vm, session, key_index, op_type); + else + rv = oct_crypto_aead_session_update (vm, session, key_index, op_type); + + if (rv) + { + oct_crypto_session_free (vm, session); + return -1; + } + + session->crypto_dev = ocd; + + session->cpt_inst_w7 = + oct_cpt_inst_w7_get (session, session->crypto_dev->roc_cpt); + + session->initialised = 1; + + return 0; +} + +static_always_inline void +oct_crypto_update_frame_error_status (vnet_crypto_async_frame_t *f, u32 index, + vnet_crypto_op_status_t s) +{ + u32 i; + + for (i = index; i < f->n_elts; i++) + f->elts[i].status = s; + + if (index == 0) + f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; +} + +static_always_inline int +oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, + const u8 is_aead, u8 aad_len, const u8 type) +{ + u32 i, enq_tail, enc_auth_len, buffer_index, nb_infl_allowed; + struct cpt_inst_s inst[VNET_CRYPTO_FRAME_SIZE]; + u32 crypto_start_offset, integ_start_offset; + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_async_frame_elt_t *elts; + oct_crypto_dev_t *crypto_dev = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + u64 dptr_start_ptr, curr_ptr; + oct_crypto_sess_t *sess; + u32 crypto_total_length; + oct_crypto_key_t *key; + vlib_buffer_t *buffer; + u16 adj_len; + int ret; + + /* GCM packets having 8 bytes of aad and 8 bytes of iv */ + u8 aad_iv = 8 + 8; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + enq_tail = pend_q->enq_tail; + + nb_infl_allowed = pend_q->n_desc - pend_q->n_crypto_inflight; + if (PREDICT_FALSE (nb_infl_allowed == 0)) + { + oct_crypto_update_frame_error_status ( + frame, 0, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + infl_req = &pend_q->req_queue[enq_tail]; + infl_req->frame = frame; + + for (i = 0; i < frame->n_elts; i++) + { + elts = &frame->elts[i]; + buffer_index = frame->buffer_indices[i]; + key = vec_elt_at_index (ocm->keys[type], elts->key_index); + + if (PREDICT_FALSE (!key->sess)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + sess = key->sess; + + if (PREDICT_FALSE (!sess->initialised)) + oct_crypto_session_init (vm, sess, elts->key_index, type); + + crypto_dev = sess->crypto_dev; + + clib_memset (inst + i, 0, sizeof (struct cpt_inst_s)); + + buffer = vlib_get_buffer (vm, buffer_index); + + if (is_aead) + { + dptr_start_ptr = + (u64) (buffer->data + (elts->crypto_start_offset - aad_iv)); + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = aad_iv; + integ_start_offset = 0; + + ret = oct_crypto_fill_fc_params ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, 0 /* auth_len */, + integ_start_offset /* auth_off */, buffer, adj_len); + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + else + { + dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset); + + enc_auth_len = elts->crypto_total_length + elts->integ_length_adj; + + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = + elts->crypto_start_offset - elts->integ_start_offset; + integ_start_offset = 0; + + ret = oct_crypto_fill_fc_params ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, + enc_auth_len /* auth_len */, integ_start_offset /* auth_off */, + buffer, adj_len); + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + + inst[i].w7.u64 = sess->cpt_inst_w7; + inst[i].res_addr = (u64) &infl_req->res[i]; + } + + oct_crypto_burst_submit (crypto_dev, inst, frame->n_elts); + + infl_req->elts = frame->n_elts; + OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc); + pend_q->n_crypto_inflight++; + + return 0; +} + +int +oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +int +oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +static_always_inline int +oct_crypto_enqueue_aead_aad_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 12); +} + +int +oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 12); +} + +vnet_crypto_async_frame_t * +oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, + u32 *enqueue_thread_idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED; + vnet_crypto_async_frame_elt_t *fe = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + vnet_crypto_async_frame_t *frame; + volatile union cpt_res_s *res; + int i; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + if (!pend_q->n_crypto_inflight) + return NULL; + + deq_head = pend_q->deq_head; + infl_req = &pend_q->req_queue[deq_head]; + frame = infl_req->frame; + + fe = frame->elts; + + for (i = infl_req->deq_elts; i < infl_req->elts; ++i) + { + res = &infl_req->res[i]; + + if (PREDICT_FALSE (res->cn10k.compcode == CPT_COMP_NOT_DONE)) + return NULL; + + if (PREDICT_FALSE (res->cn10k.uc_compcode)) + { + if (res->cn10k.uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE) + status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + else + status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR; + } + + infl_req->deq_elts++; + } + + clib_memset ((void *) infl_req->res, 0, + sizeof (union cpt_res_s) * VNET_CRYPTO_FRAME_SIZE); + + OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc); + pend_q->n_crypto_inflight--; + + frame->state = status == VNET_CRYPTO_OP_STATUS_COMPLETED ? + VNET_CRYPTO_FRAME_STATE_SUCCESS : + VNET_CRYPTO_FRAME_STATE_ELT_ERROR; + + *nb_elts_processed = frame->n_elts; + *enqueue_thread_idx = frame->enqueue_thread_index; + + infl_req->deq_elts = 0; + infl_req->elts = 0; + + return frame; +} + +int +oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev) +{ + u32 engine_index; + + engine_index = vnet_crypto_register_engine (vm, "oct_cryptodev", 100, + "OCT Cryptodev Engine"); + +#define _(n, k, t, a) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ + oct_crypto_enqueue_aead_aad_##a##_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \ + oct_crypto_enqueue_aead_aad_##a##_dec); + foreach_oct_crypto_aead_async_alg +#undef _ + +#define _(c, h, k, d) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ + oct_crypto_enqueue_linked_alg_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \ + oct_crypto_enqueue_linked_alg_dec); + foreach_oct_crypto_link_async_alg; +#undef _ + + vnet_crypto_register_dequeue_handler (vm, engine_index, + oct_crypto_frame_dequeue); + + vnet_crypto_register_key_handler (vm, engine_index, oct_crypto_key_handler); + + return 0; +} + +int +oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_inflight_req_t *infl_req_queue; + u32 n_inflight_req; + int i, j = 0; + + ocm->pend_q = oct_plt_init_param.oct_plt_zmalloc ( + tm->n_vlib_mains * sizeof (oct_crypto_pending_queue_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q == NULL) + { + log_err (dev, "Failed to allocate memory for crypto pending queue"); + return -1; + } + + /* + * Each pending queue will get number of cpt desc / number of cores. + * And that desc count is shared across inflight entries. + */ + n_inflight_req = + (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains) / VNET_CRYPTO_FRAME_SIZE; + + for (i = 0; i < tm->n_vlib_mains; ++i) + { + ocm->pend_q[i].n_desc = n_inflight_req; + + ocm->pend_q[i].req_queue = oct_plt_init_param.oct_plt_zmalloc ( + ocm->pend_q[i].n_desc * sizeof (oct_crypto_inflight_req_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q[i].req_queue == NULL) + { + log_err (dev, + "Failed to allocate memory for crypto inflight request"); + goto free; + } + + for (j = 0; j <= ocm->pend_q[i].n_desc; ++j) + { + infl_req_queue = &ocm->pend_q[i].req_queue[j]; + + infl_req_queue->sg_data = oct_plt_init_param.oct_plt_zmalloc ( + OCT_SCATTER_GATHER_BUFFER_SIZE * VNET_CRYPTO_FRAME_SIZE, + CLIB_CACHE_LINE_BYTES); + if (infl_req_queue->sg_data == NULL) + { + log_err (dev, "Failed to allocate crypto scatter gather memory"); + goto free; + } + } + } + return 0; +free: + for (; i >= 0; i--) + { + if (ocm->pend_q[i].req_queue == NULL) + continue; + for (; j >= 0; j--) + { + infl_req_queue = &ocm->pend_q[i].req_queue[j]; + + if (infl_req_queue->sg_data == NULL) + continue; + + oct_plt_init_param.oct_plt_free (infl_req_queue->sg_data); + } + oct_plt_init_param.oct_plt_free (ocm->pend_q[i].req_queue); + } + oct_plt_init_param.oct_plt_free (ocm->pend_q); + + return -1; +} diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h new file mode 100644 index 00000000000..8d17980a55f --- /dev/null +++ b/src/plugins/dev_octeon/crypto.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _CRYPTO_H_ +#define _CRYPTO_H_ +#include <vnet/crypto/crypto.h> +#include <vnet/ip/ip.h> + +#define OCT_MAX_N_CPT_DEV 2 + +#define OCT_CPT_LF_MAX_NB_DESC 128000 + +/* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */ +#define foreach_oct_crypto_aead_async_alg \ + _ (AES_128_GCM, 16, 16, 8) \ + _ (AES_128_GCM, 16, 16, 12) \ + _ (AES_192_GCM, 24, 16, 8) \ + _ (AES_192_GCM, 24, 16, 12) \ + _ (AES_256_GCM, 32, 16, 8) \ + _ (AES_256_GCM, 32, 16, 12) + +/* CRYPTO_ID, INTEG_ID, KEY_LENGTH_IN_BYTES, DIGEST_LEN */ +#define foreach_oct_crypto_link_async_alg \ + _ (AES_128_CBC, SHA1, 16, 12) \ + _ (AES_192_CBC, SHA1, 24, 12) \ + _ (AES_256_CBC, SHA1, 32, 12) \ + _ (AES_128_CBC, SHA256, 16, 16) \ + _ (AES_192_CBC, SHA256, 24, 16) \ + _ (AES_256_CBC, SHA256, 32, 16) \ + _ (AES_128_CBC, SHA384, 16, 24) \ + _ (AES_192_CBC, SHA384, 24, 24) \ + _ (AES_256_CBC, SHA384, 32, 24) \ + _ (AES_128_CBC, SHA512, 16, 32) \ + _ (AES_192_CBC, SHA512, 24, 32) \ + _ (AES_256_CBC, SHA512, 32, 32) \ + _ (3DES_CBC, MD5, 24, 12) \ + _ (3DES_CBC, SHA1, 24, 12) \ + _ (3DES_CBC, SHA256, 24, 16) \ + _ (3DES_CBC, SHA384, 24, 24) \ + _ (3DES_CBC, SHA512, 24, 32) \ + _ (AES_128_CTR, SHA1, 16, 12) \ + _ (AES_192_CTR, SHA1, 24, 12) \ + _ (AES_256_CTR, SHA1, 32, 12) + +#define OCT_MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) + +#define OCT_SCATTER_GATHER_BUFFER_SIZE 1024 + +#define CPT_LMT_SIZE_COPY (sizeof (struct cpt_inst_s) / 16) +#define OCT_MAX_LMT_SZ 16 + +#define SRC_IOV_SIZE \ + (sizeof (struct roc_se_iov_ptr) + \ + (sizeof (struct roc_se_buf_ptr) * ROC_MAX_SG_CNT)) + +#define OCT_CPT_LMT_GET_LINE_ADDR(lmt_addr, lmt_num) \ + (void *) ((u64) (lmt_addr) + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2)) + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + struct roc_cpt *roc_cpt; + struct roc_cpt_lmtline lmtline; + struct roc_cpt_lf lf; + vnet_dev_t *dev; +} oct_crypto_dev_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** CPT opcode */ + u16 cpt_op : 4; + /** Flag for AES GCM */ + u16 aes_gcm : 1; + /** IV length in bytes */ + u8 iv_length; + /** Auth IV length in bytes */ + u8 auth_iv_length; + /** IV offset in bytes */ + u16 iv_offset; + /** Auth IV offset in bytes */ + u16 auth_iv_offset; + /** CPT inst word 7 */ + u64 cpt_inst_w7; + /* initialise as part of first packet */ + u8 initialised; + /* store link key index in case of linked algo */ + vnet_crypto_key_index_t key_index; + oct_crypto_dev_t *crypto_dev; + struct roc_se_ctx cpt_ctx; +} oct_crypto_sess_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + oct_crypto_sess_t *sess; + oct_crypto_dev_t *crypto_dev; +} oct_crypto_key_t; + +typedef struct oct_crypto_scatter_gather +{ + u8 buf[OCT_SCATTER_GATHER_BUFFER_SIZE]; +} oct_crypto_scatter_gather_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** Result data of all entries in the frame */ + volatile union cpt_res_s res[VNET_CRYPTO_FRAME_SIZE]; + /** Scatter gather data */ + void *sg_data; + /** Frame pointer */ + vnet_crypto_async_frame_t *frame; + /** Number of async elements in frame */ + u16 elts; + /** Next read entry in frame, when dequeue */ + u16 deq_elts; +} oct_crypto_inflight_req_t; + +typedef struct +{ + /** Array of pending request */ + oct_crypto_inflight_req_t *req_queue; + /** Number of inflight operations in queue */ + u32 n_crypto_inflight; + /** Tail of queue to be used for enqueue */ + u16 enq_tail; + /** Head of queue to be used for dequeue */ + u16 deq_head; + /** Number of descriptors */ + u16 n_desc; +} oct_crypto_pending_queue_t; + +typedef struct +{ + oct_crypto_dev_t *crypto_dev[OCT_MAX_N_CPT_DEV]; + oct_crypto_key_t *keys[VNET_CRYPTO_ASYNC_OP_N_TYPES]; + oct_crypto_pending_queue_t *pend_q; + int n_cpt; + u8 started; +} oct_crypto_main_t; + +extern oct_crypto_main_t oct_crypto_main; + +void oct_crypto_key_del_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_add_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx); + +int oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm, + u32 *nb_elts_processed, + u32 *enqueue_thread_idx); +int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev); +int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev); +#endif /* _CRYPTO_H_ */ diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c index 1c367a036ab..5bef25f5369 100644 --- a/src/plugins/dev_octeon/flow.c +++ b/src/plugins/dev_octeon/flow.c @@ -46,6 +46,8 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = { (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \ (f->type == VNET_FLOW_TYPE_IP4_GTPU)) +#define FLOW_IS_GENERIC_TYPE(f) (f->type == VNET_FLOW_TYPE_GENERIC) + #define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \ ((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \ (f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE)) @@ -71,6 +73,9 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = { _ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \ _ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only") +#define GTPU_PORT 2152 +#define VXLAN_PORT 4789 + typedef struct { u16 src_port; @@ -87,6 +92,27 @@ typedef struct u32 teid; } gtpu_header_t; +typedef struct +{ + u8 layer; + u16 nxt_proto; + vnet_dev_port_t *port; + struct roc_npc_item_info *items; + struct + { + u8 *spec; + u8 *mask; + u16 off; + } oct_drv; + struct + { + u8 *spec; + u8 *mask; + u16 off; + u16 len; + } generic; +} oct_flow_parse_state; + static void oct_flow_convert_rss_types (u64 *key, u64 rss_types) { @@ -163,6 +189,14 @@ oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions, npc = &oct_port->npc; + for (int i = 0; item_info[i].type != ROC_NPC_ITEM_TYPE_END; i++) + { + log_debug (port->dev, "Flow[%d] Item[%d] type %d spec 0x%U mask 0x%U", + flow->index, i, item_info[i].type, format_hex_bytes, + item_info[i].spec, item_info[i].size, format_hex_bytes, + item_info[i].mask, item_info[i].size); + } + npc_flow = roc_npc_flow_create (npc, &attr, item_info, actions, npc->pf_func, &rv); if (rv) @@ -183,6 +217,320 @@ oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions, return VNET_DEV_OK; } +static int +oct_parse_l2 (oct_flow_parse_state *pst) +{ + struct roc_npc_flow_item_eth *eth_spec = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_eth *eth_mask = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_header_t *eth_hdr_mask = + (ethernet_header_t *) &pst->generic.mask[pst->generic.off]; + ethernet_header_t *eth_hdr = + (ethernet_header_t *) &pst->generic.spec[pst->generic.off]; + u16 tpid, etype; + + tpid = etype = clib_net_to_host_u16 (eth_hdr->type); + clib_memcpy_fast (eth_spec, eth_hdr, sizeof (ethernet_header_t)); + clib_memcpy_fast (eth_mask, eth_hdr_mask, sizeof (ethernet_header_t)); + eth_spec->has_vlan = 0; + + pst->items[pst->layer].spec = (void *) eth_spec; + pst->items[pst->layer].mask = (void *) eth_mask; + pst->items[pst->layer].size = sizeof (ethernet_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_ETH; + pst->generic.off += sizeof (ethernet_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_eth); + pst->layer++; + + /* Parse VLAN Tags if any */ + struct roc_npc_flow_item_vlan *vlan_spec = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_vlan *vlan_mask = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_vlan_header_t *vlan_hdr, *vlan_hdr_mask; + u8 vlan_cnt = 0; + + while (tpid == ETHERNET_TYPE_DOT1AD || tpid == ETHERNET_TYPE_VLAN) + { + if (pst->generic.off >= pst->generic.len) + break; + + vlan_hdr = + (ethernet_vlan_header_t *) &pst->generic.spec[pst->generic.off]; + vlan_hdr_mask = + (ethernet_vlan_header_t *) &pst->generic.mask[pst->generic.off]; + tpid = etype = clib_net_to_host_u16 (vlan_hdr->type); + clib_memcpy (&vlan_spec[vlan_cnt], vlan_hdr, + sizeof (ethernet_vlan_header_t)); + clib_memcpy (&vlan_mask[vlan_cnt], vlan_hdr_mask, + sizeof (ethernet_vlan_header_t)); + pst->items[pst->layer].spec = (void *) &vlan_spec[vlan_cnt]; + pst->items[pst->layer].mask = (void *) &vlan_mask[vlan_cnt]; + pst->items[pst->layer].size = sizeof (ethernet_vlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VLAN; + pst->generic.off += sizeof (ethernet_vlan_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_vlan); + pst->layer++; + vlan_cnt++; + } + + /* Inner most vlan tag */ + if (vlan_cnt) + vlan_spec[vlan_cnt - 1].has_more_vlan = 0; + + pst->nxt_proto = etype; + return 0; +} + +static int +oct_parse_l3 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + + if (pst->nxt_proto == ETHERNET_TYPE_MPLS) + { + int label_stack_bottom = 0; + do + { + + u8 *mpls_spec = &pst->generic.spec[pst->generic.off]; + u8 *mpls_mask = &pst->generic.mask[pst->generic.off]; + + label_stack_bottom = mpls_spec[2] & 1; + pst->items[pst->layer].spec = (void *) mpls_spec; + pst->items[pst->layer].mask = (void *) mpls_mask; + pst->items[pst->layer].size = sizeof (u32); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_MPLS; + pst->generic.off += sizeof (u32); + pst->layer++; + } + while (label_stack_bottom); + + pst->nxt_proto = 0; + return 0; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP4) + { + ip4_header_t *ip4_spec = + (ip4_header_t *) &pst->generic.spec[pst->generic.off]; + ip4_header_t *ip4_mask = + (ip4_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) ip4_spec; + pst->items[pst->layer].mask = (void *) ip4_mask; + pst->items[pst->layer].size = sizeof (ip4_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV4; + pst->generic.off += sizeof (ip4_header_t); + pst->layer++; + pst->nxt_proto = ip4_spec->protocol; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP6) + { + struct roc_npc_flow_item_ipv6 *ip6_spec = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_ipv6 *ip6_mask = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.mask[pst->oct_drv.off]; + ip6_header_t *ip6_hdr_mask = + (ip6_header_t *) &pst->generic.mask[pst->generic.off]; + ip6_header_t *ip6_hdr = + (ip6_header_t *) &pst->generic.spec[pst->generic.off]; + u8 nxt_hdr = ip6_hdr->protocol; + + clib_memcpy (ip6_spec, ip6_hdr, sizeof (ip6_header_t)); + clib_memcpy (ip6_mask, ip6_hdr_mask, sizeof (ip6_header_t)); + pst->items[pst->layer].spec = (void *) ip6_spec; + pst->items[pst->layer].mask = (void *) ip6_mask; + pst->items[pst->layer].size = sizeof (ip6_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6; + pst->generic.off += sizeof (ip6_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_ipv6); + pst->layer++; + + while (nxt_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS || + nxt_hdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS || + nxt_hdr == IP_PROTOCOL_IPV6_ROUTE) + { + if (pst->generic.off >= pst->generic.len) + return 0; + + ip6_ext_header_t *ip6_ext_spec = + (ip6_ext_header_t *) &pst->generic.spec[pst->generic.off]; + ip6_ext_header_t *ip6_ext_mask = + (ip6_ext_header_t *) &pst->generic.mask[pst->generic.off]; + nxt_hdr = ip6_ext_spec->next_hdr; + + pst->items[pst->layer].spec = (void *) ip6_ext_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_mask; + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_EXT; + pst->generic.off += ip6_ext_header_len (ip6_ext_spec); + pst->layer++; + } + + if (pst->generic.off >= pst->generic.len) + return 0; + + if (nxt_hdr == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *ip6_ext_frag_spec = + (ip6_frag_hdr_t *) &pst->generic.spec[pst->generic.off]; + ip6_frag_hdr_t *ip6_ext_frag_mask = + (ip6_frag_hdr_t *) &pst->generic.mask[pst->generic.off]; + + pst->items[pst->layer].spec = (void *) ip6_ext_frag_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_frag_mask; + pst->items[pst->layer].size = sizeof (ip6_frag_hdr_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_FRAG_EXT; + pst->generic.off += sizeof (ip6_frag_hdr_t); + pst->layer++; + } + + pst->nxt_proto = nxt_hdr; + } + /* Unsupported L3. */ + else + return -1; + + return 0; +} + +static int +oct_parse_l4 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + +#define _(protocol_t, protocol_value, ltype) \ + if (pst->nxt_proto == protocol_value) \ + \ + { \ + \ + protocol_t *spec = (protocol_t *) &pst->generic.spec[pst->generic.off]; \ + protocol_t *mask = (protocol_t *) &pst->generic.mask[pst->generic.off]; \ + pst->items[pst->layer].spec = spec; \ + pst->items[pst->layer].mask = mask; \ + \ + pst->items[pst->layer].size = sizeof (protocol_t); \ + \ + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_##ltype; \ + pst->generic.off += sizeof (protocol_t); \ + pst->layer++; \ + return 0; \ + } + + _ (esp_header_t, IP_PROTOCOL_IPSEC_ESP, ESP) + _ (udp_header_t, IP_PROTOCOL_UDP, UDP) + _ (tcp_header_t, IP_PROTOCOL_TCP, TCP) + _ (sctp_header_t, IP_PROTOCOL_SCTP, SCTP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP, ICMP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP6, ICMP) + _ (igmp_header_t, IP_PROTOCOL_IGMP, IGMP) + _ (gre_header_t, IP_PROTOCOL_GRE, GRE) + + /* Unsupported L4. */ + return -1; +} + +static int +oct_parse_tunnel (oct_flow_parse_state *pst) +{ + if (pst->generic.off >= pst->generic.len) + return 0; + + if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_GRE) + { + gre_header_t *gre_hdr = (gre_header_t *) pst->items[pst->layer - 1].spec; + pst->nxt_proto = clib_net_to_host_u16 (gre_hdr->protocol); + goto parse_l3; + } + + else if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_UDP) + { + udp_header_t *udp_h = (udp_header_t *) pst->items[pst->layer - 1].spec; + u16 dport = clib_net_to_host_u16 (udp_h->dst_port); + + if (dport == GTPU_PORT) + { + gtpu_header_t *gtpu_spec = + (gtpu_header_t *) &pst->generic.spec[pst->generic.off]; + gtpu_header_t *gtpu_mask = + (gtpu_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) gtpu_spec; + pst->items[pst->layer].mask = (void *) gtpu_mask; + pst->items[pst->layer].size = sizeof (gtpu_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_GTPU; + pst->generic.off += sizeof (gtpu_header_t); + pst->layer++; + pst->nxt_proto = 0; + return 0; + } + else if (dport == VXLAN_PORT) + { + vxlan_header_t *vxlan_spec = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + vxlan_header_t *vxlan_mask = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + pst->items[pst->layer].spec = (void *) vxlan_spec; + pst->items[pst->layer].mask = (void *) vxlan_mask; + pst->items[pst->layer].size = sizeof (vxlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VXLAN; + pst->generic.off += sizeof (vxlan_header_t); + pst->layer++; + pst->nxt_proto = 0; + goto parse_l2; + } + } + /* No supported Tunnel detected. */ + else + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + return 0; + } +parse_l2: + if (oct_parse_l2 (pst)) + return -1; +parse_l3: + if (oct_parse_l3 (pst)) + return -1; + + return oct_parse_l4 (pst); +} + +static vnet_dev_rv_t +oct_flow_generic_pattern_parse (oct_flow_parse_state *pst) +{ + + if (oct_parse_l2 (pst)) + goto err; + + if (oct_parse_l3 (pst)) + goto err; + + if (oct_parse_l4 (pst)) + goto err; + + if (oct_parse_tunnel (pst)) + goto err; + + if (pst->generic.off < pst->generic.len) + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + goto err; + } + + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_END; + return VNET_DEV_OK; + +err: + return VNET_DEV_ERR_NOT_SUPPORTED; +} + static vnet_dev_rv_t oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, uword *private_data) @@ -190,12 +538,22 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {}; struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {}; oct_port_t *oct_port = vnet_dev_get_port_data (port); + ethernet_header_t eth_spec = {}, eth_mask = {}; + sctp_header_t sctp_spec = {}, sctp_mask = {}; + gtpu_header_t gtpu_spec = {}, gtpu_mask = {}; + ip4_header_t ip4_spec = {}, ip4_mask = {}; + ip6_header_t ip6_spec = {}, ip6_mask = {}; + udp_header_t udp_spec = {}, udp_mask = {}; + tcp_header_t tcp_spec = {}, tcp_mask = {}; + esp_header_t esp_spec = {}, esp_mask = {}; u16 l4_src_port = 0, l4_dst_port = 0; u16 l4_src_mask = 0, l4_dst_mask = 0; struct roc_npc_action_rss rss_conf = {}; struct roc_npc_action_queue conf = {}; struct roc_npc_action_mark mark = {}; struct roc_npc *npc = &oct_port->npc; + u8 *flow_spec = 0, *flow_mask = 0; + u8 *drv_spec = 0, *drv_mask = 0; vnet_dev_rv_t rv = VNET_DEV_OK; int layer = 0, index = 0; u16 *queues = NULL; @@ -203,11 +561,52 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, u8 proto = 0; u16 action = 0; + if (FLOW_IS_GENERIC_TYPE (flow)) + { + unformat_input_t input; + int rc; + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.spec, + strlen ((const char *) flow->generic.pattern.spec)); + unformat_user (&input, unformat_hex_string, &flow_spec); + unformat_free (&input); + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.mask, + strlen ((const char *) flow->generic.pattern.mask)); + unformat_user (&input, unformat_hex_string, &flow_mask); + unformat_free (&input); + + vec_validate (drv_spec, 1024); + vec_validate (drv_mask, 1024); + oct_flow_parse_state pst = { + .nxt_proto = 0, + .port = port, + .items = item_info, + .oct_drv = { .spec = drv_spec, .mask = drv_mask }, + .generic = { .spec = flow_spec, + .mask = flow_mask, + .len = vec_len (flow_spec) }, + }; + + rc = oct_flow_generic_pattern_parse (&pst); + if (rc) + { + vec_free (flow_spec); + vec_free (flow_mask); + vec_free (drv_spec); + vec_free (drv_mask); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + goto parse_flow_actions; + } + if (FLOW_IS_ETHERNET_CLASS (flow)) { - ethernet_header_t eth_spec = { .type = clib_host_to_net_u16 ( - flow->ethernet.eth_hdr.type) }, - eth_mask = { .type = 0xFFFF }; + eth_spec.type = clib_host_to_net_u16 (flow->ethernet.eth_hdr.type); + eth_mask.type = 0xFFFF; item_info[layer].spec = (void *) ð_spec; item_info[layer].mask = (void *) ð_mask; @@ -220,10 +619,11 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, { vnet_flow_ip4_t *ip4_hdr = &flow->ip4; proto = ip4_hdr->protocol.prot; - ip4_header_t ip4_spec = { .src_address = ip4_hdr->src_addr.addr, - .dst_address = ip4_hdr->dst_addr.addr }, - ip4_mask = { .src_address = ip4_hdr->src_addr.mask, - .dst_address = ip4_hdr->dst_addr.mask }; + + ip4_spec.src_address = ip4_hdr->src_addr.addr; + ip4_spec.dst_address = ip4_hdr->dst_addr.addr; + ip4_mask.src_address = ip4_hdr->src_addr.mask; + ip4_mask.dst_address = ip4_hdr->dst_addr.mask; item_info[layer].spec = (void *) &ip4_spec; item_info[layer].mask = (void *) &ip4_mask; @@ -245,10 +645,11 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, { vnet_flow_ip6_t *ip6_hdr = &flow->ip6; proto = ip6_hdr->protocol.prot; - ip6_header_t ip6_spec = { .src_address = ip6_hdr->src_addr.addr, - .dst_address = ip6_hdr->dst_addr.addr }, - ip6_mask = { .src_address = ip6_hdr->src_addr.mask, - .dst_address = ip6_hdr->dst_addr.mask }; + + ip6_spec.src_address = ip6_hdr->src_addr.addr; + ip6_spec.dst_address = ip6_hdr->dst_addr.addr; + ip6_mask.src_address = ip6_hdr->src_addr.mask; + ip6_mask.dst_address = ip6_hdr->dst_addr.mask; item_info[layer].spec = (void *) &ip6_spec; item_info[layer].mask = (void *) &ip6_mask; @@ -273,16 +674,15 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, switch (proto) { case IP_PROTOCOL_UDP: - item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP; - - udp_header_t udp_spec = { .src_port = l4_src_port, - .dst_port = l4_dst_port }, - udp_mask = { .src_port = l4_src_mask, - .dst_port = l4_dst_mask }; + udp_spec.src_port = l4_src_port; + udp_spec.dst_port = l4_dst_port; + udp_mask.src_port = l4_src_mask; + udp_mask.dst_port = l4_dst_mask; item_info[layer].spec = (void *) &udp_spec; item_info[layer].mask = (void *) &udp_mask; item_info[layer].size = sizeof (udp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP; layer++; if (FLOW_IS_L4_TUNNEL_TYPE (flow)) @@ -290,14 +690,13 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, switch (flow->type) { case VNET_FLOW_TYPE_IP4_GTPU: - item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU; - gtpu_header_t gtpu_spec = { .teid = clib_host_to_net_u32 ( - flow->ip4_gtpu.teid) }, - gtpu_mask = { .teid = 0XFFFFFFFF }; + gtpu_spec.teid = clib_host_to_net_u32 (flow->ip4_gtpu.teid); + gtpu_mask.teid = 0XFFFFFFFF; item_info[layer].spec = (void *) >pu_spec; item_info[layer].mask = (void *) >pu_mask; item_info[layer].size = sizeof (gtpu_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU; layer++; break; @@ -309,42 +708,39 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, break; case IP_PROTOCOL_TCP: - item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP; - - tcp_header_t tcp_spec = { .src_port = l4_src_port, - .dst_port = l4_dst_port }, - tcp_mask = { .src_port = l4_src_mask, - .dst_port = l4_dst_mask }; + tcp_spec.src_port = l4_src_port; + tcp_spec.dst_port = l4_dst_port; + tcp_mask.src_port = l4_src_mask; + tcp_mask.dst_port = l4_dst_mask; item_info[layer].spec = (void *) &tcp_spec; item_info[layer].mask = (void *) &tcp_mask; item_info[layer].size = sizeof (tcp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP; layer++; break; case IP_PROTOCOL_SCTP: - item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP; - - sctp_header_t sctp_spec = { .src_port = l4_src_port, - .dst_port = l4_dst_port }, - sctp_mask = { .src_port = l4_src_mask, - .dst_port = l4_dst_mask }; + sctp_spec.src_port = l4_src_port; + sctp_spec.dst_port = l4_dst_port; + sctp_mask.src_port = l4_src_mask; + sctp_mask.dst_port = l4_dst_mask; item_info[layer].spec = (void *) &sctp_spec; item_info[layer].mask = (void *) &sctp_mask; item_info[layer].size = sizeof (sctp_header_t); + item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP; layer++; break; case IP_PROTOCOL_IPSEC_ESP: - item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP; - esp_header_t esp_spec = { .spi = clib_host_to_net_u32 ( - flow->ip4_ipsec_esp.spi) }, - esp_mask = { .spi = 0xFFFFFFFF }; + esp_spec.spi = clib_host_to_net_u32 (flow->ip4_ipsec_esp.spi); + esp_mask.spi = 0xFFFFFFFF; item_info[layer].spec = (void *) &esp_spec; item_info[layer].mask = (void *) &esp_mask; item_info[layer].size = sizeof (u32); + item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP; layer++; break; @@ -357,6 +753,7 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, end_item_info: item_info[layer].type = ROC_NPC_ITEM_TYPE_END; +parse_flow_actions: if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) { conf.index = flow->redirect_queue; @@ -422,6 +819,11 @@ end_item_info: if (queues) clib_mem_free (queues); + vec_free (flow_spec); + vec_free (flow_mask); + vec_free (drv_spec); + vec_free (drv_mask); + return rv; } diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c index e624b84f54e..d0f53013d99 100644 --- a/src/plugins/dev_octeon/format.c +++ b/src/plugins/dev_octeon/format.c @@ -25,7 +25,7 @@ format_oct_nix_rx_cqe_desc (u8 *s, va_list *args) typeof (d->sg0) *sg0 = &d->sg0; typeof (d->sg0) *sg1 = &d->sg1; - s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type, + s = format (s, "hdr: cqe_type %u nude %u qid %u tag 0x%x", h->cqe_type, h->node, h->q, h->tag); s = format (s, "\n%Uparse:", format_white_space, indent); #define _(n, f) s = format (s, " " #n " " f, p->n) diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 8c5ed95b062..99cadddfc24 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -4,12 +4,13 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> +#include <vnet/dev/bus/pci.h> #include <vnet/dev/counters.h> #include <vnet/ethernet/ethernet.h> #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> #include <dev_octeon/octeon.h> +#include <dev_octeon/crypto.h> #include <base/roc_api.h> #include <common.h> @@ -51,9 +52,12 @@ static struct } _ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"), - _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), + _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), + _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"), _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"), - _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"), + _ (0xa0f3, O10K_CPT_VF, + "Marvell Octeon-10 Cryptographic Accelerator Unit VF"), + _ (0xa0fe, O9K_CPT_VF, "Marvell Octeon-9 Cryptographic Accelerator Unit VF"), #undef _ }; @@ -109,6 +113,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .reta_sz = ROC_NIX_RSS_RETA_SZ_256, .max_sqb_count = 512, .pci_dev = &cd->plt_pci_dev, + .hw_vlan_ins = true, }; if ((rrv = roc_nix_dev_init (cd->nix))) @@ -130,6 +135,9 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .rx_offloads = { .ip4_cksum = 1, }, + .tx_offloads = { + .ip4_cksum = 1, + }, }, .ops = { .init = oct_port_init, @@ -140,6 +148,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .config_change_validate = oct_port_cfg_change_validate, .format_status = format_oct_port_status, .format_flow = format_oct_port_flow, + .clear_counters = oct_port_clear_counters, }, .data_size = sizeof (oct_port_t), .initial_data = &oct_port, @@ -158,6 +167,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .alloc = oct_rx_queue_alloc, .free = oct_rx_queue_free, .format_info = format_oct_rxq_info, + .clear_counters = oct_rxq_clear_counters, }, }, .tx_queue = { @@ -172,6 +182,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .alloc = oct_tx_queue_alloc, .free = oct_tx_queue_free, .format_info = format_oct_txq_info, + .clear_counters = oct_txq_clear_counters, }, }, }; @@ -183,17 +194,113 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) return vnet_dev_port_add (vm, dev, 0, &port_add_args); } +static int +oct_conf_cpt (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd, + int nb_lf) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + int rrv; + + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_SE)) < 0) + { + log_err (dev, "Could not add CPT SE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_IE)) < 0) + { + log_err (dev, "Could not add CPT IE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != ROC_CPT_DFLT_ENG_GRP_SE_IE) + { + log_err (dev, "Invalid CPT IE engine group configuration"); + return -1; + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_CPT_DFLT_ENG_GRP_SE) + { + log_err (dev, "Invalid CPT SE engine group configuration"); + return -1; + } + if ((rrv = roc_cpt_dev_configure (roc_cpt, nb_lf, false, 0)) < 0) + { + log_err (dev, "could not configure crypto device %U", + format_vlib_pci_addr, roc_cpt->pci_dev->addr); + return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_configure"); + } + return 0; +} + +static vnet_dev_rv_t +oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + struct roc_cpt_lmtline *cpt_lmtline; + struct roc_cpt_lf *cpt_lf; + int rrv; + + cpt_lf = &ocd->lf; + cpt_lmtline = &ocd->lmtline; + + cpt_lf->nb_desc = OCT_CPT_LF_MAX_NB_DESC; + cpt_lf->lf_id = 0; + if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init"); + + roc_cpt_iq_enable (cpt_lf); + + if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0) < 0)) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lmtline_init"); + + return 0; +} + static vnet_dev_rv_t oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) { + oct_crypto_main_t *ocm = &oct_crypto_main; + extern oct_plt_init_param_t oct_plt_init_param; oct_device_t *cd = vnet_dev_get_data (dev); + oct_crypto_dev_t *ocd = NULL; int rrv; - struct roc_cpt cpt = { - .pci_dev = &cd->plt_pci_dev, - }; - if ((rrv = roc_cpt_dev_init (&cpt))) + if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started) + return VNET_DEV_ERR_NOT_SUPPORTED; + + ocd = oct_plt_init_param.oct_plt_zmalloc (sizeof (oct_crypto_dev_t), + CLIB_CACHE_LINE_BYTES); + + ocd->roc_cpt = oct_plt_init_param.oct_plt_zmalloc (sizeof (struct roc_cpt), + CLIB_CACHE_LINE_BYTES); + ocd->roc_cpt->pci_dev = &cd->plt_pci_dev; + + ocd->dev = dev; + + if ((rrv = roc_cpt_dev_init (ocd->roc_cpt))) return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init"); + + if ((rrv = oct_conf_cpt (vm, dev, ocd, 1))) + return rrv; + + if ((rrv = oct_conf_cpt_queue (vm, dev, ocd))) + return rrv; + + if (!ocm->n_cpt) + { + /* + * Initialize s/w queues, which are common across multiple + * crypto devices + */ + oct_conf_sw_queue (vm, dev); + + ocm->crypto_dev[0] = ocd; + } + + ocm->crypto_dev[1] = ocd; + + oct_init_crypto_engine_handlers (vm, dev); + + ocm->n_cpt++; + return VNET_DEV_OK; } @@ -244,10 +351,12 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) { case OCT_DEVICE_TYPE_RVU_PF: case OCT_DEVICE_TYPE_RVU_VF: + case OCT_DEVICE_TYPE_LBK_VF: case OCT_DEVICE_TYPE_SDP_VF: return oct_init_nix (vm, dev); - case OCT_DEVICE_TYPE_CPT_VF: + case OCT_DEVICE_TYPE_O10K_CPT_VF: + case OCT_DEVICE_TYPE_O9K_CPT_VF: return oct_init_cpt (vm, dev); default: diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index 92ec953ed23..ccf8f62880d 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -12,6 +12,12 @@ #include <vnet/flow/flow.h> #include <vnet/udp/udp.h> #include <vnet/ipsec/esp.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/igmp_packet.h> +#include <vnet/gre/packet.h> +#include <vxlan/vxlan.h> #include <base/roc_api.h> #include <dev_octeon/hw_defs.h> @@ -22,8 +28,10 @@ typedef enum OCT_DEVICE_TYPE_UNKNOWN = 0, OCT_DEVICE_TYPE_RVU_PF, OCT_DEVICE_TYPE_RVU_VF, + OCT_DEVICE_TYPE_LBK_VF, OCT_DEVICE_TYPE_SDP_VF, - OCT_DEVICE_TYPE_CPT_VF, + OCT_DEVICE_TYPE_O10K_CPT_VF, + OCT_DEVICE_TYPE_O9K_CPT_VF, } __clib_packed oct_device_type_t; typedef struct @@ -34,7 +42,6 @@ typedef struct u8 full_duplex : 1; u32 speed; struct plt_pci_device plt_pci_dev; - struct roc_cpt cpt; struct roc_nix *nix; } oct_device_t; @@ -95,7 +102,6 @@ typedef struct u64 aura_handle; u64 io_addr; void *lmt_addr; - oct_npa_batch_alloc_cl128_t *ba_buffer; u8 ba_first_cl; u8 ba_num_cl; @@ -140,6 +146,17 @@ vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *, vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword, u64 *); +/* counter.c */ +void oct_port_add_counters (vlib_main_t *, vnet_dev_port_t *); +void oct_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +void oct_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +void oct_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t oct_port_get_stats (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t oct_rxq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_rx_queue_t *); +vnet_dev_rv_t oct_txq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_tx_queue_t *); + #define log_debug(dev, f, ...) \ vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \ format_vnet_dev_addr, (dev), ##__VA_ARGS__) @@ -162,7 +179,8 @@ vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword, _ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \ "aura batch alloc issue failed") \ _ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \ - "aura batch alloc not ready") + "aura batch alloc not ready") \ + _ (MTU_EXCEEDED, mtu_exceeded, ERROR, "mtu exceeded") typedef enum { diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index d5f78301adf..528683fa3c7 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <dev_octeon/octeon.h> #include <dev_octeon/common.h> @@ -54,11 +53,83 @@ oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) } vnet_dev_rv_t +oct_port_pause_flow_control_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + struct roc_nix_fc_cfg fc_cfg; + struct roc_nix_sq *sq; + struct roc_nix_cq *cq; + struct roc_nix_rq *rq; + int rrv; + + /* pause flow control is not supported on SDP/LBK devices */ + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) + { + log_notice (dev, + "pause flow control is not supported on SDP/LBK devices"); + return VNET_DEV_OK; + } + + fc_cfg.type = ROC_NIX_FC_RXCHAN_CFG; + fc_cfg.rxchan_cfg.enable = true; + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_RQ_CFG; + fc_cfg.rq_cfg.enable = true; + fc_cfg.rq_cfg.tc = 0; + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + + rq = &crq->rq; + cq = &crq->cq; + + fc_cfg.rq_cfg.rq = rq->qid; + fc_cfg.rq_cfg.cq_drop = cq->drop_thresh; + + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + } + + memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg)); + fc_cfg.type = ROC_NIX_FC_TM_CFG; + fc_cfg.tm_cfg.tc = 0; + fc_cfg.tm_cfg.enable = true; + + foreach_vnet_dev_port_tx_queue (txq, port) + { + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + + sq = &ctq->sq; + + fc_cfg.tm_cfg.sq = sq->qid; + rrv = roc_nix_fc_config_set (nix, &fc_cfg); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed"); + } + + /* By default, enable pause flow control */ + rrv = roc_nix_fc_mode_set (nix, ROC_NIX_FC_FULL); + if (rrv) + return oct_roc_err (dev, rrv, "roc_nix_fc_mode_set failed"); + + return VNET_DEV_OK; +} + +vnet_dev_rv_t oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) { vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); oct_port_t *cp = vnet_dev_get_port_data (port); + u8 mac_addr[PLT_ETHER_ADDR_LEN]; struct roc_nix *nix = cd->nix; vnet_dev_rv_t rv; int rrv; @@ -76,6 +147,22 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) } cp->lf_allocated = 1; + if (!roc_nix_is_vf_or_sdp (nix)) + { + if ((rrv = roc_nix_npc_mac_addr_get (nix, mac_addr))) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get failed"); + } + + /* Sync MAC address to CGX/RPM table */ + if ((rrv = roc_nix_mac_addr_set (nix, mac_addr))) + { + oct_port_deinit (vm, port); + return oct_roc_err (dev, rrv, "roc_nix_mac_addr_set failed"); + } + } + if ((rrv = roc_nix_tm_init (nix))) { oct_port_deinit (vm, port); @@ -124,6 +211,21 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) return rv; } + oct_port_add_counters (vm, port); + + if ((rrv = roc_nix_mac_mtu_set (nix, port->max_rx_frame_size))) + { + rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed"); + return rv; + } + + /* Configure pause frame flow control*/ + if ((rv = oct_port_pause_flow_control_init (vm, port))) + { + oct_port_deinit (vm, port); + return rv; + } + return VNET_DEV_OK; } @@ -172,7 +274,22 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_port_state_changes_t changes = {}; int rrv; - if (roc_nix_is_lbk (nix)) + if (oct_port_get_stats (vm, port)) + return; + + foreach_vnet_dev_port_rx_queue (q, port) + { + if (oct_rxq_get_stats (vm, port, q)) + return; + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + if (oct_txq_get_stats (vm, port, q)) + return; + } + + if (roc_nix_is_lbk (nix) || roc_nix_is_sdp (nix)) { link_info.status = 1; link_info.full_duplex = 1; @@ -203,7 +320,8 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) if (cd->speed != link_info.speed) { changes.change.link_speed = 1; - changes.link_speed = link_info.speed; + /* Convert to Kbps */ + changes.link_speed = link_info.speed * 1000; cd->speed = link_info.speed; } @@ -327,12 +445,6 @@ oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port) ctq->n_enq = 0; } - if ((rrv = roc_nix_mac_mtu_set (nix, 9200))) - { - rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed"); - goto done; - } - if ((rrv = roc_nix_npc_rx_ena_dis (nix, true))) { rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed"); @@ -376,6 +488,18 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) foreach_vnet_dev_port_tx_queue (q, port) oct_txq_stop (vm, q); + + vnet_dev_port_state_change (vm, port, + (vnet_dev_port_state_changes_t){ + .change.link_state = 1, + .change.link_speed = 1, + .link_speed = 0, + .link_state = 0, + }); + + /* Update the device status */ + cd->status = 0; + cd->speed = 0; } vnet_dev_rv_t @@ -385,7 +509,7 @@ oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable) oct_device_t *cd = vnet_dev_get_data (dev); struct roc_nix *nix = cd->nix; - if (roc_nix_is_vf_or_sdp (nix)) + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) return VNET_DEV_ERR_UNSUPPORTED_DEVICE; return VNET_DEV_OK; @@ -405,6 +529,9 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed"); } + if (!roc_nix_is_pf (nix)) + return VNET_DEV_OK; + rv = roc_nix_mac_promisc_mode_enable (nix, enable); if (rv) { @@ -416,6 +543,61 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) return VNET_DEV_OK; } +static vnet_dev_rv_t +oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_hw_addr_t *addr, int is_add, + int is_primary) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + i32 rrv; + + if (is_primary) + { + if (is_add) + { + /* Update mac address at NPC */ + rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed"); + + /* Update mac address at CGX for PFs only */ + if (!roc_nix_is_vf_or_sdp (nix)) + { + rrv = roc_nix_mac_addr_set (nix, (u8 *) addr); + if (rrv) + { + /* Rollback to previous mac address */ + roc_nix_npc_mac_addr_set (nix, + (u8 *) &port->primary_hw_addr); + rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed"); + } + } + } + } + + return rv; +} + +vnet_dev_rv_t +oct_op_config_max_rx_len (vlib_main_t *vm, vnet_dev_port_t *port, + u32 rx_frame_size) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + i32 rrv; + + rrv = roc_nix_mac_max_rx_len_set (nix, rx_frame_size); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_mac_max_rx_len_set() failed"); + + return rv; +} + vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_change_req_t *req) @@ -465,6 +647,9 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, break; case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + rv = oct_port_add_del_eth_addr (vm, port, &req->addr, + /* is_add */ 1, + /* is_primary */ 1); break; case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: @@ -474,6 +659,7 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, break; case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + rv = oct_op_config_max_rx_len (vm, port, req->max_rx_frame_size); break; case VNET_DEV_PORT_CFG_ADD_RX_FLOW: diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c index d6ae794fb8d..58d391b8508 100644 --- a/src/plugins/dev_octeon/queue.c +++ b/src/plugins/dev_octeon/queue.c @@ -4,7 +4,6 @@ #include <vnet/vnet.h> #include <vnet/dev/dev.h> -#include <vnet/dev/pci.h> #include <vnet/dev/counters.h> #include <dev_octeon/octeon.h> #include <vnet/ethernet/ethernet.h> diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c index f10c2cb578b..c1166b654cf 100644 --- a/src/plugins/dev_octeon/roc_helper.c +++ b/src/plugins/dev_octeon/roc_helper.c @@ -49,6 +49,12 @@ oct_plt_get_thread_index (void) return __os_thread_index; } +static u64 +oct_plt_get_cache_line_size (void) +{ + return CLIB_CACHE_LINE_BYTES; +} + static void oct_drv_physmem_free (vlib_main_t *vm, void *mem) { @@ -69,13 +75,12 @@ oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align) if (align) { - /* Force cache line alloc in case alignment is less than cache line */ - align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align; + /* Force ROC align alloc in case alignment is less than ROC align */ + align = align < ROC_ALIGN ? ROC_ALIGN : align; mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0); } else - mem = - vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0); + mem = vlib_physmem_alloc_aligned_on_numa (vm, size, ROC_ALIGN, 0); if (!mem) return NULL; @@ -178,4 +183,5 @@ oct_plt_init_param_t oct_plt_init_param = { .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock, .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock, .oct_plt_get_thread_index = oct_plt_get_thread_index, + .oct_plt_get_cache_line_size = oct_plt_get_cache_line_size, }; diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c index 997f1356199..b057c4d7047 100644 --- a/src/plugins/dev_octeon/rx_node.c +++ b/src/plugins/dev_octeon/rx_node.c @@ -104,7 +104,9 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, { oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); vlib_buffer_template_t bt = rxq->buffer_template; - u32 n_left; + u32 b0_err_flags = 0, b1_err_flags = 0; + u32 b2_err_flags = 0, b3_err_flags = 0; + u32 n_left, err_flags = 0; oct_nix_rx_cqe_desc_t *d = ctx->next_desc; vlib_buffer_t *b[4]; @@ -145,6 +147,13 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, oct_rx_attach_tail (vm, ctx, b[2], d + 2); oct_rx_attach_tail (vm, ctx, b[3], d + 3); } + + b0_err_flags = (d[0].parse.w[0] >> 20) & 0xFFF; + b1_err_flags = (d[1].parse.w[0] >> 20) & 0xFFF; + b2_err_flags = (d[2].parse.w[0] >> 20) & 0xFFF; + b3_err_flags = (d[3].parse.w[0] >> 20) & 0xFFF; + + err_flags |= b0_err_flags | b1_err_flags | b2_err_flags | b3_err_flags; } for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1) @@ -157,14 +166,51 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, ctx->n_segs += 1; if (d[0].sg0.segs > 1) oct_rx_attach_tail (vm, ctx, b[0], d + 0); + + err_flags |= ((d[0].parse.w[0] >> 20) & 0xFFF); } plt_write64 ((crq->cq.wdata | n), crq->cq.door); ctx->n_rx_pkts += n; ctx->n_left_to_next -= n; + if (err_flags) + ctx->parse_w0_or = (err_flags << 20); + return n; } +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) +{ + u32 n_alloc, n_free; + u32 buffer_indices[n_refill]; + vlib_buffer_t *buffers[n_refill]; + u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + u64 aura = roc_npa_aura_handle_to_aura (crq->aura_handle); + const uint64_t addr = + roc_npa_aura_handle_to_base (crq->aura_handle) + NPA_LF_AURA_OP_FREE0; + + if (n_refill < 256) + return 0; + + n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_refill); + if (PREDICT_FALSE (n_alloc < n_refill)) + goto alloc_fail; + + vlib_get_buffers (vm, buffer_indices, (vlib_buffer_t **) buffers, n_alloc); + + for (n_free = 0; n_free < n_alloc; n_free++) + roc_store_pair ((u64) buffers[n_free], aura, addr); + + return n_alloc; + +alloc_fail: + vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi); + return 0; +} +#else static_always_inline void oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr, oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi, @@ -260,6 +306,7 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) return n_enq; } +#endif static_always_inline void oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index 0dbf8759d35..f42f18d989b 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -22,13 +22,54 @@ typedef struct u32 n_tx_bytes; u32 n_drop; vlib_buffer_t *drop[VLIB_FRAME_SIZE]; + u32 n_exd_mtu; + vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE]; u32 batch_alloc_not_ready; u32 batch_alloc_issue_fail; + int max_pkt_len; u16 lmt_id; u64 lmt_ioaddr; lmt_line_t *lmt_lines; } oct_tx_ctx_t; +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u16 off = ctq->hdr_off; + u64 ah = ctq->aura_handle; + u32 n_freed = 0, n; + + ah = ctq->aura_handle; + + if ((n = roc_npa_aura_op_available (ah)) >= 32) + { + u64 buffers[n]; + u32 bi[n]; + + n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1); + vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed, + off); + vlib_buffer_free_no_next (vm, bi, n_freed); + } + + return n_freed; +} + +static_always_inline void +oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords) +{ + u64 lmt_status; + + do + { + roc_lmt_mov_seg (lmt_addr, desc, dwords); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#else static_always_inline u32 oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) { @@ -130,10 +171,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return n_freed; } +#endif static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, - lmt_line_t *line, u32 flags, int simple, int trace) + lmt_line_t *line, u32 flags, int simple, int trace, u32 *n, + u8 *dpl) { u8 n_dwords = 2; u32 total_len = 0; @@ -148,6 +191,17 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, }, }; + if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len)) + { + ctx->exd_mtu[ctx->n_exd_mtu++] = b; + return 0; + } + +#ifdef PLATFORM_OCTEON9 + /* Override line for Octeon9 */ + line = ctx->lmt_lines; +#endif + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -159,7 +213,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, tail_segs[n_tail_segs++] = t; if (n_tail_segs > 5) { - ctx->drop[ctx->n_drop++] = t; + ctx->drop[ctx->n_drop++] = b; return 0; } } @@ -201,19 +255,18 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) { d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM; - d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset; - d.hdr_w1.ol4ptr = - vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t); + d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset - b->current_data; + d.hdr_w1.ol4ptr = d.hdr_w1.ol3ptr + sizeof (ip4_header_t); } if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } } @@ -228,8 +281,15 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; } +#ifdef PLATFORM_OCTEON9 + oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords); +#else for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; +#endif + + *dpl = n_dwords; + *n = *n + 1; return n_dwords; } @@ -239,8 +299,9 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, vlib_buffer_t **b, u32 n_pkts, int trace) { u8 dwords_per_line[16], *dpl = dwords_per_line; - u64 lmt_arg, ioaddr, n_lines; - u32 n_left, or_flags_16 = 0; + u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines; + u32 __attribute__ ((unused)) or_flags_16 = 0; + u32 n_left, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; lmt_line_t *l = ctx->lmt_lines; @@ -248,7 +309,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, /* Data Store Memory Barrier - outer shareable domain */ asm volatile("dmb oshst" ::: "memory"); - for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8) + for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8) { u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0; vlib_prefetch_buffer_header (b[8], LOAD); @@ -269,49 +330,56 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if ((or_f & not_simple_flags) == 0) { int simple = 1; - oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); - dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2; - dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2; + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } else { int simple = 0; - dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } - dpl += 8; + dpl += n; + l += n; + n = 0; } - for (; n_left > 0; n_left -= 1, b += 1, l += 1) + for (; n_left > 0; n_left -= 1, b += 1) { u32 f0 = b[0]->flags; - dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]); or_flags_16 |= f0; + dpl += n; + l += n; + n = 0; } lmt_arg = ctx->lmt_id; ioaddr = ctx->lmt_ioaddr; - n_lines = n_pkts; + n_lines = dpl - dwords_per_line; + + if (PREDICT_FALSE (!n_lines)) + return n_pkts; +#ifndef PLATFORM_OCTEON9 if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { dpl = dwords_per_line; @@ -340,6 +408,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, } roc_lmt_submit_steorl (lmt_arg, ioaddr); +#endif return n_pkts; } @@ -350,11 +419,17 @@ VNET_DEV_NODE_FN (oct_tx_node) vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); vnet_dev_tx_queue_t *txq = rt->tx_queue; oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); u32 node_index = node->node_index; u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif oct_tx_ctx_t ctx = { .node = node, @@ -363,6 +438,7 @@ VNET_DEV_NODE_FN (oct_tx_node) .sq = ctq->sq.qid, .sizem1 = 1, }, + .max_pkt_len = roc_nix_max_pkt_len (cd->nix), .lmt_id = lmt_id, .lmt_ioaddr = ctq->io_addr, .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2), @@ -396,7 +472,7 @@ VNET_DEV_NODE_FN (oct_tx_node) n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0); } - ctq->n_enq = n_enq + n; + ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu; if (n < n_pkts) { @@ -411,6 +487,10 @@ VNET_DEV_NODE_FN (oct_tx_node) vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG, ctx.n_drop); + if (PREDICT_FALSE (ctx.n_exd_mtu)) + vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED, + ctx.n_exd_mtu); + if (ctx.batch_alloc_not_ready) vlib_error_count (vm, node_index, OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY, @@ -431,5 +511,13 @@ VNET_DEV_NODE_FN (oct_tx_node) n_pkts -= ctx.n_drop; } + if (PREDICT_FALSE (ctx.n_exd_mtu)) + { + u32 bi[VLIB_FRAME_SIZE]; + vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu); + vlib_buffer_free (vm, bi, ctx.n_exd_mtu); + n_pkts -= ctx.n_exd_mtu; + } + return n_pkts; } diff --git a/src/plugins/dhcp/client.c b/src/plugins/dhcp/client.c index 8fa67c616b2..d81d2935577 100644 --- a/src/plugins/dhcp/client.c +++ b/src/plugins/dhcp/client.c @@ -1153,7 +1153,9 @@ dhcp_client_set_command_fn (vlib_main_t * vm, a->is_add = is_add; a->sw_if_index = sw_if_index; a->hostname = hostname; - a->client_identifier = format (0, "vpp 1.1%c", 0); + a->client_identifier = + format (0, "%U", format_ethernet_address, + vnet_sw_interface_get_hw_address (vnet_get_main (), sw_if_index)); a->set_broadcast_flag = set_broadcast_flag; /* diff --git a/src/plugins/dhcp/dhcp4_proxy_node.c b/src/plugins/dhcp/dhcp4_proxy_node.c index 2b49d49bb7f..740ae8043e0 100644 --- a/src/plugins/dhcp/dhcp4_proxy_node.c +++ b/src/plugins/dhcp/dhcp4_proxy_node.c @@ -321,7 +321,8 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, o->length += id_len + 5; } - len = o->length + 3; + /* 2 bytes for option header 82+len */ + len = o->length + 2; b0->current_length += len; /* Fix IP header length and checksum */ old_l0 = ip0->length; diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index c838800deb4..77f9a27f97b 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -89,12 +89,18 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { clib_error_t *err = 0; - u32 pipe_max_size; int fds[2]; u8 *s = 0; int n, n_try; FILE *f; + /* + * XXX: Pipes on FreeBSD grow dynamically up to 64KB (FreeBSD 15), don't + * manually tweak this value on FreeBSD at the moment. + */ +#ifdef __linux__ + u32 pipe_max_size; + err = clib_sysfs_read ("/proc/sys/fs/pipe-max-size", "%u", &pipe_max_size); if (err) @@ -112,6 +118,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input, err = clib_error_return_unix (0, "fcntl(F_SETPIPE_SZ)"); goto error; } +#endif /* __linux__ */ if (fcntl (fds[0], F_SETFL, O_NONBLOCK) == -1) { diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 88a4d9ff618..2440439989f 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -210,6 +210,8 @@ typedef struct struct rte_eth_stats last_stats; struct rte_eth_xstat *xstats; f64 time_last_stats_update; + vlib_simple_counter_main_t xstats_counters; + u32 *xstats_symlinks; /* mac address */ u8 *default_mac_address; @@ -240,6 +242,7 @@ typedef struct _ (num_rx_desc) \ _ (num_tx_desc) \ _ (max_lro_pkt_size) \ + _ (disable_rxq_int) \ _ (rss_fn) typedef enum diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h index cb7b185c112..794953da55e 100644 --- a/src/plugins/dpdk/device/dpdk_priv.h +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -47,28 +47,36 @@ dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val) xd->flags = val ? xd->flags | flag : xd->flags & ~flag; } +void dpdk_counters_xstats_init (dpdk_device_t *xd); + static inline void -dpdk_get_xstats (dpdk_device_t * xd) +dpdk_get_xstats (dpdk_device_t *xd, u32 thread_index) { - int len, ret; - + int ret; + int i; if (!(xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)) return; - len = rte_eth_xstats_get (xd->port_id, NULL, 0); - if (len < 0) - return; - - vec_validate (xd->xstats, len - 1); - - ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len); - if (ret < 0 || ret > len) + ret = rte_eth_xstats_get (xd->port_id, xd->xstats, vec_len (xd->xstats)); + if (ret < 0) { - vec_set_len (xd->xstats, 0); + dpdk_log_warn ("rte_eth_xstats_get(%d) failed: %d", xd->port_id, ret); + return; + } + else if (ret != vec_len (xd->xstats)) + { + dpdk_log_warn ( + "rte_eth_xstats_get(%d) returned %d/%d stats. Resetting counters.", + xd->port_id, ret, vec_len (xd->xstats)); + dpdk_counters_xstats_init (xd); return; } - vec_set_len (xd->xstats, len); + vec_foreach_index (i, xd->xstats) + { + vlib_set_simple_counter (&xd->xstats_counters, thread_index, i, + xd->xstats[i].value); + } } #define DPDK_UPDATE_COUNTER(vnm, tidx, xd, stat, cnt) \ @@ -107,7 +115,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) DPDK_UPDATE_COUNTER (vnm, thread_index, xd, ierrors, VNET_INTERFACE_COUNTER_RX_ERROR); - dpdk_get_xstats (xd); + dpdk_get_xstats (xd, thread_index); } #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 2d038b907bf..ec9e6045de7 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -30,7 +30,7 @@ #include <dpdk/cryptodev/cryptodev.h> #include <vlib/pci/pci.h> #include <vlib/vmbus/vmbus.h> - +#include <vlib/stats/stats.h> #include <rte_ring.h> #include <rte_vect.h> @@ -226,6 +226,75 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di) return &dm->conf->default_devconf; } +/* + * Initialise the xstats counters for a device + */ +void +dpdk_counters_xstats_init (dpdk_device_t *xd) +{ + int len, ret, i; + struct rte_eth_xstat_name *xstats_names = 0; + + if (vec_len (xd->xstats_symlinks) > 0) + { + /* xstats already initialized. Reset counters */ + vec_foreach_index (i, xd->xstats_symlinks) + { + vlib_stats_remove_entry (xd->xstats_symlinks[i]); + } + } + else + { + xd->xstats_counters.stat_segment_name = + (char *) format (0, "/if/xstats/%d%c", xd->sw_if_index, 0); + xd->xstats_counters.counters = 0; + } + + len = rte_eth_xstats_get_names (xd->port_id, 0, 0); + if (len < 0) + { + dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats " + "not configured.", + xd->port_id, len); + return; + } + + vlib_validate_simple_counter (&xd->xstats_counters, len); + vlib_zero_simple_counter (&xd->xstats_counters, len); + + vec_validate (xstats_names, len - 1); + vec_validate (xd->xstats, len - 1); + vec_validate (xd->xstats_symlinks, len - 1); + + ret = rte_eth_xstats_get_names (xd->port_id, xstats_names, len); + if (ret >= 0 && ret <= len) + { + vec_foreach_index (i, xstats_names) + { + /* There is a bug in the ENA driver where the xstats names are not + * unique. */ + xd->xstats_symlinks[i] = vlib_stats_add_symlink ( + xd->xstats_counters.stats_entry_index, i, "/interfaces/%U/%s%c", + format_vnet_sw_if_index_name, vnet_get_main (), xd->sw_if_index, + xstats_names[i].name, 0); + if (xd->xstats_symlinks[i] == STAT_SEGMENT_INDEX_INVALID) + { + xd->xstats_symlinks[i] = vlib_stats_add_symlink ( + xd->xstats_counters.stats_entry_index, i, + "/interfaces/%U/%s_%d%c", format_vnet_sw_if_index_name, + vnet_get_main (), xd->sw_if_index, xstats_names[i].name, i, 0); + } + } + } + else + { + dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats " + "not configured.", + xd->port_id, ret); + } + vec_free (xstats_names); +} + static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -434,6 +503,14 @@ dpdk_lib_init (dpdk_main_t * dm) else if (dr && dr->n_tx_desc) xd->conf.n_tx_desc = dr->n_tx_desc; + if (xd->conf.n_tx_desc > di.tx_desc_lim.nb_max) + { + dpdk_log_warn ("[%u] Configured number of TX descriptors (%u) is " + "bigger than maximum supported (%u)", + port_id, xd->conf.n_tx_desc, di.tx_desc_lim.nb_max); + xd->conf.n_tx_desc = di.tx_desc_lim.nb_max; + } + dpdk_log_debug ( "[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u", port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues, @@ -519,6 +596,9 @@ dpdk_lib_init (dpdk_main_t * dm) if (devconf->max_lro_pkt_size) xd->conf.max_lro_pkt_size = devconf->max_lro_pkt_size; + if (devconf->disable_rxq_int) + xd->conf.enable_rxq_int = 0; + dpdk_device_setup (xd); /* rss queues should be configured after dpdk_device_setup() */ @@ -532,6 +612,7 @@ dpdk_lib_init (dpdk_main_t * dm) if (vec_len (xd->errors)) dpdk_log_err ("[%u] setup failed Errors:\n %U", port_id, format_dpdk_device_errors, xd); + dpdk_counters_xstats_init (xd); } for (int i = 0; i < vec_len (dm->devices); i++) @@ -659,7 +740,8 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) ; /* Cisco VIC */ else if (d->vendor_id == 0x1137 && - (d->device_id == 0x0043 || d->device_id == 0x0071)) + (d->device_id == 0x0043 || d->device_id == 0x0071 || + d->device_id == 0x02b7)) ; /* Chelsio T4/T5 */ else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) @@ -936,6 +1018,10 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr, if (error) break; } + else if (unformat (input, "no-rx-interrupts")) + { + devconf->disable_rxq_int = 1; + } else if (unformat (input, "tso on")) { devconf->tso = DPDK_DEVICE_TSO_ON; @@ -1045,19 +1131,21 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) dpdk_main_t *dm = &dpdk_main; clib_error_t *error = 0; dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); dpdk_device_config_t *devconf; vlib_pci_addr_t pci_addr = { 0 }; vlib_vmbus_addr_t vmbus_addr = { 0 }; unformat_input_t sub_input; +#ifdef __linux + vlib_thread_main_t *tm = vlib_get_thread_main (); uword default_hugepage_sz, x; + u8 file_prefix = 0; +#endif /* __linux__ */ u8 *s, *tmp = 0; int ret, i; int num_whitelisted = 0; int eal_no_hugetlb = 0; u8 no_pci = 0; u8 no_vmbus = 0; - u8 file_prefix = 0; u8 *socket_mem = 0; u32 vendor, device, domain, bus, func; void *fmt_func; @@ -1217,6 +1305,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } foreach_eal_double_hyphen_predicate_arg #undef _ +#ifdef __linux__ #define _(a) \ else if (unformat(input, #a " %s", &s)) \ { \ @@ -1232,6 +1321,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } foreach_eal_double_hyphen_arg #undef _ +#endif /* __linux__ */ #define _(a,b) \ else if (unformat(input, #a " %s", &s)) \ { \ @@ -1258,6 +1348,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { vec_add1 (conf->eal_init_args, (u8 *) "--in-memory"); +#ifdef __linux__ + /* + * FreeBSD performs huge page prealloc through a dedicated kernel mode + * this process is only required on Linux. + */ default_hugepage_sz = clib_mem_get_default_hugepage_size (); clib_bitmap_foreach (x, tm->cpu_socket_bitmap) @@ -1272,6 +1367,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages))) clib_error_report (e); } +#endif /* __linux__ */ } /* on/off dpdk's telemetry thread */ @@ -1280,6 +1376,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry"); } +#ifdef __linux__ if (!file_prefix) { tmp = format (0, "--file-prefix%c", 0); @@ -1287,6 +1384,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) tmp = format (0, "vpp%c", 0); vec_add1 (conf->eal_init_args, tmp); } +#endif if (no_pci == 0 && geteuid () == 0) dpdk_bind_devices_to_uio (conf); diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c index 9781d0ed7f0..437cfbd230e 100644 --- a/src/plugins/dpdk/main.c +++ b/src/plugins/dpdk/main.c @@ -50,7 +50,7 @@ rte_delay_us_override (unsigned us) { /* Only suspend for the admin_down_process */ vlib_process_t *proc = vlib_get_current_process (vm); - if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + if (proc->state != VLIB_PROCESS_STATE_RUNNING || (proc->node_runtime.node_index != admin_up_down_process_node.index)) return 0; diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c index 58a7cfe22f1..ee0a8eb8a31 100644 --- a/src/plugins/flowprobe/flowprobe.c +++ b/src/plugins/flowprobe/flowprobe.c @@ -48,7 +48,7 @@ uword flowprobe_walker_process (vlib_main_t * vm, vlib_node_runtime_t * rt, VNET_FEATURE_INIT (flowprobe_input_ip4_unicast, static) = { .arc_name = "ip4-unicast", .node_name = "flowprobe-input-ip4", - .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_before = VNET_FEATURES ("ip4-lookup", "ip4-inacl"), }; VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = { .arc_name = "ip4-multicast", @@ -58,7 +58,7 @@ VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = { VNET_FEATURE_INIT (flowprobe_input_ip6_unicast, static) = { .arc_name = "ip6-unicast", .node_name = "flowprobe-input-ip6", - .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_before = VNET_FEATURES ("ip6-lookup", "ip6-inacl"), }; VNET_FEATURE_INIT (flowprobe_input_ip6_multicast, static) = { .arc_name = "ip6-multicast", diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt index 179c9c7a4c4..eae100949d4 100644 --- a/src/plugins/hs_apps/CMakeLists.txt +++ b/src/plugins/hs_apps/CMakeLists.txt @@ -21,8 +21,10 @@ add_vpp_plugin(hs_apps hs_apps.c http_cli.c http_client_cli.c + http_client.c http_tps.c proxy.c + test_builtins.c ) ############################################################################## diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c index d1443e75e80..d5edffbd02e 100644 --- a/src/plugins/hs_apps/echo_client.c +++ b/src/plugins/hs_apps/echo_client.c @@ -429,8 +429,11 @@ ec_init (vlib_main_t *vm) ecm->app_is_init = 1; + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; vlib_worker_thread_barrier_sync (vm); - vnet_session_enable_disable (vm, 1 /* turn on session and transports */); + vnet_session_enable_disable (vm, &args); /* Turn on the builtin client input nodes */ foreach_vlib_main () @@ -943,15 +946,16 @@ ec_connect_rpc (void *args) a->api_context = ci; if (needs_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = ecm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = ecm->ckpair_index; } rv = vnet_connect (a); if (needs_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); if (rv) { diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c index 0243252434a..b981e775b57 100644 --- a/src/plugins/hs_apps/echo_server.c +++ b/src/plugins/hs_apps/echo_server.c @@ -591,6 +591,7 @@ echo_server_listen () i32 rv; echo_server_main_t *esm = &echo_server_main; vnet_listen_args_t _args = {}, *args = &_args; + int needs_crypto; if ((rv = parse_uri (esm->server_uri, &args->sep_ext))) { @@ -598,11 +599,14 @@ echo_server_listen () } args->app_index = esm->app_index; args->sep_ext.port = hs_make_data_port (args->sep_ext.port); - if (echo_client_transport_needs_crypto (args->sep_ext.transport_proto)) + needs_crypto = + echo_client_transport_needs_crypto (args->sep_ext.transport_proto); + if (needs_crypto) { - session_endpoint_alloc_ext_cfg (&args->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - args->sep_ext.ext_cfg->crypto.ckpair_index = esm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &args->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = esm->ckpair_index; } if (args->sep_ext.transport_proto == TRANSPORT_PROTO_UDP) @@ -612,8 +616,8 @@ echo_server_listen () rv = vnet_listen (args); esm->listener_handle = args->handle; - if (args->sep_ext.ext_cfg) - clib_mem_free (args->sep_ext.ext_cfg); + if (needs_crypto) + session_endpoint_free_ext_cfgs (&args->sep_ext); return rv; } @@ -736,7 +740,10 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, goto cleanup; } - vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ ); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); if (!server_uri_set) { diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 5d4d49c0fba..dfa90f9eced 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -17,12 +17,29 @@ #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> + +#define HCS_DEBUG 0 + +#if HCS_DEBUG +#define HCS_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define HCS_DBG(_fmt, _args...) +#endif + +typedef struct +{ + u32 handle; + u8 *uri; +} hcs_uri_map_t; typedef struct { u32 hs_index; u32 thread_index; u64 node_index; + u8 plain_text; u8 *buf; } hcs_cli_args_t; @@ -34,6 +51,7 @@ typedef struct u8 *tx_buf; u32 tx_offset; u32 vpp_session_index; + http_header_t *resp_headers; } hcs_session_t; typedef struct @@ -50,6 +68,16 @@ typedef struct u32 fifo_size; u8 *uri; vlib_main_t *vlib_main; + + /* hash table to store uri -> uri map pool index */ + uword *index_by_uri; + + /* pool of uri maps */ + hcs_uri_map_t *uri_map_pool; + + /* for appns */ + u8 *appns_id; + u64 appns_secret; } hcs_main_t; static hcs_main_t hcs_main; @@ -143,26 +171,48 @@ start_send_data (hcs_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; + u8 *headers_buf = 0; int rv; + if (vec_len (hs->resp_headers)) + { + headers_buf = http_serialize_headers (hs->resp_headers); + vec_free (hs->resp_headers); + msg.data.headers_offset = 0; + msg.data.headers_len = vec_len (headers_buf); + } + else + { + msg.data.headers_offset = 0; + msg.data.headers_len = 0; + } + msg.type = HTTP_MSG_REPLY; msg.code = status; - msg.content_type = HTTP_CONTENT_TEXT_HTML; msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = vec_len (hs->tx_buf); + msg.data.body_len = vec_len (hs->tx_buf); + msg.data.body_offset = msg.data.headers_len; + msg.data.len = msg.data.body_len + msg.data.headers_len; ts = session_get (hs->vpp_session_index, hs->thread_index); rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); - if (!msg.data.len) + if (msg.data.headers_len) + { + rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + ASSERT (rv == msg.data.headers_len); + vec_free (headers_buf); + } + + if (!msg.data.body_len) goto done; rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (hs->tx_buf), hs->tx_buf); if (rv != vec_len (hs->tx_buf)) { - hs->tx_offset = rv; + hs->tx_offset = (rv > 0) ? rv : 0; svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } else @@ -173,7 +223,7 @@ start_send_data (hcs_session_t *hs, http_status_code_t status) done: if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); } static void @@ -181,6 +231,7 @@ send_data_to_http (void *rpc_args) { hcs_cli_args_t *args = (hcs_cli_args_t *) rpc_args; hcs_session_t *hs; + http_content_type_t type = HTTP_CONTENT_TEXT_HTML; hs = hcs_session_get (args->thread_index, args->hs_index); if (!hs) @@ -190,6 +241,13 @@ send_data_to_http (void *rpc_args) } hs->tx_buf = args->buf; + if (args->plain_text) + type = HTTP_CONTENT_TEXT_PLAIN; + + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (type)); + start_send_data (hs, HTTP_STATUS_OK); cleanup: @@ -218,17 +276,9 @@ hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) { if (request[i] == '/') request[i] = ' '; - else if (request[i] == ' ') - { - /* vlib_cli_input is vector-based, no need for a NULL */ - vec_set_len (request, i); - break; - } i++; } - - /* Generate the html header */ - html = format (0, html_header_template, request /* title */ ); + HCS_DBG ("%v", request); /* Run the command */ unformat_init_vector (&input, vec_dup (request)); @@ -236,9 +286,17 @@ hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) unformat_free (&input); request = 0; - /* Generate the html page */ - html = format (html, "%v", reply); - html = format (html, html_footer); + if (args->plain_text) + { + html = format (0, "%v", reply); + } + else + { + /* Generate the html page */ + html = format (0, html_header_template, request /* title */); + html = format (html, "%v", reply); + html = format (html, html_footer); + } /* Send it */ rpc_args = clib_mem_alloc (sizeof (*args)); @@ -308,9 +366,11 @@ hcs_ts_rx_callback (session_t *ts) hcs_cli_args_t args = {}; hcs_session_t *hs; http_msg_t msg; - int rv; + int rv, is_encoded = 0; hs = hcs_session_get (ts->thread_index, ts->opaque); + hs->tx_buf = 0; + hs->resp_headers = 0; /* Read the http message header */ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); @@ -318,16 +378,66 @@ hcs_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET) { - hs->tx_buf = 0; + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_ALLOW), + http_token_lit ("GET")); start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); - return 0; + goto done; + } + + if (msg.data.target_path_len == 0 || + msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) + { + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; } /* send the command to a new/recycled vlib process */ - vec_validate (args.buf, msg.data.len - 1); - rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf); - ASSERT (rv == msg.data.len); - vec_set_len (args.buf, rv); + vec_validate (args.buf, msg.data.target_path_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, args.buf); + ASSERT (rv == msg.data.target_path_len); + HCS_DBG ("%v", args.buf); + if (http_validate_abs_path_syntax (args.buf, &is_encoded)) + { + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + vec_free (args.buf); + goto done; + } + if (is_encoded) + { + u8 *decoded = http_percent_decode (args.buf, vec_len (args.buf)); + vec_free (args.buf); + args.buf = decoded; + } + + if (msg.data.headers_len) + { + u8 *headers = 0; + http_header_table_t *ht; + vec_validate (headers, msg.data.headers_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, headers); + ASSERT (rv == msg.data.headers_len); + if (http_parse_headers (headers, &ht)) + { + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + vec_free (args.buf); + vec_free (headers); + goto done; + } + const char *accept_value = + http_get_header (ht, http_header_name_str (HTTP_HEADER_ACCEPT)); + if (accept_value) + { + HCS_DBG ("client accept: %s", accept_value); + /* just for testing purpose, we don't care about precedence */ + if (strstr (accept_value, "text/plain")) + args.plain_text = 1; + } + http_free_header_table (ht); + vec_free (headers); + } args.hs_index = hs->session_index; args.thread_index = ts->thread_index; @@ -338,6 +448,9 @@ hcs_ts_rx_callback (session_t *ts) sizeof (args)); else alloc_cli_process (&args); + +done: + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len); return 0; } @@ -372,7 +485,7 @@ hcs_ts_tx_callback (session_t *ts) } if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); return 0; } @@ -488,6 +601,11 @@ hcs_attach () hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + if (hcm->appns_id) + { + a->namespace_id = hcm->appns_id; + a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret; + } if (vnet_application_attach (a)) { @@ -522,15 +640,15 @@ hcs_listen () session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; hcs_main_t *hcm = &hcs_main; vnet_listen_args_t _a, *a = &_a; - char *uri = "tcp://0.0.0.0/80"; u8 need_crypto; int rv; + char *uri; clib_memset (a, 0, sizeof (*a)); a->app_index = hcm->app_index; - if (hcm->uri) - uri = (char *) hcm->uri; + uri = (char *) hcm->uri; + ASSERT (uri); if (parse_uri (uri, &sep)) return -1; @@ -542,15 +660,24 @@ hcs_listen () if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = hcm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hcm->ckpair_index; } rv = vnet_listen (a); + if (rv == 0) + { + hcs_uri_map_t *map; + pool_get_zero (hcm->uri_map_pool, map); + map->uri = vec_dup (uri); + map->handle = a->handle; + hash_set_mem (hcm->index_by_uri, map->uri, map - hcm->uri_map_pool); + } if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -567,6 +694,43 @@ hcs_detach () } static int +hcs_unlisten () +{ + hcs_main_t *hcm = &hcs_main; + vnet_unlisten_args_t _a, *a = &_a; + char *uri; + int rv = 0; + uword *value; + + clib_memset (a, 0, sizeof (*a)); + a->app_index = hcm->app_index; + + uri = (char *) hcm->uri; + ASSERT (uri); + + value = hash_get_mem (hcm->index_by_uri, uri); + if (value) + { + hcs_uri_map_t *map = pool_elt_at_index (hcm->uri_map_pool, *value); + + a->handle = map->handle; + rv = vnet_unlisten (a); + if (rv == 0) + { + hash_unset_mem (hcm->index_by_uri, uri); + vec_free (map->uri); + pool_put (hcm->uri_map_pool, map); + if (pool_elts (hcm->uri_map_pool) == 0) + hcs_detach (); + } + } + else + return -1; + + return rv; +} + +static int hcs_create (vlib_main_t *vm) { vlib_thread_main_t *vtm = vlib_get_thread_main (); @@ -599,6 +763,8 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hcs_main_t *hcm = &hcs_main; u64 seg_size; int rv; + u32 listener_add = ~0; + clib_error_t *error = 0; hcm->prealloc_fifos = 0; hcm->private_segment_size = 0; @@ -617,13 +783,32 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hcm->private_segment_size = seg_size; else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size)) hcm->fifo_size <<= 10; - else if (unformat (line_input, "uri %s", &hcm->uri)) + else if (unformat (line_input, "uri %_%v%_", &hcm->uri)) ; + else if (unformat (line_input, "appns %_%v%_", &hcm->appns_id)) + ; + else if (unformat (line_input, "secret %lu", &hcm->appns_secret)) + ; + else if (unformat (line_input, "listener")) + { + if (unformat (line_input, "add")) + listener_add = 1; + else if (unformat (line_input, "del")) + listener_add = 0; + else + { + unformat_free (line_input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } else { unformat_free (line_input); - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; } } @@ -631,10 +816,43 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input, start_server: + if (hcm->uri == 0) + hcm->uri = format (0, "tcp://0.0.0.0/80"); + if (hcm->app_index != (u32) ~0) - return clib_error_return (0, "test http server is already running"); + { + if (hcm->appns_id && (listener_add != ~0)) + { + error = clib_error_return ( + 0, "appns must not be specified for listener add/del"); + goto done; + } + if (listener_add == 1) + { + if (hcs_listen ()) + error = + clib_error_return (0, "failed to start listening %v", hcm->uri); + goto done; + } + else if (listener_add == 0) + { + rv = hcs_unlisten (); + if (rv != 0) + error = clib_error_return ( + 0, "failed to stop listening %v, rv = %d", hcm->uri, rv); + goto done; + } + else + { + error = clib_error_return (0, "test http server is already running"); + goto done; + } + } - vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ ); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); rv = hcs_create (vm); switch (rv) @@ -642,16 +860,23 @@ start_server: case 0: break; default: - return clib_error_return (0, "server_create returned %d", rv); + { + error = clib_error_return (0, "server_create returned %d", rv); + goto done; + } } - return 0; +done: + vec_free (hcm->appns_id); + vec_free (hcm->uri); + return error; } VLIB_CLI_COMMAND (hcs_create_command, static) = { .path = "http cli server", .short_help = "http cli server [uri <uri>] [fifo-size <nbytes>] " - "[private-segment-size <nMG>] [prealloc-fifos <n>]", + "[private-segment-size <nMG>] [prealloc-fifos <n>] " + "[listener <add|del>] [appns <app-ns> secret <appns-secret>]", .function = hcs_create_command_fn, }; @@ -662,6 +887,7 @@ hcs_main_init (vlib_main_t *vm) hcs->app_index = ~0; hcs->vlib_main = vm; + hcs->index_by_uri = hash_create_vec (0, sizeof (u8), sizeof (uword)); return 0; } diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c new file mode 100644 index 00000000000..05a87ec7de8 --- /dev/null +++ b/src/plugins/hs_apps/http_client.c @@ -0,0 +1,743 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <vnet/session/application.h> +#include <vnet/session/application_interface.h> +#include <vnet/session/session.h> +#include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> +#include <http/http_status_codes.h> +#include <vppinfra/unix.h> + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 session_index; + u32 thread_index; + u32 vpp_session_index; + u64 to_recv; + u8 is_closed; +} hc_session_t; + +typedef struct +{ + hc_session_t *sessions; + u32 thread_index; + vlib_main_t *vlib_main; +} hc_worker_t; + +typedef struct +{ + u32 app_index; + u32 cli_node_index; + u8 attached; + u8 *uri; + session_endpoint_cfg_t connect_sep; + u8 *target; + u8 *headers_buf; + u8 *data; + u64 data_offset; + hc_worker_t *wrk; + u8 *resp_headers; + u8 *http_response; + u8 *response_status; + http_header_ht_t *custom_header; + u8 is_file; + u8 use_ptr; + u8 *filename; + bool verbose; + f64 timeout; + http_req_method_t req_method; +} hc_main_t; + +typedef enum +{ + HC_CONNECT_FAILED = 1, + HC_TRANSPORT_CLOSED, + HC_REPLY_RECEIVED, +} hc_cli_signal_t; + +static hc_main_t hc_main; + +static inline hc_worker_t * +hc_worker_get (u32 thread_index) +{ + return &hc_main.wrk[thread_index]; +} + +static inline hc_session_t * +hc_session_get (u32 session_index, u32 thread_index) +{ + hc_worker_t *wrk = hc_worker_get (thread_index); + wrk->vlib_main = vlib_get_main_by_index (thread_index); + return pool_elt_at_index (wrk->sessions, session_index); +} + +static void +hc_ho_session_free (u32 hs_index) +{ + hc_worker_t *wrk = hc_worker_get (0); + pool_put_index (wrk->sessions, hs_index); +} + +static hc_session_t * +hc_session_alloc (hc_worker_t *wrk) +{ + hc_session_t *s; + + pool_get_zero (wrk->sessions, s); + s->session_index = s - wrk->sessions; + s->thread_index = wrk->thread_index; + + return s; +} + +static int +hc_session_connected_callback (u32 app_index, u32 hc_session_index, + session_t *s, session_error_t err) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session, *new_hc_session; + hc_worker_t *wrk; + http_msg_t msg; + u64 to_send; + u32 n_enq; + u8 n_segs; + int rv; + http_header_ht_t *header; + http_header_t *req_headers = 0; + u32 new_hc_index; + + HTTP_DBG (1, "ho hc_index: %d", hc_session_index); + + if (err) + { + clib_warning ("hc_session_index[%d] connected error: %U", + hc_session_index, format_session_error, err); + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_CONNECT_FAILED, 0); + return -1; + } + + hc_session = hc_session_get (hc_session_index, 0); + wrk = hc_worker_get (s->thread_index); + new_hc_session = hc_session_alloc (wrk); + new_hc_index = new_hc_session->session_index; + clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session)); + hc_session->vpp_session_index = s->session_index; + + new_hc_session->session_index = new_hc_index; + new_hc_session->thread_index = s->thread_index; + new_hc_session->vpp_session_index = s->session_index; + HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index); + s->opaque = new_hc_index; + + if (hcm->req_method == HTTP_REQ_POST) + { + if (hcm->is_file) + http_add_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + else + http_add_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + } + + vec_foreach (header, hcm->custom_header) + http_add_header (&req_headers, (const char *) header->name, + vec_len (header->name), (const char *) header->value, + vec_len (header->value)); + + hcm->headers_buf = http_serialize_headers (req_headers); + vec_free (req_headers); + + msg.method_type = hcm->req_method; + if (hcm->req_method == HTTP_REQ_POST) + msg.data.body_len = vec_len (hcm->data); + else + msg.data.body_len = 0; + + msg.type = HTTP_MSG_REQUEST; + /* request target */ + msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; + msg.data.target_path_len = vec_len (hcm->target); + /* custom headers */ + msg.data.headers_len = vec_len (hcm->headers_buf); + /* total length */ + msg.data.len = + msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; + + if (hcm->use_ptr) + { + uword target = pointer_to_uword (hcm->target); + uword headers = pointer_to_uword (hcm->headers_buf); + uword body = pointer_to_uword (hcm->data); + msg.data.type = HTTP_MSG_DATA_PTR; + svm_fifo_seg_t segs[4] = { + { (u8 *) &msg, sizeof (msg) }, + { (u8 *) &target, sizeof (target) }, + { (u8 *) &headers, sizeof (headers) }, + { (u8 *) &body, sizeof (body) }, + }; + + n_segs = (hcm->req_method == HTTP_REQ_GET) ? 3 : 4; + rv = svm_fifo_enqueue_segments (s->tx_fifo, segs, n_segs, + 0 /* allow partial */); + if (hcm->req_method == HTTP_REQ_POST) + ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) + + sizeof (body))); + else + ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers))); + goto done; + } + + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.target_path_offset = 0; + msg.data.headers_offset = msg.data.target_path_len; + msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len; + + rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->target), hcm->target); + ASSERT (rv == vec_len (hcm->target)); + + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->headers_buf), + hcm->headers_buf); + ASSERT (rv == msg.data.headers_len); + + if (hcm->req_method == HTTP_REQ_POST) + { + to_send = vec_len (hcm->data); + n_enq = clib_min (svm_fifo_size (s->tx_fifo), to_send); + + rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data); + if (rv < to_send) + { + hcm->data_offset = (rv > 0) ? rv : 0; + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + } + } + +done: + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); + + return 0; +} + +static void +hc_session_disconnect_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + int rv; + + a->handle = session_handle (s); + a->app_index = hcm->app_index; + if ((rv = vnet_disconnect_session (a))) + clib_warning ("warning: disconnect returned: %U", format_session_error, + rv); +} + +static void +hc_session_transport_closed_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_TRANSPORT_CLOSED, 0); +} + +static void +hc_ho_cleanup_callback (session_t *ts) +{ + HTTP_DBG (1, "ho hc_index: %d:", ts->opaque); + hc_ho_session_free (ts->opaque); +} + +static void +hc_session_reset_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + int rv; + + hc_session = hc_session_get (s->opaque, s->thread_index); + hc_session->is_closed = 1; + + a->handle = session_handle (s); + a->app_index = hcm->app_index; + if ((rv = vnet_disconnect_session (a))) + clib_warning ("warning: disconnect returned: %U", format_session_error, + rv); +} + +static int +hc_rx_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session; + http_msg_t msg; + int rv; + + hc_session = hc_session_get (s->opaque, s->thread_index); + + if (hc_session->is_closed) + { + clib_warning ("hc_session_index[%d] is closed", s->opaque); + return -1; + } + + if (hc_session->to_recv == 0) + { + rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + if (msg.type != HTTP_MSG_REPLY) + { + clib_warning ("unexpected msg type %d", msg.type); + return -1; + } + + if (msg.data.headers_len) + { + http_header_table_t *ht; + vec_validate (hcm->resp_headers, msg.data.headers_len - 1); + rv = svm_fifo_peek (s->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, hcm->resp_headers); + + ASSERT (rv == msg.data.headers_len); + HTTP_DBG (1, (char *) hcm->resp_headers); + + if (http_parse_headers (hcm->resp_headers, &ht)) + { + clib_warning ("invalid headers received"); + return -1; + } + http_free_header_table (ht); + + hcm->response_status = + format (0, "%U", format_http_status_code, msg.code); + } + + if (msg.data.body_len == 0) + { + svm_fifo_dequeue_drop_all (s->rx_fifo); + goto done; + } + + /* drop everything up to body */ + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.body_offset); + hc_session->to_recv = msg.data.body_len; + if (msg.code != HTTP_STATUS_OK && hc_session->to_recv == 0) + { + goto done; + } + vec_validate (hcm->http_response, msg.data.body_len - 1); + vec_reset_length (hcm->http_response); + } + + u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo); + + u32 n_deq = clib_min (hc_session->to_recv, max_deq); + u32 curr = vec_len (hcm->http_response); + rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr); + if (rv < 0) + { + clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv); + return -1; + } + + ASSERT (rv == n_deq); + vec_set_len (hcm->http_response, curr + n_deq); + ASSERT (hc_session->to_recv >= rv); + hc_session->to_recv -= rv; + +done: + if (hc_session->to_recv == 0) + { + hc_session_disconnect_callback (s); + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_REPLY_RECEIVED, 0); + } + + return 0; +} + +static int +hc_tx_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + u64 to_send; + int rv; + + to_send = vec_len (hcm->data) - hcm->data_offset; + rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset); + + if (rv <= 0) + { + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + return 0; + } + + if (rv < to_send) + { + hcm->data_offset += rv; + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + } + + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); + + return 0; +} + +static session_cb_vft_t hc_session_cb_vft = { + .session_connected_callback = hc_session_connected_callback, + .session_disconnect_callback = hc_session_disconnect_callback, + .session_transport_closed_callback = hc_session_transport_closed_callback, + .session_reset_callback = hc_session_reset_callback, + .builtin_app_rx_callback = hc_rx_callback, + .builtin_app_tx_callback = hc_tx_callback, + .half_open_cleanup_callback = hc_ho_cleanup_callback, +}; + +static clib_error_t * +hc_attach () +{ + hc_main_t *hcm = &hc_main; + vnet_app_attach_args_t _a, *a = &_a; + u64 options[18]; + int rv; + + clib_memset (a, 0, sizeof (*a)); + clib_memset (options, 0, sizeof (options)); + + a->api_client_index = APP_INVALID_INDEX; + a->name = format (0, "http_client"); + a->session_cb_vft = &hc_session_cb_vft; + a->options = options; + a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + + if ((rv = vnet_application_attach (a))) + return clib_error_return (0, "attach returned: %U", format_session_error, + rv); + + hcm->app_index = a->app_index; + vec_free (a->name); + hcm->attached = 1; + + return 0; +} + +static int +hc_connect_rpc (void *rpc_args) +{ + vnet_connect_args_t *a = rpc_args; + int rv; + + rv = vnet_connect (a); + if (rv > 0) + clib_warning (0, "connect returned: %U", format_session_error, rv); + + vec_free (a); + return rv; +} + +static void +hc_connect () +{ + hc_main_t *hcm = &hc_main; + vnet_connect_args_t *a = 0; + hc_worker_t *wrk; + hc_session_t *hc_session; + + vec_validate (a, 0); + clib_memset (a, 0, sizeof (a[0])); + + clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); + a->app_index = hcm->app_index; + + /* allocate http session on main thread */ + wrk = hc_worker_get (0); + hc_session = hc_session_alloc (wrk); + a->api_context = hc_session->session_index; + + session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc, + a); +} + +static clib_error_t * +hc_run (vlib_main_t *vm) +{ + hc_main_t *hcm = &hc_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + hc_worker_t *wrk; + uword event_type, *event_data = 0; + clib_error_t *err; + FILE *file_ptr; + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (hcm->wrk, num_threads - 1); + vec_foreach (wrk, hcm->wrk) + wrk->thread_index = wrk - hcm->wrk; + + if ((err = hc_attach ())) + return clib_error_return (0, "http client attach: %U", format_clib_error, + err); + + hc_connect (); + + vlib_process_wait_for_event_or_clock (vm, hcm->timeout); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: + err = clib_error_return (0, "error: timeout"); + break; + case HC_CONNECT_FAILED: + err = clib_error_return (0, "error: failed to connect"); + break; + case HC_TRANSPORT_CLOSED: + err = clib_error_return (0, "error: transport closed"); + break; + case HC_REPLY_RECEIVED: + if (hcm->filename) + { + file_ptr = + fopen ((char *) format (0, "/tmp/%v", hcm->filename), "w"); + if (file_ptr == NULL) + { + vlib_cli_output (vm, "couldn't open file %v", hcm->filename); + } + else + { + fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status, + hcm->resp_headers, hcm->http_response); + fclose (file_ptr); + vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename); + } + } + if (hcm->verbose) + vlib_cli_output (vm, "< %v\n< %v", hcm->response_status, + hcm->resp_headers); + vlib_cli_output (vm, "<\n%v", hcm->http_response); + + break; + default: + err = clib_error_return (0, "error: unexpected event %d", event_type); + break; + } + + vec_free (event_data); + return err; +} + +static int +hc_detach () +{ + hc_main_t *hcm = &hc_main; + vnet_app_detach_args_t _da, *da = &_da; + int rv; + + if (!hcm->attached) + return 0; + + da->app_index = hcm->app_index; + da->api_client_index = APP_INVALID_INDEX; + rv = vnet_application_detach (da); + hcm->attached = 0; + hcm->app_index = APP_INVALID_INDEX; + + return rv; +} + +static void +hcc_worker_cleanup (hc_worker_t *wrk) +{ + pool_free (wrk->sessions); +} + +static void +hc_cleanup () +{ + hc_main_t *hcm = &hc_main; + hc_worker_t *wrk; + http_header_ht_t *header; + + vec_foreach (wrk, hcm->wrk) + hcc_worker_cleanup (wrk); + + vec_free (hcm->uri); + vec_free (hcm->target); + vec_free (hcm->headers_buf); + vec_free (hcm->data); + vec_free (hcm->resp_headers); + vec_free (hcm->http_response); + vec_free (hcm->response_status); + vec_free (hcm->wrk); + vec_free (hcm->filename); + vec_foreach (header, hcm->custom_header) + { + vec_free (header->name); + vec_free (header->value); + } + vec_free (hcm->custom_header); +} + +static clib_error_t * +hc_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + hc_main_t *hcm = &hc_main; + clib_error_t *err = 0; + unformat_input_t _line_input, *line_input = &_line_input; + u8 *path = 0; + u8 *file_data; + http_header_ht_t new_header; + u8 *name; + u8 *value; + int rv; + hcm->timeout = 10; + + if (hcm->attached) + return clib_error_return (0, "failed: already running!"); + + hcm->use_ptr = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "expected required arguments"); + + hcm->req_method = + (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) && + unformat (line_input, "post") ? + HTTP_REQ_POST : + HTTP_REQ_GET; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "uri %s", &hcm->uri)) + ; + else if (unformat (line_input, "data %v", &hcm->data)) + hcm->is_file = 0; + else if (unformat (line_input, "target %s", &hcm->target)) + ; + else if (unformat (line_input, "file %s", &path)) + hcm->is_file = 1; + else if (unformat (line_input, "use-ptr")) + hcm->use_ptr = 1; + else if (unformat (line_input, "save-to %s", &hcm->filename)) + { + if (strstr ((char *) hcm->filename, "..") || + strchr ((char *) hcm->filename, '/')) + { + err = clib_error_return ( + 0, "illegal characters in filename '%v'", hcm->filename); + goto done; + } + } + else if (unformat (line_input, "header %v:%v", &name, &value)) + { + new_header.name = name; + new_header.value = value; + vec_add1 (hcm->custom_header, new_header); + } + else if (unformat (line_input, "verbose")) + hcm->verbose = true; + else if (unformat (line_input, "timeout %f", &hcm->timeout)) + ; + else + { + err = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!hcm->uri) + { + err = clib_error_return (0, "URI not defined"); + goto done; + } + if (!hcm->target) + { + err = clib_error_return (0, "target not defined"); + goto done; + } + if (!hcm->data && hcm->req_method == HTTP_REQ_POST) + { + if (path) + { + err = clib_file_contents ((char *) path, &file_data); + if (err) + goto done; + hcm->data = file_data; + } + else + { + err = clib_error_return (0, "data not defined"); + goto done; + } + } + + if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep))) + { + err = + clib_error_return (0, "URI parse error: %U", format_session_error, rv); + goto done; + } + + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vlib_worker_thread_barrier_sync (vm); + vnet_session_enable_disable (vm, &args); + vlib_worker_thread_barrier_release (vm); + + hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index; + + err = hc_run (vm); + + if ((rv = hc_detach ())) + { + /* don't override last error */ + if (!err) + err = clib_error_return (0, "detach returned: %U", + format_session_error, rv); + else + clib_warning ("warning: detach returned: %U", format_session_error, + rv); + } + +done: + vec_free (path); + hc_cleanup (); + unformat_free (line_input); + return err; +} + +VLIB_CLI_COMMAND (hc_command, static) = { + .path = "http client", + .short_help = "[post] uri http://<ip-addr> target <origin-form> " + "[data <form-urlencoded> | file <file-path>] [use-ptr] " + "[save-to <filename>] [header <Key:Value>] [verbose] " + "[timeout <seconds> (default = 10)]", + .function = hc_command_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +hc_main_init () +{ + hc_main_t *hcm = &hc_main; + hcm->app_index = APP_INVALID_INDEX; + return 0; +} + +VLIB_INIT_FUNCTION (hc_main_init); diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 085a2b69bf7..861af7f03e2 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -13,11 +13,12 @@ * limitations under the License. */ -#include <vnet/session/application.h> #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> -#include <hs_apps/http_cli.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> +#include <http/http_status_codes.h> #define HCC_DEBUG 0 @@ -34,14 +35,14 @@ typedef struct u32 thread_index; u32 rx_offset; u32 vpp_session_index; - u32 to_recv; + u64 to_recv; u8 is_closed; + http_header_t *req_headers; } hcc_session_t; typedef struct { hcc_session_t *sessions; - u8 *rx_buf; u32 thread_index; } hcc_worker_t; @@ -68,6 +69,8 @@ typedef struct typedef enum { HCC_REPLY_RECEIVED = 100, + HCC_TRANSPORT_CLOSED, + HCC_CONNECT_FAILED, } hcc_cli_signal_t; static hcc_main_t hcc_main; @@ -96,10 +99,10 @@ hcc_session_get (u32 hs_index, u32 thread_index) } static void -hcc_session_free (u32 thread_index, hcc_session_t *hs) +hcc_ho_session_free (u32 hs_index) { - hcc_worker_t *wrk = hcc_worker_get (thread_index); - pool_put (wrk->sessions, hs); + hcc_worker_t *wrk = hcc_worker_get (0); + pool_put_index (wrk->sessions, hs_index); } static int @@ -128,43 +131,68 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, hcc_session_t *hs, *new_hs; hcc_worker_t *wrk; http_msg_t msg; + u8 *headers_buf; + u32 new_hs_index; int rv; - HCC_DBG ("hc_index: %d", hc_index); + HCC_DBG ("ho hc_index: %d", hc_index); if (err) { clib_warning ("connected error: hc_index(%d): %U", hc_index, format_session_error, err); + vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, + HCC_CONNECT_FAILED, 0); return -1; } - /* TODO delete half open session once the support is added in http layer */ hs = hcc_session_get (hc_index, 0); wrk = hcc_worker_get (as->thread_index); new_hs = hcc_session_alloc (wrk); + new_hs_index = new_hs->session_index; clib_memcpy_fast (new_hs, hs, sizeof (*hs)); - - hs->vpp_session_index = as->session_index; + new_hs->session_index = new_hs_index; + new_hs->thread_index = as->thread_index; + new_hs->vpp_session_index = as->session_index; + HCC_DBG ("new hc_index: %d", new_hs->session_index); + as->opaque = new_hs_index; + + http_add_header (&new_hs->req_headers, + http_header_name_token (HTTP_HEADER_ACCEPT), + http_content_type_token (HTTP_CONTENT_TEXT_HTML)); + headers_buf = http_serialize_headers (new_hs->req_headers); + vec_free (new_hs->req_headers); msg.type = HTTP_MSG_REQUEST; msg.method_type = HTTP_REQ_GET; - msg.content_type = HTTP_CONTENT_TEXT_HTML; + /* request target */ + msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; + msg.data.target_path_offset = 0; + msg.data.target_path_len = vec_len (hcm->http_query); + /* custom headers */ + msg.data.headers_offset = msg.data.target_path_len; + msg.data.headers_len = vec_len (headers_buf); + /* request body */ + msg.data.body_len = 0; + /* data type and total length */ msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = vec_len (hcm->http_query); + msg.data.len = + msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { hcm->http_query, vec_len (hcm->http_query) } }; + svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, + { hcm->http_query, vec_len (hcm->http_query) }, + { headers_buf, vec_len (headers_buf) } }; - rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 2, 0 /* allow partial */); - if (rv < 0 || rv != sizeof (msg) + vec_len (hcm->http_query)) + rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */); + vec_free (headers_buf); + if (rv < 0 || rv != sizeof (msg) + msg.data.len) { clib_warning ("failed app enqueue"); return -1; } if (svm_fifo_set_event (as->tx_fifo)) - session_send_io_evt_to_thread (as->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (as->handle, SESSION_IO_EVT_TX); return 0; } @@ -219,23 +247,32 @@ hcc_ts_rx_callback (session_t *ts) if (hs->to_recv == 0) { + /* read the http message header */ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); - if (msg.type != HTTP_MSG_REPLY || msg.code != HTTP_STATUS_OK) + if (msg.type != HTTP_MSG_REPLY) { clib_warning ("unexpected msg type %d", msg.type); return 0; } - vec_validate (hcm->http_response, msg.data.len - 1); + /* drop everything up to body */ + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset); + hs->to_recv = msg.data.body_len; + if (msg.code != HTTP_STATUS_OK && hs->to_recv == 0) + { + hcm->http_response = format (0, "request failed, response code: %U", + format_http_status_code, msg.code); + goto done; + } + vec_validate (hcm->http_response, msg.data.body_len - 1); vec_reset_length (hcm->http_response); - hs->to_recv = msg.data.len; } u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo); u32 n_deq = clib_min (hs->to_recv, max_deq); - u32 curr = vec_len (hcm->http_response); + u64 curr = vec_len (hcm->http_response); rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hcm->http_response + curr); if (rv < 0) { @@ -249,10 +286,12 @@ hcc_ts_rx_callback (session_t *ts) vec_set_len (hcm->http_response, curr + n_deq); ASSERT (hs->to_recv >= rv); hs->to_recv -= rv; - HCC_DBG ("app rcvd %d, remains %d", rv, hs->to_recv); + HCC_DBG ("app rcvd %d, remains %llu", rv, hs->to_recv); +done: if (hs->to_recv == 0) { + HCC_DBG ("all data received, going to disconnect"); hcc_session_disconnect (ts); vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, HCC_REPLY_RECEIVED, 0); @@ -262,15 +301,21 @@ hcc_ts_rx_callback (session_t *ts) } static void -hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf) +hcc_ts_transport_closed (session_t *s) { - hcc_session_t *hs; + hcc_main_t *hcm = &hcc_main; - hs = hcc_session_get (s->thread_index, s->opaque); - if (!hs) - return; + HCC_DBG ("transport closed"); - hcc_session_free (s->thread_index, hs); + vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, + HCC_TRANSPORT_CLOSED, 0); +} + +static void +hcc_ho_cleanup_callback (session_t *ts) +{ + HCC_DBG ("ho hc_index: %d:", ts->opaque); + hcc_ho_session_free (ts->opaque); } static session_cb_vft_t hcc_session_cb_vft = { @@ -280,7 +325,8 @@ static session_cb_vft_t hcc_session_cb_vft = { .builtin_app_rx_callback = hcc_ts_rx_callback, .builtin_app_tx_callback = hcc_ts_tx_callback, .session_reset_callback = hcc_ts_reset_callback, - .session_cleanup_callback = hcc_ts_cleanup_callback, + .session_transport_closed_callback = hcc_ts_transport_closed, + .half_open_cleanup_callback = hcc_ho_cleanup_callback, }; static clib_error_t * @@ -335,6 +381,7 @@ hcc_connect_rpc (void *rpc_args) if (rv) clib_warning (0, "connect returned: %U", format_session_error, rv); + session_endpoint_free_ext_cfgs (&a->sep_ext); vec_free (a); return rv; } @@ -353,6 +400,7 @@ hcc_connect () hcc_main_t *hcm = &hcc_main; hcc_worker_t *wrk; hcc_session_t *hs; + transport_endpt_ext_cfg_t *ext_cfg; vec_validate (a, 0); clib_memset (a, 0, sizeof (a[0])); @@ -360,6 +408,11 @@ hcc_connect () clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); a->app_index = hcm->app_index; + /* set http (response) timeout to 10 seconds */ + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 10; + /* allocate http session on main thread */ wrk = hcc_worker_get (0); hs = hcc_session_alloc (wrk); @@ -380,7 +433,7 @@ hcc_run (vlib_main_t *vm, int print_output) hcc_worker_t *wrk; num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (hcm->wrk, num_threads); + vec_validate (hcm->wrk, num_threads - 1); vec_foreach (wrk, hcm->wrk) { wrk->thread_index = wrk - hcm->wrk; @@ -409,7 +462,12 @@ hcc_run (vlib_main_t *vm, int print_output) case HCC_REPLY_RECEIVED: if (print_output) vlib_cli_output (vm, "%v", hcm->http_response); - vec_free (hcm->http_response); + break; + case HCC_TRANSPORT_CLOSED: + err = clib_error_return (0, "error, transport closed"); + break; + case HCC_CONNECT_FAILED: + err = clib_error_return (0, "failed to connect"); break; default: err = clib_error_return (0, "unexpected event %d", event_type); @@ -440,6 +498,28 @@ hcc_detach () return rv; } +static void +hcc_worker_cleanup (hcc_worker_t *wrk) +{ + pool_free (wrk->sessions); +} + +static void +hcc_cleanup () +{ + hcc_main_t *hcm = &hcc_main; + hcc_worker_t *wrk; + + vec_foreach (wrk, hcm->wrk) + hcc_worker_cleanup (wrk); + + vec_free (hcm->uri); + vec_free (hcm->http_query); + vec_free (hcm->http_response); + vec_free (hcm->appns_id); + vec_free (hcm->wrk); +} + static clib_error_t * hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) @@ -489,7 +569,6 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, } } - vec_free (hcm->appns_id); hcm->appns_id = appns_id; hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index; @@ -505,8 +584,11 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; vlib_worker_thread_barrier_sync (vm); - vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */); + vnet_session_enable_disable (vm, &args); vlib_worker_thread_barrier_release (vm); err = hcc_run (vm, print_output); @@ -520,8 +602,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, } done: - vec_free (hcm->uri); - vec_free (hcm->http_query); + hcc_cleanup (); unformat_free (line_input); return err; } diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c index 920f7ea731f..a40a31caf63 100644 --- a/src/plugins/hs_apps/http_tps.c +++ b/src/plugins/hs_apps/http_tps.c @@ -17,6 +17,10 @@ #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> + +#define HTS_RX_BUF_SIZE (64 << 10) typedef struct { @@ -26,6 +30,8 @@ typedef struct u64 data_len; u64 data_offset; u32 vpp_session_index; + u64 left_recv; + u64 total_recv; union { /** threshold after which connection is closed */ @@ -34,6 +40,8 @@ typedef struct u32 close_rate; }; u8 *uri; + u8 *rx_buf; + http_header_t *resp_headers; } hts_session_t; typedef struct hts_listen_cfg_ @@ -102,6 +110,8 @@ hts_session_free (hts_session_t *hs) if (htm->debug_level > 0) clib_warning ("Freeing session %u", hs->session_index); + vec_free (hs->rx_buf); + if (CLIB_DEBUG) clib_memset (hs, 0xfa, sizeof (*hs)); @@ -151,7 +161,7 @@ hts_session_tx_zc (hts_session_t *hs, session_t *ts) svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); } static void @@ -198,7 +208,7 @@ hts_session_tx_no_zc (hts_session_t *hs, session_t *ts) svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); } static inline void @@ -223,22 +233,46 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; + u8 *headers_buf = 0; + u32 n_segs = 1; + svm_fifo_seg_t seg[2]; int rv; + if (vec_len (hs->resp_headers)) + { + headers_buf = http_serialize_headers (hs->resp_headers); + vec_free (hs->resp_headers); + msg.data.headers_offset = 0; + msg.data.headers_len = vec_len (headers_buf); + seg[1].data = headers_buf; + seg[1].len = msg.data.headers_len; + n_segs = 2; + } + else + { + msg.data.headers_offset = 0; + msg.data.headers_len = 0; + } + msg.type = HTTP_MSG_REPLY; msg.code = status; - msg.content_type = HTTP_CONTENT_APP_OCTET_STREAM; msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = hs->data_len; + msg.data.body_len = hs->data_len; + msg.data.body_offset = msg.data.headers_len; + msg.data.len = msg.data.body_len + msg.data.headers_len; + seg[0].data = (u8 *) &msg; + seg[0].len = sizeof (msg); ts = session_get (hs->vpp_session_index, hs->thread_index); - rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); + rv = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs, + 0 /* allow partial */); + vec_free (headers_buf); + ASSERT (rv == (sizeof (msg) + msg.data.headers_len)); - if (!msg.data.len) + if (!msg.data.body_len) { if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); return; } @@ -246,7 +280,7 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status) } static int -try_test_file (hts_session_t *hs, u8 *request) +try_test_file (hts_session_t *hs, u8 *target) { char *test_str = "test_file"; hts_main_t *htm = &hts_main; @@ -254,10 +288,10 @@ try_test_file (hts_session_t *hs, u8 *request) uword file_size; int rc = 0; - if (memcmp (request, test_str, clib_strnlen (test_str, 9))) + if (memcmp (target, test_str, clib_strnlen (test_str, 9))) return -1; - unformat_init_vector (&input, vec_dup (request)); + unformat_init_vector (&input, vec_dup (target)); if (!unformat (&input, "test_file_%U", unformat_memory_size, &file_size)) { rc = -1; @@ -286,6 +320,10 @@ try_test_file (hts_session_t *hs, u8 *request) } } + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + hts_start_send_data (hs, HTTP_STATUS_OK); done: @@ -294,39 +332,121 @@ done: return rc; } +static inline void +hts_session_rx_body (hts_session_t *hs, session_t *ts) +{ + hts_main_t *htm = &hts_main; + u32 n_deq; + int rv; + + n_deq = svm_fifo_max_dequeue (ts->rx_fifo); + if (!htm->no_zc) + { + svm_fifo_dequeue_drop_all (ts->rx_fifo); + } + else + { + n_deq = clib_min (n_deq, HTS_RX_BUF_SIZE); + rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hs->rx_buf); + ASSERT (rv == n_deq); + } + hs->left_recv -= n_deq; + + if (hs->close_threshold > 0) + { + if ((f64) (hs->total_recv - hs->left_recv) / hs->total_recv > + hs->close_threshold) + hts_disconnect_transport (hs); + } + + if (hs->left_recv == 0) + { + hts_start_send_data (hs, HTTP_STATUS_OK); + vec_free (hs->rx_buf); + } +} + static int hts_ts_rx_callback (session_t *ts) { + hts_main_t *htm = &hts_main; hts_session_t *hs; - u8 *request = 0; + u8 *target = 0; http_msg_t msg; int rv; hs = hts_session_get (ts->thread_index, ts->opaque); - /* Read the http message header */ - rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET) + if (hs->left_recv == 0) { - hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); - goto done; - } + hs->data_len = 0; + hs->resp_headers = 0; + hs->rx_buf = 0; - if (!msg.data.len) - { - hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST); - goto done; - } + /* Read the http message header */ + rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + if (msg.type != HTTP_MSG_REQUEST) + { + hts_start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); + goto done; + } + if (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST) + { + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_ALLOW), + http_token_lit ("GET, POST")); + hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); + goto done; + } - vec_validate (request, msg.data.len - 1); - rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request); + if (msg.data.target_path_len == 0 || + msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) + { + hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; + } - if (try_test_file (hs, request)) - hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND); + vec_validate (target, msg.data.target_path_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, target); + ASSERT (rv == msg.data.target_path_len); -done: + if (htm->debug_level) + clib_warning ("%s request target: %v", + msg.method_type == HTTP_REQ_GET ? "GET" : "POST", + target); + + if (msg.method_type == HTTP_REQ_GET) + { + if (try_test_file (hs, target)) + hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND); + vec_free (target); + } + else + { + vec_free (target); + if (!msg.data.body_len) + { + hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; + } + /* drop everything up to body */ + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset); + hs->left_recv = msg.data.body_len; + hs->total_recv = msg.data.body_len; + if (htm->no_zc) + vec_validate (hs->rx_buf, HTS_RX_BUF_SIZE - 1); + hts_session_rx_body (hs, ts); + return 0; + } + + done: + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len); + } + else + hts_session_rx_body (hs, ts); return 0; } @@ -354,6 +474,7 @@ hts_ts_accept_callback (session_t *ts) hs = hts_session_alloc (ts->thread_index); hs->vpp_session_index = ts->session_index; + hs->left_recv = 0; ts->opaque = hs->session_index; ts->session_state = SESSION_STATE_READY; @@ -520,15 +641,16 @@ hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri, if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = htm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = htm->ckpair_index; } rv = vnet_listen (a); if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); if (rv) return rv; @@ -717,7 +839,10 @@ start_server: if (htm->app_index == (u32) ~0) { - vnet_session_enable_disable (vm, 1 /* is_enable */); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); if (hts_create (vm)) { diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c index e8fedf921a5..7079da5eb94 100644 --- a/src/plugins/hs_apps/proxy.c +++ b/src/plugins/hs_apps/proxy.c @@ -19,50 +19,145 @@ #include <vnet/session/application_interface.h> #include <hs_apps/proxy.h> #include <vnet/tcp/tcp.h> +#include <http/http.h> +#include <http/http_header_names.h> proxy_main_t proxy_main; #define TCP_MSS 1460 -typedef struct +static proxy_session_side_ctx_t * +proxy_session_side_ctx_alloc (proxy_worker_t *wrk) { - session_endpoint_cfg_t sep; - u32 app_index; - u32 api_context; -} proxy_connect_args_t; + proxy_session_side_ctx_t *ctx; + + pool_get_zero (wrk->ctx_pool, ctx); + ctx->sc_index = ctx - wrk->ctx_pool; + ctx->ps_index = ~0; + + return ctx; +} static void -proxy_cb_fn (void *data, u32 data_len) +proxy_session_side_ctx_free (proxy_worker_t *wrk, + proxy_session_side_ctx_t *ctx) { - proxy_connect_args_t *pa = (proxy_connect_args_t *) data; - vnet_connect_args_t a; + pool_put (wrk->ctx_pool, ctx); +} - clib_memset (&a, 0, sizeof (a)); - a.api_context = pa->api_context; - a.app_index = pa->app_index; - clib_memcpy (&a.sep_ext, &pa->sep, sizeof (pa->sep)); - vnet_connect (&a); - if (a.sep_ext.ext_cfg) - clib_mem_free (a.sep_ext.ext_cfg); +static proxy_session_side_ctx_t * +proxy_session_side_ctx_get (proxy_worker_t *wrk, u32 ctx_index) +{ + return pool_elt_at_index (wrk->ctx_pool, ctx_index); } static void -proxy_call_main_thread (vnet_connect_args_t * a) +proxy_send_http_resp (session_t *s, http_status_code_t sc, + http_header_t *resp_headers) { - if (vlib_get_thread_index () == 0) + http_msg_t msg; + int rv; + u8 *headers_buf = 0; + + if (vec_len (resp_headers)) { - vnet_connect (a); - if (a->sep_ext.ext_cfg) - clib_mem_free (a->sep_ext.ext_cfg); + headers_buf = http_serialize_headers (resp_headers); + msg.data.len = msg.data.headers_len = vec_len (headers_buf); } else + msg.data.len = msg.data.headers_len = 0; + + msg.type = HTTP_MSG_REPLY; + msg.code = sc; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.headers_offset = 0; + msg.data.body_len = 0; + msg.data.body_offset = 0; + rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + if (msg.data.headers_len) + { + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (headers_buf), headers_buf); + ASSERT (rv == vec_len (headers_buf)); + vec_free (headers_buf); + } + + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); +} + +static void +proxy_do_connect (vnet_connect_args_t *a) +{ + ASSERT (session_vlib_thread_is_cl_thread ()); + vnet_connect (a); + session_endpoint_free_ext_cfgs (&a->sep_ext); +} + +static void +proxy_handle_connects_rpc (void *args) +{ + u32 thread_index = pointer_to_uword (args), n_connects = 0, n_pending; + proxy_worker_t *wrk; + u32 max_connects; + + wrk = proxy_worker_get (thread_index); + + clib_spinlock_lock (&wrk->pending_connects_lock); + + n_pending = clib_fifo_elts (wrk->pending_connects); + max_connects = clib_min (32, n_pending); + vec_validate (wrk->burst_connects, max_connects); + + while (n_connects < max_connects) + clib_fifo_sub1 (wrk->pending_connects, wrk->burst_connects[n_connects++]); + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + /* Do connects without locking pending_connects */ + n_connects = 0; + while (n_connects < max_connects) { - proxy_connect_args_t args; - args.api_context = a->api_context; - args.app_index = a->app_index; - clib_memcpy (&args.sep, &a->sep_ext, sizeof (a->sep_ext)); - vl_api_rpc_call_main_thread (proxy_cb_fn, (u8 *) & args, sizeof (args)); + proxy_do_connect (&wrk->burst_connects[n_connects]); + n_connects += 1; } + + /* More work to do, program rpc */ + if (max_connects < n_pending) + session_send_rpc_evt_to_thread_force ( + transport_cl_thread (), proxy_handle_connects_rpc, + uword_to_pointer ((uword) thread_index, void *)); +} + +static void +proxy_program_connect (vnet_connect_args_t *a) +{ + u32 connects_thread = transport_cl_thread (), thread_index, n_pending; + proxy_worker_t *wrk; + + thread_index = vlib_get_thread_index (); + + /* If already on first worker, handle request */ + if (thread_index == connects_thread) + { + proxy_do_connect (a); + return; + } + + /* If not on first worker, queue request */ + wrk = proxy_worker_get (thread_index); + + clib_spinlock_lock (&wrk->pending_connects_lock); + + clib_fifo_add1 (wrk->pending_connects, *a); + n_pending = clib_fifo_elts (wrk->pending_connects); + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + if (n_pending == 1) + session_send_rpc_evt_to_thread_force ( + connects_thread, proxy_handle_connects_rpc, + uword_to_pointer ((uword) thread_index, void *)); } static proxy_session_t * @@ -85,16 +180,6 @@ proxy_session_get (u32 ps_index) return pool_elt_at_index (pm->sessions, ps_index); } -static inline proxy_session_t * -proxy_session_get_if_valid (u32 ps_index) -{ - proxy_main_t *pm = &proxy_main; - - if (pool_is_free_index (pm->sessions, ps_index)) - return 0; - return pool_elt_at_index (pm->sessions, ps_index); -} - static void proxy_session_free (proxy_session_t *ps) { @@ -115,7 +200,7 @@ proxy_session_postponed_free_rpc (void *arg) clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_get (ps_index); - segment_manager_dealloc_fifos (ps->server_rx_fifo, ps->server_tx_fifo); + segment_manager_dealloc_fifos (ps->po.rx_fifo, ps->po.tx_fifo); proxy_session_free (ps); clib_spinlock_unlock_if_init (&pm->sessions_lock); @@ -126,54 +211,79 @@ proxy_session_postponed_free_rpc (void *arg) static void proxy_session_postponed_free (proxy_session_t *ps) { - session_send_rpc_evt_to_thread (ps->po_thread_index, + /* Passive open session handle has been invalidated so we don't have thread + * index at this point */ + session_send_rpc_evt_to_thread (ps->po.rx_fifo->master_thread_index, proxy_session_postponed_free_rpc, uword_to_pointer (ps->ps_index, void *)); } static void +proxy_session_close_po (proxy_session_t *ps) +{ + vnet_disconnect_args_t _a = {}, *a = &_a; + proxy_main_t *pm = &proxy_main; + + ASSERT (!vlib_num_workers () || + CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock)); + + a->handle = ps->po.session_handle; + a->app_index = pm->server_app_index; + vnet_disconnect_session (a); + + ps->po_disconnected = 1; +} + +static void +proxy_session_close_ao (proxy_session_t *ps) +{ + vnet_disconnect_args_t _a = {}, *a = &_a; + proxy_main_t *pm = &proxy_main; + + ASSERT (!vlib_num_workers () || + CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock)); + + a->handle = ps->ao.session_handle; + a->app_index = pm->active_open_app_index; + vnet_disconnect_session (a); + + ps->ao_disconnected = 1; +} + +static void proxy_try_close_session (session_t * s, int is_active_open) { proxy_main_t *pm = &proxy_main; - proxy_session_t *ps = 0; - vnet_disconnect_args_t _a, *a = &_a; + proxy_session_side_ctx_t *sc; + proxy_session_t *ps; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (sc->ps_index); if (is_active_open) { - a->handle = ps->vpp_active_open_handle; - a->app_index = pm->active_open_app_index; - vnet_disconnect_session (a); - ps->ao_disconnected = 1; + proxy_session_close_ao (ps); if (!ps->po_disconnected) { - ASSERT (ps->vpp_server_handle != SESSION_INVALID_HANDLE); - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; + ASSERT (ps->po.session_handle != SESSION_INVALID_HANDLE); + proxy_session_close_po (ps); } } else { - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; + proxy_session_close_po (ps); if (!ps->ao_disconnected && !ps->active_open_establishing) { /* Proxy session closed before active open */ - if (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE) - { - a->handle = ps->vpp_active_open_handle; - a->app_index = pm->active_open_app_index; - vnet_disconnect_session (a); - } + if (ps->ao.session_handle != SESSION_INVALID_HANDLE) + proxy_session_close_ao (ps); ps->ao_disconnected = 1; } } @@ -181,29 +291,63 @@ proxy_try_close_session (session_t * s, int is_active_open) } static void +proxy_try_side_ctx_cleanup (session_t *s) +{ + proxy_main_t *pm = &proxy_main; + proxy_session_t *ps; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + if (sc->state == PROXY_SC_S_CREATED) + return; + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (sc->ps_index); + + if (!ps->po_disconnected) + proxy_session_close_po (ps); + + if (!ps->ao_disconnected) + proxy_session_close_ao (ps); + + clib_spinlock_unlock_if_init (&pm->sessions_lock); +} + +static void proxy_try_delete_session (session_t * s, u8 is_active_open) { proxy_main_t *pm = &proxy_main; proxy_session_t *ps = 0; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + u32 ps_index; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + ps_index = sc->ps_index; + + proxy_session_side_ctx_free (wrk, sc); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (ps_index); if (is_active_open) { - ps->vpp_active_open_handle = SESSION_INVALID_HANDLE; + ps->ao.session_handle = SESSION_INVALID_HANDLE; /* Revert master thread index change on connect notification */ - ps->server_rx_fifo->master_thread_index = ps->po_thread_index; + ps->po.rx_fifo->master_thread_index = + ps->po.tx_fifo->master_thread_index; /* Passive open already cleaned up */ - if (ps->vpp_server_handle == SESSION_INVALID_HANDLE) + if (ps->po.session_handle == SESSION_INVALID_HANDLE) { - ASSERT (s->rx_fifo->refcnt == 1); - /* The two sides of the proxy on different threads */ - if (ps->po_thread_index != s->thread_index) + if (ps->po.tx_fifo->master_thread_index != s->thread_index) { /* This is not the right thread to delete the fifos */ s->rx_fifo = 0; @@ -211,14 +355,17 @@ proxy_try_delete_session (session_t * s, u8 is_active_open) proxy_session_postponed_free (ps); } else - proxy_session_free (ps); + { + ASSERT (s->rx_fifo->refcnt == 1); + proxy_session_free (ps); + } } } else { - ps->vpp_server_handle = SESSION_INVALID_HANDLE; + ps->po.session_handle = SESSION_INVALID_HANDLE; - if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE) + if (ps->ao.session_handle == SESSION_INVALID_HANDLE) { if (!ps->active_open_establishing) proxy_session_free (ps); @@ -275,16 +422,26 @@ static int proxy_accept_callback (session_t * s) { proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; proxy_session_t *ps; + proxy_worker_t *wrk; + transport_proto_t tp = session_get_transport_proto (s); + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_alloc (wrk); + s->opaque = sc->sc_index; clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_alloc (); - ps->vpp_server_handle = session_handle (s); - ps->vpp_active_open_handle = SESSION_INVALID_HANDLE; - ps->po_thread_index = s->thread_index; - s->opaque = ps->ps_index; + ps->po.session_handle = session_handle (s); + ps->po.rx_fifo = s->rx_fifo; + ps->po.tx_fifo = s->tx_fifo; + + ps->ao.session_handle = SESSION_INVALID_HANDLE; + sc->ps_index = ps->ps_index; + sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0; clib_spinlock_unlock_if_init (&pm->sessions_lock); @@ -325,92 +482,167 @@ proxy_transport_needs_crypto (transport_proto_t proto) return proto == TRANSPORT_PROTO_TLS; } -static int -proxy_rx_callback (session_t * s) +static void +proxy_session_start_connect (proxy_session_side_ctx_t *sc, session_t *s) { + int actual_transfer __attribute__ ((unused)); + vnet_connect_args_t _a = {}, *a = &_a; proxy_main_t *pm = &proxy_main; - u32 thread_index = vlib_get_thread_index (); - svm_fifo_t *ao_tx_fifo; + u32 max_dequeue, ps_index; proxy_session_t *ps; - - ASSERT (s->thread_index == thread_index); + transport_proto_t tp = session_get_transport_proto (s); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (sc->ps_index); - if (PREDICT_TRUE (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE)) + /* maybe we were already here */ + if (ps->active_open_establishing) { clib_spinlock_unlock_if_init (&pm->sessions_lock); + return; + } - ao_tx_fifo = s->rx_fifo; + ps->active_open_establishing = 1; + ps_index = ps->ps_index; - /* - * Send event for active open tx fifo - */ - if (svm_fifo_set_event (ao_tx_fifo)) + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + if (tp == TRANSPORT_PROTO_HTTP) + { + http_msg_t msg; + u8 *target_buf = 0; + http_uri_t target_uri; + http_header_t *resp_headers = 0; + session_endpoint_cfg_t target_sep = SESSION_ENDPOINT_CFG_NULL; + int rv; + + rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + if (msg.type != HTTP_MSG_REQUEST) + { + proxy_send_http_resp (s, HTTP_STATUS_INTERNAL_ERROR, 0); + return; + } + if (msg.method_type != HTTP_REQ_CONNECT) { - u32 ao_thread_index = ao_tx_fifo->master_thread_index; - u32 ao_session_index = ao_tx_fifo->shr->master_session_index; - if (session_send_io_evt_to_thread_custom (&ao_session_index, - ao_thread_index, - SESSION_IO_EVT_TX)) - clib_warning ("failed to enqueue tx evt"); + http_add_header (&resp_headers, + http_header_name_token (HTTP_HEADER_ALLOW), + http_token_lit ("CONNECT")); + proxy_send_http_resp (s, HTTP_STATUS_METHOD_NOT_ALLOWED, + resp_headers); + vec_free (resp_headers); + return; } - if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS) - svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + if (msg.data.target_form != HTTP_TARGET_AUTHORITY_FORM || + msg.data.target_path_len == 0) + { + proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); + return; + } + + /* read target uri */ + target_buf = vec_new (u8, msg.data.target_path_len); + rv = svm_fifo_peek (s->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, target_buf); + ASSERT (rv == msg.data.target_path_len); + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.len); + rv = http_parse_authority_form_target (target_buf, &target_uri); + vec_free (target_buf); + if (rv) + { + proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); + return; + } + target_sep.is_ip4 = target_uri.is_ip4; + target_sep.ip = target_uri.ip; + target_sep.port = target_uri.port; + target_sep.transport_proto = TRANSPORT_PROTO_TCP; + clib_memcpy (&a->sep_ext, &target_sep, sizeof (target_sep)); } else { - vnet_connect_args_t _a, *a = &_a; - svm_fifo_t *tx_fifo, *rx_fifo; - u32 max_dequeue, ps_index; - int actual_transfer __attribute__ ((unused)); + max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo); + if (PREDICT_FALSE (max_dequeue == 0)) + return; - rx_fifo = s->rx_fifo; - tx_fifo = s->tx_fifo; + max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue); + actual_transfer = + svm_fifo_peek (s->rx_fifo, 0 /* relative_offset */, max_dequeue, + pm->rx_buf[s->thread_index]); - ASSERT (rx_fifo->master_thread_index == thread_index); - ASSERT (tx_fifo->master_thread_index == thread_index); + /* Expectation is that here actual data just received is parsed and based + * on its contents, the destination and parameters of the connect to the + * upstream are decided + */ - max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo); + clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep)); + } - if (PREDICT_FALSE (max_dequeue == 0)) - { - clib_spinlock_unlock_if_init (&pm->sessions_lock); - return 0; - } + a->api_context = ps_index; + a->app_index = pm->active_open_app_index; - max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue); - actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ , - max_dequeue, pm->rx_buf[thread_index]); + if (proxy_transport_needs_crypto (a->sep.transport_proto)) + { + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = pm->ckpair_index; + } - /* $$$ your message in this space: parse url, etc. */ + proxy_program_connect (a); +} - clib_memset (a, 0, sizeof (*a)); +static int +proxy_rx_callback (session_t *s) +{ + proxy_session_side_ctx_t *sc; + svm_fifo_t *ao_tx_fifo; + proxy_session_t *ps; + proxy_worker_t *wrk; - ps->server_rx_fifo = rx_fifo; - ps->server_tx_fifo = tx_fifo; - ps->active_open_establishing = 1; - ps_index = ps->ps_index; + ASSERT (s->thread_index == vlib_get_thread_index ()); - clib_spinlock_unlock_if_init (&pm->sessions_lock); + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); - clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep)); - a->api_context = ps_index; - a->app_index = pm->active_open_app_index; + if (PREDICT_FALSE (sc->state < PROXY_SC_S_ESTABLISHED)) + { + proxy_main_t *pm = &proxy_main; - if (proxy_transport_needs_crypto (a->sep.transport_proto)) + if (sc->state == PROXY_SC_S_CREATED) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index; + proxy_session_start_connect (sc, s); + sc->state = PROXY_SC_S_CONNECTING; + return 0; } - proxy_call_main_thread (a); + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (sc->ps_index); + sc->pair = ps->ao; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + if (sc->pair.session_handle == SESSION_INVALID_HANDLE) + return 0; + + sc->state = PROXY_SC_S_ESTABLISHED; } + ao_tx_fifo = s->rx_fifo; + + /* + * Send event for active open tx fifo + */ + if (svm_fifo_set_event (ao_tx_fifo)) + session_program_tx_io_evt (sc->pair.session_handle, SESSION_IO_EVT_TX); + + if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS) + svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + return 0; } @@ -418,20 +650,20 @@ static void proxy_force_ack (void *handlep) { transport_connection_t *tc; - session_t *ao_s; + session_t *s; - ao_s = session_get_from_handle (pointer_to_uword (handlep)); - if (session_get_transport_proto (ao_s) != TRANSPORT_PROTO_TCP) + s = session_get_from_handle (pointer_to_uword (handlep)); + if (session_get_transport_proto (s) != TRANSPORT_PROTO_TCP) return; - tc = session_get_transport (ao_s); + tc = session_get_transport (s); tcp_send_ack ((tcp_connection_t *) tc); } static int proxy_tx_callback (session_t * proxy_s) { - proxy_main_t *pm = &proxy_main; - proxy_session_t *ps; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; u32 min_free; min_free = clib_min (svm_fifo_size (proxy_s->tx_fifo) >> 3, 128 << 10); @@ -441,21 +673,17 @@ proxy_tx_callback (session_t * proxy_s) return 0; } - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get (proxy_s->opaque); - - if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE) - goto unlock; + wrk = proxy_worker_get (proxy_s->thread_index); + sc = proxy_session_side_ctx_get (wrk, proxy_s->opaque); + if (sc->state < PROXY_SC_S_ESTABLISHED) + return 0; /* Force ack on active open side to update rcv wnd. Make sure it's done on * the right thread */ - void *arg = uword_to_pointer (ps->vpp_active_open_handle, void *); - session_send_rpc_evt_to_thread (ps->server_rx_fifo->master_thread_index, - proxy_force_ack, arg); - -unlock: - clib_spinlock_unlock_if_init (&pm->sessions_lock); + void *arg = uword_to_pointer (sc->pair.session_handle, void *); + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), proxy_force_ack, + arg); return 0; } @@ -464,7 +692,10 @@ static void proxy_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf) { if (ntf == SESSION_CLEANUP_TRANSPORT) - return; + { + proxy_try_side_ctx_cleanup (s); + return; + } proxy_try_delete_session (s, 0 /* is_active_open */ ); } @@ -490,10 +721,17 @@ active_open_alloc_session_fifos (session_t *s) clib_spinlock_lock_if_init (&pm->sessions_lock); + /* Active open opaque is pointing at proxy session */ ps = proxy_session_get (s->opaque); - txf = ps->server_rx_fifo; - rxf = ps->server_tx_fifo; + if (ps->po_disconnected) + { + clib_spinlock_unlock_if_init (&pm->sessions_lock); + return SESSION_E_ALLOC; + } + + txf = ps->po.rx_fifo; + rxf = ps->po.tx_fifo; /* * Reset the active-open tx-fifo master indices so the active-open session @@ -524,31 +762,43 @@ active_open_connected_callback (u32 app_index, u32 opaque, { proxy_main_t *pm = &proxy_main; proxy_session_t *ps; - u8 thread_index = vlib_get_thread_index (); - - /* - * Setup proxy session handle. - */ - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get (opaque); + proxy_worker_t *wrk; + proxy_session_side_ctx_t *sc; + session_t *po_s; + transport_proto_t tp; /* Connection failed */ if (err) { - vnet_disconnect_args_t _a, *a = &_a; + clib_spinlock_lock_if_init (&pm->sessions_lock); - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; - } - else - { - ps->vpp_active_open_handle = session_handle (s); - ps->active_open_establishing = 0; + ps = proxy_session_get (opaque); + po_s = session_get_from_handle (ps->po.session_handle); + tp = session_get_transport_proto (po_s); + if (tp == TRANSPORT_PROTO_HTTP) + { + proxy_send_http_resp (po_s, HTTP_STATUS_BAD_GATEWAY, 0); + } + ps->ao_disconnected = 1; + proxy_session_close_po (ps); + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + return 0; } + wrk = proxy_worker_get (s->thread_index); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (opaque); + + ps->ao.rx_fifo = s->rx_fifo; + ps->ao.tx_fifo = s->tx_fifo; + ps->ao.session_handle = session_handle (s); + + ps->active_open_establishing = 0; + /* Passive open session was already closed! */ if (ps->po_disconnected) { @@ -558,21 +808,136 @@ active_open_connected_callback (u32 app_index, u32 opaque, return -1; } - s->opaque = opaque; + po_s = session_get_from_handle (ps->po.session_handle); + tp = session_get_transport_proto (po_s); + + sc = proxy_session_side_ctx_alloc (wrk); + sc->pair = ps->po; + sc->ps_index = ps->ps_index; clib_spinlock_unlock_if_init (&pm->sessions_lock); - /* - * Send event for active open tx fifo - */ - ASSERT (s->thread_index == thread_index); - if (svm_fifo_set_event (s->tx_fifo)) - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + sc->state = PROXY_SC_S_ESTABLISHED; + s->opaque = sc->sc_index; + sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0; + + if (tp == TRANSPORT_PROTO_HTTP) + { + proxy_send_http_resp (po_s, HTTP_STATUS_OK, 0); + } + else + { + /* + * Send event for active open tx fifo + */ + ASSERT (s->thread_index == vlib_get_thread_index ()); + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (session_handle (s), SESSION_IO_EVT_TX); + } return 0; } static void +active_open_migrate_po_fixup_rpc (void *arg) +{ + u32 ps_index = pointer_to_uword (arg); + proxy_session_side_ctx_t *po_sc; + proxy_main_t *pm = &proxy_main; + session_handle_t po_sh; + proxy_worker_t *wrk; + proxy_session_t *ps; + session_t *po_s; + + wrk = proxy_worker_get (vlib_get_thread_index ()); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (ps_index); + + po_s = session_get_from_handle (ps->po.session_handle); + po_s->rx_fifo = ps->po.rx_fifo; + po_s->tx_fifo = ps->po.tx_fifo; + + po_sc = proxy_session_side_ctx_get (wrk, po_s->opaque); + po_sc->pair = ps->ao; + po_sh = ps->po.session_handle; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_program_tx_io_evt (po_sh, SESSION_IO_EVT_TX); +} + +static void +active_open_migrate_rpc (void *arg) +{ + u32 ps_index = pointer_to_uword (arg); + proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + proxy_session_t *ps; + session_t *s; + + wrk = proxy_worker_get (vlib_get_thread_index ()); + sc = proxy_session_side_ctx_alloc (wrk); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (ps_index); + sc->ps_index = ps->ps_index; + + s = session_get_from_handle (ps->ao.session_handle); + s->opaque = sc->sc_index; + s->flags &= ~SESSION_F_IS_MIGRATING; + + /* Fixup passive open session because of migration and zc */ + ps->ao.rx_fifo = ps->po.tx_fifo = s->rx_fifo; + ps->ao.tx_fifo = ps->po.rx_fifo = s->tx_fifo; + + ps->po.tx_fifo->shr->master_session_index = + session_index_from_handle (ps->po.session_handle); + ps->po.tx_fifo->master_thread_index = + session_thread_from_handle (ps->po.session_handle); + + sc->pair = ps->po; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), + active_open_migrate_po_fixup_rpc, uword_to_pointer (sc->ps_index, void *)); +} + +static void +active_open_migrate_callback (session_t *s, session_handle_t new_sh) +{ + proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; + proxy_session_t *ps; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + + /* NOTE: this is just an example. ZC makes this migration rather + * tedious. Probably better approaches could be found */ + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (sc->ps_index); + ps->ao.session_handle = new_sh; + ps->ao.rx_fifo = 0; + ps->ao.tx_fifo = 0; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_send_rpc_evt_to_thread (session_thread_from_handle (new_sh), + active_open_migrate_rpc, + uword_to_pointer (sc->ps_index, void *)); + + proxy_session_side_ctx_free (wrk, sc); +} + +static void active_open_reset_callback (session_t * s) { proxy_try_close_session (s, 1 /* is_active_open */ ); @@ -618,10 +983,8 @@ active_open_rx_callback (session_t * s) static int active_open_tx_callback (session_t * ao_s) { - proxy_main_t *pm = &proxy_main; - transport_connection_t *tc; - proxy_session_t *ps; - session_t *proxy_s; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; u32 min_free; min_free = clib_min (svm_fifo_size (ao_s->tx_fifo) >> 3, 128 << 10); @@ -631,23 +994,27 @@ active_open_tx_callback (session_t * ao_s) return 0; } - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get_if_valid (ao_s->opaque); - if (!ps) - goto unlock; - - if (ps->vpp_server_handle == ~0) - goto unlock; + wrk = proxy_worker_get (ao_s->thread_index); + sc = proxy_session_side_ctx_get (wrk, ao_s->opaque); - proxy_s = session_get_from_handle (ps->vpp_server_handle); - - /* Force ack on proxy side to update rcv wnd */ - tc = session_get_transport (proxy_s); - tcp_send_ack ((tcp_connection_t *) tc); + if (sc->state < PROXY_SC_S_ESTABLISHED) + return 0; -unlock: - clib_spinlock_unlock_if_init (&pm->sessions_lock); + if (sc->is_http) + { + /* notify HTTP transport */ + session_t *po = session_get_from_handle (sc->pair.session_handle); + session_send_io_evt_to_thread_custom ( + &po->session_index, po->thread_index, SESSION_IO_EVT_RX); + } + else + { + /* Force ack on proxy side to update rcv wnd */ + void *arg = uword_to_pointer (sc->pair.session_handle, void *); + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), proxy_force_ack, + arg); + } return 0; } @@ -664,6 +1031,7 @@ active_open_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf) static session_cb_vft_t active_open_clients = { .session_reset_callback = active_open_reset_callback, .session_connected_callback = active_open_connected_callback, + .session_migrate_callback = active_open_migrate_callback, .session_accept_callback = active_open_create_callback, .session_disconnect_callback = active_open_disconnect_callback, .session_cleanup_callback = active_open_cleanup_callback, @@ -756,22 +1124,26 @@ proxy_server_listen () { proxy_main_t *pm = &proxy_main; vnet_listen_args_t _a, *a = &_a; - int rv; + int rv, need_crypto; clib_memset (a, 0, sizeof (*a)); a->app_index = pm->server_app_index; clib_memcpy (&a->sep_ext, &pm->server_sep, sizeof (pm->server_sep)); - if (proxy_transport_needs_crypto (a->sep.transport_proto)) + /* Make sure listener is marked connected for transports like udp */ + a->sep_ext.transport_flags = TRANSPORT_CFG_F_CONNECTED; + need_crypto = proxy_transport_needs_crypto (a->sep.transport_proto); + if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = pm->ckpair_index; } rv = vnet_listen (a); - if (a->sep_ext.ext_cfg) - clib_mem_free (a->sep_ext.ext_cfg); + if (need_crypto) + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -797,15 +1169,25 @@ proxy_server_create (vlib_main_t * vm) { vlib_thread_main_t *vtm = vlib_get_thread_main (); proxy_main_t *pm = &proxy_main; + proxy_worker_t *wrk; u32 num_threads; int i; + if (vlib_num_workers ()) + clib_spinlock_init (&pm->sessions_lock); + num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (pm->rx_buf, num_threads - 1); for (i = 0; i < num_threads; i++) vec_validate (pm->rx_buf[i], pm->rcv_buffer_size); + vec_validate (pm->workers, vlib_num_workers ()); + vec_foreach (wrk, pm->workers) + { + clib_spinlock_init (&wrk->pending_connects_lock); + } + proxy_server_add_ckpair (); if (proxy_server_attach ()) @@ -813,11 +1195,6 @@ proxy_server_create (vlib_main_t * vm) clib_warning ("failed to attach server app"); return -1; } - if (proxy_server_listen ()) - { - clib_warning ("failed to start listening"); - return -1; - } if (active_open_attach ()) { clib_warning ("failed to attach active open app"); @@ -849,9 +1226,6 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, pm->private_segment_count = 0; pm->segment_size = 512 << 20; - if (vlib_num_workers ()) - clib_spinlock_init (&pm->sessions_lock); - if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -897,35 +1271,45 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, default_server_uri); server_uri = format (0, "%s%c", default_server_uri, 0); } - if (!client_uri) - { - clib_warning ("No client-uri provided, Using default: %s", - default_client_uri); - client_uri = format (0, "%s%c", default_client_uri, 0); - } - if (parse_uri ((char *) server_uri, &pm->server_sep)) { error = clib_error_return (0, "Invalid server uri %v", server_uri); goto done; } - if (parse_uri ((char *) client_uri, &pm->client_sep)) + + /* http proxy get target within request */ + if (pm->server_sep.transport_proto != TRANSPORT_PROTO_HTTP) { - error = clib_error_return (0, "Invalid client uri %v", client_uri); - goto done; + if (!client_uri) + { + clib_warning ("No client-uri provided, Using default: %s", + default_client_uri); + client_uri = format (0, "%s%c", default_client_uri, 0); + } + if (parse_uri ((char *) client_uri, &pm->client_sep)) + { + error = clib_error_return (0, "Invalid client uri %v", client_uri); + goto done; + } } - vnet_session_enable_disable (vm, 1 /* turn on session and transport */ ); - - rv = proxy_server_create (vm); - switch (rv) + if (pm->server_app_index == APP_INVALID_INDEX) { - case 0: - break; - default: - error = clib_error_return (0, "server_create returned %d", rv); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); + rv = proxy_server_create (vm); + if (rv) + { + error = clib_error_return (0, "server_create returned %d", rv); + goto done; + } } + if (proxy_server_listen ()) + error = clib_error_return (0, "failed to start listening"); + done: unformat_free (line_input); vec_free (client_uri); @@ -933,14 +1317,13 @@ done: return error; } -VLIB_CLI_COMMAND (proxy_create_command, static) = -{ +VLIB_CLI_COMMAND (proxy_create_command, static) = { .path = "test proxy server", - .short_help = "test proxy server [server-uri <tcp://ip/port>]" - "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]" - "[max-fifo-size <nn>[k|m]][high-watermark <nn>]" - "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]" - "[private-segment-size <mem>][private-segment-count <nn>]", + .short_help = "test proxy server [server-uri <proto://ip/port>]" + "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]" + "[max-fifo-size <nn>[k|m]][high-watermark <nn>]" + "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]" + "[private-segment-size <mem>][private-segment-count <nn>]", .function = proxy_server_create_command_fn, }; @@ -950,6 +1333,7 @@ proxy_main_init (vlib_main_t * vm) proxy_main_t *pm = &proxy_main; pm->server_client_index = ~0; pm->active_open_client_index = ~0; + pm->server_app_index = APP_INVALID_INDEX; return 0; } diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h index 26f4de2f729..789e5613520 100644 --- a/src/plugins/hs_apps/proxy.h +++ b/src/plugins/hs_apps/proxy.h @@ -26,23 +26,57 @@ #include <vnet/session/session.h> #include <vnet/session/application_interface.h> +#define foreach_proxy_session_side_state \ + _ (CREATED, "created") \ + _ (CONNECTING, "connecting") \ + _ (ESTABLISHED, "establiehed") \ + _ (CLOSED, "closed") + +typedef enum proxy_session_side_state_ +{ +#define _(sym, str) PROXY_SC_S_##sym, + foreach_proxy_session_side_state +#undef _ +} proxy_session_side_state_t; +typedef struct proxy_session_side_ +{ + session_handle_t session_handle; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; +} proxy_session_side_t; + +typedef struct proxy_session_side_ctx_ +{ + proxy_session_side_t pair; + proxy_session_side_state_t state; + u32 sc_index; + u32 ps_index; + u8 is_http; +} proxy_session_side_ctx_t; + typedef struct { - svm_fifo_t *server_rx_fifo; - svm_fifo_t *server_tx_fifo; + proxy_session_side_t po; /**< passive open side */ + proxy_session_side_t ao; /**< active open side */ - session_handle_t vpp_server_handle; - session_handle_t vpp_active_open_handle; volatile int active_open_establishing; volatile int po_disconnected; volatile int ao_disconnected; u32 ps_index; - u32 po_thread_index; } proxy_session_t; +typedef struct proxy_worker_ +{ + proxy_session_side_ctx_t *ctx_pool; + clib_spinlock_t pending_connects_lock; + vnet_connect_args_t *pending_connects; + vnet_connect_args_t *burst_connects; +} proxy_worker_t; + typedef struct { + proxy_worker_t *workers; /**< per-thread data */ proxy_session_t *sessions; /**< session pool, shared */ clib_spinlock_t sessions_lock; /**< lock for session pool */ u8 **rx_buf; /**< intermediate rx buffers */ @@ -75,6 +109,13 @@ typedef struct extern proxy_main_t proxy_main; +static inline proxy_worker_t * +proxy_worker_get (u32 thread_index) +{ + proxy_main_t *pm = &proxy_main; + return vec_elt_at_index (pm->workers, thread_index); +} + #endif /* __included_proxy_h__ */ /* diff --git a/src/plugins/hs_apps/sapi/vpp_echo_common.c b/src/plugins/hs_apps/sapi/vpp_echo_common.c index 5ce04d1b75b..09ba583cf78 100644 --- a/src/plugins/hs_apps/sapi/vpp_echo_common.c +++ b/src/plugins/hs_apps/sapi/vpp_echo_common.c @@ -330,8 +330,8 @@ format_transport_proto (u8 * s, va_list * args) case TRANSPORT_PROTO_UDP: s = format (s, "UDP"); break; - case TRANSPORT_PROTO_NONE: - s = format (s, "NONE"); + case TRANSPORT_PROTO_CT: + s = format (s, "CT"); break; case TRANSPORT_PROTO_TLS: s = format (s, "TLS"); diff --git a/src/plugins/hs_apps/test_builtins.c b/src/plugins/hs_apps/test_builtins.c new file mode 100644 index 00000000000..c314e71b5df --- /dev/null +++ b/src/plugins/hs_apps/test_builtins.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <http_static/http_static.h> +#include <vppinfra/tw_timer_2t_1w_2048sl.h> + +typedef struct +{ + u32 stop_timer_handle; + hss_session_handle_t sh; +} tw_timer_elt_t; + +typedef struct tb_main_ +{ + tw_timer_elt_t *delayed_resps; + tw_timer_wheel_2t_1w_2048sl_t tw; + hss_session_send_fn send_data; + u8 *test_data; +} tb_main_t; + +static tb_main_t tb_main; + +static uword +test_builtins_timer_process (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + tb_main_t *tbm = &tb_main; + f64 now, timeout = 1.0; + uword *event_data = 0; + uword __clib_unused event_type; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + now = vlib_time_now (vm); + event_type = vlib_process_get_events (vm, (uword **) &event_data); + + /* expire timers */ + tw_timer_expire_timers_2t_1w_2048sl (&tbm->tw, now); + + vec_reset_length (event_data); + } + return 0; +} + +VLIB_REGISTER_NODE (test_builtins_timer_process_node) = { + .function = test_builtins_timer_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "test-builtins-timer-process", + .state = VLIB_NODE_STATE_DISABLED, +}; + +static void +send_data_to_hss (hss_session_handle_t sh, u8 *data, u8 free_vec_data) +{ + tb_main_t *tbm = &tb_main; + hss_url_handler_args_t args = {}; + + args.sh = sh; + args.data = data; + args.data_len = vec_len (data); + args.ct = HTTP_CONTENT_TEXT_PLAIN; + args.sc = HTTP_STATUS_OK; + args.free_vec_data = free_vec_data; + + tbm->send_data (&args); +} + +static hss_url_handler_rc_t +handle_get_test1 (hss_url_handler_args_t *args) +{ + u8 *data; + + clib_warning ("get request on test1"); + data = format (0, "hello"); + send_data_to_hss (args->sh, data, 1); + + return HSS_URL_HANDLER_ASYNC; +} + +static hss_url_handler_rc_t +handle_get_test2 (hss_url_handler_args_t *args) +{ + u8 *data; + + clib_warning ("get request on test2"); + data = format (0, "some data"); + send_data_to_hss (args->sh, data, 1); + + return HSS_URL_HANDLER_ASYNC; +} + +static void +delayed_resp_cb (u32 *expired_timers) +{ + tb_main_t *tbm = &tb_main; + int i; + u32 pool_index; + tw_timer_elt_t *e; + u8 *data; + + for (i = 0; i < vec_len (expired_timers); i++) + { + pool_index = expired_timers[i] & 0x7FFFFFFF; + e = pool_elt_at_index (tbm->delayed_resps, pool_index); + clib_warning ("sending delayed data"); + data = format (0, "delayed data"); + send_data_to_hss (e->sh, data, 1); + pool_put (tbm->delayed_resps, e); + } +} + +static hss_url_handler_rc_t +handle_get_test_delayed (hss_url_handler_args_t *args) +{ + tb_main_t *tbm = &tb_main; + tw_timer_elt_t *e; + + clib_warning ("get request on test_delayed"); + pool_get (tbm->delayed_resps, e); + e->sh = args->sh; + e->stop_timer_handle = + tw_timer_start_2t_1w_2048sl (&tbm->tw, e - tbm->delayed_resps, 0, 5); + + return HSS_URL_HANDLER_ASYNC; +} + +static hss_url_handler_rc_t +handle_post_test3 (hss_url_handler_args_t *args) +{ + send_data_to_hss (args->sh, 0, 0); + return HSS_URL_HANDLER_ASYNC; +} + +static hss_url_handler_rc_t +handle_get_64bytes (hss_url_handler_args_t *args) +{ + tb_main_t *tbm = &tb_main; + send_data_to_hss (args->sh, tbm->test_data, 0); + return HSS_URL_HANDLER_ASYNC; +} + +static void +test_builtins_init (vlib_main_t *vm) +{ + tb_main_t *tbm = &tb_main; + hss_register_url_fn fp; + vlib_node_t *n; + + fp = vlib_get_plugin_symbol ("http_static_plugin.so", + "hss_register_url_handler"); + + if (fp == 0) + { + clib_warning ("http_static_plugin.so not loaded..."); + return; + } + + tbm->test_data = format ( + 0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + + (*fp) (handle_get_test1, "test1", HTTP_REQ_GET); + (*fp) (handle_get_test2, "test2", HTTP_REQ_GET); + (*fp) (handle_get_test_delayed, "test_delayed", HTTP_REQ_GET); + (*fp) (handle_post_test3, "test3", HTTP_REQ_POST); + (*fp) (handle_get_64bytes, "64B", HTTP_REQ_GET); + + tbm->send_data = + vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data"); + + tw_timer_wheel_init_2t_1w_2048sl (&tbm->tw, delayed_resp_cb, 1.0, ~0); + + vlib_node_set_state (vm, test_builtins_timer_process_node.index, + VLIB_NODE_STATE_POLLING); + n = vlib_get_node (vm, test_builtins_timer_process_node.index); + vlib_start_process (vm, n->runtime_index); +} + +static clib_error_t * +test_builtins_enable_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + test_builtins_init (vm); + return 0; +} + +VLIB_CLI_COMMAND (test_builtins_enable_command, static) = { + .path = "test-url-handler enable", + .short_help = "test-url-handler enable", + .function = test_builtins_enable_command_fn, +}; diff --git a/src/plugins/hs_apps/vcl/vcl_test.h b/src/plugins/hs_apps/vcl/vcl_test.h index 0ce27ef84e2..11667fb144a 100644 --- a/src/plugins/hs_apps/vcl/vcl_test.h +++ b/src/plugins/hs_apps/vcl/vcl_test.h @@ -124,7 +124,7 @@ typedef struct typedef struct { - const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_SRTP + 1]; + const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_HTTP + 1]; uint32_t ckpair_index; hs_test_cfg_t cfg; vcl_test_wrk_t *wrk; @@ -420,6 +420,39 @@ vcl_test_write (vcl_test_session_t *ts, void *buf, uint32_t nbytes) return (tx_bytes); } +static inline int +vcl_test_write_ds (vcl_test_session_t *ts) +{ + vcl_test_stats_t *stats = &ts->stats; + int tx_bytes; + + do + { + stats->tx_xacts++; + if (ts->ds[1].len) + tx_bytes = vppcom_session_write_segments (ts->fd, ts->ds, 2); + else + tx_bytes = vppcom_session_write_segments (ts->fd, ts->ds, 1); + + if (tx_bytes < 0) + errno = -tx_bytes; + if ((tx_bytes == 0) || + ((tx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK)))) + stats->rx_eagain++; + } + while ((tx_bytes == 0) || + ((tx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK)))); + + if (tx_bytes < 0) + { + vterr ("vppcom_session_write_segments()", -errno); + } + else + stats->tx_bytes += tx_bytes; + + return (tx_bytes); +} + static inline void dump_help (void) { diff --git a/src/plugins/hs_apps/vcl/vcl_test_client.c b/src/plugins/hs_apps/vcl/vcl_test_client.c index a4a10b562ff..8bac1f00b9d 100644 --- a/src/plugins/hs_apps/vcl/vcl_test_client.c +++ b/src/plugins/hs_apps/vcl/vcl_test_client.c @@ -419,13 +419,8 @@ vtc_worker_run_select (vcl_test_client_worker_t *wrk) if (vcm->incremental_stats) vtc_inc_stats_check (ts); } - if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) || - (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes)) - { - clock_gettime (CLOCK_REALTIME, &ts->stats.stop); - ts->is_done = 1; - n_active_sessions--; - } + if (vtc_session_check_is_done (ts, check_rx)) + n_active_sessions -= 1; } } diff --git a/src/plugins/hs_apps/vcl/vcl_test_protos.c b/src/plugins/hs_apps/vcl/vcl_test_protos.c index cd1ac2b24f4..9c81c5f17a1 100644 --- a/src/plugins/hs_apps/vcl/vcl_test_protos.c +++ b/src/plugins/hs_apps/vcl/vcl_test_protos.c @@ -14,6 +14,23 @@ */ #include <hs_apps/vcl/vcl_test.h> +#include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> + +typedef enum vcl_test_http_state_ +{ + VCL_TEST_HTTP_IDLE = 0, + VCL_TEST_HTTP_IN_PROGRESS, + VCL_TEST_HTTP_COMPLETED, +} vcl_test_http_state_t; + +typedef struct vcl_test_http_ctx_t +{ + u8 is_server; + vcl_test_http_state_t test_state; + u64 rem_data; +} vcl_test_http_ctx_t; static int vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt) @@ -978,6 +995,418 @@ static const vcl_test_proto_vft_t vcl_test_srtp = { VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_SRTP, vcl_test_srtp); +static void +vt_http_session_init (vcl_test_session_t *ts, u8 is_server) +{ + vcl_test_http_ctx_t *http_ctx; + + http_ctx = malloc (sizeof (vcl_test_http_ctx_t)); + memset (http_ctx, 0, sizeof (*http_ctx)); + http_ctx->is_server = is_server; + ts->opaque = http_ctx; +} + +static inline void +vt_http_send_reply_msg (vcl_test_session_t *ts, http_status_code_t status) +{ + http_msg_t msg; + int rv = 0; + + memset (&msg, 0, sizeof (http_msg_t)); + msg.type = HTTP_MSG_REPLY; + msg.code = status; + + vppcom_data_segment_t segs[1] = { { (u8 *) &msg, sizeof (msg) } }; + + do + { + rv = vppcom_session_write_segments (ts->fd, segs, 1); + + if (rv < 0) + { + errno = -rv; + if (errno == EAGAIN || errno == EWOULDBLOCK) + continue; + + vterr ("vppcom_session_write()", -errno); + break; + } + } + while (rv <= 0); +} + +static inline int +vt_process_http_server_read_msg (vcl_test_session_t *ts, void *buf, + uint32_t nbytes) +{ + http_msg_t msg; + u8 *target_path = 0; + vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque; + vcl_test_stats_t *stats = &ts->stats; + int rv = 0; + + do + { + stats->rx_xacts++; + rv = vppcom_session_read (ts->fd, buf, nbytes); + + if (rv <= 0) + { + errno = -rv; + if (errno == EAGAIN || errno == EWOULDBLOCK) + { + stats->rx_eagain++; + continue; + } + + vterr ("vppcom_session_read()", -errno); + return 0; + } + + if (PREDICT_TRUE (vcl_test_http_ctx->test_state == + VCL_TEST_HTTP_IN_PROGRESS)) + { + vcl_test_http_ctx->rem_data -= rv; + + if (vcl_test_http_ctx->rem_data == 0) + { + vcl_test_http_ctx->test_state = VCL_TEST_HTTP_COMPLETED; + vt_http_send_reply_msg (ts, HTTP_STATUS_OK); + } + } + else if (PREDICT_FALSE (vcl_test_http_ctx->test_state == + VCL_TEST_HTTP_IDLE)) + { + msg = *(http_msg_t *) buf; + + /* verify that we have received http post request from client */ + if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_POST) + { + vt_http_send_reply_msg (ts, HTTP_STATUS_METHOD_NOT_ALLOWED); + vterr ("error! only POST requests allowed from client", 0); + return 0; + } + + if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) + { + vt_http_send_reply_msg (ts, HTTP_STATUS_BAD_REQUEST); + vterr ("error! http target not in origin form", 0); + return 0; + } + + /* validate target path syntax */ + if (msg.data.target_path_len) + { + vec_validate (target_path, msg.data.target_path_len - 1); + memcpy (target_path, + buf + sizeof (msg) + msg.data.target_path_offset - 1, + msg.data.target_path_len + 1); + if (http_validate_abs_path_syntax (target_path, 0)) + { + vt_http_send_reply_msg (ts, HTTP_STATUS_BAD_REQUEST); + vterr ("error! target path is not absolute", 0); + vec_free (target_path); + return 0; + } + vec_free (target_path); + } + + /* read body */ + if (msg.data.body_len) + { + vcl_test_http_ctx->rem_data = msg.data.body_len; + /* | <http_msg_t> | <target> | <headers> | <body> | */ + vcl_test_http_ctx->rem_data -= + (rv - sizeof (msg) - msg.data.body_offset); + vcl_test_http_ctx->test_state = VCL_TEST_HTTP_IN_PROGRESS; + } + } + + if (rv < nbytes) + stats->rx_incomp++; + } + while (rv <= 0); + + stats->rx_bytes += rv; + return (rv); +} + +static inline int +vt_process_http_client_read_msg (vcl_test_session_t *ts, void *buf, + uint32_t nbytes) +{ + http_msg_t msg; + int rv = 0; + + do + { + rv = vppcom_session_read (ts->fd, buf, nbytes); + + if (rv < 0) + { + errno = -rv; + if (errno == EAGAIN || errno == EWOULDBLOCK) + continue; + + vterr ("vppcom_session_read()", -errno); + break; + } + } + while (!rv); + + msg = *(http_msg_t *) buf; + + if (msg.type == HTTP_MSG_REPLY && msg.code == HTTP_STATUS_OK) + vtinf ("received 200 OK from server"); + else + vterr ("received unexpected reply from server", 0); + + return (rv); +} + +static inline int +vt_process_http_client_write_msg (vcl_test_session_t *ts, void *buf, + uint32_t nbytes) +{ + http_msg_t msg; + http_header_t *req_headers = 0; + u8 *headers_buf = 0; + u8 *target; + vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque; + vcl_test_stats_t *stats = &ts->stats; + int rv = 0; + + if (PREDICT_TRUE (vcl_test_http_ctx->test_state == + VCL_TEST_HTTP_IN_PROGRESS)) + { + do + { + rv = vppcom_session_write ( + ts->fd, buf, clib_min (nbytes, vcl_test_http_ctx->rem_data)); + + if (rv <= 0) + { + errno = -rv; + if (errno == EAGAIN || errno == EWOULDBLOCK) + { + stats->tx_eagain++; + continue; + } + + vterr ("vppcom_session_write()", -errno); + return 0; + } + + vcl_test_http_ctx->rem_data -= rv; + + if (vcl_test_http_ctx->rem_data == 0) + { + vcl_test_http_ctx->test_state = VCL_TEST_HTTP_COMPLETED; + vtinf ("client finished sending %ld bytes of data", + ts->cfg.total_bytes); + } + + if (rv < nbytes) + stats->tx_incomp++; + } + while (rv <= 0); + } + + else if (PREDICT_FALSE (vcl_test_http_ctx->test_state == VCL_TEST_HTTP_IDLE)) + { + http_add_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + headers_buf = http_serialize_headers (req_headers); + vec_free (req_headers); + + memset (&msg, 0, sizeof (http_msg_t)); + msg.type = HTTP_MSG_REQUEST; + msg.method_type = HTTP_REQ_POST; + + /* target */ + msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; + target = (u8 *) "/vcl_test_http\0"; + msg.data.target_path_len = strlen ((char *) target); + + /* headers */ + msg.data.headers_offset = msg.data.target_path_len; + msg.data.headers_len = vec_len (headers_buf); + + /* body */ + msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len; + msg.data.body_len = ts->cfg.total_bytes; + + msg.data.len = + msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; + msg.data.type = HTTP_MSG_DATA_INLINE; + + vppcom_data_segment_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, + { target, strlen ((char *) target) }, + { headers_buf, + vec_len (headers_buf) } }; + + do + { + rv = vppcom_session_write_segments (ts->fd, segs, 3); + + if (rv <= 0) + { + errno = -rv; + if (errno == EAGAIN || errno == EWOULDBLOCK) + { + stats->tx_eagain++; + continue; + } + + vterr ("vppcom_session_write_segments()", -errno); + vec_free (headers_buf); + return 0; + } + } + while (rv <= 0); + + vcl_test_http_ctx->test_state = VCL_TEST_HTTP_IN_PROGRESS; + vcl_test_http_ctx->rem_data = ts->cfg.total_bytes; + vec_free (headers_buf); + } + + stats->tx_bytes += rv; + return (rv); +} + +static inline int +vt_process_http_server_write_msg (vcl_test_session_t *ts, void *buf, + uint32_t nbytes) +{ + return 0; +} + +static inline int +vt_http_read (vcl_test_session_t *ts, void *buf, uint32_t nbytes) +{ + vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque; + + if (vcl_test_http_ctx->is_server) + return vt_process_http_server_read_msg (ts, buf, nbytes); + else + return vt_process_http_client_read_msg (ts, buf, nbytes); +} + +static inline int +vt_http_write (vcl_test_session_t *ts, void *buf, uint32_t nbytes) +{ + vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque; + + if (vcl_test_http_ctx->is_server) + return vt_process_http_server_write_msg (ts, buf, nbytes); + else + return vt_process_http_client_write_msg (ts, buf, nbytes); +} + +static int +vt_http_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt) +{ + uint32_t flags, flen; + int rv; + + ts->fd = vppcom_session_create (VPPCOM_PROTO_HTTP, ts->noblk_connect); + if (ts->fd < 0) + { + vterr ("vppcom_session_create()", ts->fd); + return ts->fd; + } + + rv = vppcom_session_connect (ts->fd, endpt); + if (rv < 0 && rv != VPPCOM_EINPROGRESS) + { + vterr ("vppcom_session_connect()", rv); + return rv; + } + + ts->read = vt_http_read; + ts->write = vt_http_write; + + if (!ts->noblk_connect) + { + flags = O_NONBLOCK; + flen = sizeof (flags); + vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen); + vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd); + } + + vt_http_session_init (ts, 0 /* is_server */); + + return 0; +} + +static int +vt_http_listen (vcl_test_session_t *ts, vppcom_endpt_t *endpt) +{ + int rv; + + ts->fd = vppcom_session_create (VPPCOM_PROTO_HTTP, 1 /* is_nonblocking */); + if (ts->fd < 0) + { + vterr ("vppcom_session_create()", ts->fd); + return ts->fd; + } + + rv = vppcom_session_bind (ts->fd, endpt); + if (rv < 0) + { + vterr ("vppcom_session_bind()", rv); + return rv; + } + + rv = vppcom_session_listen (ts->fd, 10); + if (rv < 0) + { + vterr ("vppcom_session_listen()", rv); + return rv; + } + + return 0; +} + +static int +vt_http_accept (int listen_fd, vcl_test_session_t *ts) +{ + int client_fd; + + client_fd = vppcom_session_accept (listen_fd, &ts->endpt, 0); + if (client_fd < 0) + { + vterr ("vppcom_session_accept()", client_fd); + return client_fd; + } + + ts->fd = client_fd; + ts->is_open = 1; + ts->read = vt_http_read; + ts->write = vt_http_write; + + vt_http_session_init (ts, 1 /* is_server */); + + return 0; +} + +static int +vt_http_close (vcl_test_session_t *ts) +{ + free (ts->opaque); + return 0; +} + +static const vcl_test_proto_vft_t vcl_test_http = { + .open = vt_http_connect, + .listen = vt_http_listen, + .accept = vt_http_accept, + .close = vt_http_close, +}; + +VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_HTTP, vcl_test_http); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/hs_apps/vcl/vcl_test_server.c b/src/plugins/hs_apps/vcl/vcl_test_server.c index d17a2089ba7..008539f2585 100644 --- a/src/plugins/hs_apps/vcl/vcl_test_server.c +++ b/src/plugins/hs_apps/vcl/vcl_test_server.c @@ -282,11 +282,7 @@ vts_server_process_rx (vcl_test_session_t *conn, int rx_bytes) if (conn->cfg.test == HS_TEST_TYPE_BI) { if (vsm->use_ds) - { - (void) vcl_test_write (conn, conn->ds[0].data, conn->ds[0].len); - if (conn->ds[1].len) - (void) vcl_test_write (conn, conn->ds[1].data, conn->ds[1].len); - } + (void) vcl_test_write_ds (conn); else (void) vcl_test_write (conn, conn->rxbuf, rx_bytes); } @@ -420,36 +416,41 @@ static void vcl_test_init_endpoint_addr (vcl_test_server_main_t * vsm) { struct sockaddr_storage *servaddr = &vsm->servaddr; - memset (servaddr, 0, sizeof (*servaddr)); if (vsm->server_cfg.address_ip6) { struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr; - server_addr->sin6_family = AF_INET6; - server_addr->sin6_addr = in6addr_any; - server_addr->sin6_port = htons (vsm->server_cfg.port); + vsm->server_cfg.endpt.is_ip4 = 0; + vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr; + vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port); } else { struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr; - server_addr->sin_family = AF_INET; - server_addr->sin_addr.s_addr = htonl (INADDR_ANY); - server_addr->sin_port = htons (vsm->server_cfg.port); + vsm->server_cfg.endpt.is_ip4 = 1; + vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr; + vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port); } +} + +static void +vcl_test_clear_endpoint_addr (vcl_test_server_main_t *vsm) +{ + struct sockaddr_storage *servaddr = &vsm->servaddr; + + memset (&vsm->servaddr, 0, sizeof (vsm->servaddr)); if (vsm->server_cfg.address_ip6) { struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr; - vsm->server_cfg.endpt.is_ip4 = 0; - vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr; - vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin6_port; + server_addr->sin6_family = AF_INET6; + server_addr->sin6_addr = in6addr_any; } else { struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr; - vsm->server_cfg.endpt.is_ip4 = 1; - vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr; - vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin_port; + server_addr->sin_family = AF_INET; + server_addr->sin_addr.s_addr = htonl (INADDR_ANY); } } @@ -460,9 +461,10 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc, int v, c; vsm->server_cfg.proto = VPPCOM_PROTO_TCP; + vcl_test_clear_endpoint_addr (vsm); opterr = 0; - while ((c = getopt (argc, argv, "6DLsw:hp:S")) != -1) + while ((c = getopt (argc, argv, "6DLsw:hp:SB:")) != -1) switch (c) { case '6': @@ -473,7 +475,22 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc, if (vppcom_unformat_proto (&vsm->server_cfg.proto, optarg)) vtwrn ("Invalid vppcom protocol %s, defaulting to TCP", optarg); break; - + case 'B': + if (vsm->server_cfg.address_ip6) + { + if (inet_pton ( + AF_INET6, optarg, + &((struct sockaddr_in6 *) &vsm->servaddr)->sin6_addr) != 1) + vtwrn ("couldn't parse ipv6 addr %s", optarg); + } + else + { + if (inet_pton ( + AF_INET, optarg, + &((struct sockaddr_in *) &vsm->servaddr)->sin_addr) != 1) + vtwrn ("couldn't parse ipv4 addr %s", optarg); + } + break; case 'D': vsm->server_cfg.proto = VPPCOM_PROTO_UDP; break; diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt index d9cd84a3955..075b8d6817b 100644 --- a/src/plugins/http/CMakeLists.txt +++ b/src/plugins/http/CMakeLists.txt @@ -17,3 +17,8 @@ add_vpp_plugin(http http_buffer.c http_timer.c ) + +add_vpp_plugin(http_unittest + SOURCES + test/http_test.c +) diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 0fa113c8155..6659de9689f 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -16,11 +16,11 @@ #include <http/http.h> #include <vnet/session/session.h> #include <http/http_timer.h> +#include <http/http_status_codes.h> static http_main_t http_main; #define HTTP_FIFO_THRESH (16 << 10) -#define CONTENT_LEN_STR "Content-Length: " /* HTTP state machine result */ typedef enum http_sm_result_t_ @@ -30,24 +30,12 @@ typedef enum http_sm_result_t_ HTTP_SM_ERROR = -1, } http_sm_result_t; -const char *http_status_code_str[] = { -#define _(c, s, str) str, - foreach_http_status_code -#undef _ -}; - -const char *http_content_type_str[] = { -#define _(s, ext, str) str, - foreach_http_content_type -#undef _ -}; - const http_buffer_type_t msg_to_buf_type[] = { [HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO, [HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR, }; -u8 * +static u8 * format_http_state (u8 *s, va_list *va) { http_state_t state = va_arg (*va, http_state_t); @@ -83,6 +71,24 @@ format_http_state (u8 *s, va_list *va) } \ while (0) +static inline int +http_state_is_tx_valid (http_conn_t *hc) +{ + http_state_t state = hc->http_state; + return (state == HTTP_STATE_APP_IO_MORE_DATA || + state == HTTP_STATE_WAIT_APP_REPLY || + state == HTTP_STATE_WAIT_APP_METHOD); +} + +static inline int +http_state_is_rx_valid (http_conn_t *hc) +{ + http_state_t state = hc->http_state; + return (state == HTTP_STATE_WAIT_SERVER_REPLY || + state == HTTP_STATE_CLIENT_IO_MORE_DATA || + state == HTTP_STATE_WAIT_CLIENT_METHOD); +} + static inline http_worker_t * http_worker_get (u32 thread_index) { @@ -111,6 +117,15 @@ http_conn_get_w_thread (u32 hc_index, u32 thread_index) return pool_elt_at_index (wrk->conn_pool, hc_index); } +static inline http_conn_t * +http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index) +{ + http_worker_t *wrk = http_worker_get (thread_index); + if (pool_is_free_index (wrk->conn_pool, hc_index)) + return 0; + return pool_elt_at_index (wrk->conn_pool, hc_index); +} + void http_conn_free (http_conn_t *hc) { @@ -118,6 +133,35 @@ http_conn_free (http_conn_t *hc) pool_put (wrk->conn_pool, hc); } +static inline http_conn_t * +http_ho_conn_get (u32 ho_hc_index) +{ + http_main_t *hm = &http_main; + return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index); +} + +void +http_ho_conn_free (http_conn_t *ho_hc) +{ + http_main_t *hm = &http_main; + pool_put (hm->ho_conn_pool, ho_hc); +} + +static inline u32 +http_ho_conn_alloc (void) +{ + http_main_t *hm = &http_main; + http_conn_t *hc; + + pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES); + clib_memset (hc, 0, sizeof (*hc)); + hc->h_hc_index = hc - hm->ho_conn_pool; + hc->h_pa_session_handle = SESSION_INVALID_HANDLE; + hc->h_tc_session_handle = SESSION_INVALID_HANDLE; + hc->timeout = HTTP_CONN_TIMEOUT; + return hc->h_hc_index; +} + static u32 http_listener_alloc (void) { @@ -126,6 +170,7 @@ http_listener_alloc (void) pool_get_zero (hm->listener_pool, lhc); lhc->c_c_index = lhc - hm->listener_pool; + lhc->timeout = HTTP_CONN_TIMEOUT; return lhc->c_c_index; } @@ -140,6 +185,7 @@ http_listener_free (http_conn_t *lhc) { http_main_t *hm = &http_main; + vec_free (lhc->app_name); if (CLIB_DEBUG) memset (lhc, 0xfc, sizeof (*lhc)); pool_put (hm->listener_pool, lhc); @@ -160,20 +206,47 @@ http_disconnect_transport (http_conn_t *hc) } static void +http_conn_invalidate_timer_cb (u32 hs_handle) +{ + http_conn_t *hc; + + hc = + http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24); + + HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF); + if (!hc) + { + HTTP_DBG (1, "already deleted"); + return; + } + + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; + hc->pending_timer = 1; +} + +static void http_conn_timeout_cb (void *hc_handlep) { http_conn_t *hc; uword hs_handle; hs_handle = pointer_to_uword (hc_handlep); - hc = http_conn_get_w_thread (hs_handle & 0x00FFFFFF, hs_handle >> 24); + hc = + http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24); - HTTP_DBG (1, "terminate thread %d index %d hs %llx", hs_handle >> 24, - hs_handle & 0x00FFFFFF, hc); + HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF); if (!hc) - return; + { + HTTP_DBG (1, "already deleted"); + return; + } + + if (!hc->pending_timer) + { + HTTP_DBG (1, "timer not pending"); + return; + } - hc->timer_handle = ~0; session_transport_closing_notify (&hc->connection); http_disconnect_transport (hc); } @@ -193,6 +266,7 @@ http_ts_accept_callback (session_t *ts) hc_index = http_conn_alloc_w_thread (ts->thread_index); hc = http_conn_get_w_thread (hc_index, ts->thread_index); clib_memcpy_fast (hc, lhc, sizeof (*lhc)); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; hc->h_hc_index = hc_index; @@ -225,6 +299,7 @@ http_ts_accept_callback (session_t *ts) if ((rv = app_worker_init_accepted (as))) { HTTP_DBG (1, "failed to allocate fifos"); + hc->h_pa_session_handle = SESSION_INVALID_HANDLE; session_free (as); return rv; } @@ -266,20 +341,25 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, app_worker_t *app_wrk; int rv; + ho_hc = http_ho_conn_get (ho_hc_index); + ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); + if (err) { - clib_warning ("ERROR: %d", err); + clib_warning ("half-open hc index %d, error: %U", ho_hc_index, + format_session_error, err); + app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index); + if (app_wrk) + app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx); return 0; } new_hc_index = http_conn_alloc_w_thread (ts->thread_index); hc = http_conn_get_w_thread (new_hc_index, ts->thread_index); - ho_hc = http_conn_get_w_thread (ho_hc_index, 0); - - ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; hc->h_tc_session_handle = session_handle (ts); hc->c_c_index = new_hc_index; @@ -301,8 +381,8 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, as->session_type = session_type_from_proto_and_ip ( TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type)); - HTTP_DBG (1, "half-open hc index %d, hc index %d", ho_hc_index, - new_hc_index); + HTTP_DBG (1, "half-open hc index %x, hc [%u]%x", ho_hc_index, + ts->thread_index, new_hc_index); app_wrk = app_worker_get (hc->h_pa_wrk_index); if (!app_wrk) @@ -359,47 +439,54 @@ http_ts_reset_callback (session_t *ts) */ static const char *http_error_template = "HTTP/1.1 %s\r\n" "Date: %U GMT\r\n" - "Content-Type: text/html\r\n" "Connection: close\r\n" - "Pragma: no-cache\r\n" "Content-Length: 0\r\n\r\n"; -static const char *http_redirect_template = "HTTP/1.1 %s\r\n"; - /** * http response boilerplate */ static const char *http_response_template = "HTTP/1.1 %s\r\n" "Date: %U GMT\r\n" - "Expires: %U GMT\r\n" - "Server: VPP Static\r\n" - "Content-Type: %s\r\n" - "Content-Length: %lu\r\n\r\n"; + "Server: %v\r\n"; + +static const char *content_len_template = "Content-Length: %llu\r\n"; + +/** + * http request boilerplate + */ +static const char *http_get_request_template = "GET %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n" + "%s"; -static const char *http_request_template = "GET %s HTTP/1.1\r\n" - "User-Agent: VPP HTTP client\r\n" - "Accept: */*\r\n"; +static const char *http_post_request_template = "POST %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n" + "Content-Length: %llu\r\n" + "%s"; static u32 -http_send_data (http_conn_t *hc, u8 *data, u32 length, u32 offset) +http_send_data (http_conn_t *hc, u8 *data, u32 length) { const u32 max_burst = 64 << 10; session_t *ts; u32 to_send; - int sent; + int rv; ts = session_get_from_handle (hc->h_tc_session_handle); - to_send = clib_min (length - offset, max_burst); - sent = svm_fifo_enqueue (ts->tx_fifo, to_send, data + offset); - - if (sent <= 0) - return offset; + to_send = clib_min (length, max_burst); + rv = svm_fifo_enqueue (ts->tx_fifo, to_send, data); + if (rv <= 0) + { + clib_warning ("svm_fifo_enqueue failed, rv %d", rv); + return 0; + } if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - return (offset + sent); + return rv; } static void @@ -415,37 +502,70 @@ http_send_error (http_conn_t *hc, http_status_code_t ec) now = clib_timebase_now (&hm->timebase); data = format (0, http_error_template, http_status_code_str[ec], format_clib_timebase_time, now); - http_send_data (hc, data, vec_len (data), 0); + HTTP_DBG (3, "%v", data); + http_send_data (hc, data, vec_len (data)); vec_free (data); } static int http_read_message (http_conn_t *hc) { - u32 max_deq, cursize; + u32 max_deq; session_t *ts; int n_read; ts = session_get_from_handle (hc->h_tc_session_handle); - cursize = vec_len (hc->rx_buf); max_deq = svm_fifo_max_dequeue (ts->rx_fifo); if (PREDICT_FALSE (max_deq == 0)) return -1; - vec_validate (hc->rx_buf, cursize + max_deq - 1); - n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf + cursize); + vec_validate (hc->rx_buf, max_deq - 1); + n_read = svm_fifo_peek (ts->rx_fifo, 0, max_deq, hc->rx_buf); ASSERT (n_read == max_deq); + HTTP_DBG (1, "read %u bytes from rx_fifo", n_read); + + return 0; +} + +static void +http_read_message_drop (http_conn_t *hc, u32 len) +{ + session_t *ts; + + ts = session_get_from_handle (hc->h_tc_session_handle); + svm_fifo_dequeue_drop (ts->rx_fifo, len); + vec_reset_length (hc->rx_buf); if (svm_fifo_is_empty (ts->rx_fifo)) svm_fifo_unset_event (ts->rx_fifo); +} - vec_set_len (hc->rx_buf, cursize + n_read); - return 0; +static void +http_read_message_drop_all (http_conn_t *hc) +{ + session_t *ts; + + ts = session_get_from_handle (hc->h_tc_session_handle); + svm_fifo_dequeue_drop_all (ts->rx_fifo); + vec_reset_length (hc->rx_buf); + + if (svm_fifo_is_empty (ts->rx_fifo)) + svm_fifo_unset_event (ts->rx_fifo); } -static int -v_find_index (u8 *vec, u32 offset, char *str) +/** + * @brief Find the first occurrence of the string in the vector. + * + * @param vec The vector to be scanned. + * @param offset Search offset in the vector. + * @param num Maximum number of characters to be searched if non-zero. + * @param str The string to be searched. + * + * @return @c -1 if the string is not found within the vector; index otherwise. + */ +static inline int +v_find_index (u8 *vec, u32 offset, u32 num, char *str) { int start_index = offset; u32 slen = (u32) strnlen_s_inline (str, 16); @@ -456,7 +576,15 @@ v_find_index (u8 *vec, u32 offset, char *str) if (vlen <= slen) return -1; - for (; start_index < (vlen - slen); start_index++) + int end_index = vlen - slen; + if (num) + { + if (num < slen) + return -1; + end_index = clib_min (end_index, offset + num - slen); + } + + for (; start_index <= end_index; start_index++) { if (!memcmp (vec + start_index, str, slen)) return start_index; @@ -465,50 +593,445 @@ v_find_index (u8 *vec, u32 offset, char *str) return -1; } +static void +http_identify_optional_query (http_conn_t *hc) +{ + int i; + for (i = hc->target_path_offset; + i < (hc->target_path_offset + hc->target_path_len); i++) + { + if (hc->rx_buf[i] == '?') + { + hc->target_query_offset = i + 1; + hc->target_query_len = hc->target_path_offset + hc->target_path_len - + hc->target_query_offset; + hc->target_path_len = hc->target_path_len - hc->target_query_len - 1; + break; + } + } +} + +static int +http_get_target_form (http_conn_t *hc) +{ + int i; + + /* "*" */ + if ((hc->rx_buf[hc->target_path_offset] == '*') && + (hc->target_path_len == 1)) + { + hc->target_form = HTTP_TARGET_ASTERISK_FORM; + return 0; + } + + /* 1*( "/" segment ) [ "?" query ] */ + if (hc->rx_buf[hc->target_path_offset] == '/') + { + /* drop leading slash */ + hc->target_path_len--; + hc->target_path_offset++; + hc->target_form = HTTP_TARGET_ORIGIN_FORM; + http_identify_optional_query (hc); + return 0; + } + + /* scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ + i = v_find_index (hc->rx_buf, hc->target_path_offset, hc->target_path_len, + "://"); + if (i > 0) + { + hc->target_form = HTTP_TARGET_ABSOLUTE_FORM; + http_identify_optional_query (hc); + return 0; + } + + /* host ":" port */ + for (i = hc->target_path_offset; + i < (hc->target_path_offset + hc->target_path_len); i++) + { + if ((hc->rx_buf[i] == ':') && (isdigit (hc->rx_buf[i + 1]))) + { + hc->target_form = HTTP_TARGET_AUTHORITY_FORM; + return 0; + } + } + + return -1; +} + static int -http_parse_header (http_conn_t *hc, int *content_length) +http_parse_request_line (http_conn_t *hc, http_status_code_t *ec) { - unformat_input_t input; - int i, len; - u8 *line; + int i, target_len; + u32 next_line_offset, method_offset; + + /* request-line = method SP request-target SP HTTP-version CRLF */ + i = v_find_index (hc->rx_buf, 8, 0, "\r\n"); + if (i < 0) + { + clib_warning ("request line incomplete"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + HTTP_DBG (2, "request line length: %d", i); + hc->control_data_len = i + 2; + next_line_offset = hc->control_data_len; + + /* there should be at least one more CRLF */ + if (vec_len (hc->rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } - i = v_find_index (hc->rx_buf, hc->rx_buf_offset, CONTENT_LEN_STR); + /* + * RFC9112 2.2: + * In the interest of robustness, a server that is expecting to receive and + * parse a request-line SHOULD ignore at least one empty line (CRLF) + * received prior to the request-line. + */ + method_offset = hc->rx_buf[0] == '\r' && hc->rx_buf[1] == '\n' ? 2 : 0; + /* parse method */ + if (!memcmp (hc->rx_buf + method_offset, "GET ", 4)) + { + HTTP_DBG (0, "GET method"); + hc->method = HTTP_REQ_GET; + hc->target_path_offset = method_offset + 4; + } + else if (!memcmp (hc->rx_buf + method_offset, "POST ", 5)) + { + HTTP_DBG (0, "POST method"); + hc->method = HTTP_REQ_POST; + hc->target_path_offset = method_offset + 5; + } + else if (!memcmp (hc->rx_buf + method_offset, "CONNECT ", 8)) + { + HTTP_DBG (0, "CONNECT method"); + hc->method = HTTP_REQ_CONNECT; + hc->target_path_offset = method_offset + 8; + hc->is_tunnel = 1; + } + else + { + if (hc->rx_buf[method_offset] - 'A' <= 'Z' - 'A') + { + clib_warning ("method not implemented: %8v", hc->rx_buf); + *ec = HTTP_STATUS_NOT_IMPLEMENTED; + return -1; + } + else + { + clib_warning ("not method name: %8v", hc->rx_buf); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + } + + /* find version */ + i = v_find_index (hc->rx_buf, next_line_offset - 11, 11, " HTTP/"); if (i < 0) { - clib_warning ("cannot find '%s' in the header!", CONTENT_LEN_STR); + clib_warning ("HTTP version not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + /* verify major version */ + if (isdigit (hc->rx_buf[i + 6])) + { + if (hc->rx_buf[i + 6] != '1') + { + clib_warning ("HTTP major version '%c' not supported", + hc->rx_buf[i + 6]); + *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED; + return -1; + } + } + else + { + clib_warning ("HTTP major version '%c' is not digit", hc->rx_buf[i + 6]); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + /* parse request-target */ + HTTP_DBG (2, "http at %d", i); + target_len = i - hc->target_path_offset; + HTTP_DBG (2, "target_len %d", target_len); + if (target_len < 1) + { + clib_warning ("request-target not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + hc->target_path_len = target_len; + hc->target_query_offset = 0; + hc->target_query_len = 0; + if (http_get_target_form (hc)) + { + clib_warning ("invalid target"); + *ec = HTTP_STATUS_BAD_REQUEST; return -1; } + HTTP_DBG (2, "request-target path length: %u", hc->target_path_len); + HTTP_DBG (2, "request-target path offset: %u", hc->target_path_offset); + HTTP_DBG (2, "request-target query length: %u", hc->target_query_len); + HTTP_DBG (2, "request-target query offset: %u", hc->target_query_offset); + + /* set buffer offset to nex line start */ + hc->rx_buf_offset = next_line_offset; + + return 0; +} + +#define expect_char(c) \ + if (*p++ != c) \ + { \ + clib_warning ("unexpected character"); \ + return -1; \ + } - hc->rx_buf_offset = i; +#define parse_int(val, mul) \ + do \ + { \ + if (!isdigit (*p)) \ + { \ + clib_warning ("expected digit"); \ + return -1; \ + } \ + val += mul * (*p++ - '0'); \ + } \ + while (0) - i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "\n"); +static int +http_parse_status_line (http_conn_t *hc) +{ + int i; + u32 next_line_offset; + u8 *p, *end; + u16 status_code = 0; + + i = v_find_index (hc->rx_buf, 0, 0, "\r\n"); + /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */ if (i < 0) { - clib_warning ("end of line missing; incomplete data"); + clib_warning ("status line incomplete"); + return -1; + } + HTTP_DBG (2, "status line length: %d", i); + if (i < 12) + { + clib_warning ("status line too short (%d)", i); + return -1; + } + hc->control_data_len = i + 2; + next_line_offset = hc->control_data_len; + p = hc->rx_buf; + end = hc->rx_buf + i; + + /* there should be at least one more CRLF */ + if (vec_len (hc->rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + return -1; + } + + /* parse version */ + expect_char ('H'); + expect_char ('T'); + expect_char ('T'); + expect_char ('P'); + expect_char ('/'); + expect_char ('1'); + expect_char ('.'); + if (!isdigit (*p++)) + { + clib_warning ("invalid HTTP minor version"); + return -1; + } + + /* skip space(s) */ + if (*p != ' ') + { + clib_warning ("no space after HTTP version"); + return -1; + } + do + { + p++; + if (p == end) + { + clib_warning ("no status code"); + return -1; + } + } + while (*p == ' '); + + /* parse status code */ + if ((end - p) < 3) + { + clib_warning ("not enough characters for status code"); return -1; } + parse_int (status_code, 100); + parse_int (status_code, 10); + parse_int (status_code, 1); + if (status_code < 100 || status_code > 599) + { + clib_warning ("invalid status code %d", status_code); + return -1; + } + hc->status_code = status_code; + HTTP_DBG (0, "status code: %d", hc->status_code); - len = i - hc->rx_buf_offset; - line = vec_new (u8, len); - clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len); + /* set buffer offset to nex line start */ + hc->rx_buf_offset = next_line_offset; + + return 0; +} + +static int +http_identify_headers (http_conn_t *hc, http_status_code_t *ec) +{ + int i; - unformat_init_vector (&input, line); - if (!unformat (&input, CONTENT_LEN_STR "%d", content_length)) + /* check if we have any header */ + if ((hc->rx_buf[hc->rx_buf_offset] == '\r') && + (hc->rx_buf[hc->rx_buf_offset + 1] == '\n')) { - clib_warning ("failed to unformat content length!"); + /* just another CRLF -> no headers */ + HTTP_DBG (2, "no headers"); + hc->headers_len = 0; + hc->control_data_len += 2; + return 0; + } + + /* find empty line indicating end of header section */ + i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, "\r\n\r\n"); + if (i < 0) + { + clib_warning ("cannot find header section end"); + *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - unformat_free (&input); + hc->headers_offset = hc->rx_buf_offset; + hc->headers_len = i - hc->rx_buf_offset + 2; + hc->control_data_len += (hc->headers_len + 2); + HTTP_DBG (2, "headers length: %u", hc->headers_len); + HTTP_DBG (2, "headers offset: %u", hc->headers_offset); + + return 0; +} + +static int +http_identify_message_body (http_conn_t *hc, http_status_code_t *ec) +{ + int i, value_len; + u8 *end, *p, *value_start; + u64 body_len = 0, digit; + + hc->body_len = 0; + + if (hc->headers_len == 0) + { + HTTP_DBG (2, "no header, no message-body"); + return 0; + } + if (hc->is_tunnel) + { + HTTP_DBG (2, "tunnel, no message-body"); + return 0; + } + + /* TODO check for chunked transfer coding */ - /* skip rest of the header */ - hc->rx_buf_offset += len; - i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "<html>"); + /* try to find Content-Length header */ + i = v_find_index (hc->rx_buf, hc->headers_offset, hc->headers_len, + "Content-Length:"); if (i < 0) { - clib_warning ("<html> tag not found"); + HTTP_DBG (2, "Content-Length header not present, no message-body"); + return 0; + } + hc->rx_buf_offset = i + 15; + + i = v_find_index (hc->rx_buf, hc->rx_buf_offset, hc->headers_len, "\r\n"); + if (i < 0) + { + clib_warning ("end of line missing"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + value_len = i - hc->rx_buf_offset; + if (value_len < 1) + { + clib_warning ("invalid header, content length value missing"); + *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - hc->rx_buf_offset = i; + + end = hc->rx_buf + hc->rx_buf_offset + value_len; + p = hc->rx_buf + hc->rx_buf_offset; + /* skip leading whitespace */ + while (1) + { + if (p == end) + { + clib_warning ("value not found"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + else if (*p != ' ' && *p != '\t') + { + break; + } + p++; + value_len--; + } + value_start = p; + /* skip trailing whitespace */ + p = value_start + value_len - 1; + while (*p == ' ' || *p == '\t') + { + p--; + value_len--; + } + + if (value_len < 1) + { + clib_warning ("value not found"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + p = value_start; + for (i = 0; i < value_len; i++) + { + /* check for digit */ + if (!isdigit (*p)) + { + clib_warning ("expected digit"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + digit = *p - '0'; + u64 new_body_len = body_len * 10 + digit; + /* check for overflow */ + if (new_body_len < body_len) + { + clib_warning ("too big number, overflow"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + body_len = new_body_len; + p++; + } + + hc->body_len = body_len; + + hc->body_offset = hc->headers_offset + hc->headers_len + 2; + HTTP_DBG (2, "body length: %llu", hc->body_len); + HTTP_DBG (2, "body offset: %u", hc->body_offset); return 0; } @@ -516,92 +1039,95 @@ http_parse_header (http_conn_t *hc, int *content_length) static http_sm_result_t http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) { - int i, rv, content_length; + int rv; http_msg_t msg = {}; app_worker_t *app_wrk; session_t *as; + u32 len, max_enq, body_sent; http_status_code_t ec; + http_main_t *hm = &http_main; rv = http_read_message (hc); /* Nothing yet, wait for data or timer expire */ if (rv) - return HTTP_SM_STOP; + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } + + HTTP_DBG (3, "%v", hc->rx_buf); if (vec_len (hc->rx_buf) < 8) { - ec = HTTP_STATUS_BAD_REQUEST; + clib_warning ("response buffer too short"); goto error; } - if ((i = v_find_index (hc->rx_buf, 0, "200 OK")) >= 0) - { - msg.type = HTTP_MSG_REPLY; - msg.content_type = HTTP_CONTENT_TEXT_HTML; - msg.code = HTTP_STATUS_OK; - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = 0; + rv = http_parse_status_line (hc); + if (rv) + goto error; - rv = http_parse_header (hc, &content_length); - if (rv) - { - clib_warning ("failed to parse http reply"); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - return -1; - } - msg.data.len = content_length; - u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset; - as = session_get_from_handle (hc->h_pa_session_handle); - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { &hc->rx_buf[hc->rx_buf_offset], dlen } }; - - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, - 0 /* allow partial */); - if (rv < 0) - { - clib_warning ("error enqueue"); - return HTTP_SM_ERROR; - } + rv = http_identify_headers (hc, &ec); + if (rv) + goto error; - hc->rx_buf_offset += dlen; - hc->to_recv = content_length - dlen; + rv = http_identify_message_body (hc, &ec); + if (rv) + goto error; - if (hc->rx_buf_offset == vec_len (hc->rx_buf)) - { - vec_reset_length (hc->rx_buf); - hc->rx_buf_offset = 0; - } + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + as = session_get_from_handle (hc->h_pa_session_handle); + max_enq = svm_fifo_max_enqueue (as->rx_fifo); + max_enq -= sizeof (msg); + if (max_enq < hc->control_data_len) + { + clib_warning ("not enough room for control data in app's rx fifo"); + goto error; + } + len = clib_min (max_enq, vec_len (hc->rx_buf)); + + msg.type = HTTP_MSG_REPLY; + msg.code = hm->sc_by_u16[hc->status_code]; + msg.data.headers_offset = hc->headers_offset; + msg.data.headers_len = hc->headers_len; + msg.data.body_offset = hc->body_offset; + msg.data.body_len = hc->body_len; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = len; - if (hc->to_recv == 0) - { - hc->rx_buf_offset = 0; - vec_reset_length (hc->rx_buf); - http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); - } - else - { - http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA); - } + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, + { hc->rx_buf, len } }; - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - return HTTP_SM_STOP; + rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); + ASSERT (rv == (sizeof (msg) + len)); + + http_read_message_drop (hc, len); + + body_sent = len - hc->control_data_len; + hc->to_recv = hc->body_len - body_sent; + if (hc->to_recv == 0) + { + /* all sent, we are done */ + http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); } else { - HTTP_DBG (0, "Unknown http method %v", hc->rx_buf); - ec = HTTP_STATUS_METHOD_NOT_ALLOWED; - goto error; + /* stream rest of the response body */ + http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA); } -error: + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); + return HTTP_SM_STOP; - http_send_error (hc, ec); +error: + http_read_message_drop_all (hc); session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_ERROR; } @@ -612,9 +1138,9 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) app_worker_t *app_wrk; http_msg_t msg; session_t *as; - int i, rv; - u32 len; - u8 *buf; + int rv; + u32 len, max_enq, body_sent; + u64 max_deq; rv = http_read_message (hc); @@ -622,64 +1148,76 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) if (rv) return HTTP_SM_STOP; + HTTP_DBG (3, "%v", hc->rx_buf); + if (vec_len (hc->rx_buf) < 8) { ec = HTTP_STATUS_BAD_REQUEST; goto error; } - if ((i = v_find_index (hc->rx_buf, 0, "GET ")) >= 0) - { - hc->method = HTTP_REQ_GET; - hc->rx_buf_offset = i + 5; + rv = http_parse_request_line (hc, &ec); + if (rv) + goto error; - i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "HTTP"); - if (i < 0) - { - ec = HTTP_STATUS_BAD_REQUEST; - goto error; - } + rv = http_identify_headers (hc, &ec); + if (rv) + goto error; - HTTP_DBG (0, "GET method %v", hc->rx_buf); - len = i - hc->rx_buf_offset - 1; - } - else if ((i = v_find_index (hc->rx_buf, 0, "POST ")) >= 0) - { - hc->method = HTTP_REQ_POST; - hc->rx_buf_offset = i + 6; - len = vec_len (hc->rx_buf) - hc->rx_buf_offset - 1; - HTTP_DBG (0, "POST method %v", hc->rx_buf); - } - else + rv = http_identify_message_body (hc, &ec); + if (rv) + goto error; + + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + as = session_get_from_handle (hc->h_pa_session_handle); + max_enq = svm_fifo_max_enqueue (as->rx_fifo); + if (max_enq < hc->control_data_len) { - HTTP_DBG (0, "Unknown http method %v", hc->rx_buf); - ec = HTTP_STATUS_METHOD_NOT_ALLOWED; + clib_warning ("not enough room for control data in app's rx fifo"); + ec = HTTP_STATUS_INTERNAL_ERROR; goto error; } - - buf = &hc->rx_buf[hc->rx_buf_offset]; + /* do not dequeue more than one HTTP request, we do not support pipelining */ + max_deq = + clib_min (hc->control_data_len + hc->body_len, vec_len (hc->rx_buf)); + len = clib_min (max_enq, max_deq); msg.type = HTTP_MSG_REQUEST; msg.method_type = hc->method; - msg.content_type = HTTP_CONTENT_TEXT_HTML; msg.data.type = HTTP_MSG_DATA_INLINE; msg.data.len = len; + msg.data.target_form = hc->target_form; + msg.data.target_path_offset = hc->target_path_offset; + msg.data.target_path_len = hc->target_path_len; + msg.data.target_query_offset = hc->target_query_offset; + msg.data.target_query_len = hc->target_query_len; + msg.data.headers_offset = hc->headers_offset; + msg.data.headers_len = hc->headers_len; + msg.data.body_offset = hc->body_offset; + msg.data.body_len = hc->body_len; + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, + { hc->rx_buf, len } }; - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { buf, len } }; - - as = session_get_from_handle (hc->h_pa_session_handle); rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); - if (rv < 0 || rv != sizeof (msg) + len) + ASSERT (rv == (sizeof (msg) + len)); + + body_sent = len - hc->control_data_len; + hc->to_recv = hc->body_len - body_sent; + if (hc->to_recv == 0) { - clib_warning ("failed app enqueue"); - /* This should not happen as we only handle 1 request per session, - * and fifo is allocated, but going forward we should consider - * rescheduling */ - return HTTP_SM_ERROR; + /* drop everything, we do not support pipelining */ + http_read_message_drop_all (hc); + /* all sent, we are done */ + http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY); + } + else + { + http_read_message_drop (hc, len); + /* stream rest of the response body */ + http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA); } - - vec_free (hc->rx_buf); - http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY); app_wrk = app_worker_get_if_valid (as->app_wrk_index); if (app_wrk) @@ -688,7 +1226,7 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) return HTTP_SM_STOP; error: - + http_read_message_drop_all (hc); http_send_error (hc, ec); session_transport_closing_notify (&hc->connection); http_disconnect_transport (hc); @@ -700,13 +1238,14 @@ static http_sm_result_t http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) { http_main_t *hm = &http_main; - u8 *header; - u32 offset; + u8 *response; + u32 sent; f64 now; session_t *as; http_status_code_t sc; http_msg_t msg; int rv; + http_sm_result_t sm_result = HTTP_SM_ERROR; as = session_get_from_handle (hc->h_pa_session_handle); @@ -727,60 +1266,97 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) goto error; } - http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo, - msg.data.len); + if (msg.code >= HTTP_N_STATUS) + { + clib_warning ("unsupported status code: %d", msg.code); + return HTTP_SM_ERROR; + } /* - * Add headers. For now: + * Add "protocol layer" headers: * - current time - * - expiration time - * - content type + * - server name * - data length */ now = clib_timebase_now (&hm->timebase); + response = format (0, http_response_template, http_status_code_str[msg.code], + /* Date */ + format_clib_timebase_time, now, + /* Server */ + hc->app_name); + + /* RFC9110 9.3.6: A server MUST NOT send Content-Length header field in a + * 2xx (Successful) response to CONNECT. */ + if (hc->is_tunnel && http_status_code_str[msg.code][0] == '2') + { + ASSERT (msg.data.body_len == 0); + hc->state = HTTP_CONN_STATE_TUNNEL; + /* cleanup some stuff we don't need anymore in tunnel mode */ + http_conn_timer_stop (hc); + vec_free (hc->rx_buf); + http_buffer_free (&hc->tx_buf); + } + else + response = format (response, content_len_template, msg.data.body_len); - switch (msg.code) - { - case HTTP_STATUS_OK: - header = - format (0, http_response_template, http_status_code_str[msg.code], - /* Date */ - format_clib_timebase_time, now, - /* Expires */ - format_clib_timebase_time, now + 600.0, - /* Content type */ - http_content_type_str[msg.content_type], - /* Length */ - msg.data.len); - break; - case HTTP_STATUS_MOVED: - header = - format (0, http_redirect_template, http_status_code_str[msg.code]); - /* Location: http(s)://new-place already queued up as data */ - break; - default: - return HTTP_SM_ERROR; + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + if (msg.data.type == HTTP_MSG_DATA_PTR) + { + uword app_headers_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), + (u8 *) &app_headers_ptr); + ASSERT (rv == sizeof (app_headers_ptr)); + vec_append (response, uword_to_pointer (app_headers_ptr, u8 *)); + } + else + { + u32 orig_len = vec_len (response); + vec_resize (response, msg.data.headers_len); + u8 *p = response + orig_len; + rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p); + ASSERT (rv == msg.data.headers_len); + } } + else + { + /* No headers from app */ + response = format (response, "\r\n"); + } + HTTP_DBG (3, "%v", response); - offset = http_send_data (hc, header, vec_len (header), 0); - if (offset != vec_len (header)) + sent = http_send_data (hc, response, vec_len (response)); + if (sent != vec_len (response)) { - clib_warning ("couldn't send response header!"); + clib_warning ("sending status-line and headers failed!"); sc = HTTP_STATUS_INTERNAL_ERROR; - vec_free (header); + vec_free (response); goto error; } - vec_free (header); + vec_free (response); - /* Start sending the actual data */ - http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA); + if (msg.data.body_len) + { + /* Start sending the actual data */ + http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], + as->tx_fifo, msg.data.body_len); + http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA); + sm_result = HTTP_SM_CONTINUE; + } + else + { + /* No response body, we are done */ + http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + sm_result = HTTP_SM_STOP; + } - ASSERT (sp->max_burst_size >= offset); - sp->max_burst_size -= offset; - return HTTP_SM_CONTINUE; + ASSERT (sp->max_burst_size >= sent); + sp->max_burst_size -= sent; + return sm_result; error: - clib_warning ("unexpected msg type from app %u", msg.type); http_send_error (hc, sc); http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); session_transport_closing_notify (&hc->connection); @@ -793,9 +1369,11 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) { http_msg_t msg; session_t *as; - u8 *buf = 0, *request; - u32 offset; + u8 *target_buff = 0, *request = 0, *target; + u32 sent; int rv; + http_sm_result_t sm_result = HTTP_SM_ERROR; + http_state_t next_state; as = session_get_from_handle (hc->h_pa_session_handle); @@ -814,29 +1392,131 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) goto error; } - vec_validate (buf, msg.data.len - 1); - rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf); - ASSERT (rv == msg.data.len); + /* read request target */ + if (msg.data.type == HTTP_MSG_DATA_PTR) + { + uword target_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr), + (u8 *) &target_ptr); + ASSERT (rv == sizeof (target_ptr)); + target = uword_to_pointer (target_ptr, u8 *); + } + else + { + vec_validate (target_buff, msg.data.target_path_len - 1); + rv = + svm_fifo_dequeue (as->tx_fifo, msg.data.target_path_len, target_buff); + ASSERT (rv == msg.data.target_path_len); + target = target_buff; + } - request = format (0, http_request_template, buf); - offset = http_send_data (hc, request, vec_len (request), 0); - if (offset != vec_len (request)) + /* currently we support only GET and POST method */ + if (msg.method_type == HTTP_REQ_GET) + { + if (msg.data.body_len) + { + clib_warning ("GET request shouldn't include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + */ + request = format (0, http_get_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name, + /* Any headers from app? */ + msg.data.headers_len ? "" : "\r\n"); + + next_state = HTTP_STATE_WAIT_SERVER_REPLY; + sm_result = HTTP_SM_STOP; + } + else if (msg.method_type == HTTP_REQ_POST) { - clib_warning ("sending request failed!"); + if (!msg.data.body_len) + { + clib_warning ("POST request should include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + * - content length + */ + request = format (0, http_post_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name, + /* Content-Length */ + msg.data.body_len, + /* Any headers from app? */ + msg.data.headers_len ? "" : "\r\n"); + + http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], + as->tx_fifo, msg.data.body_len); + + next_state = HTTP_STATE_APP_IO_MORE_DATA; + sm_result = HTTP_SM_CONTINUE; + } + else + { + clib_warning ("unsupported method %d", msg.method_type); goto error; } - http_state_change (hc, HTTP_STATE_WAIT_SERVER_REPLY); + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + if (msg.data.type == HTTP_MSG_DATA_PTR) + { + uword app_headers_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), + (u8 *) &app_headers_ptr); + ASSERT (rv == sizeof (app_headers_ptr)); + vec_append (request, uword_to_pointer (app_headers_ptr, u8 *)); + } + else + { + u32 orig_len = vec_len (request); + vec_resize (request, msg.data.headers_len); + u8 *p = request + orig_len; + rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p); + ASSERT (rv == msg.data.headers_len); + } + } + HTTP_DBG (3, "%v", request); - vec_free (buf); - vec_free (request); + sent = http_send_data (hc, request, vec_len (request)); + if (sent != vec_len (request)) + { + clib_warning ("sending request-line and headers failed!"); + sm_result = HTTP_SM_ERROR; + goto error; + } - return HTTP_SM_STOP; + http_state_change (hc, next_state); + goto done; error: + svm_fifo_dequeue_drop_all (as->tx_fifo); session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_ERROR; + +done: + vec_free (target_buff); + vec_free (request); + return sm_result; } static http_sm_result_t @@ -891,7 +1571,14 @@ http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp) return HTTP_SM_ERROR; } hc->to_recv -= rv; - HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv); + HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->to_recv); + + /* Finished transaction: + * server back to HTTP_STATE_WAIT_APP_REPLY + * client to HTTP_STATE_WAIT_APP_METHOD */ + if (hc->to_recv == 0) + http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_APP_REPLY : + HTTP_STATE_WAIT_APP_METHOD); app_wrk = app_worker_get_if_valid (as->app_wrk_index); if (app_wrk) @@ -929,7 +1616,7 @@ http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) if (!http_buffer_is_drained (hb)) { if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) { @@ -943,10 +1630,13 @@ http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) else { if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX_FLUSH); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); - /* Finished transaction, back to HTTP_STATE_WAIT_METHOD */ - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + /* Finished transaction: + * server back to HTTP_STATE_WAIT_METHOD + * client to HTTP_STATE_WAIT_SERVER_REPLY */ + http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_CLIENT_METHOD : + HTTP_STATE_WAIT_SERVER_REPLY); http_buffer_free (&hc->tx_buf); } @@ -987,23 +1677,75 @@ http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp) } static int +http_tunnel_rx (session_t *ts, http_conn_t *hc) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + session_t *as; + app_worker_t *app_wrk; + + HTTP_DBG (1, "tunnel received data from client"); + + as = session_get_from_handle (hc->h_pa_session_handle); + + max_deq = svm_fifo_max_dequeue (ts->rx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + return 0; + } + max_enq = svm_fifo_max_enqueue (as->rx_fifo); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + return 0; + } + max_read = clib_min (max_enq, max_deq); + svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read); + n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + HTTP_DBG (1, "transfered %u bytes", n_written); + svm_fifo_dequeue_drop (ts->rx_fifo, n_written); + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_program_rx_io_evt (session_handle (ts)); + + return 0; +} + +static int http_ts_rx_callback (session_t *ts) { http_conn_t *hc; + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque); + hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - if (!hc) + + if (hc->state == HTTP_CONN_STATE_CLOSED) { - clib_warning ("http connection not found (ts %d)", ts->opaque); - return -1; + HTTP_DBG (1, "conn closed"); + svm_fifo_dequeue_drop_all (ts->tx_fifo); + return 0; } - if (hc->state == HTTP_CONN_STATE_CLOSED) + if (hc->state == HTTP_CONN_STATE_TUNNEL) + return http_tunnel_rx (ts, hc); + + if (!http_state_is_rx_valid (hc)) { + if (hc->state != HTTP_CONN_STATE_CLOSED) + clib_warning ("app data req state '%U' session state %u", + format_http_state, hc->http_state, hc->state); svm_fifo_dequeue_drop_all (ts->tx_fifo); return 0; } + HTTP_DBG (1, "run state machine"); http_req_run_state_machine (hc, 0); if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) @@ -1020,6 +1762,7 @@ http_ts_builtin_tx_callback (session_t *ts) http_conn_t *hc; hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + HTTP_DBG (1, "transport connection reschedule"); transport_connection_reschedule (&hc->connection); return 0; @@ -1034,21 +1777,36 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) return; hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - if (!hc) - { - clib_warning ("no http connection for %u", ts->session_index); - return; - } + + HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque); vec_free (hc->rx_buf); http_buffer_free (&hc->tx_buf); - http_conn_timer_stop (hc); + + if (hc->pending_timer == 0) + http_conn_timer_stop (hc); session_transport_delete_notify (&hc->connection); + + if (!hc->is_server) + { + vec_free (hc->app_name); + vec_free (hc->host); + } http_conn_free (hc); } +static void +http_ts_ho_cleanup_callback (session_t *ts) +{ + http_conn_t *ho_hc; + HTTP_DBG (1, "half open: %x", ts->opaque); + ho_hc = http_ho_conn_get (ts->opaque); + session_half_open_delete_notify (&ho_hc->connection); + http_ho_conn_free (ho_hc); +} + int http_add_segment_callback (u32 client_index, u64 segment_handle) { @@ -1068,6 +1826,7 @@ static session_cb_vft_t http_app_cb_vft = { .session_connected_callback = http_ts_connected_callback, .session_reset_callback = http_ts_reset_callback, .session_cleanup_callback = http_ts_cleanup_callback, + .half_open_cleanup_callback = http_ts_ho_cleanup_callback, .add_segment_callback = http_add_segment_callback, .del_segment_callback = http_del_segment_callback, .builtin_app_rx_callback = http_ts_rx_callback, @@ -1090,8 +1849,6 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) return 0; } - vec_validate (hm->wrk, vlib_num_workers ()); - clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -1113,10 +1870,16 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) hm->app_index = a->app_index; vec_free (a->name); + if (hm->is_init) + return 0; + + vec_validate (hm->wrk, vlib_num_workers ()); + clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE, &vm->clib_time /* share the system clock */); - http_timers_init (vm, http_conn_timeout_cb); + http_timers_init (vm, http_conn_timeout_cb, http_conn_invalidate_timer_cb); + hm->is_init = 1; return 0; } @@ -1131,6 +1894,8 @@ http_transport_connect (transport_endpoint_cfg_t *tep) http_conn_t *hc; int error; u32 hc_index; + session_t *ho; + transport_endpt_ext_cfg_t *ext_cfg; app_worker_t *app_wrk = app_worker_get (sep->app_wrk_index); clib_memset (cargs, 0, sizeof (*cargs)); @@ -1140,18 +1905,48 @@ http_transport_connect (transport_endpoint_cfg_t *tep) app = application_get (app_wrk->app_index); cargs->sep_ext.ns_index = app->ns_index; - hc_index = http_conn_alloc_w_thread (0 /* ts->thread_index */); - hc = http_conn_get_w_thread (hc_index, 0); + hc_index = http_ho_conn_alloc (); + hc = http_ho_conn_get (hc_index); hc->h_pa_wrk_index = sep->app_wrk_index; hc->h_pa_app_api_ctx = sep->opaque; hc->state = HTTP_CONN_STATE_CONNECTING; cargs->api_context = hc_index; + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + if (ext_cfg) + { + HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); + hc->timeout = ext_cfg->opaque; + } + + hc->is_server = 0; + + if (vec_len (app->name)) + hc->app_name = vec_dup (app->name); + else + hc->app_name = format (0, "VPP HTTP client"); + + if (sep->is_ip4) + hc->host = format (0, "%U:%d", format_ip4_address, &sep->ip.ip4, + clib_net_to_host_u16 (sep->port)); + else + hc->host = format (0, "%U:%d", format_ip6_address, &sep->ip.ip6, + clib_net_to_host_u16 (sep->port)); + HTTP_DBG (1, "hc ho_index %x", hc_index); if ((error = vnet_connect (cargs))) return error; + ho = session_alloc_for_half_open (&hc->connection); + ho->app_wrk_index = app_wrk->wrk_index; + ho->ho_index = app_worker_add_half_open (app_wrk, session_handle (ho)); + ho->opaque = sep->opaque; + ho->session_type = + session_type_from_proto_and_ip (TRANSPORT_PROTO_HTTP, sep->is_ip4); + hc->h_tc_session_handle = cargs->sh; + hc->c_s_index = ho->session_index; + return 0; } @@ -1163,11 +1958,12 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) http_main_t *hm = &http_main; session_endpoint_cfg_t *sep; app_worker_t *app_wrk; - transport_proto_t tp; + transport_proto_t tp = TRANSPORT_PROTO_TCP; app_listener_t *al; application_t *app; http_conn_t *lhc; u32 lhc_index; + transport_endpt_ext_cfg_t *ext_cfg; sep = (session_endpoint_cfg_t *) tep; @@ -1177,7 +1973,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) args->app_index = hm->app_index; args->sep_ext = *sep; args->sep_ext.ns_index = app->ns_index; - tp = sep->ext_cfg ? TRANSPORT_PROTO_TLS : TRANSPORT_PROTO_TCP; + + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (ext_cfg) + { + HTTP_DBG (1, "app set tls"); + tp = TRANSPORT_PROTO_TLS; + } args->sep_ext.transport_proto = tp; if (vnet_listen (args)) @@ -1186,6 +1988,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lhc_index = http_listener_alloc (); lhc = http_listener_get (lhc_index); + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + if (ext_cfg && ext_cfg->opaque) + { + HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); + lhc->timeout = ext_cfg->opaque; + } + /* Grab transport connection listener and link to http listener */ lhc->h_tc_session_handle = args->handle; al = app_listener_get_w_handle (lhc->h_tc_session_handle); @@ -1199,6 +2008,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lhc->c_s_index = app_listener_index; lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; + lhc->is_server = 1; + + if (vec_len (app->name)) + lhc->app_name = vec_dup (app->name); + else + lhc->app_name = format (0, "VPP server app"); + return lhc_index; } @@ -1230,7 +2046,7 @@ http_transport_close (u32 hc_index, u32 thread_index) session_t *as; http_conn_t *hc; - HTTP_DBG (1, "App disconnecting %x", hc_index); + HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); hc = http_conn_get_w_thread (hc_index, thread_index); if (hc->state == HTTP_CONN_STATE_CONNECTING) @@ -1239,7 +2055,11 @@ http_transport_close (u32 hc_index, u32 thread_index) http_disconnect_transport (hc); return; } - + else if (hc->state == HTTP_CONN_STATE_CLOSED) + { + HTTP_DBG (1, "nothing to do, already closed"); + return; + } as = session_get_from_handle (hc->h_pa_session_handle); /* Nothing more to send, confirm close */ @@ -1270,27 +2090,84 @@ http_transport_get_listener (u32 listener_index) } static int +http_tunnel_tx (http_conn_t *hc, session_t *as, transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + session_t *ts; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from target"); + + ts = session_get_from_handle (hc->h_tc_session_handle); + + max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + goto check_fifo; + } + max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo); + if (max_enq == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + max_read = clib_min (max_enq, max_deq); + max_read = clib_min (max_read, sp->max_burst_size); + svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read); + n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + HTTP_DBG (1, "transfered %u bytes", n_written); + sp->bytes_dequeued += n_written; + sp->max_burst_size -= n_written; + svm_fifo_dequeue_drop (as->tx_fifo, n_written); + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + +check_fifo: + /* Deschedule and wait for deq notification if ts fifo is almost full */ + if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) + { + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&hc->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + + return n_written > 0 ? clib_max (n_written / TRANSPORT_PACER_MIN_MSS, 1) : 0; +} + +static int http_app_tx_callback (void *session, transport_send_params_t *sp) { session_t *as = (session_t *) session; u32 max_burst_sz, sent; http_conn_t *hc; - HTTP_DBG (1, "app session conn index %x", as->connection_index); + HTTP_DBG (1, "hc [%u]%x", as->thread_index, as->connection_index); hc = http_conn_get_w_thread (as->connection_index, as->thread_index); + + max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; + sp->max_burst_size = max_burst_sz; + + if (hc->state == HTTP_CONN_STATE_TUNNEL) + return http_tunnel_tx (hc, as, sp); + if (!http_state_is_tx_valid (hc)) { if (hc->state != HTTP_CONN_STATE_CLOSED) - clib_warning ("app data req state '%U' session state %u", - format_http_state, hc->http_state, hc->state); + { + clib_warning ("hc [%u]%x invalid tx state http state " + "'%U', session state %u", + as->thread_index, as->connection_index, + format_http_state, hc->http_state, hc->state); + } svm_fifo_dequeue_drop_all (as->tx_fifo); return 0; } - max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; - sp->max_burst_size = max_burst_sz; - + HTTP_DBG (1, "run state machine"); http_req_run_state_machine (hc, sp); if (hc->state == HTTP_CONN_STATE_APP_CLOSED) @@ -1304,6 +2181,19 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) return sent > 0 ? clib_max (sent / TRANSPORT_PACER_MIN_MSS, 1) : 0; } +static int +http_app_rx_evt_cb (transport_connection_t *tc) +{ + http_conn_t *hc = (http_conn_t *) tc; + HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index); + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + + if (hc->state == HTTP_CONN_STATE_TUNNEL) + return http_tunnel_rx (ts, hc); + + return 0; +} + static void http_transport_get_endpoint (u32 hc_index, u32 thread_index, transport_endpoint_t *tep, u8 is_lcl) @@ -1361,6 +2251,9 @@ format_http_conn_state (u8 *s, va_list *args) case HTTP_CONN_STATE_ESTABLISHED: s = format (s, "ESTABLISHED"); break; + case HTTP_CONN_STATE_TUNNEL: + s = format (s, "TUNNEL"); + break; case HTTP_CONN_STATE_TRANSPORT_CLOSED: s = format (s, "TRANSPORT_CLOSED"); break; @@ -1412,18 +2305,61 @@ format_http_transport_listener (u8 *s, va_list *args) return s; } +static u8 * +format_http_transport_half_open (u8 *s, va_list *args) +{ + u32 ho_index = va_arg (*args, u32); + u32 __clib_unused thread_index = va_arg (*args, u32); + u32 __clib_unused verbose = va_arg (*args, u32); + http_conn_t *ho_hc; + session_t *tcp_ho; + + ho_hc = http_ho_conn_get (ho_index); + tcp_ho = session_get_from_handle (ho_hc->h_tc_session_handle); + + s = format (s, "[%d:%d][H] half-open app_wrk %u ts %d:%d", + ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->h_pa_wrk_index, + tcp_ho->thread_index, tcp_ho->session_index); + return s; +} + +static transport_connection_t * +http_transport_get_ho (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + return &ho_hc->connection; +} + +static void +http_transport_cleanup_ho (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + session_cleanup_half_open (ho_hc->h_tc_session_handle); + http_ho_conn_free (ho_hc); +} + static const transport_proto_vft_t http_proto = { .enable = http_transport_enable, .connect = http_transport_connect, .start_listen = http_start_listen, .stop_listen = http_stop_listen, .close = http_transport_close, + .cleanup_ho = http_transport_cleanup_ho, .custom_tx = http_app_tx_callback, + .app_rx_evt = http_app_rx_evt_cb, .get_connection = http_transport_get_connection, .get_listener = http_transport_get_listener, + .get_half_open = http_transport_get_ho, .get_transport_endpoint = http_transport_get_endpoint, .format_connection = format_http_transport_connection, .format_listener = format_http_transport_listener, + .format_half_open = format_http_transport_half_open, .transport_options = { .name = "http", .short_name = "H", @@ -1436,6 +2372,7 @@ static clib_error_t * http_transport_init (vlib_main_t *vm) { http_main_t *hm = &http_main; + int i; transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto, FIB_PROTOCOL_IP4, ~0); @@ -1447,7 +2384,26 @@ http_transport_init (vlib_main_t *vm) hm->first_seg_size = 32 << 20; hm->fifo_size = 512 << 10; - return 0; + /* Setup u16 to http_status_code_t map */ + /* Unrecognized status code is equivalent to the x00 status */ + vec_validate (hm->sc_by_u16, 599); + for (i = 100; i < 200; i++) + hm->sc_by_u16[i] = HTTP_STATUS_CONTINUE; + for (i = 200; i < 300; i++) + hm->sc_by_u16[i] = HTTP_STATUS_OK; + for (i = 300; i < 400; i++) + hm->sc_by_u16[i] = HTTP_STATUS_MULTIPLE_CHOICES; + for (i = 400; i < 500; i++) + hm->sc_by_u16[i] = HTTP_STATUS_BAD_REQUEST; + for (i = 500; i < 600; i++) + hm->sc_by_u16[i] = HTTP_STATUS_INTERNAL_ERROR; + + /* Registered status codes */ +#define _(c, s, str) hm->sc_by_u16[c] = HTTP_STATUS_##s; + foreach_http_status_code +#undef _ + + return 0; } VLIB_INIT_FUNCTION (http_transport_init); diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index dbae5ac4611..a117f374efa 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -16,6 +16,8 @@ #ifndef SRC_PLUGINS_HTTP_HTTP_H_ #define SRC_PLUGINS_HTTP_HTTP_H_ +#include <ctype.h> + #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> @@ -49,11 +51,20 @@ typedef struct http_conn_id_ STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN, "ctx id must be less than TRANSPORT_CONN_ID_LEN"); +typedef struct +{ + char *base; + uword len; +} http_token_t; + +#define http_token_lit(s) (s), sizeof (s) - 1 + typedef enum http_conn_state_ { HTTP_CONN_STATE_LISTEN, HTTP_CONN_STATE_CONNECTING, HTTP_CONN_STATE_ESTABLISHED, + HTTP_CONN_STATE_TUNNEL, HTTP_CONN_STATE_TRANSPORT_CLOSED, HTTP_CONN_STATE_APP_CLOSED, HTTP_CONN_STATE_CLOSED @@ -75,6 +86,7 @@ typedef enum http_req_method_ { HTTP_REQ_GET = 0, HTTP_REQ_POST, + HTTP_REQ_CONNECT, } http_req_method_t; typedef enum http_msg_type_ @@ -83,86 +95,96 @@ typedef enum http_msg_type_ HTTP_MSG_REPLY } http_msg_type_t; +typedef enum http_target_form_ +{ + HTTP_TARGET_ORIGIN_FORM, + HTTP_TARGET_ABSOLUTE_FORM, + HTTP_TARGET_AUTHORITY_FORM, + HTTP_TARGET_ASTERISK_FORM +} http_target_form_t; + #define foreach_http_content_type \ - _ (APP_7Z, ".7z", "application / x - 7z - compressed") \ - _ (APP_DOC, ".doc", "application / msword") \ + _ (APP_7Z, ".7z", "application/x-7z-compressed") \ + _ (APP_DOC, ".doc", "application/msword") \ _ (APP_DOCX, ".docx", \ - "application / vnd.openxmlformats - " \ + "application/vnd.openxmlformats-" \ "officedocument.wordprocessingml.document") \ - _ (APP_EPUB, ".epub", "application / epub + zip") \ - _ (APP_FONT, ".eot", "application / vnd.ms - fontobject") \ - _ (APP_JAR, ".jar", "application / java - archive") \ - _ (APP_JSON, ".json", "application / json") \ - _ (APP_JSON_LD, ".jsonld", "application / ld + json") \ - _ (APP_MPKG, ".mpkg", "application / vnd.apple.installer + xml") \ - _ (APP_ODP, ".odp", "application / vnd.oasis.opendocument.presentation") \ - _ (APP_ODS, ".ods", "application / vnd.oasis.opendocument.spreadsheet") \ - _ (APP_ODT, ".odt", "application / vnd.oasis.opendocument.text") \ - _ (APP_OGX, ".ogx", "application / ogg") \ - _ (APP_PDF, ".pdf", "application / pdf") \ - _ (APP_PHP, ".php", "application / x - httpd - php") \ - _ (APP_PPT, ".ppt", "application / vnd.ms - powerpoint") \ - _ (APP_PPTX, ".pptx", "application / vnd.ms - powerpoint") \ - _ (APP_RAR, ".rar", "application / vnd.rar") \ - _ (APP_RTF, ".rtf", "application / rtf") \ - _ (APP_SH, ".sh", "application / x - sh") \ - _ (APP_TAR, ".tar", "application / x - tar") \ - _ (APP_VSD, ".vsd", "application / vnd.visio") \ - _ (APP_XHTML, ".xhtml", "application / xhtml + xml") \ - _ (APP_XLS, ".xls", "application / vnd.ms - excel") \ - _ (APP_XML, ".xml", "application / xml") \ + _ (APP_EPUB, ".epub", "application/epub+zip") \ + _ (APP_FONT, ".eot", "application/vnd.ms-fontobject") \ + _ (APP_JAR, ".jar", "application/java-archive") \ + _ (APP_JSON, ".json", "application/json") \ + _ (APP_JSON_LD, ".jsonld", "application/ld+json") \ + _ (APP_MPKG, ".mpkg", "application/vnd.apple.installer+xml") \ + _ (APP_ODP, ".odp", "application/vnd.oasis.opendocument.presentation") \ + _ (APP_ODS, ".ods", "application/vnd.oasis.opendocument.spreadsheet") \ + _ (APP_ODT, ".odt", "application/vnd.oasis.opendocument.text") \ + _ (APP_OGX, ".ogx", "application/ogg") \ + _ (APP_PDF, ".pdf", "application/pdf") \ + _ (APP_PHP, ".php", "application/x-httpd-php") \ + _ (APP_PPT, ".ppt", "application/vnd.ms-powerpoint") \ + _ (APP_PPTX, ".pptx", "application/vnd.ms-powerpoint") \ + _ (APP_RAR, ".rar", "application/vnd.rar") \ + _ (APP_RTF, ".rtf", "application/rtf") \ + _ (APP_SH, ".sh", "application/x-sh") \ + _ (APP_TAR, ".tar", "application/x-tar") \ + _ (APP_VSD, ".vsd", "application/vnd.visio") \ + _ (APP_XHTML, ".xhtml", "application/xhtml+xml") \ + _ (APP_XLS, ".xls", "application/vnd.ms-excel") \ + _ (APP_XML, ".xml", "application/xml") \ _ (APP_XSLX, ".xlsx", \ - "application / vnd.openxmlformats - officedocument.spreadsheetml.sheet") \ - _ (APP_XUL, ".xul", "application / vnd.mozilla.xul + xml") \ - _ (APP_ZIP, ".zip", "application / zip") \ - _ (AUDIO_AAC, ".aac", "audio / aac") \ - _ (AUDIO_CD, ".cda", "application / x - cdf") \ - _ (AUDIO_WAV, ".wav", "audio / wav") \ - _ (AUDIO_WEBA, ".weba", "audio / webm") \ - _ (AUDO_MIDI, ".midi", "audio / midi") \ - _ (AUDO_MID, ".mid", "audo / midi") \ - _ (AUDO_MP3, ".mp3", "audio / mpeg") \ - _ (AUDO_OGA, ".oga", "audio / ogg") \ - _ (AUDO_OPUS, ".opus", "audio / opus") \ - _ (APP_OCTET_STREAM, ".bin", "application / octet - stream") \ - _ (BZIP2, ".bz2", "application / x - bzip2") \ - _ (BZIP, ".bz", "application / x - bzip") \ - _ (FONT_OTF, ".otf", "font / otf") \ - _ (FONT_TTF, ".ttf", "font / ttf") \ - _ (FONT_WOFF2, ".woff2", "font / woff2") \ - _ (FONT_WOFF, ".woff", "font / woff") \ - _ (GZIP, ".gz", "application / gzip") \ - _ (IMAGE_AVIF, ".avif", "image / avif") \ - _ (IMAGE_BMP, ".bmp", "image / bmp") \ - _ (IMAGE_GIF, ".gif", "image / gif") \ - _ (IMAGE_ICON, ".ico", "image / vnd.microsoft.icon") \ - _ (IMAGE_JPEG, ".jpeg", "image / jpeg") \ - _ (IMAGE_JPG, ".jpg", "image / jpeg") \ - _ (IMAGE_PNG, ".png", "image / png") \ - _ (IMAGE_SVG, ".svg", "image / svg + xml") \ - _ (IMAGE_TIFF, ".tiff", "image / tiff") \ - _ (IMAGE_TIF, ".tif", "image / tiff") \ - _ (IMAGE_WEBP, ".webp", "image / webp") \ - _ (SCRIPT_CSH, ".csh", "application / x - csh") \ - _ (TEXT_ABIWORD, ".abw", "application / x - abiword") \ - _ (TEXT_ARCHIVE, ".arc", "application / x - freearc") \ - _ (TEXT_AZW, ".azw", "application / vnd.amazon.ebook") \ - _ (TEXT_CALENDAR, ".ics", "text / calendar") \ - _ (TEXT_CSS, ".css", "text / css") \ - _ (TEXT_CSV, ".csv", "text / csv") \ - _ (TEXT_HTM, ".htm", "text / html") \ - _ (TEXT_HTML, ".html", "text / html") \ - _ (TEXT_JS, ".js", "text / javascript") \ - _ (TEXT_MJS, ".mjs", "text / javascript") \ - _ (TEXT_PLAIN, ".txt", "text / plain") \ - _ (VIDEO_3GP2, ".3g2", "video / 3gpp2") \ - _ (VIDEO_3GP, ".3gp", "video / 3gpp") \ - _ (VIDEO_AVI, ".avi", "video / x - msvideo") \ - _ (VIDEO_MP4, ".mp4", "video / mp4") \ - _ (VIDEO_MPEG, ".mpeg", "video / mpeg") \ - _ (VIDEO_OGG, ".ogv", "video / ogg") \ - _ (VIDEO_TS, ".ts", "video / mp2t") \ - _ (VIDEO_WEBM, ".webm", "video / webm") + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") \ + _ (APP_XUL, ".xul", "application/vnd.mozilla.xul+xml") \ + _ (APP_X_WWW_FORM_URLENCODED, ".invalid", \ + "application/x-www-form-urlencoded") \ + _ (APP_ZIP, ".zip", "application/zip") \ + _ (AUDIO_AAC, ".aac", "audio/aac") \ + _ (AUDIO_CD, ".cda", "application/x-cdf") \ + _ (AUDIO_WAV, ".wav", "audio/wav") \ + _ (AUDIO_WEBA, ".weba", "audio/webm") \ + _ (AUDO_MIDI, ".midi", "audio/midi") \ + _ (AUDO_MID, ".mid", "audo/midi") \ + _ (AUDO_MP3, ".mp3", "audio/mpeg") \ + _ (AUDO_OGA, ".oga", "audio/ogg") \ + _ (AUDO_OPUS, ".opus", "audio/opus") \ + _ (APP_OCTET_STREAM, ".bin", "application/octet-stream") \ + _ (BZIP2, ".bz2", "application/x-bzip2") \ + _ (BZIP, ".bz", "application/x-bzip") \ + _ (FONT_OTF, ".otf", "font/otf") \ + _ (FONT_TTF, ".ttf", "font/ttf") \ + _ (FONT_WOFF2, ".woff2", "font/woff2") \ + _ (FONT_WOFF, ".woff", "font/woff") \ + _ (GZIP, ".gz", "application/gzip") \ + _ (IMAGE_AVIF, ".avif", "image/avif") \ + _ (IMAGE_BMP, ".bmp", "image/bmp") \ + _ (IMAGE_GIF, ".gif", "image/gif") \ + _ (IMAGE_ICON, ".ico", "image/vnd.microsoft.icon") \ + _ (IMAGE_JPEG, ".jpeg", "image/jpeg") \ + _ (IMAGE_JPG, ".jpg", "image/jpeg") \ + _ (IMAGE_PNG, ".png", "image/png") \ + _ (IMAGE_SVG, ".svg", "image/svg+xml") \ + _ (IMAGE_TIFF, ".tiff", "image/tiff") \ + _ (IMAGE_TIF, ".tif", "image/tiff") \ + _ (IMAGE_WEBP, ".webp", "image/webp") \ + _ (SCRIPT_CSH, ".csh", "application/x-csh") \ + _ (TEXT_ABIWORD, ".abw", "application/x-abiword") \ + _ (TEXT_ARCHIVE, ".arc", "application/x-freearc") \ + _ (TEXT_AZW, ".azw", "application/vnd.amazon.ebook") \ + _ (TEXT_CALENDAR, ".ics", "text/calendar") \ + _ (TEXT_CSS, ".css", "text/css") \ + _ (TEXT_CSV, ".csv", "text/csv") \ + _ (TEXT_HTM, ".htm", "text/html") \ + _ (TEXT_HTML, ".html", "text/html") \ + _ (TEXT_JS, ".js", "text/javascript") \ + _ (TEXT_MJS, ".mjs", "text/javascript") \ + _ (TEXT_PLAIN, ".txt", "text/plain") \ + _ (VIDEO_3GP2, ".3g2", "video/3gpp2") \ + _ (VIDEO_3GP, ".3gp", "video/3gpp") \ + _ (VIDEO_AVI, ".avi", "video/x-msvideo") \ + _ (VIDEO_MP4, ".mp4", "video/mp4") \ + _ (VIDEO_MPEG, ".mpeg", "video/mpeg") \ + _ (VIDEO_OGG, ".ogv", "video/ogg") \ + _ (VIDEO_TS, ".ts", "video/mp2t") \ + _ (VIDEO_WEBM, ".webm", "video/webm") typedef enum http_content_type_ { @@ -172,12 +194,50 @@ typedef enum http_content_type_ } http_content_type_t; #define foreach_http_status_code \ + _ (100, CONTINUE, "100 Continue") \ + _ (101, SWITCHING_PROTOCOLS, "101 Switching Protocols") \ _ (200, OK, "200 OK") \ + _ (201, CREATED, "201 Created") \ + _ (202, ACCEPTED, "202 Accepted") \ + _ (203, NON_UTHORITATIVE_INFORMATION, "203 Non-Authoritative Information") \ + _ (204, NO_CONTENT, "204 No Content") \ + _ (205, RESET_CONTENT, "205 Reset Content") \ + _ (206, PARTIAL_CONTENT, "206 Partial Content") \ + _ (300, MULTIPLE_CHOICES, "300 Multiple Choices") \ _ (301, MOVED, "301 Moved Permanently") \ + _ (302, FOUND, "302 Found") \ + _ (303, SEE_OTHER, "303 See Other") \ + _ (304, NOT_MODIFIED, "304 Not Modified") \ + _ (305, USE_PROXY, "305 Use Proxy") \ + _ (307, TEMPORARY_REDIRECT, "307 Temporary Redirect") \ + _ (308, PERMANENT_REDIRECT, "308 Permanent Redirect") \ _ (400, BAD_REQUEST, "400 Bad Request") \ + _ (401, UNAUTHORIZED, "401 Unauthorized") \ + _ (402, PAYMENT_REQUIRED, "402 Payment Required") \ + _ (403, FORBIDDEN, "403 Forbidden") \ _ (404, NOT_FOUND, "404 Not Found") \ _ (405, METHOD_NOT_ALLOWED, "405 Method Not Allowed") \ - _ (500, INTERNAL_ERROR, "500 Internal Server Error") + _ (406, NOT_ACCEPTABLE, "406 Not Acceptable") \ + _ (407, PROXY_AUTHENTICATION_REQUIRED, "407 Proxy Authentication Required") \ + _ (408, REQUEST_TIMEOUT, "408 Request Timeout") \ + _ (409, CONFLICT, "409 Conflict") \ + _ (410, GONE, "410 Gone") \ + _ (411, LENGTH_REQUIRED, "411 Length Required") \ + _ (412, PRECONDITION_FAILED, "412 Precondition Failed") \ + _ (413, CONTENT_TOO_LARGE, "413 Content Too Large") \ + _ (414, URI_TOO_LONG, "414 URI Too Long") \ + _ (415, UNSUPPORTED_MEDIA_TYPE, "415 Unsupported Media Type") \ + _ (416, RANGE_NOT_SATISFIABLE, "416 Range Not Satisfiable") \ + _ (417, EXPECTATION_FAILED, "417 Expectation Failed") \ + _ (421, MISDIRECTED_REQUEST, "421 Misdirected Request") \ + _ (422, UNPROCESSABLE_CONTENT, "422 Unprocessable_Content") \ + _ (426, UPGRADE_REQUIRED, "426 Upgrade Required") \ + _ (500, INTERNAL_ERROR, "500 Internal Server Error") \ + _ (501, NOT_IMPLEMENTED, "501 Not Implemented") \ + _ (502, BAD_GATEWAY, "502 Bad Gateway") \ + _ (503, SERVICE_UNAVAILABLE, "503 Service Unavailable") \ + _ (504, GATEWAY_TIMEOUT, "504 Gateway Timeout") \ + _ (505, HTTP_VERSION_NOT_SUPPORTED, "505 HTTP Version Not Supported") typedef enum http_status_code_ { @@ -187,6 +247,101 @@ typedef enum http_status_code_ HTTP_N_STATUS } http_status_code_t; +#define foreach_http_header_name \ + _ (ACCEPT, "Accept") \ + _ (ACCEPT_CHARSET, "Accept-Charset") \ + _ (ACCEPT_ENCODING, "Accept-Encoding") \ + _ (ACCEPT_LANGUAGE, "Accept-Language") \ + _ (ACCEPT_RANGES, "Accept-Ranges") \ + _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials") \ + _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers") \ + _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods") \ + _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin") \ + _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers") \ + _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age") \ + _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers") \ + _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method") \ + _ (AGE, "Age") \ + _ (ALLOW, "Allow") \ + _ (ALPN, "ALPN") \ + _ (ALT_SVC, "Alt-Svc") \ + _ (ALT_USED, "Alt-Used") \ + _ (ALTERNATES, "Alternates") \ + _ (AUTHENTICATION_CONTROL, "Authentication-Control") \ + _ (AUTHENTICATION_INFO, "Authentication-Info") \ + _ (AUTHORIZATION, "Authorization") \ + _ (CACHE_CONTROL, "Cache-Control") \ + _ (CACHE_STATUS, "Cache-Status") \ + _ (CAPSULE_PROTOCOL, "Capsule-Protocol") \ + _ (CDN_CACHE_CONTROL, "CDN-Cache-Control") \ + _ (CDN_LOOP, "CDN-Loop") \ + _ (CLIENT_CERT, "Client-Cert") \ + _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain") \ + _ (CLOSE, "Close") \ + _ (CONNECTION, "Connection") \ + _ (CONTENT_DIGEST, "Content-Digest") \ + _ (CONTENT_DISPOSITION, "Content-Disposition") \ + _ (CONTENT_ENCODING, "Content-Encoding") \ + _ (CONTENT_LANGUAGE, "Content-Language") \ + _ (CONTENT_LENGTH, "Content-Length") \ + _ (CONTENT_LOCATION, "Content-Location") \ + _ (CONTENT_RANGE, "Content-Range") \ + _ (CONTENT_TYPE, "Content-Type") \ + _ (COOKIE, "Cookie") \ + _ (DATE, "Date") \ + _ (DIGEST, "Digest") \ + _ (DPOP, "DPoP") \ + _ (DPOP_NONCE, "DPoP-Nonce") \ + _ (EARLY_DATA, "Early-Data") \ + _ (ETAG, "ETag") \ + _ (EXPECT, "Expect") \ + _ (EXPIRES, "Expires") \ + _ (FORWARDED, "Forwarded") \ + _ (FROM, "From") \ + _ (HOST, "Host") \ + _ (IF_MATCH, "If-Match") \ + _ (IF_MODIFIED_SINCE, "If-Modified-Since") \ + _ (IF_NONE_MATCH, "If-None-Match") \ + _ (IF_RANGE, "If-Range") \ + _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since") \ + _ (KEEP_ALIVE, "Keep-Alive") \ + _ (LAST_MODIFIED, "Last-Modified") \ + _ (LINK, "Link") \ + _ (LOCATION, "Location") \ + _ (MAX_FORWARDS, "Max-Forwards") \ + _ (ORIGIN, "Origin") \ + _ (PRIORITY, "Priority") \ + _ (PROXY_AUTHENTICATE, "Proxy-Authenticate") \ + _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info") \ + _ (PROXY_AUTHORIZATION, "Proxy-Authorization") \ + _ (PROXY_STATUS, "Proxy-Status") \ + _ (RANGE, "Range") \ + _ (REFERER, "Referer") \ + _ (REPR_DIGEST, "Repr-Digest") \ + _ (SET_COOKIE, "Set-Cookie") \ + _ (SIGNATURE, "Signature") \ + _ (SIGNATURE_INPUT, "Signature-Input") \ + _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security") \ + _ (RETRY_AFTER, "Retry-After") \ + _ (SERVER, "Server") \ + _ (TE, "TE") \ + _ (TRAILER, "Trailer") \ + _ (TRANSFER_ENCODING, "Transfer-Encoding") \ + _ (UPGRADE, "Upgrade") \ + _ (USER_AGENT, "User-Agent") \ + _ (VARY, "Vary") \ + _ (VIA, "Via") \ + _ (WANT_CONTENT_DIGEST, "Want-Content-Digest") \ + _ (WANT_REPR_DIGEST, "Want-Repr-Digest") \ + _ (WWW_AUTHENTICATE, "WWW-Authenticate") + +typedef enum http_header_name_ +{ +#define _(sym, str) HTTP_HEADER_##sym, + foreach_http_header_name +#undef _ +} http_header_name_t; + typedef enum http_msg_data_type_ { HTTP_MSG_DATA_INLINE, @@ -197,6 +352,15 @@ typedef struct http_msg_data_ { http_msg_data_type_t type; u64 len; + http_target_form_t target_form; + u32 target_path_offset; + u32 target_path_len; + u32 target_query_offset; + u32 target_query_len; + u32 headers_offset; + u32 headers_len; + u32 body_offset; + u64 body_len; u8 data[0]; } http_msg_data_t; @@ -208,7 +372,6 @@ typedef struct http_msg_ http_req_method_t method_type; http_status_code_t code; }; - http_content_type_t content_type; http_msg_data_t data; } http_msg_t; @@ -227,6 +390,11 @@ typedef struct http_tc_ http_conn_state_t state; u32 timer_handle; + u32 timeout; + u8 pending_timer; + u8 *app_name; + u8 *host; + u8 is_server; /* * Current request @@ -236,8 +404,20 @@ typedef struct http_tc_ u8 *rx_buf; u32 rx_buf_offset; http_buffer_t tx_buf; - u32 to_recv; + u64 to_recv; u32 bytes_dequeued; + u32 control_data_len; /* start line + headers + empty line */ + http_target_form_t target_form; + u32 target_path_offset; + u32 target_path_len; + u32 target_query_offset; + u32 target_query_len; + u32 headers_offset; + u32 headers_len; + u32 body_offset; + u64 body_len; + u16 status_code; + u8 is_tunnel; } http_conn_t; typedef struct http_worker_ @@ -249,14 +429,17 @@ typedef struct http_main_ { http_worker_t *wrk; http_conn_t *listener_pool; + http_conn_t *ho_conn_pool; u32 app_index; clib_timebase_t timebase; + u16 *sc_by_u16; /* * Runtime config */ u8 debug_level; + u8 is_init; /* * Config @@ -266,14 +449,764 @@ typedef struct http_main_ u32 fifo_size; } http_main_t; -static inline int -http_state_is_tx_valid (http_conn_t *hc) +always_inline int +_validate_target_syntax (u8 *target, u32 len, int is_query, int *is_encoded) +{ + int encoded = 0; + u32 i; + + static uword valid_chars[4] = { + /* !$&'()*+,-./0123456789:;= */ + 0x2fffffd200000000, + /* @ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ */ + 0x47fffffe87ffffff, + 0x0000000000000000, + 0x0000000000000000, + }; + + for (i = 0; i < len; i++) + { + if (clib_bitmap_get_no_check (valid_chars, target[i])) + continue; + /* target was already split after first question mark, + * for query it is valid character */ + if (is_query && target[i] == '?') + continue; + /* pct-encoded = "%" HEXDIG HEXDIG */ + if (target[i] == '%') + { + if ((i + 2) >= len) + return -1; + if (!isxdigit (target[i + 1]) || !isxdigit (target[i + 2])) + return -1; + i += 2; + encoded = 1; + continue; + } + clib_warning ("invalid character %d", target[i]); + return -1; + } + if (is_encoded) + *is_encoded = encoded; + return 0; +} + +/** + * An "absolute-path" rule validation (RFC9110 section 4.1). + * + * @param path Vector of target path to validate. + * @param is_encoded Return flag that indicates if percent-encoded (optional). + * + * @return @c 0 on success. + */ +always_inline int +http_validate_abs_path_syntax (u8 *path, int *is_encoded) +{ + return _validate_target_syntax (path, vec_len (path), 0, is_encoded); +} + +/** + * A "query" rule validation (RFC3986 section 2.1). + * + * @param query Vector of target query to validate. + * @param is_encoded Return flag that indicates if percent-encoded (optional). + * + * @return @c 0 on success. + */ +always_inline int +http_validate_query_syntax (u8 *query, int *is_encoded) +{ + return _validate_target_syntax (query, vec_len (query), 1, is_encoded); +} + +#define htoi(x) (isdigit (x) ? (x - '0') : (tolower (x) - 'a' + 10)) + +/** + * Decode percent-encoded data. + * + * @param src Data to decode. + * @param len Length of data to decode. + * + * @return New vector with decoded data. + * + * The caller is always responsible to free the returned vector. + */ +always_inline u8 * +http_percent_decode (u8 *src, u32 len) { - http_state_t state = hc->http_state; - return (state == HTTP_STATE_APP_IO_MORE_DATA || - state == HTTP_STATE_CLIENT_IO_MORE_DATA || - state == HTTP_STATE_WAIT_APP_REPLY || - state == HTTP_STATE_WAIT_APP_METHOD); + u32 i; + u8 *decoded_uri = 0; + + for (i = 0; i < len; i++) + { + if (src[i] == '%') + { + u8 c = (htoi (src[i + 1]) << 4) | htoi (src[i + 2]); + vec_add1 (decoded_uri, c); + i += 2; + } + else + vec_add1 (decoded_uri, src[i]); + } + return decoded_uri; +} + +/** + * Remove dot segments from path (RFC3986 section 5.2.4) + * + * @param path Path to sanitize. + * + * @return New vector with sanitized path. + * + * The caller is always responsible to free the returned vector. + */ +always_inline u8 * +http_path_remove_dot_segments (u8 *path) +{ + u32 *segments = 0, *segments_len = 0, segment_len; + u8 *new_path = 0; + int i, ii; + + if (!path) + return vec_new (u8, 0); + + segments = vec_new (u32, 1); + /* first segment */ + segments[0] = 0; + /* find all segments */ + for (i = 1; i < (vec_len (path) - 1); i++) + { + if (path[i] == '/') + vec_add1 (segments, i + 1); + } + /* dummy tail */ + vec_add1 (segments, vec_len (path)); + + /* scan all segments for "." and ".." */ + segments_len = vec_new (u32, vec_len (segments) - 1); + for (i = 0; i < vec_len (segments_len); i++) + { + segment_len = segments[i + 1] - segments[i]; + if (segment_len == 2 && path[segments[i]] == '.') + segment_len = 0; + else if (segment_len == 3 && path[segments[i]] == '.' && + path[segments[i] + 1] == '.') + { + segment_len = 0; + /* remove parent (if any) */ + for (ii = i - 1; ii >= 0; ii--) + { + if (segments_len[ii]) + { + segments_len[ii] = 0; + break; + } + } + } + segments_len[i] = segment_len; + } + + /* we might end with empty path, so return at least empty vector */ + new_path = vec_new (u8, 0); + /* append all valid segments */ + for (i = 0; i < vec_len (segments_len); i++) + { + if (segments_len[i]) + vec_add (new_path, path + segments[i], segments_len[i]); + } + vec_free (segments); + vec_free (segments_len); + return new_path; +} + +always_inline int +_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start, + u32 *field_name_len) +{ + u32 name_len = 0; + u8 *p; + + static uword tchar[4] = { + /* !#$%'*+-.0123456789 */ + 0x03ff6cba00000000, + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */ + 0x57ffffffc7fffffe, + 0x0000000000000000, + 0x0000000000000000, + }; + + p = *pos; + + *field_name_start = p; + while (p != end) + { + if (clib_bitmap_get_no_check (tchar, *p)) + { + name_len++; + p++; + } + else if (*p == ':') + { + if (name_len == 0) + { + clib_warning ("empty field name"); + return -1; + } + *field_name_len = name_len; + p++; + *pos = p; + return 0; + } + else + { + clib_warning ("invalid character %d", *p); + return -1; + } + } + clib_warning ("field name end not found"); + return -1; +} + +always_inline int +_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start, + u32 *field_value_len) +{ + u32 value_len = 0; + u8 *p; + + p = *pos; + + /* skip leading whitespace */ + while (1) + { + if (p == end) + { + clib_warning ("field value not found"); + return -1; + } + else if (*p != ' ' && *p != '\t') + { + break; + } + p++; + } + + *field_value_start = p; + while (p != end) + { + if (*p == '\r') + { + if ((end - p) < 1) + { + clib_warning ("incorrect field line end"); + return -1; + } + p++; + if (*p == '\n') + { + if (value_len == 0) + { + clib_warning ("empty field value"); + return -1; + } + p++; + *pos = p; + /* skip trailing whitespace */ + p = *field_value_start + value_len - 1; + while (*p == ' ' || *p == '\t') + { + p--; + value_len--; + } + *field_value_len = value_len; + return 0; + } + clib_warning ("CR without LF"); + return -1; + } + if (*p < ' ' && *p != '\t') + { + clib_warning ("invalid character %d", *p); + return -1; + } + p++; + value_len++; + } + + clib_warning ("field value end not found"); + return -1; +} + +typedef struct +{ + u8 *name; + u8 *value; +} http_header_ht_t; + +typedef struct +{ + http_token_t name; + http_token_t value; +} http_header_t; + +typedef struct +{ + http_header_ht_t *headers; + uword *value_by_name; +} http_header_table_t; + +/** + * Free header table's memory. + * + * @param ht Header table to free. + */ +always_inline void +http_free_header_table (http_header_table_t *ht) +{ + http_header_ht_t *header; + vec_foreach (header, ht->headers) + { + vec_free (header->name); + vec_free (header->value); + } + vec_free (ht->headers); + hash_free (ht->value_by_name); + clib_mem_free (ht); +} + +/** + * Parse headers in given vector. + * + * @param headers Vector to parse. + * @param [out] header_table Parsed headers in case of success. + * + * @return @c 0 on success. + * + * The caller is responsible to free the returned @c header_table + * using @c http_free_header_table . + */ +always_inline int +http_parse_headers (u8 *headers, http_header_table_t **header_table) +{ + u8 *pos, *end, *name_start, *value_start, *name; + u32 name_len, value_len; + int rv; + http_header_ht_t *header; + http_header_table_t *ht; + uword *p; + + end = headers + vec_len (headers); + pos = headers; + + ht = clib_mem_alloc (sizeof (*ht)); + ht->value_by_name = hash_create_string (0, sizeof (uword)); + ht->headers = 0; + do + { + rv = _parse_field_name (&pos, end, &name_start, &name_len); + if (rv != 0) + { + http_free_header_table (ht); + return rv; + } + rv = _parse_field_value (&pos, end, &value_start, &value_len); + if (rv != 0) + { + http_free_header_table (ht); + return rv; + } + name = vec_new (u8, name_len); + clib_memcpy (name, name_start, name_len); + vec_terminate_c_string (name); + /* check if header is repeated */ + p = hash_get_mem (ht->value_by_name, name); + if (p) + { + /* if yes combine values */ + header = vec_elt_at_index (ht->headers, p[0]); + vec_pop (header->value); /* drop null byte */ + header->value = format (header->value, ", %U%c", format_ascii_bytes, + value_start, value_len, 0); + vec_free (name); + continue; + } + /* or create new record */ + vec_add2 (ht->headers, header, sizeof (*header)); + header->name = name; + header->value = vec_new (u8, value_len); + clib_memcpy (header->value, value_start, value_len); + vec_terminate_c_string (header->value); + hash_set_mem (ht->value_by_name, header->name, header - ht->headers); + } + while (pos != end); + + *header_table = ht; + + return 0; +} + +/** + * Try to find given header name in header table. + * + * @param header_table Header table to search. + * @param name Header name to match. + * + * @return Header's value in case of success, @c 0 otherwise. + */ +always_inline const char * +http_get_header (http_header_table_t *header_table, const char *name) +{ + uword *p; + http_header_ht_t *header; + + p = hash_get_mem (header_table->value_by_name, name); + if (p) + { + header = vec_elt_at_index (header_table->headers, p[0]); + return (const char *) header->value; + } + + return 0; +} + +/** + * Add header to the list. + * + * @param headers Header list. + * @param name Pointer to header's name buffer. + * @param name_len Length of the name. + * @param value Pointer to header's value buffer. + * @param value_len Length of the value. + * + * @note Headers added at protocol layer: Date, Server, Content-Length + */ +always_inline void +http_add_header (http_header_t **headers, const char *name, uword name_len, + const char *value, uword value_len) +{ + http_header_t *header; + vec_add2 (*headers, header, 1); + header->name.base = (char *) name; + header->name.len = name_len; + header->value.base = (char *) value; + header->value.len = value_len; +} + +/** + * Serialize the header list. + * + * @param headers Header list to serialize. + * + * @return New vector with serialized headers. + * + * The caller is always responsible to free the returned vector. + */ +always_inline u8 * +http_serialize_headers (http_header_t *headers) +{ + u8 *headers_buf = 0, *dst; + u32 headers_buf_len = 2; + http_header_t *header; + + vec_foreach (header, headers) + headers_buf_len += header->name.len + header->value.len + 4; + + vec_validate (headers_buf, headers_buf_len - 1); + dst = headers_buf; + + vec_foreach (header, headers) + { + clib_memcpy (dst, header->name.base, header->name.len); + dst += header->name.len; + *dst++ = ':'; + *dst++ = ' '; + clib_memcpy (dst, header->value.base, header->value.len); + dst += header->value.len; + *dst++ = '\r'; + *dst++ = '\n'; + } + *dst++ = '\r'; + *dst = '\n'; + return headers_buf; +} + +typedef struct +{ + ip46_address_t ip; + u16 port; + u8 is_ip4; +} http_uri_t; + +always_inline int +http_parse_authority_form_target (u8 *target, http_uri_t *authority) +{ + unformat_input_t input; + u32 port; + int rv = 0; + + unformat_init_vector (&input, vec_dup (target)); + if (unformat (&input, "[%U]:%d", unformat_ip6_address, &authority->ip.ip6, + &port)) + { + authority->port = clib_host_to_net_u16 (port); + authority->is_ip4 = 0; + } + else if (unformat (&input, "%U:%d", unformat_ip4_address, &authority->ip.ip4, + &port)) + { + authority->port = clib_host_to_net_u16 (port); + authority->is_ip4 = 1; + } + /* TODO reg-name resolution */ + else + { + clib_warning ("unsupported format '%v'", target); + rv = -1; + } + unformat_free (&input); + return rv; +} + +always_inline u8 * +http_serialize_authority_form_target (http_uri_t *authority) +{ + u8 *s; + + if (authority->is_ip4) + s = format (0, "%U:%d", format_ip4_address, &authority->ip.ip4, + clib_net_to_host_u16 (authority->port)); + else + s = format (0, "[%U]:%d", format_ip6_address, &authority->ip.ip6, + clib_net_to_host_u16 (authority->port)); + + return s; +} + +typedef enum http_url_scheme_ +{ + HTTP_URL_SCHEME_HTTP, + HTTP_URL_SCHEME_HTTPS, +} http_url_scheme_t; + +typedef struct +{ + http_url_scheme_t scheme; + u16 port; + u32 host_offset; + u32 host_len; + u32 path_offset; + u32 path_len; + u8 host_is_ip6; +} http_url_t; + +always_inline int +_parse_port (u8 **pos, u8 *end, u16 *port) +{ + u32 value = 0; + u8 *p = *pos; + + if (!isdigit (*p)) + return -1; + value = *p - '0'; + p++; + + while (p != end) + { + if (!isdigit (*p)) + break; + value = value * 10 + *p - '0'; + if (value > CLIB_U16_MAX) + return -1; + p++; + } + *pos = p; + *port = clib_host_to_net_u16 ((u16) value); + return 0; +} + +/** + * An "absolute-form" URL parsing. + * + * @param url Vector of target URL to validate. + * @param parsed Parsed URL metadata in case of success. + * + * @return @c 0 on success. + */ +always_inline int +http_parse_absolute_form (u8 *url, http_url_t *parsed) +{ + u8 *token_start, *token_end, *end; + int is_encoded = 0; + + static uword valid_chars[4] = { + /* -.0123456789 */ + 0x03ff600000000000, + /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz */ + 0x07fffffe07fffffe, + 0x0000000000000000, + 0x0000000000000000, + }; + + if (vec_len (url) < 9) + { + clib_warning ("uri too short"); + return -1; + } + + clib_memset (parsed, 0, sizeof (*parsed)); + + end = url + vec_len (url); + + /* parse scheme */ + if (!memcmp (url, "http:// ", 7)) + { + parsed->scheme = HTTP_URL_SCHEME_HTTP; + parsed->port = clib_host_to_net_u16 (80); + parsed->host_offset = 7; + } + else if (!memcmp (url, "https:// ", 8)) + { + parsed->scheme = HTTP_URL_SCHEME_HTTPS; + parsed->port = clib_host_to_net_u16 (443); + parsed->host_offset = 8; + } + else + { + clib_warning ("invalid scheme"); + return -1; + } + token_start = url + parsed->host_offset; + + /* parse host */ + if (*token_start == '[') + /* IPv6 address */ + { + parsed->host_is_ip6 = 1; + parsed->host_offset++; + token_end = ++token_start; + while (1) + { + if (token_end == end) + { + clib_warning ("invalid host, IPv6 addr not terminated with ']'"); + return -1; + } + else if (*token_end == ']') + { + parsed->host_len = token_end - token_start; + token_start = token_end + 1; + break; + } + else if (*token_end != ':' && *token_end != '.' && + !isxdigit (*token_end)) + { + clib_warning ("invalid character '%u'", *token_end); + return -1; + } + token_end++; + } + } + else + { + token_end = token_start; + while (token_end != end && *token_end != ':' && *token_end != '/') + { + if (!clib_bitmap_get_no_check (valid_chars, *token_end)) + { + clib_warning ("invalid character '%u'", *token_end); + return -1; + } + token_end++; + } + parsed->host_len = token_end - token_start; + token_start = token_end; + } + + if (!parsed->host_len) + { + clib_warning ("zero length host"); + return -1; + } + + /* parse port, if any */ + if (token_start != end && *token_start == ':') + { + token_end = ++token_start; + if (_parse_port (&token_end, end, &parsed->port)) + { + clib_warning ("invalid port"); + return -1; + } + token_start = token_end; + } + + if (token_start == end) + return 0; + + token_start++; /* drop leading slash */ + parsed->path_offset = token_start - url; + parsed->path_len = end - token_start; + + if (parsed->path_len) + return _validate_target_syntax (token_start, parsed->path_len, 0, + &is_encoded); + + return 0; +} + +/** + * Parse target host and port of UDP tunnel over HTTP. + * + * @param path Path in format "{target_host}/{target_port}/". + * @param path_len Length of given path. + * @param parsed Parsed target in case of success.. + * + * @return @c 0 on success. + * + * @note Only IPv4 literals and IPv6 literals supported. + */ +always_inline int +http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed) +{ + u8 *p, *end, *decoded_host; + u32 host_len; + unformat_input_t input; + + p = path; + end = path + path_len; + clib_memset (parsed, 0, sizeof (*parsed)); + + while (p != end && *p != '/') + p++; + + host_len = p - path; + if (!host_len || (host_len == path_len) || (host_len + 1 == path_len)) + return -1; + decoded_host = http_percent_decode (path, host_len); + unformat_init_vector (&input, decoded_host); + if (unformat (&input, "%U", unformat_ip4_address, &parsed->ip.ip4)) + parsed->is_ip4 = 1; + else if (unformat (&input, "%U", unformat_ip6_address, &parsed->ip.ip6)) + parsed->is_ip4 = 0; + else + { + unformat_free (&input); + clib_warning ("unsupported target_host format"); + return -1; + } + unformat_free (&input); + + p++; + if (_parse_port (&p, end, &parsed->port)) + { + clib_warning ("invalid port"); + return -1; + } + + if (p == end || *p != '/') + return -1; + + return 0; } #endif /* SRC_PLUGINS_HTTP_HTTP_H_ */ diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c index f3dc308dbf8..bc1b8c08630 100644 --- a/src/plugins/http/http_buffer.c +++ b/src/plugins/http/http_buffer.c @@ -173,7 +173,7 @@ buf_ptr_drain (http_buffer_t *hb, u32 len) bf->segs[1].data += len; bf->segs[0].len -= len; - HTTP_DBG (1, "drained %u left %u", len, bf->segs[1].len); + HTTP_DBG (1, "drained %u left %u", len, bf->segs[0].len); if (!bf->segs[0].len) { diff --git a/src/plugins/http/http_content_types.h b/src/plugins/http/http_content_types.h new file mode 100644 index 00000000000..ddc02566db7 --- /dev/null +++ b/src/plugins/http/http_content_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_ +#define SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_ + +#include <http/http.h> + +static http_token_t http_content_types[] = { +#define _(s, ext, str) { http_token_lit (str) }, + foreach_http_content_type +#undef _ +}; + +#define http_content_type_token(e) \ + http_content_types[e].base, http_content_types[e].len + +#endif /* SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_ */ diff --git a/src/plugins/http/http_header_names.h b/src/plugins/http/http_header_names.h new file mode 100644 index 00000000000..99acac786db --- /dev/null +++ b/src/plugins/http/http_header_names.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_ +#define SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_ + +#include <http/http.h> + +static http_token_t http_header_names[] = { +#define _(sym, str) { http_token_lit (str) }, + foreach_http_header_name +#undef _ +}; + +#define http_header_name_token(e) \ + http_header_names[e].base, http_header_names[e].len + +#define http_header_name_str(e) http_header_names[e].base + +#endif /* SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_ */ diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst new file mode 100644 index 00000000000..f86c796bd83 --- /dev/null +++ b/src/plugins/http/http_plugin.rst @@ -0,0 +1,537 @@ +.. _http_plugin: + +.. toctree:: + +HTTP Plugin +=========== + +Overview +-------- + +This plugin adds the HTTP protocol to VPP's Host Stack. +As a result parsing and serializing of HTTP/1 requests or responses are available for internal VPP applications. + +Usage +----- + +The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``, +``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_parse_headers``, ``http_get_header``, +``http_free_header_table``, ``http_add_header``, ``http_serialize_headers``, ``http_parse_authority_form_target``, +``http_serialize_authority_form_target``, ``http_parse_absolute_form``, ``http_parse_masque_host_port``. + +It relies on the hoststack constructs and uses ``http_msg_data_t`` data structure for passing metadata to/from applications. + +Server application +^^^^^^^^^^^^^^^^^^ + +Server application sets ``TRANSPORT_PROTO_HTTP`` as ``transport_proto`` in session endpoint configuration when registering to listen. + +Receiving data +"""""""""""""" + +HTTP plugin sends message header with metadata for parsing, in form of offset and length, followed by all data bytes as received from transport. + +Application will get pre-parsed following items: + +* HTTP method +* target form +* target path offset and length +* target query offset and length +* header section offset and length +* body offset and length + +The example below reads HTTP message header in ``builtin_app_rx_callback``, which is first step application should do: + +.. code-block:: C + + #include <http/http.h> + http_msg_t msg; + rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + +As next step application might validate message and method type, for example application only expects to receive GET requests: + +.. code-block:: C + + if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET) + { + /* your error handling */ + } + +Now application can start reading HTTP data. First let's read the target path: + +.. code-block:: C + + u8 *target_path; + vec_validate (target_path, msg.data.target_path_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, msg.data.target_path_len, target_path); + ASSERT (rv == msg.data.target_path_len); + +Application might also want to know target form which is stored in ``msg.data.target_form``, you can read more about target forms in RFC9112 section 3.2. +In case of origin form HTTP plugin always sets ``target_path_offset`` after leading slash character. + +Example bellow validates "absolute-path" rule, as described in RFC9110 section 4.1, in case of target in origin form, additionally application can get information if percent encoding is used and decode path: + +.. code-block:: C + + int is_encoded = 0; + if (msg.data.target_form == HTTP_TARGET_ORIGIN_FORM) + { + if (http_validate_abs_path_syntax (target_path, &is_encoded)) + { + /* your error handling */ + } + if (is_encoded) + { + u8 *decoded = http_percent_decode (target_path, vec_len (target_path)); + vec_free (target_path); + target_path = decoded; + } + } + +More on topic when to decode in RFC3986 section 2.4. + +When application serves static files, it is highly recommended to sanitize target path by removing dot segments (you don't want to risk path traversal attack): + +.. code-block:: C + + u8 *sanitized_path; + sanitized_path = http_path_remove_dot_segments (target_path); + +Let's move to target query which is optional. Percent encoding might be used too, but we skip it for brevity: + +.. code-block:: C + + u8 *target_query = 0; + if (msg.data.target_query_len) + { + vec_validate (target_query, msg.data.target_query_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset, + msg.data.target_query_len, target_query); + ASSERT (rv == msg.data.target_query_len); + if (http_validate_query_syntax (target_query, 0)) + { + /* your error handling */ + } + } + +And now for something completely different, headers. +Headers are parsed using a generic algorithm, independent of the individual header names. +When header is repeated, its combined value consists of all values separated by comma, concatenated in order as received. +Following example shows how to parse headers: + +.. code-block:: C + + #include <http/http_header_names.h> + if (msg.data.headers_len) + { + u8 *headers = 0; + http_header_table_t *ht; + vec_validate (headers, msg.data.headers_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, headers); + ASSERT (rv == msg.data.headers_len); + if (http_parse_headers (headers, &ht)) + { + /* your error handling */ + } + /* get Accept header */ + const char *accept_value = http_get_header (ht, http_header_name_str (HTTP_HEADER_ACCEPT)); + if (accept_value) + { + /* do something interesting */ + } + http_free_header_table (ht); + vec_free (headers); + } + +Finally application reads body (if any), which might be received in multiple pieces (depends on size), so we might need some state machine in ``builtin_app_rx_callback``. +We will add following members to our session context structure: + +.. code-block:: C + + typedef struct + { + /* ... */ + u64 to_recv; + u8 *resp_body; + } session_ctx_t; + +First we prepare vector for response body, do it only once when you are reading metadata: + +.. code-block:: C + + /* drop everything up to body */ + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset); + ctx->to_recv = msg.data.body_len; + /* prepare vector for response body */ + vec_validate (ctx->resp_body, msg.data.body_len - 1); + vec_reset_length (ctx->resp_body); + +Now we can start reading body content, following block of code could be executed multiple times: + +.. code-block:: C + + /* dequeue */ + u32 n_deq = svm_fifo_max_dequeue (ts->rx_fifo); + /* current offset */ + u64 curr = vec_len (ctx->resp_body); + rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, ctx->resp_body + curr); + ASSERT (rv == n_deq); + /* update length of the vector */ + vec_set_len (ctx->resp_body, curr + n_deq); + /* update number of remaining bytes to receive */ + ctx->to_recv -= rv; + /* check if all data received */ + if (ctx->to_recv == 0) + { + /* we are done */ + /* send 200 OK response */ + } + +Sending data +"""""""""""""" + +When server application sends response back to HTTP layer it starts with message metadata, followed by optional serialized headers and finally body (if any). + +Application should set following items: + +* Status code +* target form +* header section offset and length +* body offset and length + +Application could pass headers back to HTTP layer. Header list is created dynamically as vector of ``http_header_t``, +where we store only pointers to buffers (zero copy). +Well known header names are predefined. +The list is serialized just before you send buffer to HTTP layer. + +.. note:: + Following headers are added at protocol layer and **MUST NOT** be set by application: Date, Server, Content-Length + +Following example shows how to create headers section: + +.. code-block:: C + + #include <http/http.h> + #include <http/http_header_names.h> + #include <http/http_content_types.h> + http_header_t *resp_headers = 0; + u8 *headers_buf = 0; + http_add_header (resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_TEXT_HTML)); + http_add_header (resp_headers, + http_header_name_token (HTTP_HEADER_CACHE_CONTROL), + http_token_lit ("max-age=600")); + http_add_header (resp_headers, + http_header_name_token (HTTP_HEADER_LOCATION), + (const char *) redirect, vec_len (redirect)); + headers_buf = http_serialize_headers (resp_headers); + +The example below show how to create and send response HTTP message metadata: + +.. code-block:: C + + http_msg_t msg; + msg.type = HTTP_MSG_REPLY; + msg.code = HTTP_STATUS_MOVED + msg.data.headers_offset = 0; + msg.data.headers_len = vec_len (headers_buf); + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.body_len = vec_len (tx_buf); + msg.data.body_offset = msg.data.headers_len; + msg.data.len = msg.data.body_len + msg.data.headers_len; + ts = session_get (hs->vpp_session_index, hs->thread_index); + rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + +Next you will send your serialized headers: + +.. code-block:: C + + rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + ASSERT (rv == msg.data.headers_len); + vec_free (headers_buf); + +Finally application sends response body: + +.. code-block:: C + + rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (tx_buf), tx_buf); + if (rv != vec_len (hs->tx_buf)) + { + hs->tx_offset = rv; + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + } + else + { + vec_free (tx_buf); + } + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + +Examples above shows how to send body and headers by copy, alternatively you could pass them as pointer: + +.. code-block:: C + + msg.data.type = HTTP_MSG_DATA_PTR; + /* code omitted for brevity */ + if (msg.data.headers_len) + { + uword headers = pointer_to_uword (headers_buf); + rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (headers), (u8 *) &headers); + ASSERT (rv == sizeof (headers)); + } + uword data = pointer_to_uword (tx_buf); + rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data); + ASSERT (rv == sizeof (data)); + +In this case you need to free data when you receive next request or when session is closed. + + +Client application +^^^^^^^^^^^^^^^^^^ + +Client application opens connection with vnet URI where transport protocol is set to ``http``. + +Sending data +"""""""""""""" + +HTTP request is sent when connection is successfully established in ``session_connected_callback``. + +When client application sends message to HTTP layer it starts with message metadata, followed by request target, optional headers and body (if any) buffers. + +Application should set following items: + +* HTTP method +* target form, offset and length +* header section offset and length +* body offset and length + +Application could pass headers to HTTP layer. Header list is created dynamically as vector of ``http_header_t``, +where we store only pointers to buffers (zero copy). +Well known header names are predefined. +The list is serialized just before you send buffer to HTTP layer. + +.. note:: + Following headers are added at protocol layer and **MUST NOT** be set by application: Host, User-Agent + + +The example below shows how to create headers section: + +.. code-block:: C + + #include <http/http.h> + #include <http/http_header_names.h> + #include <http/http_content_types.h> + http_header_t *req_headers = 0; + u8 *headers_buf = 0; + http_add_header (req_headers, + http_header_name_token (HTTP_HEADER_ACCEPT), + http_content_type_token (HTTP_CONTENT_TEXT_HTML)); + headers_buf = http_serialize_headers (req_headers); + vec_free (hs->req_headers); + +Following example shows how to set message metadata: + +.. code-block:: C + + http_msg_t msg; + msg.type = HTTP_MSG_REQUEST; + msg.method_type = HTTP_REQ_GET; + msg.data.headers_offset = 0; + /* request target */ + msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; + msg.data.target_path_offset = 0; + msg.data.target_path_len = vec_len (target); + /* custom headers */ + msg.data.headers_offset = msg.data.target_path_len; + msg.data.headers_len = vec_len (headers_buf); + /* no request body because we are doing GET request */ + msg.data.body_len = 0; + /* data type and total length */ + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; + +Finally application sends everything to HTTP layer: + +.. code-block:: C + + svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, /* message metadata */ + { target, vec_len (target) }, /* request target */ + { headers_buf, vec_len (headers_buf) } }; /* serialized headers */ + rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */); + vec_free (headers_buf); + if (rv < 0 || rv != sizeof (msg) + msg.data.len) + { + clib_warning ("failed app enqueue"); + return -1; + } + if (svm_fifo_set_event (as->tx_fifo)) + session_program_tx_io_evt (as->handle, SESSION_IO_EVT_TX); + +Examples above shows how to send buffers by copy, alternatively you could pass them as pointer: + +.. code-block:: C + + msg.data.type = HTTP_MSG_DATA_PTR; + msg.method_type = HTTP_REQ_POST; + msg.data.body_len = vec_len (data); + /* code omitted for brevity */ + uword target = pointer_to_uword (target); + uword headers = pointer_to_uword (headers_buf); + uword body = pointer_to_uword (data); + svm_fifo_seg_t segs[4] = { + { (u8 *) &msg, sizeof (msg) }, + { (u8 *) &target, sizeof (target) }, + { (u8 *) &headers, sizeof (headers) }, + { (u8 *) &body, sizeof (body) }, + }; + rv = svm_fifo_enqueue_segments (s->tx_fifo, segs, 4, 0 /* allow partial */); + ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) + sizeof (body))); + +In this case you need to free data when you receive response or when session is closed. + +Receiving data +"""""""""""""" + +HTTP plugin sends message header with metadata for parsing, in form of offset and length, followed by all data bytes as received from transport. + +Application will get pre-parsed following items: + +* status code +* header section offset and length +* body offset and length + +The example below reads HTTP message header in ``builtin_app_rx_callback``, which is first step application should do: + +.. code-block:: C + + #include <http/http.h> + http_msg_t msg; + rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + +As next step application might validate message type and status code: + +.. code-block:: C + + if (msg.type != HTTP_MSG_REPLY) + { + /* your error handling */ + } + if (msg.code != HTTP_STATUS_OK) + { + /* your error handling */ + /* of course you can continue with steps bellow */ + /* you might be interested in some headers or body content (if any) */ + } + +Headers are parsed using a generic algorithm, independent of the individual header names. +When header is repeated, its combined value consists of all values separated by comma, concatenated in order as received. +Following example shows how to parse headers: + +.. code-block:: C + + #include <http/http_header_names.h> + if (msg.data.headers_len) + { + u8 *headers = 0; + http_header_table_t *ht; + vec_validate (headers, msg.data.headers_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, headers); + ASSERT (rv == msg.data.headers_len); + if (http_parse_headers (headers, &ht)) + { + /* your error handling */ + } + /* get Content-Type header */ + const char *content_type = http_get_header (ht, http_header_name_str (HTTP_HEADER_CONTENT_TYPE)); + if (content_type) + { + /* do something interesting */ + } + http_free_header_table (ht); + vec_free (headers); + } + +Finally application reads body, which might be received in multiple pieces (depends on size), so we might need some state machine in ``builtin_app_rx_callback``. +We will add following members to our session context structure: + +.. code-block:: C + + typedef struct + { + /* ... */ + u64 to_recv; + u8 *resp_body; + } session_ctx_t; + +First we prepare vector for response body, do it only once when you are reading metadata: + +.. code-block:: C + + /* drop everything up to body */ + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset); + ctx->to_recv = msg.data.body_len; + /* prepare vector for response body */ + vec_validate (ctx->resp_body, msg.data.body_len - 1); + vec_reset_length (ctx->resp_body); + +Now we can start reading body content, following block of code could be executed multiple times: + +.. code-block:: C + + /* dequeue */ + u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo); + u32 n_deq = clib_min (to_recv, max_deq); + /* current offset */ + u64 curr = vec_len (ctx->resp_body); + rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, ctx->resp_body + curr); + if (rv < 0 || rv != n_deq) + { + /* your error handling */ + } + /* update length of the vector */ + vec_set_len (ctx->resp_body, curr + n_deq); + /* update number of remaining bytes to receive */ + ASSERT (to_recv >= rv); + ctx->to_recv -= rv; + /* check if all data received */ + if (ctx->to_recv == 0) + { + /* we are done */ + /* close the session if you don't want to send another request */ + /* and update state machine... */ + } + +HTTP timeout +^^^^^^^^^^^^ + +HTTP plugin sets session inactivity timeout by default to 60 seconds. +Client and server applications can pass custom timeout value (in seconds) using extended configuration when doing connect or start listening respectively. +You just need to add extended configuration to session endpoint configuration which is part of ``vnet_connect_args_t`` and ``vnet_listen_args_t``. +HTTP plugin use ``opaque`` member of ``transport_endpt_ext_cfg_t``, unsigned 32bit integer seems to be sufficient (allowing the timeout to be set up to 136 years). + +The example below sets HTTP session timeout to 30 seconds (server application): + +.. code-block:: C + + vnet_listen_args_t _a, *a = &_a; + session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; + transport_endpt_ext_cfg_t *ext_cfg; + int rv; + clib_memset (a, 0, sizeof (*a)); + clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + /* add new extended config entry */ + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + /* your custom timeout value in seconds */ + ext_cfg->opaque = 30; + /* rest of the settings omitted for brevity */ + rv = vnet_listen (a); + /* don't forget to free extended config */ + session_endpoint_free_ext_cfgs (&a->sep_ext); + /* ... */ diff --git a/src/plugins/http/http_status_codes.h b/src/plugins/http/http_status_codes.h new file mode 100644 index 00000000000..100095c8f42 --- /dev/null +++ b/src/plugins/http/http_status_codes.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_ +#define SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_ + +#include <http/http.h> + +static const char *http_status_code_str[] = { +#define _(c, s, str) str, + foreach_http_status_code +#undef _ +}; + +static inline u8 * +format_http_status_code (u8 *s, va_list *va) +{ + http_status_code_t status_code = va_arg (*va, http_status_code_t); + if (status_code < HTTP_N_STATUS) + s = format (s, "%s", http_status_code_str[status_code]); + else + s = format (s, "invalid status code %d", status_code); + return s; +} + +#endif /* SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_ */ diff --git a/src/plugins/http/http_timer.c b/src/plugins/http/http_timer.c index 42fe69076fe..580f31657a9 100644 --- a/src/plugins/http/http_timer.c +++ b/src/plugins/http/http_timer.c @@ -29,7 +29,15 @@ http_timer_process_expired_cb (u32 *expired_timers) { /* Get session handle. The first bit is the timer id */ hs_handle = expired_timers[i] & 0x7FFFFFFF; - session_send_rpc_evt_to_thread (hs_handle >> 24, twc->cb_fn, + twc->invalidate_cb (hs_handle); + } + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session handle. The first bit is the timer id */ + hs_handle = expired_timers[i] & 0x7FFFFFFF; + HTTP_DBG (1, "rpc to hc [%u]%x", hs_handle >> 24, + hs_handle & 0x00FFFFFF); + session_send_rpc_evt_to_thread (hs_handle >> 24, twc->rpc_cb, uword_to_pointer (hs_handle, void *)); } } @@ -66,15 +74,19 @@ VLIB_REGISTER_NODE (http_timer_process_node) = { }; void -http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn) +http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb, + http_conn_invalidate_timer_fn *invalidate_cb) { http_tw_ctx_t *twc = &http_tw_ctx; vlib_node_t *n; + ASSERT (twc->tw.timers == 0); + tw_timer_wheel_init_2t_1w_2048sl (&twc->tw, http_timer_process_expired_cb, 1.0 /* timer interval */, ~0); clib_spinlock_init (&twc->tw_lock); - twc->cb_fn = cb_fn; + twc->rpc_cb = rpc_cb; + twc->invalidate_cb = invalidate_cb; vlib_node_set_state (vm, http_timer_process_node.index, VLIB_NODE_STATE_POLLING); diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h index eec5a4595fe..43d20d004d8 100644 --- a/src/plugins/http/http_timer.h +++ b/src/plugins/http/http_timer.h @@ -19,34 +19,37 @@ #include <http/http.h> #include <vppinfra/tw_timer_2t_1w_2048sl.h> -#define HTTP_CONN_TIMEOUT 60 +#define HTTP_CONN_TIMEOUT 60 +#define HTTP_TIMER_HANDLE_INVALID ((u32) ~0) typedef void (http_conn_timeout_fn) (void *); +typedef void (http_conn_invalidate_timer_fn) (u32 hs_handle); typedef struct http_tw_ctx_ { tw_timer_wheel_2t_1w_2048sl_t tw; clib_spinlock_t tw_lock; - http_conn_timeout_fn *cb_fn; + http_conn_timeout_fn *rpc_cb; + http_conn_invalidate_timer_fn *invalidate_cb; } http_tw_ctx_t; extern http_tw_ctx_t http_tw_ctx; -void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn); +void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb, + http_conn_invalidate_timer_fn *invalidate_cb); static inline void http_conn_timer_start (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; u32 hs_handle; - u64 timeout; - timeout = HTTP_CONN_TIMEOUT; + ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID); hs_handle = hc->c_thread_index << 24 | hc->c_c_index; clib_spinlock_lock (&twc->tw_lock); hc->timer_handle = - tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, timeout); + tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); clib_spinlock_unlock (&twc->tw_lock); } @@ -55,12 +58,13 @@ http_conn_timer_stop (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - if (hc->timer_handle == ~0) + hc->pending_timer = 0; + if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID) return; clib_spinlock_lock (&twc->tw_lock); tw_timer_stop_2t_1w_2048sl (&twc->tw, hc->timer_handle); - hc->timer_handle = ~0; + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; clib_spinlock_unlock (&twc->tw_lock); } @@ -68,15 +72,17 @@ static inline void http_conn_timer_update (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - u64 timeout; - - if (hc->timer_handle == ~0) - return; - - timeout = HTTP_CONN_TIMEOUT; + u32 hs_handle; clib_spinlock_lock (&twc->tw_lock); - tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, timeout); + if (hc->timer_handle != HTTP_TIMER_HANDLE_INVALID) + tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout); + else + { + hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + hc->timer_handle = + tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); + } clib_spinlock_unlock (&twc->tw_lock); } diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c new file mode 100644 index 00000000000..d4ac8f46f29 --- /dev/null +++ b/src/plugins/http/test/http_test.c @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <http/http.h> + +#define HTTP_TEST_I(_cond, _comment, _args...) \ + ({ \ + int _evald = (_cond); \ + if (!(_evald)) \ + { \ + vlib_cli_output (vm, "FAIL:%d: " _comment "\n", __LINE__, ##_args); \ + } \ + else \ + { \ + vlib_cli_output (vm, "PASS:%d: " _comment "\n", __LINE__, ##_args); \ + } \ + _evald; \ + }) + +#define HTTP_TEST(_cond, _comment, _args...) \ + { \ + if (!HTTP_TEST_I (_cond, _comment, ##_args)) \ + { \ + return 1; \ + } \ + } + +static int +http_test_authority_form (vlib_main_t *vm) +{ + u8 *target = 0, *formated_target = 0; + http_uri_t authority; + int rv; + + target = format (0, "10.10.2.45:20"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv == 0), "'%v' should be valid", target); + formated_target = http_serialize_authority_form_target (&authority); + rv = vec_cmp (target, formated_target); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); + vec_free (target); + vec_free (formated_target); + + target = format (0, "[dead:beef::1234]:443"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv == 0), "'%v' should be valid", target); + formated_target = http_serialize_authority_form_target (&authority); + rv = vec_cmp (target, formated_target); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); + vec_free (target); + vec_free (formated_target); + + target = format (0, "example.com:80"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' reg-name not supported", target); + vec_free (target); + + target = format (0, "10.10.2.45"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + target = format (0, "1000.10.2.45:20"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + target = format (0, "[xyz0::1234]:443"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + return 0; +} + +static int +http_test_absolute_form (vlib_main_t *vm) +{ + u8 *url = 0; + http_url_t parsed_url; + int rv; + + url = format (0, "https://example.org/.well-known/masque/udp/1.2.3.4/123/"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), + "scheme should be https"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("https://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("https://")); + HTTP_TEST ((parsed_url.host_len == strlen ("example.org")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("example.org")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("https://example.org/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("https://example.org/")); + HTTP_TEST ( + (parsed_url.path_len == strlen (".well-known/masque/udp/1.2.3.4/123/")), + "path_len=%u should be %u", parsed_url.path_len, + strlen (".well-known/masque/udp/1.2.3.4/123/")); + vec_free (url); + + url = format (0, "http://vpp-example.org"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://")); + HTTP_TEST ((parsed_url.host_len == strlen ("vpp-example.org")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("vpp-example.org")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", + parsed_url.path_len); + vec_free (url); + + url = format (0, "http://1.2.3.4:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://")); + HTTP_TEST ((parsed_url.host_len == strlen ("1.2.3.4")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("1.2.3.4")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), + "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("http://1.2.3.4:8080/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("http://1.2.3.4:8080/")); + HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), + "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); + vec_free (url); + + url = format (0, "https://[dead:beef::1234]/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), + "scheme should be https"); + HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("https://[")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("https://[")); + HTTP_TEST ((parsed_url.host_len == strlen ("dead:beef::1234")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("dead:beef::1234")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("https://[dead:beef::1234]/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("https://[dead:beef::1234]/")); + HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), + "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); + vec_free (url); + + url = format (0, "http://[::ffff:192.0.2.128]:8080/"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://[")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://[")); + HTTP_TEST ((parsed_url.host_len == strlen ("::ffff:192.0.2.128")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("::ffff:192.0.2.128")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), + "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", + parsed_url.path_len); + vec_free (url); + + url = format (0, "http://[dead:beef::1234/abc"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://[dead|beef::1234]/abc"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http:example.org:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "htt://example.org:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http:///abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org:808080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a%%3Xbcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a%%3"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a[b]cd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://exa[m]ple.org/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + return 0; +} + +static int +http_test_parse_masque_host_port (vlib_main_t *vm) +{ + u8 *path = 0; + http_uri_t target; + int rv; + + path = format (0, "10.10.2.45/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv == 0), "'%v' should be valid", path); + HTTP_TEST ((target.is_ip4 == 1), "is_ip4=%d should be 1", target.is_ip4); + HTTP_TEST ((clib_net_to_host_u16 (target.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (target.port)); + HTTP_TEST ((target.ip.ip4.data[0] == 10 && target.ip.ip4.data[1] == 10 && + target.ip.ip4.data[2] == 2 && target.ip.ip4.data[3] == 45), + "target.ip=%U should be 10.10.2.45", format_ip4_address, + &target.ip.ip4); + vec_free (path); + + path = format (0, "dead%%3Abeef%%3A%%3A1234/80/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv == 0), "'%v' should be valid", path); + HTTP_TEST ((target.is_ip4 == 0), "is_ip4=%d should be 0", target.is_ip4); + HTTP_TEST ((clib_net_to_host_u16 (target.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (target.port)); + HTTP_TEST ((clib_net_to_host_u16 (target.ip.ip6.as_u16[0]) == 0xdead && + clib_net_to_host_u16 (target.ip.ip6.as_u16[1]) == 0xbeef && + target.ip.ip6.as_u16[2] == 0 && target.ip.ip6.as_u16[3] == 0 && + target.ip.ip6.as_u16[4] == 0 && target.ip.ip6.as_u16[5] == 0 && + target.ip.ip6.as_u16[6] == 0 && + clib_net_to_host_u16 (target.ip.ip6.as_u16[7]) == 0x1234), + "target.ip=%U should be dead:beef::1234", format_ip6_address, + &target.ip.ip6); + vec_free (path); + + path = format (0, "example.com/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' reg-name not supported", path); + vec_free (path); + + path = format (0, "10.10.2.45/443443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45/443"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + return 0; +} + +static clib_error_t * +test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + int res = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "authority-form")) + res = http_test_authority_form (vm); + else if (unformat (input, "absolute-form")) + res = http_test_absolute_form (vm); + else if (unformat (input, "parse-masque-host-port")) + res = http_test_parse_masque_host_port (vm); + else if (unformat (input, "all")) + { + if ((res = http_test_authority_form (vm))) + goto done; + if ((res = http_test_absolute_form (vm))) + goto done; + if ((res = http_test_parse_masque_host_port (vm))) + goto done; + } + else + break; + } + +done: + if (res) + return clib_error_return (0, "HTTP unit test failed"); + return 0; +} + +VLIB_CLI_COMMAND (test_http_command) = { + .path = "test http", + .short_help = "http unit tests", + .function = test_http_command_fn, +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "HTTP - Unit Test", + .default_disabled = 1, +}; diff --git a/src/plugins/http_static/builtinurl/json_urls.c b/src/plugins/http_static/builtinurl/json_urls.c index 808893aac79..19c5245e4b2 100644 --- a/src/plugins/http_static/builtinurl/json_urls.c +++ b/src/plugins/http_static/builtinurl/json_urls.c @@ -20,77 +20,68 @@ hss_url_handler_rc_t handle_get_version (hss_url_handler_args_t *args) { u8 *s = 0; + unformat_input_t input; + int verbose = 0; + + if (args->query) + { + unformat_init_vector (&input, args->query); + if (unformat (&input, "verbose=")) + { + if (unformat (&input, "true")) + verbose = 1; + } + } s = format (s, "{\"vpp_details\": {"); s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER); + if (verbose) + { + s = format (s, " \"build_by\": \"%s\",", VPP_BUILD_USER); + s = format (s, " \"build_host\": \"%s\",", VPP_BUILD_HOST); + s = format (s, " \"build_dir\": \"%s\",", VPP_BUILD_TOPDIR); + } s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE); args->data = s; args->data_len = vec_len (s); + args->ct = HTTP_CONTENT_APP_JSON; args->free_vec_data = 1; return HSS_URL_HANDLER_OK; } -void -trim_path_from_request (u8 *s, char *path) -{ - u8 *cp; - int trim_length = strlen (path) + 1 /* remove '?' */; - - /* Get rid of the path and question-mark */ - vec_delete (s, trim_length, 0); - - /* Tail trim irrelevant browser info */ - cp = s; - while ((cp - s) < vec_len (s)) - { - if (*cp == ' ') - { - /* - * Makes request a vector which happens to look - * like a c-string. - */ - *cp = 0; - vec_set_len (s, cp - s); - break; - } - cp++; - } -} - hss_url_handler_rc_t handle_get_interface_stats (hss_url_handler_args_t *args) { u8 *s = 0, *stats = 0; - uword *p; - u32 *sw_if_indices = 0; + u32 sw_if_index, *sw_if_indices = 0; vnet_hw_interface_t *hi; vnet_sw_interface_t *si; char *q = "\""; int i; int need_comma = 0; + unformat_input_t input; u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im, vnet_sw_interface_t * si, int json); vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; /* Get stats for a single interface via http POST */ - if (args->reqtype == HTTP_REQ_POST) + if (args->req_type == HTTP_REQ_POST) { - trim_path_from_request (args->request, "interface_stats.json"); - + unformat_init_vector (&input, args->req_data); /* Find the sw_if_index */ - p = hash_get (im->hw_interface_by_name, args->request); - if (!p) + if (!unformat (&input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) { s = format (s, "{\"interface_stats\": {[\n"); - s = format (s, " \"name\": \"%s\",", args->request); + s = format (s, " \"name\": \"%s\",", args->req_data); s = format (s, " \"error\": \"%s\"", "UnknownInterface"); s = format (s, "]}\n"); goto out; } - vec_add1 (sw_if_indices, p[0]); + vec_add1 (sw_if_indices, sw_if_index); } else /* default, HTTP_BUILTIN_METHOD_GET */ { @@ -127,6 +118,7 @@ handle_get_interface_stats (hss_url_handler_args_t *args) out: args->data = s; args->data_len = vec_len (s); + args->ct = HTTP_CONTENT_APP_JSON; args->free_vec_data = 1; vec_free (sw_if_indices); vec_free (stats); @@ -167,6 +159,7 @@ handle_get_interface_list (hss_url_handler_args_t *args) args->data = s; args->data_len = vec_len (s); + args->ct = HTTP_CONTENT_APP_JSON; args->free_vec_data = 1; return HSS_URL_HANDLER_OK; } diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c index 8b9751b7f78..2e63e335d47 100644 --- a/src/plugins/http_static/http_cache.c +++ b/src/plugins/http_static/http_cache.c @@ -17,6 +17,8 @@ #include <vppinfra/bihash_template.c> #include <vppinfra/unix.h> #include <vlib/vlib.h> +#include <sys/stat.h> +#include <vppinfra/time_range.h> static void hss_cache_lock (hss_cache_t *hc) @@ -153,7 +155,7 @@ lru_update (hss_cache_t *hc, hss_cache_entry_t *ep, f64 now) static void hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data, - u64 *data_len) + u64 *data_len, u8 **last_modified) { hss_cache_entry_t *ce; @@ -162,6 +164,7 @@ hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data, ce->inuse++; *data = ce->data; *data_len = vec_len (ce->data); + *last_modified = ce->last_modified; /* Update the cache entry, mark it in-use */ lru_update (hc, ce, vlib_time_now (vlib_get_main ())); @@ -209,16 +212,15 @@ hss_cache_lookup (hss_cache_t *hc, u8 *path) u32 hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data, - u64 *data_len) + u64 *data_len, u8 **last_modified) { u32 ce_index; - /* Make sure nobody removes the entry while we look it up */ hss_cache_lock (hc); ce_index = hss_cache_lookup (hc, path); if (ce_index != ~0) - hss_cache_attach_entry (hc, ce_index, data, data_len); + hss_cache_attach_entry (hc, ce_index, data, data_len, last_modified); hss_cache_unlock (hc); @@ -260,6 +262,7 @@ hss_cache_do_evictions (hss_cache_t *hc) hc->cache_evictions++; vec_free (ce->filename); vec_free (ce->data); + vec_free (ce->last_modified); if (hc->debug_level > 1) clib_warning ("pool put index %d", ce - hc->cache_pool); @@ -271,13 +274,15 @@ hss_cache_do_evictions (hss_cache_t *hc) } u32 -hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len) +hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len, + u8 **last_modified) { BVT (clib_bihash_kv) kv; hss_cache_entry_t *ce; clib_error_t *error; u8 *file_data; u32 ce_index; + struct stat dm; hss_cache_lock (hc); @@ -298,11 +303,17 @@ hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len) pool_get_zero (hc->cache_pool, ce); ce->filename = vec_dup (path); ce->data = file_data; + if (stat ((char *) path, &dm) == 0) + { + ce->last_modified = + format (0, "%U GMT", format_clib_timebase_time, (f64) dm.st_mtime); + } /* Attach cache entry without additional lock */ ce->inuse++; *data = file_data; *data_len = vec_len (file_data); + *last_modified = ce->last_modified; lru_add (hc, ce, vlib_time_now (vlib_get_main ())); hc->cache_size += vec_len (ce->data); @@ -364,6 +375,7 @@ hss_cache_clear (hss_cache_t *hc) hc->cache_evictions++; vec_free (ce->filename); vec_free (ce->data); + vec_free (ce->last_modified); if (hc->debug_level > 1) clib_warning ("pool put index %d", ce - hc->cache_pool); pool_put (hc->cache_pool, ce); @@ -421,19 +433,19 @@ format_hss_cache (u8 *s, va_list *args) { s = format (s, "cache size %lld bytes, limit %lld bytes, evictions %lld", hc->cache_size, hc->cache_limit, hc->cache_evictions); - return 0; + return s; } vm = vlib_get_main (); now = vlib_time_now (vm); - s = format (s, "%U", format_hss_cache_entry, 0 /* header */, now); + s = format (s, "%U\n", format_hss_cache_entry, 0 /* header */, now); for (index = hc->first_index; index != ~0;) { ce = pool_elt_at_index (hc->cache_pool, index); index = ce->next_index; - s = format (s, "%U", format_hss_cache_entry, ce, now); + s = format (s, "%U\n", format_hss_cache_entry, ce, now); } s = format (s, "%40s%12lld", "Total Size", hc->cache_size); diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h index a89ed5e7e94..21f71a924d5 100644 --- a/src/plugins/http_static/http_cache.h +++ b/src/plugins/http_static/http_cache.h @@ -22,6 +22,9 @@ typedef struct hss_cache_entry_ { /** Name of the file */ u8 *filename; + /** Last modified date, format: + * <day-name>, <day> <month> <year> <hour>:<minute>:<second> GMT */ + u8 *last_modified; /** Contents of the file, as a u8 * vector */ u8 *data; /** Last time the cache entry was used */ @@ -58,9 +61,9 @@ typedef struct hss_cache_ } hss_cache_t; u32 hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data, - u64 *data_len); + u64 *data_len, u8 **last_modified); u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, - u64 *data_len); + u64 *data_len, u8 **last_modified); void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index); u32 hss_cache_clear (hss_cache_t *hc); void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level); diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api index 4d6d8bfe9b5..dd4f513a420 100644 --- a/src/plugins/http_static/http_static.api +++ b/src/plugins/http_static/http_static.api @@ -2,7 +2,8 @@ /** \file This file defines static http server control-plane API messages */ -option version = "2.1.0"; + +option version = "2.2.0"; /** \brief Configure and enable the static http server @param client_index - opaque cookie to identify the sender @@ -16,6 +17,39 @@ option version = "2.1.0"; */ autoreply define http_static_enable { + option deprecated; + + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + /* Typical options */ + u32 fifo_size; + u32 cache_size_limit; + /* Unusual options */ + u32 prealloc_fifos; + u32 private_segment_size; + + /* Root of the html path */ + string www_root[256]; + /* The bind URI */ + string uri[256]; +}; + +/** \brief Configure and enable the static http server + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param fifo_size - size (in bytes) of the session FIFOs + @param cache_size_limit - size (in bytes) of the in-memory file data cache + @param max_age - how long a response is considered fresh (in seconds) + @param prealloc_fifos - number of preallocated fifos (usually 0) + @param private_segment_size - fifo segment size (usually 0) + @param www_root - html root path + @param uri - bind URI, defaults to "tcp://0.0.0.0/80" +*/ + +autoreply define http_static_enable_v2 { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -24,6 +58,7 @@ autoreply define http_static_enable { /* Typical options */ u32 fifo_size; u32 cache_size_limit; + u32 max_age [default=600]; /* Unusual options */ u32 prealloc_fifos; u32 private_segment_size; diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c index 8f8fe37b7c1..967b8474af8 100644 --- a/src/plugins/http_static/http_static.c +++ b/src/plugins/http_static/http_static.c @@ -66,7 +66,7 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url, */ static int hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, - u32 private_segment_size, u8 *www_root, u8 *uri) + u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age) { hss_main_t *hsm = &hss_main; int rv; @@ -77,6 +77,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, hsm->private_segment_size = private_segment_size; hsm->www_root = format (0, "%s%c", www_root, 0); hsm->uri = format (0, "%s%c", uri, 0); + hsm->max_age = max_age; if (vec_len (hsm->www_root) < 2) return VNET_API_ERROR_INVALID_VALUE; @@ -84,7 +85,10 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, if (hsm->app_index != ~0) return VNET_API_ERROR_APP_ALREADY_ATTACHED; - vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (hsm->vlib_main, &args); rv = hss_create (hsm->vlib_main); switch (rv) @@ -110,14 +114,33 @@ static void vl_api_http_static_enable_t_handler mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; - rv = - hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), - ntohl (mp->prealloc_fifos), - ntohl (mp->private_segment_size), mp->www_root, mp->uri); + rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), + ntohl (mp->prealloc_fifos), + ntohl (mp->private_segment_size), mp->www_root, mp->uri, + HSS_DEFAULT_MAX_AGE); REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_REPLY); } +/* API message handler */ +static void +vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) +{ + vl_api_http_static_enable_v2_reply_t *rmp; + hss_main_t *hsm = &hss_main; + int rv; + + mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; + mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; + + rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), + ntohl (mp->prealloc_fifos), + ntohl (mp->private_segment_size), mp->www_root, mp->uri, + ntohl (mp->max_age)); + + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY); +} + #include <http_static/http_static.api.c> static clib_error_t * hss_api_init (vlib_main_t *vm) diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h index 2850d356b74..fac24db4ec9 100644 --- a/src/plugins/http_static/http_static.h +++ b/src/plugins/http_static/http_static.h @@ -23,6 +23,8 @@ #include <vppinfra/error.h> #include <http_static/http_cache.h> +#define HSS_DEFAULT_MAX_AGE 600 + /** @file http_static.h * Static http server definitions */ @@ -45,13 +47,15 @@ typedef struct /** Data length */ u64 data_len; /** Current data send offset */ - u32 data_offset; + u64 data_offset; /** Need to free data in detach_cache_entry */ int free_data; /** File cache pool index */ u32 cache_pool_index; - /** Content type, e.g. text, text/javascript, etc. */ - http_content_type_t content_type; + /** Response header list */ + http_header_t *resp_headers; + /** Serialized headers to send */ + u8 *headers_buf; } hss_session_t; typedef struct hss_session_handle_ @@ -79,8 +83,9 @@ typedef struct hss_url_handler_args_ /* Request args */ struct { - u8 *request; - http_req_method_t reqtype; + u8 *query; + u8 *req_data; + http_req_method_t req_type; }; /* Reply args */ @@ -90,6 +95,7 @@ typedef struct hss_url_handler_args_ uword data_len; u8 free_vec_data; http_status_code_t sc; + http_content_type_t ct; }; }; } hss_url_handler_args_t; @@ -152,6 +158,12 @@ typedef struct u8 enable_url_handlers; /** Max cache size before LRU occurs */ u64 cache_size; + /** How long a response is considered fresh (in seconds) */ + u32 max_age; + /** Formatted max_age: "max-age=xyz" */ + u8 *max_age_formatted; + /** Timeout during which client connection will stay open */ + u32 keepalive_timeout; /** hash table of file extensions to mime types string indices */ uword *mime_type_indices_by_file_extensions; diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c index 3503a1b0812..f701c8b9ee7 100644 --- a/src/plugins/http_static/http_static_test.c +++ b/src/plugins/http_static/http_static_test.c @@ -18,6 +18,7 @@ #include <vlibapi/api.h> #include <vlibmemory/api.h> #include <vppinfra/error.h> +#include <http_static/http_static.h> uword unformat_sw_if_index (unformat_input_t * input, va_list * args); @@ -126,6 +127,96 @@ api_http_static_enable (vat_main_t * vam) return ret; } +static int +api_http_static_enable_v2 (vat_main_t *vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_http_static_enable_v2_t *mp; + u64 tmp; + u8 *www_root = 0; + u8 *uri = 0; + u32 prealloc_fifos = 0; + u32 private_segment_size = 0; + u32 fifo_size = 8 << 10; + u32 cache_size_limit = 1 << 20; + u32 max_age = HSS_DEFAULT_MAX_AGE; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "www-root %s", &www_root)) + ; + else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("private segment size %llu, too large", tmp); + return -99; + } + private_segment_size = (u32) tmp; + } + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("fifo-size %llu, too large", tmp); + return -99; + } + fifo_size = (u32) tmp; + } + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &tmp)) + { + if (tmp < (128ULL << 10)) + { + errmsg ("cache-size must be at least 128kb"); + return -99; + } + cache_size_limit = (u32) tmp; + } + else if (unformat (line_input, "max-age %d", &max_age)) + ; + else if (unformat (line_input, "uri %s", &uri)) + ; + else + { + errmsg ("unknown input `%U'", format_unformat_error, line_input); + return -99; + } + } + + if (www_root == 0) + { + errmsg ("Must specify www-root"); + return -99; + } + + if (uri == 0) + uri = format (0, "tcp://0.0.0.0/80%c", 0); + + /* Construct the API message */ + M (HTTP_STATIC_ENABLE_V2, mp); + strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); + strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); + mp->fifo_size = ntohl (fifo_size); + mp->cache_size_limit = ntohl (cache_size_limit); + mp->prealloc_fifos = ntohl (prealloc_fifos); + mp->private_segment_size = ntohl (private_segment_size); + mp->max_age = ntohl (max_age); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + #include <http_static/http_static.api_test.c> /* diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 040cdca9d7a..d1ece75ce37 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -19,6 +19,9 @@ #include <sys/stat.h> #include <unistd.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> + /** @file static_server.c * Static http server, sufficient to serve .html / .css / .js content. */ @@ -55,8 +58,6 @@ hss_session_free (hss_session_t *hs) { hss_main_t *hsm = &hss_main; - pool_put (hsm->sessions[hs->thread_index], hs); - if (CLIB_DEBUG) { u32 save_thread_index; @@ -65,6 +66,8 @@ hss_session_free (hss_session_t *hs) memset (hs, 0xfa, sizeof (*hs)); hs->thread_index = save_thread_index; } + + pool_put (hsm->sessions[hs->thread_index], hs); } /** \brief Disconnect a session @@ -83,48 +86,84 @@ start_send_data (hss_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; + u8 *headers_buf = 0; + u32 n_enq; + u64 to_send; int rv; ts = session_get (hs->vpp_session_index, hs->thread_index); + if (vec_len (hs->resp_headers)) + { + headers_buf = http_serialize_headers (hs->resp_headers); + vec_free (hs->resp_headers); + msg.data.headers_offset = 0; + msg.data.headers_len = vec_len (headers_buf); + } + else + { + msg.data.headers_offset = 0; + msg.data.headers_len = 0; + } + msg.type = HTTP_MSG_REPLY; msg.code = status; - msg.content_type = hs->content_type; - msg.data.len = hs->data_len; + msg.data.body_len = hs->data_len; + msg.data.len = msg.data.body_len + msg.data.headers_len; - if (hs->data_len > hss_main.use_ptr_thresh) + if (msg.data.len > hss_main.use_ptr_thresh) { msg.data.type = HTTP_MSG_DATA_PTR; rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); + if (msg.data.headers_len) + { + hs->headers_buf = headers_buf; + uword headers = pointer_to_uword (hs->headers_buf); + rv = + svm_fifo_enqueue (ts->tx_fifo, sizeof (headers), (u8 *) &headers); + ASSERT (rv == sizeof (headers)); + } + uword data = pointer_to_uword (hs->data); rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data); - ASSERT (rv == sizeof (sizeof (data))); + ASSERT (rv == sizeof (data)); goto done; } msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.body_offset = msg.data.headers_len; rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); - if (!msg.data.len) + if (msg.data.headers_len) + { + rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + ASSERT (rv == msg.data.headers_len); + vec_free (headers_buf); + } + + if (!msg.data.body_len) goto done; - rv = svm_fifo_enqueue (ts->tx_fifo, hs->data_len, hs->data); + to_send = hs->data_len; + n_enq = clib_min (svm_fifo_size (ts->tx_fifo), to_send); + + rv = svm_fifo_enqueue (ts->tx_fifo, n_enq, hs->data); - if (rv != hs->data_len) + if (rv < to_send) { - hs->data_offset = rv; + hs->data_offset = (rv > 0) ? rv : 0; svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } done: if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); } __clib_export void @@ -142,6 +181,15 @@ hss_session_send_data (hss_url_handler_args_t *args) hs->data = args->data; hs->data_len = args->data_len; hs->free_data = args->free_vec_data; + + /* Set content type only if we have some response data */ + if (hs->data_len) + { + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (args->ct)); + } + start_send_data (hs, args->sc); } @@ -212,30 +260,27 @@ content_type_from_request (u8 *request) static int try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, - u8 *request) + u8 *target_path, u8 *target_query, u8 *data) { http_status_code_t sc = HTTP_STATUS_OK; hss_url_handler_args_t args = {}; uword *p, *url_table; - http_content_type_t type; int rv; - if (!hsm->enable_url_handlers || !request) + if (!hsm->enable_url_handlers || !target_path) return -1; /* zero-length? try "index.html" */ - if (vec_len (request) == 0) + if (vec_len (target_path) == 0) { - request = format (request, "index.html"); + target_path = format (target_path, "index.html"); } - type = content_type_from_request (request); - /* Look for built-in GET / POST handlers */ url_table = (rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers; - p = hash_get_mem (url_table, request); + p = hash_get_mem (url_table, target_path); if (!p) return -1; @@ -244,10 +289,12 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, hs->cache_pool_index = ~0; if (hsm->debug_level > 0) - clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", request); + clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", + target_path); - args.reqtype = rt; - args.request = request; + args.req_type = rt; + args.query = target_query; + args.req_data = data; args.sh.thread_index = hs->thread_index; args.sh.session_index = hs->session_index; @@ -260,18 +307,25 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, if (rv == HSS_URL_HANDLER_ERROR) { clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0], - (rt == HTTP_REQ_GET) ? "GET" : "POST", request); - sc = HTTP_STATUS_NOT_FOUND; + (rt == HTTP_REQ_GET) ? "GET" : "POST", target_path); + sc = HTTP_STATUS_BAD_GATEWAY; } hs->data = args.data; hs->data_len = args.data_len; hs->free_data = args.free_vec_data; - hs->content_type = type; + + /* Set content type only if we have some response data */ + if (hs->data_len) + { + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (args.ct)); + } start_send_data (hs, sc); - if (!hs->data) + if (!hs->data_len) hss_session_disconnect_transport (hs); return 0; @@ -335,18 +389,20 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) } redirect = - format (0, - "Location: http%s://%U%s%s\r\n\r\n", - proto == TRANSPORT_PROTO_TLS ? "s" : "", format_ip46_address, - &endpt.ip, endpt.is_ip4, print_port ? port_str : (u8 *) "", path); + format (0, "http%s://%U%s%s", proto == TRANSPORT_PROTO_TLS ? "s" : "", + format_ip46_address, &endpt.ip, endpt.is_ip4, + print_port ? port_str : (u8 *) "", path); if (hsm->debug_level > 0) clib_warning ("redirect: %s", redirect); vec_free (port_str); - hs->data = redirect; - hs->data_len = vec_len (redirect); + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_LOCATION), + (const char *) redirect, vec_len (redirect)); + hs->data = redirect; /* TODO: find better way */ + hs->data_len = 0; hs->free_data = 1; return HTTP_STATUS_MOVED; @@ -354,29 +410,28 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) static int try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, - u8 *request) + u8 *target) { http_status_code_t sc = HTTP_STATUS_OK; - u8 *path; + u8 *path, *sanitized_path; u32 ce_index; http_content_type_t type; + u8 *last_modified; /* Feature not enabled */ if (!hsm->www_root) return -1; - type = content_type_from_request (request); + /* Remove dot segments to prevent path traversal */ + sanitized_path = http_path_remove_dot_segments (target); /* * Construct the file to open - * Browsers are capable of sporadically including a leading '/' */ - if (!request) + if (!target) path = format (0, "%s%c", hsm->www_root, 0); - else if (request[0] == '/') - path = format (0, "%s%s%c", hsm->www_root, request, 0); else - path = format (0, "%s/%s%c", hsm->www_root, request, 0); + path = format (0, "%s/%s%c", hsm->www_root, sanitized_path, 0); if (hsm->debug_level > 0) clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path); @@ -386,8 +441,8 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, hs->data_offset = 0; - ce_index = - hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, &hs->data_len); + ce_index = hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, + &hs->data_len, &last_modified); if (ce_index == ~0) { if (!file_path_is_valid (path)) @@ -406,8 +461,8 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, sc = try_index_file (hsm, hs, path); goto done; } - ce_index = - hss_cache_add_and_attach (&hsm->cache, path, &hs->data, &hs->data_len); + ce_index = hss_cache_add_and_attach (&hsm->cache, path, &hs->data, + &hs->data_len, &last_modified); if (ce_index == ~0) { sc = HTTP_STATUS_INTERNAL_ERROR; @@ -418,43 +473,61 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, hs->path = path; hs->cache_pool_index = ce_index; -done: + /* Set following headers only for happy path: + * Content-Type + * Cache-Control max-age + */ + type = content_type_from_request (target); + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (type)); + http_add_header ( + &hs->resp_headers, http_header_name_token (HTTP_HEADER_CACHE_CONTROL), + (const char *) hsm->max_age_formatted, vec_len (hsm->max_age_formatted)); + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_LAST_MODIFIED), + (const char *) last_modified, vec_len (last_modified)); - hs->content_type = type; +done: + vec_free (sanitized_path); start_send_data (hs, sc); - if (!hs->data) + if (!hs->data_len) hss_session_disconnect_transport (hs); return 0; } -static int -handle_request (hss_session_t *hs, http_req_method_t rt, u8 *request) +static void +handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path, + u8 *target_query, u8 *data) { hss_main_t *hsm = &hss_main; - if (!try_url_handler (hsm, hs, rt, request)) - return 0; + if (!try_url_handler (hsm, hs, rt, target_path, target_query, data)) + return; - if (!try_file_handler (hsm, hs, rt, request)) - return 0; + if (!try_file_handler (hsm, hs, rt, target_path)) + return; /* Handler did not find anything return 404 */ start_send_data (hs, HTTP_STATUS_NOT_FOUND); hss_session_disconnect_transport (hs); - - return 0; } static int hss_ts_rx_callback (session_t *ts) { hss_session_t *hs; - u8 *request = 0; + u8 *target_path = 0, *target_query = 0, *data = 0; http_msg_t msg; int rv; hs = hss_session_get (ts->thread_index, ts->opaque); + if (hs->free_data) + vec_free (hs->data); + hs->data = 0; + hs->resp_headers = 0; + vec_free (hs->headers_buf); /* Read the http message header */ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); @@ -463,26 +536,66 @@ hss_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST)) { - hs->data = 0; + http_add_header (&hs->resp_headers, + http_header_name_token (HTTP_HEADER_ALLOW), + http_token_lit ("GET, POST")); start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); - return 0; + goto done; } - /* Read request */ - if (msg.data.len) + if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) { - vec_validate (request, msg.data.len - 1); - rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request); - ASSERT (rv == msg.data.len); - /* request must be a proper C-string in addition to a vector */ - vec_add1 (request, 0); + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; } - /* Find and send data */ - handle_request (hs, msg.method_type, request); + /* Read target path */ + if (msg.data.target_path_len) + { + vec_validate (target_path, msg.data.target_path_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, target_path); + ASSERT (rv == msg.data.target_path_len); + if (http_validate_abs_path_syntax (target_path, 0)) + { + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; + } + /* Target path must be a proper C-string in addition to a vector */ + vec_add1 (target_path, 0); + } - vec_free (request); + /* Read target query */ + if (msg.data.target_query_len) + { + vec_validate (target_query, msg.data.target_query_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset, + msg.data.target_query_len, target_query); + ASSERT (rv == msg.data.target_query_len); + if (http_validate_query_syntax (target_query, 0)) + { + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + goto done; + } + } + /* Read body */ + if (msg.data.body_len) + { + vec_validate (data, msg.data.body_len - 1); + rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len, + data); + ASSERT (rv == msg.data.body_len); + } + + /* Find and send data */ + handle_request (hs, msg.method_type, target_path, target_query, data); + +done: + vec_free (target_path); + vec_free (target_query); + vec_free (data); + svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len); return 0; } @@ -490,7 +603,8 @@ static int hss_ts_tx_callback (session_t *ts) { hss_session_t *hs; - u32 to_send; + u32 n_enq; + u64 to_send; int rv; hs = hss_session_get (ts->thread_index, ts->opaque); @@ -498,7 +612,9 @@ hss_ts_tx_callback (session_t *ts) return 0; to_send = hs->data_len - hs->data_offset; - rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->data + hs->data_offset); + n_enq = clib_min (svm_fifo_size (ts->tx_fifo), to_send); + + rv = svm_fifo_enqueue (ts->tx_fifo, n_enq, hs->data + hs->data_offset); if (rv <= 0) { @@ -513,7 +629,7 @@ hss_ts_tx_callback (session_t *ts) } if (svm_fifo_set_event (ts->tx_fifo)) - session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); return 0; } @@ -607,6 +723,7 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf) hs->data = 0; hs->data_offset = 0; hs->free_data = 0; + vec_free (hs->headers_buf); vec_free (hs->path); hss_session_free (hs); @@ -630,7 +747,7 @@ hss_attach () hss_main_t *hsm = &hss_main; u64 options[APP_OPTIONS_N_OPTIONS]; vnet_app_attach_args_t _a, *a = &_a; - u32 segment_size = 128 << 20; + u64 segment_size = 128 << 20; clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -687,6 +804,7 @@ hss_listen (void) vnet_listen_args_t _a, *a = &_a; char *uri = "tcp://0.0.0.0/80"; u8 need_crypto; + transport_endpt_ext_cfg_t *ext_cfg; int rv; clib_memset (a, 0, sizeof (*a)); @@ -703,17 +821,21 @@ hss_listen (void) sep.transport_proto = TRANSPORT_PROTO_HTTP; clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + ext_cfg->opaque = hsm->keepalive_timeout; + if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index; + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hsm->ckpair_index; } rv = vnet_listen (a); - if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -757,6 +879,8 @@ hss_create (vlib_main_t *vm) if (hsm->enable_url_handlers) hss_url_handlers_init (hsm); + hsm->max_age_formatted = format (0, "max-age=%d", hsm->max_age); + return 0; } @@ -777,6 +901,8 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hsm->private_segment_size = 0; hsm->fifo_size = 0; hsm->cache_size = 10 << 20; + hsm->max_age = HSS_DEFAULT_MAX_AGE; + hsm->keepalive_timeout = 60; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -801,6 +927,9 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "debug %d", &hsm->debug_level)) ; + else if (unformat (line_input, "keepalive-timeout %d", + &hsm->keepalive_timeout)) + ; else if (unformat (line_input, "debug")) hsm->debug_level = 1; else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size, @@ -808,6 +937,8 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "url-handlers")) hsm->enable_url_handlers = 1; + else if (unformat (line_input, "max-age %d", &hsm->max_age)) + ; else { error = clib_error_return (0, "unknown input `%U'", @@ -836,7 +967,10 @@ no_input: goto done; } - vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ ); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); if ((rv = hss_create (vm))) { @@ -859,14 +993,16 @@ done: * http static server www-root /tmp/www uri tcp://0.0.0.0/80 cache-size 2m * @cliend * @cliexcmd{http static server www-root <path> [prealloc-fios <nn>] - * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]} + * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>] + * [keepalive-timeout <nn>]} ?*/ VLIB_CLI_COMMAND (hss_create_command, static) = { .path = "http static server", .short_help = "http static server www-root <path> [prealloc-fifos <nn>]\n" - "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n" - "[ptr-thresh <nn>] [url-handlers] [debug [nn]]\n", + "[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n" + "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n" + "[keepalive-timeout <nn>]\n", .function = hss_create_command_fn, }; @@ -876,7 +1012,7 @@ format_hss_session (u8 *s, va_list *args) hss_session_t *hs = va_arg (*args, hss_session_t *); int __clib_unused verbose = va_arg (*args, int); - s = format (s, "\n path %s, data length %u, data_offset %u", + s = format (s, "\n path %s, data length %llu, data_offset %llu", hs->path ? hs->path : (u8 *) "[none]", hs->data_len, hs->data_offset); return s; diff --git a/src/plugins/ikev2/CMakeLists.txt b/src/plugins/ikev2/CMakeLists.txt index 568271ed7d9..dd2b49d6651 100644 --- a/src/plugins/ikev2/CMakeLists.txt +++ b/src/plugins/ikev2/CMakeLists.txt @@ -27,6 +27,7 @@ add_vpp_plugin(ikev2 ikev2_crypto.c ikev2_format.c ikev2_payload.c + ikev2_handoff.c API_FILES ikev2_types.api diff --git a/src/plugins/ikev2/ikev2.api b/src/plugins/ikev2/ikev2.api index de276e7f3ea..e2ff8fb8268 100644 --- a/src/plugins/ikev2/ikev2.api +++ b/src/plugins/ikev2/ikev2.api @@ -658,6 +658,12 @@ counters ikev2 { units "packets"; description "IKE AUTH SA requests received"; }; + handoff { + severity info; + type counter64; + units "packets"; + description "IKE packets handoff"; + }; }; paths { "/err/ikev2-ip4" "ike"; diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c index 9bea2c96d12..f66469a24d1 100644 --- a/src/plugins/ikev2/ikev2.c +++ b/src/plugins/ikev2/ikev2.c @@ -97,6 +97,7 @@ format_ikev2_gen_sa_error (u8 * s, va_list * args) typedef enum { IKEV2_NEXT_IP4_LOOKUP, + IKEV2_NEXT_IP4_HANDOFF, IKEV2_NEXT_IP4_ERROR_DROP, IKEV2_IP4_N_NEXT, } ikev2_ip4_next_t; @@ -104,6 +105,7 @@ typedef enum typedef enum { IKEV2_NEXT_IP6_LOOKUP, + IKEV2_NEXT_IP6_HANDOFF, IKEV2_NEXT_IP6_ERROR_DROP, IKEV2_IP6_N_NEXT, } ikev2_ip6_next_t; @@ -3187,6 +3189,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data (); + u32 thread_index = vm->thread_index; ikev2_stats_t _stats, *stats = &_stats; int res; @@ -3213,6 +3216,14 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node, int ip_hdr_sz = 0; int is_req = 0; + if (PREDICT_TRUE (thread_index != km->handoff_thread)) + { + vlib_node_increment_counter (vm, node->node_index, + IKEV2_ERROR_HANDOFF, 1); + + next[0] = is_ip4 ? IKEV2_NEXT_IP4_HANDOFF : IKEV2_NEXT_IP6_HANDOFF; + goto out; + } if (natt) { u8 *ptr = vlib_buffer_get_current (b0); @@ -3723,6 +3734,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node, ikev2_delete_sa (ptd, sa0); } + + out: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { @@ -3775,6 +3788,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip4,static) = { .n_next_nodes = IKEV2_IP4_N_NEXT, .next_nodes = { [IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip4-handoff", [IKEV2_NEXT_IP4_ERROR_DROP] = "error-drop", }, }; @@ -3792,6 +3806,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip4_natt,static) = { .n_next_nodes = IKEV2_IP4_N_NEXT, .next_nodes = { [IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip4-natt-handoff", [IKEV2_NEXT_IP4_ERROR_DROP] = "error-drop", }, }; @@ -3809,6 +3824,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip6,static) = { .n_next_nodes = IKEV2_IP6_N_NEXT, .next_nodes = { [IKEV2_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip6-handoff", [IKEV2_NEXT_IP6_ERROR_DROP] = "error-drop", }, }; @@ -5126,6 +5142,8 @@ ikev2_init (vlib_main_t * vm) km->liveness_period = IKEV2_LIVENESS_PERIOD_CHECK; km->liveness_max_retries = IKEV2_LIVENESS_RETRIES; + km->handoff_thread = vlib_num_workers () ? 1 : 0; + return 0; } @@ -5133,6 +5151,31 @@ VLIB_INIT_FUNCTION (ikev2_init) = { .runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init"), }; +static clib_error_t * +ikev2_config (vlib_main_t *vm, unformat_input_t *input) +{ + ikev2_main_t *km = &ikev2_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "handoff-thread %d", &km->handoff_thread)) + { + if (km->handoff_thread > vlib_num_workers ()) + { + return clib_error_return (0, "wrong handoff-thread %d", + km->handoff_thread); + } + } + else + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + } + + return 0; +} + +VLIB_CONFIG_FUNCTION (ikev2_config, "ikev2"); + static u8 ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa, u8 del_old_ids) @@ -5447,6 +5490,7 @@ ikev2_send_informational_request (ikev2_sa_t * sa) } dp = sa->dst_port ? sa->dst_port : ikev2_get_port (sa); + ikev2_send_ike (km->vlib_main, src, dst, bi0, len, ikev2_get_port (sa), dp, sa->sw_if_index); } @@ -5625,6 +5669,15 @@ ikev2_lazy_init (ikev2_main_t *km) if (!km->dns_resolve_name_ptr) ikev2_log_error ("cannot load symbols from dns plugin"); + km->handoff_ip4_fq_index = + vlib_frame_queue_main_init (ikev2_node_ip4.index, 0); + + km->handoff_ip4_natt_fq_index = + vlib_frame_queue_main_init (ikev2_node_ip4_natt.index, 0); + + km->handoff_ip6_fq_index = + vlib_frame_queue_main_init (ikev2_node_ip6.index, 0); + /* wake up ikev2 process */ vlib_process_signal_event (vlib_get_first_main (), ikev2_mngr_process_node.index, 0, 0); diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c index a3e71668126..e09bde3cbe2 100644 --- a/src/plugins/ikev2/ikev2_api.c +++ b/src/plugins/ikev2/ikev2_api.c @@ -173,7 +173,7 @@ send_profile (ikev2_profile_t * profile, vl_api_registration_t * reg, rmp->profile.lifetime_jitter = profile->lifetime_jitter; rmp->profile.handover = profile->handover; - vl_api_ikev2_profile_t_endian (&rmp->profile); + vl_api_ikev2_profile_t_endian (&rmp->profile, 1 /* to network */); vl_api_send_msg (reg, (u8 *) rmp); } @@ -291,7 +291,7 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_t_endian(rsa); + vl_api_ikev2_sa_t_endian (rsa, 1 /* to network */); }); } @@ -382,7 +382,7 @@ send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_v2_t_endian (rsa); + vl_api_ikev2_sa_v2_t_endian (rsa, 1 /* to network */); }); } @@ -476,7 +476,7 @@ send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_v3_t_endian (rsa); + vl_api_ikev2_sa_v3_t_endian (rsa, 1 /* to network */); }); } @@ -549,7 +549,7 @@ send_child_sa (ikev2_child_sa_t * child, k->sk_ar_len); } - vl_api_ikev2_child_sa_t_endian (&rmp->child_sa); + vl_api_ikev2_child_sa_t_endian (&rmp->child_sa, 1 /* to network */); }); } @@ -577,6 +577,7 @@ vl_api_ikev2_child_sa_dump_t_handler (vl_api_ikev2_child_sa_dump_t * mp) vec_foreach (child, sa->childs) { u32 child_sa_index = child - sa->childs; + sai = ikev2_encode_sa_index (sai, tkm - im->per_thread_data); send_child_sa (child, mp, child_sa_index, sai); } } @@ -628,7 +629,7 @@ send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp, clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len); } - vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa); + vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa, 1 /* to network */); }); } @@ -700,7 +701,7 @@ static void rmp->ts.sa_index = api_sa_index; rmp->ts.child_sa_index = child_sa_index; cp_ts (&rmp->ts, ts, mp->is_initiator); - vl_api_ikev2_ts_t_endian (&rmp->ts); + vl_api_ikev2_ts_t_endian (&rmp->ts, 1 /* to network */); }); } } diff --git a/src/plugins/ikev2/ikev2_crypto.c b/src/plugins/ikev2/ikev2_crypto.c index 3d4ad0a28ed..58167e2322e 100644 --- a/src/plugins/ikev2/ikev2_crypto.c +++ b/src/plugins/ikev2/ikev2_crypto.c @@ -481,15 +481,14 @@ ikev2_encrypt_data (ikev2_main_per_thread_data_t * ptd, ikev2_sa_t * sa, int BN_bn2binpad (const BIGNUM * a, unsigned char *to, int tolen) { - int r = BN_bn2bin (a, to); + int r = BN_num_bytes (a); ASSERT (tolen >= r); int pad = tolen - r; if (pad) { - vec_insert (to, pad, 0); clib_memset (to, 0, pad); - vec_dec_len (to, pad); } + BN_bn2bin (a, to + pad); return tolen; } #endif diff --git a/src/plugins/ikev2/ikev2_handoff.c b/src/plugins/ikev2/ikev2_handoff.c new file mode 100644 index 00000000000..8f55985bce8 --- /dev/null +++ b/src/plugins/ikev2/ikev2_handoff.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <ikev2/ikev2_priv.h> + +extern ikev2_main_t ikev2_main; + +#define foreach_ikev2_handoff_error _ (CONGESTION_DROP, "congestion drop") + +typedef enum +{ +#define _(sym, str) IKEV2_HANDOFF_ERROR_##sym, + foreach_ikev2_handoff_error +#undef _ + IKEV2_HANDOFF_N_ERROR, +} ikev2_handoff_error_t; + +static char *ikev2_handoff_error_strings[] = { +#define _(sym, string) string, + foreach_ikev2_handoff_error +#undef _ +}; + +typedef struct ikev2_handoff_trace_t_ +{ + u32 current_worker_index; + u32 next_worker_index; +} ikev2_handoff_trace_t; + +u8 * +format_ikev2_handoff_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + ikev2_handoff_trace_t *t = va_arg (*args, ikev2_handoff_trace_t *); + s = format (s, "ikev2 handoff %d to %d", t->current_worker_index, + t->next_worker_index); + return s; +} + +static_always_inline uword +ikev2_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, u32 fq_index) +{ + ikev2_main_t *km = &ikev2_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 thread_indices[VLIB_FRAME_SIZE], *ti; + u32 n_enq, n_left_from, *from; + u32 this_thread; + + this_thread = vm->thread_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); + + b = bufs; + ti = thread_indices; + + while (n_left_from > 0) + { + ti[0] = km->handoff_thread; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ikev2_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->current_worker_index = this_thread; + t->next_worker_index = ti[0]; + } + n_left_from--; + ti++; + b++; + } + + n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from, + thread_indices, frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + vlib_node_increment_counter (vm, node->node_index, + IKEV2_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + return n_enq; +} + +/* Do worker handoff based on the ikev2's thread_index */ +VLIB_NODE_FN (ikev2_ip4_handoff) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + ikev2_main_t *km = &ikev2_main; + + return ikev2_handoff_inline (vm, node, from_frame, km->handoff_ip4_fq_index); +} + +VLIB_NODE_FN (ikev2_ip4_natt_handoff) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + ikev2_main_t *km = &ikev2_main; + + return ikev2_handoff_inline (vm, node, from_frame, + km->handoff_ip4_natt_fq_index); +} + +VLIB_NODE_FN (ikev2_ip6_handoff) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + ikev2_main_t *km = &ikev2_main; + + return ikev2_handoff_inline (vm, node, from_frame, km->handoff_ip6_fq_index); +} + +VLIB_REGISTER_NODE (ikev2_ip4_handoff) = { + .name = "ikev2-ip4-handoff", + .vector_size = sizeof (u32), + .format_trace = format_ikev2_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(ikev2_handoff_error_strings), + .error_strings = ikev2_handoff_error_strings, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ikev2_ip4_natt_handoff) = { + .name = "ikev2-ip4-natt-handoff", + .vector_size = sizeof (u32), + .format_trace = format_ikev2_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(ikev2_handoff_error_strings), + .error_strings = ikev2_handoff_error_strings, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ikev2_ip6_handoff) = { + .name = "ikev2-ip6-handoff", + .vector_size = sizeof (u32), + .format_trace = format_ikev2_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(ikev2_handoff_error_strings), + .error_strings = ikev2_handoff_error_strings, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h index 0639809e9b1..96313182552 100644 --- a/src/plugins/ikev2/ikev2_priv.h +++ b/src/plugins/ikev2/ikev2_priv.h @@ -571,6 +571,12 @@ typedef struct /* punt handle for IPsec NATT IPSEC_PUNT_IP4_SPI_UDP_0 reason */ vlib_punt_hdl_t punt_hdl; + /** Worker handoff */ + u32 handoff_thread; + u32 handoff_ip4_fq_index; + u32 handoff_ip4_natt_fq_index; + u32 handoff_ip6_fq_index; + } ikev2_main_t; extern ikev2_main_t ikev2_main; diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c index 5682d7058f6..93683a5b5dc 100644 --- a/src/plugins/ikev2/ikev2_test.c +++ b/src/plugins/ikev2/ikev2_test.c @@ -391,7 +391,7 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_t_endian (sa); + vl_api_ikev2_sa_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -461,7 +461,7 @@ vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_v2_t_endian (sa); + vl_api_ikev2_sa_v2_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -533,7 +533,7 @@ vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_v3_t_endian (sa); + vl_api_ikev2_sa_v3_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -619,7 +619,7 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp) vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa; vl_api_ikev2_keys_t *k = &child_sa->keys; - vl_api_ikev2_child_sa_t_endian (child_sa); + vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */); fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index); @@ -696,7 +696,7 @@ vl_api_ikev2_child_sa_v2_details_t_handler ( vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa; vl_api_ikev2_keys_t *k = &child_sa->keys; - vl_api_ikev2_child_sa_t_endian (child_sa); + vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */); fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index); @@ -784,7 +784,7 @@ static void vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_ts_t *ts = &mp->ts; ip_address_t start_addr, end_addr; - vl_api_ikev2_ts_t_endian (ts); + vl_api_ikev2_ts_t_endian (ts, 0 /* from network */); ip_address_decode2 (&ts->start_addr, &start_addr); ip_address_decode2 (&ts->end_addr, &end_addr); diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c index e1f4a6a1d69..61665ad4146 100644 --- a/src/plugins/linux-cp/lcp_interface.c +++ b/src/plugins/linux-cp/lcp_interface.c @@ -258,7 +258,11 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, vec_validate_init_empty (lip_db_by_host, host_sw_if_index, INDEX_INVALID); lip_db_by_phy[phy_sw_if_index] = lipi; lip_db_by_host[host_sw_if_index] = lipi; - hash_set (lip_db_by_vif, host_index, lipi); + + if (clib_strcmp ((char *) ns, (char *) lcp_get_default_ns ()) == 0) + { + hash_set (lip_db_by_vif, host_index, lipi); + } lip->lip_host_sw_if_index = host_sw_if_index; lip->lip_phy_sw_if_index = phy_sw_if_index; @@ -997,7 +1001,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)), .num_tx_queues = clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)), - .id = hw->hw_if_index, + .id = ~0, + .auto_id_offset = 4096, .sw_if_index = ~0, .rx_ring_sz = 256, .tx_ring_sz = 256, @@ -1094,7 +1099,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, * This controls whether the host can RX/TX. */ sw = vnet_get_sw_interface (vnm, phy_sw_if_index); - lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index)); + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index)); LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u", format_lcp_itf_pair, lip, sw->flags, hw->flags); vnet_sw_interface_admin_up (vnm, host_sw_if_index); diff --git a/src/plugins/mactime/builtins.c b/src/plugins/mactime/builtins.c index c487d0375bf..f726d3c03ed 100644 --- a/src/plugins/mactime/builtins.c +++ b/src/plugins/mactime/builtins.c @@ -147,6 +147,7 @@ handle_get_mactime (hss_url_handler_args_t *args) args->data = s; args->data_len = vec_len (s); + args->ct = HTTP_CONTENT_APP_JSON; args->free_vec_data = 1; return HSS_URL_HANDLER_OK; } diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 51853d619e6..f8d894a013a 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -151,9 +151,8 @@ ip6_map_t_icmp (vlib_main_t * vm, vnet_buffer (p0)->map_t.map_domain_index); ctx0.d = d0; ctx0.sender_port = 0; - if (!ip6_get_port - (vm, p0, ip60, p0->current_length, NULL, &ctx0.sender_port, - NULL, NULL, NULL, NULL)) + if (!ip6_get_port (vm, p0, ip60, p0->current_length, NULL, + &ctx0.sender_port, NULL, NULL, NULL, NULL, NULL)) { // In case of 1:1 mapping, we don't care about the port if (!(d0->ea_bits_len == 0 && d0->rules)) diff --git a/src/plugins/marvell/CMakeLists.txt b/src/plugins/marvell/CMakeLists.txt deleted file mode 100644 index b48ac72aa08..00000000000 --- a/src/plugins/marvell/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") - return() -endif() - -find_path(MUSDK_INCLUDE_DIR NAMES mv_std.h) -find_library(MUSDK_LIB NAMES libmusdk.a) - -if(MUSDK_INCLUDE_DIR AND MUSDK_LIB) - get_filename_component(MUSDK_LIB_DIR ${MUSDK_LIB} DIRECTORY) - set(MUSDK_LINK_FLAGS "-Wl,--whole-archive,${MUSDK_LIB_DIR}/libmusdk.a,--no-whole-archive") - add_vpp_plugin(marvell - SOURCES - plugin.c - pp2/cli.c - pp2/format.c - pp2/input.c - pp2/output.c - pp2/pp2.c - pp2/pp2_api.c - - API_FILES - pp2/pp2.api - - API_TEST_SOURCES - pp2/pp2_test.c - - LINK_FLAGS - ${MUSDK_LINK_FLAGS} - ) - include_directories(${MUSDK_INCLUDE_DIR}) - message(STATUS "Found Marvell MUSDK in ${MUSDK_INCLUDE_DIR}") -else() - message(WARNING "Marvell MUSDK not found - marvell_plugin disabled") -endif() diff --git a/src/plugins/marvell/README.rst b/src/plugins/marvell/README.rst deleted file mode 100644 index 19cf1c49d0e..00000000000 --- a/src/plugins/marvell/README.rst +++ /dev/null @@ -1,85 +0,0 @@ -Marvell device plugin -===================== - -Overview --------- - -This plugins provides native device support for Marvell PP2 network -device, by use of Marvell Usermode SDK -(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__). -Code is developed and tested on -`MACCHIATObin <http://macchiatobin.net>`__ board. - -Prerequisites -------------- - -Plugins depends on installed MUSDK and Marvell provided linux -`kernel <https://github.com/MarvellEmbeddedProcessors/linux-marvell>`__ -with MUSDK provided kernel patches (see ``patches/linux`` in musdk repo -and relevant documentation. Kernel version used: **4.14.22 -armada-18.09.3** MUSDK version used: **armada-18.09.3** Following kernel -modules from MUSDK must be loaded for plugin to work: \* -``musdk_cma.ko`` \* ``mv_pp_uio.ko`` - -Musdk 18.09.3 compilation steps -------------------------------- - -:: - - ./bootstrap - ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no - sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c - sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c - make - sudo make install - -Usage ------ - -Interface Creation -~~~~~~~~~~~~~~~~~~ - -Interfaces are dynamically created with following CLI: - -:: - - create interface marvell pp2 name eth0 - set interface state mv-ppio-0/0 up - -Where ``eth0`` is linux interface name and ``mv-ppio-X/Y`` is VPP -interface name where X is PP2 device ID and Y is PPIO ID Interface needs -to be assigned to MUSDK in FDT configuration and linux interface state -must be up. - -Interface Deletion -~~~~~~~~~~~~~~~~~~ - -Interface can be deleted with following CLI: - -:: - - delete interface marvell pp2 <interface name> - -Interface Statistics -~~~~~~~~~~~~~~~~~~~~ - -Interface statistics can be displayed with -``sh hardware-interface mv-ppio0/0`` command. - -Interaction with DPDK plugin -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This plugin doesn’t have any dependency on DPDK or DPDK plugin but it -can work with DPDK plugin enabled or disabled. It is observed that -performance is better around 30% when DPDK plugin is disabled, as DPDK -plugin registers own buffer manager, which needs to deal with additional -metadata in each packet. - -DPKD plugin can be disabled by adding following config to the -startup.conf. - -:: - - plugins { - dpdk_plugin.so { disable } - } diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c deleted file mode 100644 index f4ecb1873c9..00000000000 --- a/src/plugins/marvell/pp2/cli.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <inttypes.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> - -#include <marvell/pp2/pp2.h> - -static clib_error_t * -mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - mrvl_pp2_create_if_args_t args = { 0 }; - uint val; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &args.name)) - ; - else if (unformat (line_input, "rx-queue-size %u", &val)) - args.rx_q_sz = val; - else if (unformat (line_input, "tx-queue-size %u", &val)) - args.tx_q_sz = val; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - unformat_free (line_input); - - - mrvl_pp2_create_if (&args); - - vec_free (args.name); - - return args.error; -} - -VLIB_CLI_COMMAND (mrvl_pp2_create_command, static) = { - .path = "create interface marvell pp2", - .short_help = "create interface marvell pp2 [name <ifname>] [rx-queue-size slots] [tx-queue-size slots]", - .function = mrvl_pp2_create_command_fn, -}; - -static clib_error_t * -mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 sw_if_index = ~0; - vnet_hw_interface_t *hw; - mrvl_pp2_main_t *mm = &mrvl_pp2_main; - mrvl_pp2_if_t *dif; - vnet_main_t *vnm = vnet_get_main (); - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "sw_if_index %d", &sw_if_index)) - ; - else if (unformat (line_input, "%U", unformat_vnet_sw_interface, - vnm, &sw_if_index)) - ; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - unformat_free (line_input); - - if (sw_if_index == ~0) - return clib_error_return (0, - "please specify interface name or sw_if_index"); - - hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); - if (hw == NULL || mrvl_pp2_device_class.index != hw->dev_class_index) - return clib_error_return (0, "not a Marvell PP2 interface"); - - dif = pool_elt_at_index (mm->interfaces, hw->dev_instance); - - mrvl_pp2_delete_if (dif); - - return 0; -} - -VLIB_CLI_COMMAND (mrvl_pp2_delete_command, static) = { - .path = "delete interface marvell pp2", - .short_help = "delete interface marvell pp2 " - "{<interface> | sw_if_index <sw_idx>}", - .function = mrvl_pp2_delete_command_fn, -}; - -clib_error_t * -mrvl_pp2_cli_init (vlib_main_t * vm) -{ - /* initialize binary API */ - mrvl_pp2_plugin_api_hookup (vm); - - return 0; -} - -VLIB_INIT_FUNCTION (mrvl_pp2_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/format.c b/src/plugins/marvell/pp2/format.c deleted file mode 100644 index 877010ea561..00000000000 --- a/src/plugins/marvell/pp2/format.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/ioctl.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/plugin/plugin.h> -#include <marvell/pp2/pp2.h> - -static inline u32 -mrvl_get_u32_bits (void *start, int offset, int first, int last) -{ - u32 value = *(u32 *) (((u8 *) start) + offset); - if ((last == 0) && (first == 31)) - return value; - value >>= last; - value &= (1 << (first - last + 1)) - 1; - return value; -} - -u8 * -format_mrvl_pp2_interface_name (u8 * s, va_list * args) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - u32 dev_instance = va_arg (*args, u32); - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, dev_instance); - return format (s, "mv-ppio-%d/%d", ppif->ppio->pp2_id, ppif->ppio->port_id); -} - -#define foreach_ppio_statistics_entry \ - _(rx_packets) \ - _(rx_fullq_dropped) \ - _(rx_bm_dropped) \ - _(rx_early_dropped) \ - _(rx_fifo_dropped) \ - _(rx_cls_dropped) \ - _(tx_packets) - -#define foreach_ppio_inq_statistics_entry \ - _(enq_desc) \ - _(drop_early) \ - _(drop_fullq) \ - _(drop_bm) - -#define foreach_ppio_outq_statistics_entry \ - _(enq_desc) \ - _(enq_dec_to_ddr) \ - _(enq_buf_to_ddr) \ - _(deq_desc) - -u8 * -format_mrvl_pp2_interface (u8 * s, va_list * args) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - u32 dev_instance = va_arg (*args, u32); - u32 indent = format_get_indent (s); - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, dev_instance); - struct pp2_ppio_statistics stat; - int i; - u8 *s2 = 0; - - pp2_ppio_get_statistics (ppif->ppio, &stat, 0); - -#define _(c) if (stat.c) \ - s2 = format (s2, "\n%U%-25U%16Ld", \ - format_white_space, indent + 2, \ - format_c_identifier, #c, stat.c); - foreach_ppio_statistics_entry; - - if (vec_len (s2)) - s = format (s, "Interface statistics:%v", s2); - vec_reset_length (s2); - - vec_foreach_index (i, ppif->inqs) - { - struct pp2_ppio_inq_statistics stat = { 0 }; - pp2_ppio_inq_get_statistics (ppif->ppio, 0, i, &stat, 0); - - foreach_ppio_inq_statistics_entry; - - if (vec_len (s2)) - s = format (s, "\n%UInput queue %u statistics:%v", - format_white_space, indent, i, s2); - vec_reset_length (s2); - } - vec_foreach_index (i, ppif->outqs) - { - struct pp2_ppio_outq_statistics stat = { 0 }; - - pp2_ppio_outq_get_statistics (ppif->ppio, i, &stat, 0); - - foreach_ppio_outq_statistics_entry; - - if (vec_len (s2)) - s = format (s, "\n%UOutput queue %u statistics:%v", - format_white_space, indent, i, s2); - vec_reset_length (s2); - } -#undef _ - vec_free (s2); - return s; -} - -#define foreach_pp2_rx_desc_field \ - _(0x00, 6, 0, l3_offset) \ - _(0x00, 12, 8, ip_hdlen) \ - _(0x00, 14, 13, ec) \ - _(0x00, 15, 15, es) \ - _(0x00, 19, 16, pool_id) \ - _(0x00, 21, 21, hwf_sync) \ - _(0x00, 22, 22, l4_chk_ok) \ - _(0x00, 23, 23, ip_frg) \ - _(0x00, 24, 24, ipv4_hdr_err) \ - _(0x00, 27, 25, l4_info) \ - _(0x00, 30, 28, l3_info) \ - _(0x00, 31, 31, buf_header) \ - _(0x04, 5, 0, lookup_id) \ - _(0x04, 8, 6, cpu_code) \ - _(0x04, 9, 9, pppoe) \ - _(0x04, 11, 10, l3_cast_info) \ - _(0x04, 13, 12, l2_cast_info) \ - _(0x04, 15, 14, vlan_info) \ - _(0x04, 31, 16, byte_count) \ - _(0x08, 11, 0, gem_port_id) \ - _(0x08, 13, 12, color) \ - _(0x08, 14, 14, gop_sop_u) \ - _(0x08, 15, 15, key_hash_enable) \ - _(0x08, 31, 16, l4chk) \ - _(0x0c, 31, 0, timestamp) \ - _(0x10, 31, 0, buf_phys_ptr_lo) \ - _(0x14, 7, 0, buf_phys_ptr_hi) \ - _(0x14, 31, 8, key_hash) \ - _(0x18, 31, 0, buf_virt_ptr_lo) \ - _(0x1c, 7, 0, buf_virt_ptr_hi) \ - _(0x1c, 14, 8, buf_qset_no) \ - _(0x1c, 15, 15, buf_type) \ - _(0x1c, 21, 16, mod_dscp) \ - _(0x1c, 24, 22, mod_pri) \ - _(0x1c, 25, 25, mdscp) \ - _(0x1c, 26, 26, mpri) \ - _(0x1c, 27, 27, mgpid) \ - _(0x1c, 31, 29, port_num) - -u8 * -format_mrvl_pp2_input_trace (u8 * s, va_list * args) -{ - vlib_main_t *vm = va_arg (*args, vlib_main_t *); - vlib_node_t *node = va_arg (*args, vlib_node_t *); - mrvl_pp2_input_trace_t *t = va_arg (*args, mrvl_pp2_input_trace_t *); - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); - u32 indent = format_get_indent (s); - struct pp2_ppio_desc *d = &t->desc; - u32 r32; - - s = format (s, "pp2: %v (%d) next-node %U", - hi->name, t->hw_if_index, format_vlib_next_node_name, vm, - node->index, t->next_index); - s = format (s, "\n%U", format_white_space, indent + 2); - -#define _(a, b, c, n) \ - r32 = mrvl_get_u32_bits (d, a, b, c); \ - if (r32 > 9) \ - s = format (s, "%s %u (0x%x)", #n, r32, r32); \ - else \ - s = format (s, "%s %u", #n,r32); \ - if (format_get_indent (s) > 72) \ - s = format (s, "\n%U", format_white_space, indent + 2); \ - else s = format (s, " "); - - foreach_pp2_rx_desc_field; -#undef _ - return s; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/input.c b/src/plugins/marvell/pp2/input.c deleted file mode 100644 index 2545f91becb..00000000000 --- a/src/plugins/marvell/pp2/input.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#define _GNU_SOURCE -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/uio.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> -#include <vnet/interface/rx_queue_funcs.h> - -#include <marvell/pp2/pp2.h> - -#define foreach_mrvl_pp2_input_error \ - _(PPIO_RECV, "pp2_ppio_recv error") \ - _(BPOOL_GET_NUM_BUFFS, "pp2_bpool_get_num_buffs error") \ - _(BPOOL_PUT_BUFFS, "pp2_bpool_put_buffs error") \ - _(BUFFER_ALLOC, "buffer alloc error") \ - _(MAC_CE, "MAC error (CRC error)") \ - _(MAC_OR, "overrun error") \ - _(MAC_RSVD, "unknown MAC error") \ - _(MAC_RE, "resource error") \ - _(IP_HDR, "ip4 header error") - -typedef enum -{ -#define _(f,s) MRVL_PP2_INPUT_ERROR_##f, - foreach_mrvl_pp2_input_error -#undef _ - MRVL_PP2_INPUT_N_ERROR, -} mrvl_pp2_input_error_t; - -static __clib_unused char *mrvl_pp2_input_error_strings[] = { -#define _(n,s) s, - foreach_mrvl_pp2_input_error -#undef _ -}; - -static_always_inline void -mrvl_pp2_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next0, - vlib_buffer_t * b0, uword * n_trace, - mrvl_pp2_if_t * ppif, struct pp2_ppio_desc *d) -{ - if (PREDICT_TRUE ( - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0))) - { - mrvl_pp2_input_trace_t *tr; - vlib_set_trace_count (vm, node, --(*n_trace)); - tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = ppif->hw_if_index; - clib_memcpy_fast (&tr->desc, d, sizeof (struct pp2_ppio_desc)); - } -} - -static_always_inline u16 -mrvl_pp2_set_buf_data_len_flags (vlib_buffer_t * b, struct pp2_ppio_desc *d, - u32 add_flags) -{ - u16 len; - len = pp2_ppio_inq_desc_get_pkt_len (d); - b->total_length_not_including_first_buffer = 0; - b->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | add_flags; - - if (add_flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID) - vnet_buffer (b)->l2_hdr_offset = 2; - - if (add_flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) - { - u16 offset = DM_RXD_GET_L3_OFF (d); - vnet_buffer (b)->l3_hdr_offset = offset; - b->current_data = offset; - b->current_length = len - offset + 2; - } - else - { - b->current_data = 2; - b->current_length = len; - } - - if (add_flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) - vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset + - DM_RXD_GET_IPHDR_LEN (d) * 4; - - return len; -} - -static_always_inline u16 -mrvl_pp2_next_from_desc (vlib_node_runtime_t * node, struct pp2_ppio_desc * d, - vlib_buffer_t * b, u32 * next) -{ - u8 l3_info; - /* ES bit set means MAC error - drop and count */ - if (PREDICT_FALSE (DM_RXD_GET_ES (d))) - { - *next = VNET_DEVICE_INPUT_NEXT_DROP; - u8 ec = DM_RXD_GET_EC (d); - if (ec == 0) - b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_CE]; - else if (ec == 1) - b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_OR]; - else if (ec == 2) - b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_RSVD]; - else if (ec == 3) - b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_RE]; - return mrvl_pp2_set_buf_data_len_flags (b, d, 0); - } - l3_info = DM_RXD_GET_L3_PRS_INFO (d); - - /* ipv4 packet can be value 1, 2 or 3 */ - if (PREDICT_TRUE ((l3_info - 1) < 3)) - { - if (PREDICT_FALSE (DM_RXD_GET_L3_IP4_HDR_ERR (d) != 0)) - { - *next = VNET_DEVICE_INPUT_NEXT_DROP; - b->error = node->errors[MRVL_PP2_INPUT_ERROR_IP_HDR]; - return mrvl_pp2_set_buf_data_len_flags (b, d, 0); - } - *next = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - return mrvl_pp2_set_buf_data_len_flags - (b, d, - VNET_BUFFER_F_L2_HDR_OFFSET_VALID | - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | - VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP4); - } - - /* ipv4 packet can be value 4 or 5 */ - if (PREDICT_TRUE ((l3_info - 4) < 2)) - { - *next = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - return mrvl_pp2_set_buf_data_len_flags - (b, d, - VNET_BUFFER_F_L2_HDR_OFFSET_VALID | - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | - VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP6); - } - - *next = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - return mrvl_pp2_set_buf_data_len_flags (b, d, - VNET_BUFFER_F_L2_HDR_OFFSET_VALID); -} - -static_always_inline uword -mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, mrvl_pp2_if_t * ppif, - u16 qid) -{ - vnet_main_t *vnm = vnet_get_main (); - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - u32 thread_index = vm->thread_index; - mrvl_pp2_inq_t *inq = vec_elt_at_index (ppif->inqs, qid); - uword n_trace = vlib_get_trace_count (vm, node); - mrvl_pp2_per_thread_data_t *ptd = - vec_elt_at_index (ppm->per_thread_data, thread_index); - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 sw_if_index[VLIB_N_RX_TX]; - u32 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u32 *to_next = 0; - struct pp2_ppio_desc *d; - u16 n_desc = VLIB_FRAME_SIZE; - u32 n_bufs; - u32 *buffers; - int i; - - vec_validate_aligned (ptd->descs, n_desc, CLIB_CACHE_LINE_BYTES); - if (PREDICT_FALSE (pp2_ppio_recv (ppif->ppio, 0, qid, ptd->descs, &n_desc))) - { - vlib_error_count (vm, node->node_index, MRVL_PP2_INPUT_ERROR_PPIO_RECV, - 1); - n_desc = 0; - } - n_rx_packets = n_desc; - - for (i = 0; i < n_desc; i++) - ptd->buffers[i] = pp2_ppio_inq_desc_get_cookie (&ptd->descs[i]); - - d = ptd->descs; - buffers = ptd->buffers; - sw_if_index[VLIB_RX] = ppif->sw_if_index; - sw_if_index[VLIB_TX] = (u32) ~ 0; - while (n_desc) - { - u32 n_left_to_next; - vlib_buffer_t *b0, *b1; - u32 bi0, bi1; - u32 next0, next1; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_desc >= 4 && n_left_to_next >= 2) - { - /* prefetch */ - bi0 = buffers[0]; - bi1 = buffers[1]; - to_next[0] = bi0; - to_next[1] = bi1; - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - if (PREDICT_TRUE (ppif->per_interface_next_index == ~0)) - { - n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0); - n_rx_bytes += mrvl_pp2_next_from_desc (node, d + 1, b1, &next1); - vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0); - vnet_feature_start_device_input (ppif->sw_if_index, &next1, b1); - } - else - { - n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b0, d, 0); - n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b1, d + 1, 0); - next0 = next1 = ppif->per_interface_next_index; - } - - clib_memcpy_fast (vnet_buffer (b0)->sw_if_index, sw_if_index, - sizeof (sw_if_index)); - clib_memcpy_fast (vnet_buffer (b1)->sw_if_index, sw_if_index, - sizeof (sw_if_index)); - - if (PREDICT_FALSE (n_trace > 0)) - { - mrvl_pp2_input_trace (vm, node, next0, b0, &n_trace, ppif, d); - if (n_trace > 0) - mrvl_pp2_input_trace (vm, node, next1, b1, &n_trace, ppif, - d + 1); - } - - to_next += 2; - n_left_to_next -= 2; - d += 2; - buffers += 2; - n_desc -= 2; - - /* enqueue */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - - } - while (n_desc && n_left_to_next) - { - u32 bi0 = buffers[0]; - to_next[0] = bi0; - b0 = vlib_get_buffer (vm, bi0); - - if (PREDICT_TRUE (ppif->per_interface_next_index == ~0)) - { - n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0); - vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0); - } - else - { - n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b0, d, 0); - next0 = ppif->per_interface_next_index; - } - - clib_memcpy_fast (vnet_buffer (b0)->sw_if_index, sw_if_index, - sizeof (sw_if_index)); - - if (PREDICT_FALSE (n_trace > 0)) - mrvl_pp2_input_trace (vm, node, next0, b0, &n_trace, ppif, d); - - to_next += 1; - n_left_to_next--; - d++; - buffers++; - n_desc--; - - /* enqueue */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_increment_combined_counter (vnm-> - interface_main.combined_sw_if_counters + - VNET_INTERFACE_COUNTER_RX, thread_index, - ppif->hw_if_index, n_rx_packets, - n_rx_bytes); - - if (PREDICT_FALSE (pp2_bpool_get_num_buffs (inq->bpool, &n_bufs))) - { - vlib_error_count (vm, node->node_index, - MRVL_PP2_INPUT_ERROR_BPOOL_GET_NUM_BUFFS, 1); - goto done; - } - - n_bufs = inq->size - n_bufs; - while (n_bufs >= MRVL_PP2_BUFF_BATCH_SZ) - { - u16 n_alloc, i; - struct buff_release_entry *e = ptd->bre; - u32 *buffers = ptd->buffers; - - n_alloc = vlib_buffer_alloc (vm, ptd->buffers, MRVL_PP2_BUFF_BATCH_SZ); - i = n_alloc; - - if (PREDICT_FALSE (n_alloc == 0)) - { - vlib_error_count (vm, node->node_index, - MRVL_PP2_INPUT_ERROR_BUFFER_ALLOC, 1); - goto done; - } - - while (i--) - { - u32 bi = buffers[0]; - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - e->buff.addr = vlib_buffer_get_pa (vm, b) - 64; - e->buff.cookie = bi; - e->bpool = inq->bpool; - e++; - buffers++; - } - - i = n_alloc; - if (PREDICT_FALSE (pp2_bpool_put_buffs (ptd->hif, ptd->bre, &i))) - { - vlib_error_count (vm, node->node_index, - MRVL_PP2_INPUT_ERROR_BPOOL_PUT_BUFFS, 1); - vlib_buffer_free (vm, ptd->buffers, n_alloc); - goto done; - } - - if (PREDICT_FALSE (i != n_alloc)) - vlib_buffer_free (vm, ptd->buffers + i, n_alloc - i); - - n_bufs -= i; - } - -done: - return n_rx_packets; -} - -uword -mrvl_pp2_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_rx = 0; - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - vnet_hw_if_rxq_poll_vector_t *pv; - - pv = vnet_hw_if_get_rxq_poll_vector (vm, node); - - for (int i = 0; i < vec_len (pv); i++) - { - mrvl_pp2_if_t *ppif; - ppif = vec_elt_at_index (ppm->interfaces, pv[i].dev_instance); - if (ppif->flags & MRVL_PP2_IF_F_ADMIN_UP) - n_rx += - mrvl_pp2_device_input_inline (vm, node, frame, ppif, pv[i].queue_id); - } - return n_rx; -} - -VLIB_REGISTER_NODE (mrvl_pp2_input_node) = { - .function = mrvl_pp2_input_fn, - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - .name = "mrvl-pp2-input", - .sibling_of = "device-input", - .format_trace = format_mrvl_pp2_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_POLLING, - .n_errors = MRVL_PP2_INPUT_N_ERROR, - .error_strings = mrvl_pp2_input_error_strings, -}; - - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/output.c b/src/plugins/marvell/pp2/output.c deleted file mode 100644 index 911b2f55a17..00000000000 --- a/src/plugins/marvell/pp2/output.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/uio.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> - -#include <marvell/pp2/pp2.h> - -uword -mrvl_pp2_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, rd->dev_instance); - u32 thread_index = vm->thread_index; - mrvl_pp2_per_thread_data_t *ptd = - vec_elt_at_index (ppm->per_thread_data, thread_index); - u8 qid = thread_index; - mrvl_pp2_outq_t *outq = vec_elt_at_index (ppif->outqs, qid); - u32 *buffers = vlib_frame_vector_args (frame); - u16 n_desc = frame->n_vectors, n_left = n_desc, n_sent = n_desc, n_done; - struct pp2_ppio_desc *d; - u16 mask = outq->size - 1; - - if (PREDICT_FALSE (pp2_ppio_get_num_outq_done (ppif->ppio, ptd->hif, qid, - &n_done))) - { - n_done = 0; - vlib_error_count (vm, node->node_index, - MRVL_PP2_TX_ERROR_PPIO_GET_NUM_OUTQ_DONE, 1); - } - - if (n_done) - { - u16 n_free = clib_min (n_done, outq->size - (outq->tail & mask)); - vlib_buffer_free (vm, outq->buffers + (outq->tail & mask), n_free); - if (PREDICT_FALSE (n_free < n_done)) - vlib_buffer_free (vm, outq->buffers, n_done - n_free); - outq->tail += n_done; - } - - vec_validate_aligned (ptd->descs, n_left, CLIB_CACHE_LINE_BYTES); - d = ptd->descs; - while (n_left) - { - u32 bi0 = buffers[0]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - u64 paddr = vlib_buffer_get_pa (vm, b0); - - pp2_ppio_outq_desc_reset (d); - pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data); - pp2_ppio_outq_desc_set_pkt_offset (d, 0); - pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length); - d++; - buffers++; - n_left--; - } - - if (pp2_ppio_send (ppif->ppio, ptd->hif, qid, ptd->descs, &n_sent)) - { - n_sent = 0; - vlib_error_count (vm, node->node_index, MRVL_PP2_TX_ERROR_PPIO_SEND, 1); - } - - /* free unsent buffers */ - if (PREDICT_FALSE (n_sent != n_desc)) - { - vlib_buffer_free (vm, vlib_frame_vector_args (frame) + n_sent, - frame->n_vectors - n_sent); - vlib_error_count (vm, node->node_index, MRVL_PP2_TX_ERROR_NO_FREE_SLOTS, - frame->n_vectors - n_sent); - } - - /* store buffer index for each enqueued packet into the ring - so we can know what to free after packet is sent */ - if (n_sent) - { - u16 slot = outq->head & mask; - buffers = vlib_frame_vector_args (frame); - u16 n_copy = clib_min (outq->size - slot, n_sent); - - vlib_buffer_copy_indices (outq->buffers + slot, buffers, n_copy); - if (PREDICT_FALSE (n_copy < n_sent)) - clib_memcpy_fast (outq->buffers, buffers + n_copy, - (n_sent - n_copy) * sizeof (u32)); - - outq->head += n_sent; - } - - return n_sent; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/pp2.c b/src/plugins/marvell/pp2/pp2.c deleted file mode 100644 index 030ab9b4496..00000000000 --- a/src/plugins/marvell/pp2/pp2.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/ioctl.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/plugin/plugin.h> -#include <marvell/pp2/pp2.h> -#include <vnet/interface/rx_queue_funcs.h> - -/* size of DMA memory used by musdk (not used for buffers) */ -#define MV_SYS_DMA_MEM_SZ (2 << 20) -/* number of HIFs reserved (first X) */ -#define NUM_HIFS_RSVD 4 -/* number of buffer pools reserved (first X) */ -#define NUM_BPOOLS_RSVD 7 - -mrvl_pp2_main_t mrvl_pp2_main; -extern vnet_device_class_t ppa2_device_class; - -static void -mrvl_pp2_main_deinit () -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - int i; - vec_foreach_index (i, ppm->per_thread_data) - { - mrvl_pp2_per_thread_data_t *ptd = vec_elt_at_index (ppm->per_thread_data, - i); - if (ptd->hif) - pp2_hif_deinit (ptd->hif); - vec_free (ptd->descs); - } - vec_free (ppm->per_thread_data); - pp2_deinit (); - mv_sys_dma_mem_destroy (); -} - -static clib_error_t * -mrvl_pp2_main_init () -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - clib_error_t *err = 0; - struct pp2_init_params init_params = { 0 }; - int i, rv; - u8 *s = 0; - - rv = mv_sys_dma_mem_init (MV_SYS_DMA_MEM_SZ); - if (rv) - return clib_error_return (0, "mv_sys_dma_mem_init failed, rv = %u", rv); - - init_params.hif_reserved_map = ((1 << NUM_HIFS_RSVD) - 1); - init_params.bm_pool_reserved_map = ((1 << NUM_BPOOLS_RSVD) - 1); - rv = pp2_init (&init_params); - if (rv) - { - err = clib_error_return (0, "mrvl_pp2_init failed, rv = %u", rv); - goto done; - } - - vec_validate_aligned (ppm->per_thread_data, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - vec_foreach_index (i, ppm->per_thread_data) - { - mrvl_pp2_per_thread_data_t *ptd = vec_elt_at_index (ppm->per_thread_data, - i); - struct pp2_hif_params hif_params = { 0 }; - vec_reset_length (s); - s = format (s, "hif-%d%c", NUM_HIFS_RSVD + i, 0); - hif_params.match = (char *) s; - hif_params.out_size = 2048; /* FIXME */ - if (pp2_hif_init (&hif_params, &ptd->hif)) - { - err = clib_error_return (0, "hif '%s' init failed", s); - goto done; - } - } - -done: - if (err) - mrvl_pp2_main_deinit (); - vec_free (s); - return err; -} - -static u32 -mrvl_pp2_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, - u32 flags) -{ - /* nothing for now */ - return 0; -} - -void -mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif) -{ - vlib_main_t *vm = vlib_get_main (); - vnet_main_t *vnm = vnet_get_main (); - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - mrvl_pp2_outq_t *outq; - mrvl_pp2_inq_t *inq; - - if (ppif->hw_if_index != ~0) - ethernet_delete_interface (vnm, ppif->hw_if_index); - - if (ppif->ppio) - { - pp2_ppio_disable (ppif->ppio); - pp2_ppio_deinit (ppif->ppio); - } - - /* free buffers hanging in the tx ring */ - vec_foreach (outq, ppif->outqs) - { - while (outq->tail < outq->head) - { - u16 slot = outq->tail & (outq->size - 1); - vlib_buffer_free (vm, outq->buffers + slot, 1); - outq->tail++; - } - vec_free (outq->buffers); - } - vec_free (ppif->outqs); - - /* free buffers hangin in the rx buffer pool */ - vec_foreach (inq, ppif->inqs) - if (inq->bpool) - { - u32 n_bufs = 0; - pp2_bpool_get_num_buffs (inq->bpool, &n_bufs); - while (n_bufs--) - { - struct pp2_buff_inf binf; - if (pp2_bpool_get_buff (ppm->per_thread_data[0].hif, inq->bpool, - &binf) == 0) - { - u32 bi = binf.cookie; - vlib_buffer_free (vm, &bi, 1); - } - } - pp2_bpool_deinit (inq->bpool); - } - vec_free (ppif->inqs); - - - pool_put (ppm->interfaces, ppif); - - if (pool_elts (ppm->interfaces) == 0) - mrvl_pp2_main_deinit (); -} - -void -mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args) -{ - vlib_main_t *vm = vlib_get_main (); - vnet_main_t *vnm = vnet_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); - vnet_eth_interface_registration_t eir = {}; - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - struct pp2_bpool_params bpool_params = { 0 }; - struct pp2_ppio_params ppio_params = { 0 }; - struct pp2_ppio_inq_params inq_params = { 0 }; - vnet_sw_interface_t *sw; - mrvl_pp2_if_t *ppif = 0; - u8 pp2_id, port_id, *s = 0; - eth_addr_t mac_addr; - u8 n_outqs, n_inqs = 1; - int i; - - if (tm->n_vlib_mains > PP2_PPIO_MAX_NUM_OUTQS) - { - args->rv = VNET_API_ERROR_INIT_FAILED; - args->error = clib_error_return (0, "number of threads (main + workers)" - " is bigger than number of output " - "queues (%u)", PP2_PPIO_MAX_NUM_OUTQS); - return; - } - n_outqs = tm->n_vlib_mains; - - /* defaults */ - args->tx_q_sz = args->tx_q_sz ? args->tx_q_sz : 2 * VLIB_FRAME_SIZE; - args->rx_q_sz = args->rx_q_sz ? args->rx_q_sz : 2 * VLIB_FRAME_SIZE; - - if (vec_len (ppm->per_thread_data) == 0) - { - if ((args->error = mrvl_pp2_main_init ()) != 0) - { - args->rv = VNET_API_ERROR_INIT_FAILED; - return; - } - } - - pool_get_zero (ppm->interfaces, ppif); - ppif->dev_instance = ppif - ppm->interfaces; - ppif->hw_if_index = ~0; - vec_validate_aligned (ppif->inqs, n_inqs - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (ppif->outqs, n_outqs - 1, CLIB_CACHE_LINE_BYTES); - - for (i = 0; i < n_inqs; i++) - { - mrvl_pp2_inq_t *inq = vec_elt_at_index (ppif->inqs, i); - inq->size = args->rx_q_sz; - } - for (i = 0; i < n_outqs; i++) - { - mrvl_pp2_outq_t *outq = vec_elt_at_index (ppif->outqs, i); - outq->size = args->tx_q_sz; - vec_validate_aligned (outq->buffers, outq->size, CLIB_CACHE_LINE_BYTES); - } - - if (pp2_netdev_get_ppio_info ((char *) args->name, &pp2_id, &port_id)) - { - args->rv = VNET_API_ERROR_INVALID_INTERFACE; - args->error = clib_error_return (0, "Invalid interface '%s'", - args->name); - goto error; - } - - /* FIXME bpool bit select per pp */ - s = format (s, "pool-%d:%d%c", pp2_id, pp2_id + 8, 0); - bpool_params.match = (char *) s; - bpool_params.buff_len = vlib_buffer_get_default_data_size (vm); - /* FIXME +64 ? */ - if (pp2_bpool_init (&bpool_params, &ppif->inqs[0].bpool)) - { - args->rv = VNET_API_ERROR_INIT_FAILED; - args->error = clib_error_return (0, "bpool '%s' init failed", s); - goto error; - } - vec_reset_length (s); - - s = format (s, "ppio-%d:%d%c", pp2_id, port_id, 0); - ppio_params.match = (char *) s; - ppio_params.type = PP2_PPIO_T_NIC; - inq_params.size = args->rx_q_sz; - ppio_params.inqs_params.num_tcs = 1; - ppio_params.inqs_params.tcs_params[0].pkt_offset = 0; - ppio_params.inqs_params.tcs_params[0].num_in_qs = n_inqs; - ppio_params.inqs_params.tcs_params[0].inqs_params = &inq_params; - ppio_params.inqs_params.tcs_params[0].pools[0][0] = ppif->inqs[0].bpool; - ppio_params.outqs_params.num_outqs = n_outqs; - for (i = 0; i < n_outqs; i++) - { - ppio_params.outqs_params.outqs_params[i].weight = 1; - ppio_params.outqs_params.outqs_params[i].size = args->tx_q_sz; - } - if (pp2_ppio_init (&ppio_params, &ppif->ppio)) - { - args->rv = VNET_API_ERROR_INIT_FAILED; - args->error = clib_error_return (0, "ppio '%s' init failed", s); - goto error; - } - vec_reset_length (s); - - if (pp2_ppio_get_mac_addr (ppif->ppio, mac_addr)) - { - args->rv = VNET_API_ERROR_INIT_FAILED; - args->error = - clib_error_return (0, "%s: pp2_ppio_get_mac_addr failed", s); - goto error; - } - - eir.dev_class_index = mrvl_pp2_device_class.index; - eir.dev_instance = ppif->dev_instance; - eir.address = mac_addr; - eir.cb.flag_change = mrvl_pp2_eth_flag_change; - ppif->hw_if_index = vnet_eth_register_interface (vnm, &eir); - - sw = vnet_get_hw_sw_interface (vnm, ppif->hw_if_index); - ppif->sw_if_index = sw->sw_if_index; - ppif->per_interface_next_index = ~0; - args->sw_if_index = sw->sw_if_index; - vnet_hw_if_set_input_node (vnm, ppif->hw_if_index, - mrvl_pp2_input_node.index); - /* FIXME: only one RX queue ? */ - ppif->inqs[0].queue_index = vnet_hw_if_register_rx_queue ( - vnm, ppif->hw_if_index, 0, VNET_HW_IF_RXQ_THREAD_ANY); - - vnet_hw_if_set_rx_queue_mode (vnm, ppif->inqs[0].queue_index, - VNET_HW_IF_RX_MODE_POLLING); - vnet_hw_if_update_runtime_data (vnm, ppif->hw_if_index); - vnet_hw_interface_set_flags (vnm, ppif->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - goto done; - -error: - mrvl_pp2_delete_if (ppif); -done: - vec_free (s); -} - -static clib_error_t * -mrvl_pp2_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, - u32 flags) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, hw->dev_instance); - static clib_error_t *error = 0; - int is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - int rv; - - if (is_up) - rv = pp2_ppio_enable (ppif->ppio); - else - rv = pp2_ppio_disable (ppif->ppio); - - if (rv) - return clib_error_return (0, "failed to %s interface", - is_up ? "enable" : "disable"); - - if (is_up) - ppif->flags |= MRVL_PP2_IF_F_ADMIN_UP; - else - ppif->flags &= ~MRVL_PP2_IF_F_ADMIN_UP; - - return error; -} - -static void -mrvl_pp2_clear_interface_counters (u32 instance) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, instance); - struct pp2_ppio_statistics stats; - - pp2_ppio_get_statistics (ppif->ppio, &stats, 1); -} - -static void -mrvl_pp2_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - mrvl_pp2_main_t *ppm = &mrvl_pp2_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - ppif->per_interface_next_index = node_index; - return; - } - - ppif->per_interface_next_index = - vlib_node_add_next (vlib_get_main (), mrvl_pp2_input_node.index, - node_index); -} - -static char *mrvl_pp2_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_mrvl_pp2_tx_func_error -#undef _ -}; - -VNET_DEVICE_CLASS (mrvl_pp2_device_class,) = -{ - .name = "Marvell PPv2 interface", - .format_device_name = format_mrvl_pp2_interface_name, - .format_device = format_mrvl_pp2_interface, - .tx_function = mrvl_pp2_interface_tx, - .tx_function_n_errors = MRVL_PP2_TX_N_ERROR, - .tx_function_error_strings = mrvl_pp2_tx_func_error_strings, - .admin_up_down_function = mrvl_pp2_interface_admin_up_down, - .clear_counters = mrvl_pp2_clear_interface_counters, - .rx_redirect_to_node = mrvl_pp2_set_interface_next_node, -}; - -static clib_error_t * -mrvl_pp2_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (mrvl_pp2_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/pp2.h b/src/plugins/marvell/pp2/pp2.h deleted file mode 100644 index abb8e573a37..00000000000 --- a/src/plugins/marvell/pp2/pp2.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#define MVCONF_DBG_LEVEL 0 -#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32 -#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64 -#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64 -#define MVCONF_SYS_DMA_UIO -#define MVCONF_TYPES_PUBLIC -#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC - -#include <vlib/vlib.h> - -#include "mv_std.h" -#include "env/mv_sys_dma.h" -#include "drivers/mv_pp2.h" -#include <drivers/mv_pp2_bpool.h> -#include <drivers/mv_pp2_ppio.h> - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u16 size; - u32 queue_index; - struct pp2_bpool *bpool; -} mrvl_pp2_inq_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u16 size; - u32 *buffers; - u16 head; - u16 tail; -} mrvl_pp2_outq_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 flags; -#define MRVL_PP2_IF_F_ADMIN_UP (1 << 0) - struct pp2_ppio *ppio; - u32 per_interface_next_index; - - mrvl_pp2_inq_t *inqs; - mrvl_pp2_outq_t *outqs; - - u32 dev_instance; - u32 sw_if_index; - u32 hw_if_index; -} mrvl_pp2_if_t; - -#define MRVL_PP2_BUFF_BATCH_SZ VLIB_FRAME_SIZE - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - struct pp2_hif *hif; - struct pp2_ppio_desc *descs; - struct buff_release_entry bre[MRVL_PP2_BUFF_BATCH_SZ]; - u32 buffers[VLIB_FRAME_SIZE]; -} mrvl_pp2_per_thread_data_t; - -typedef struct -{ - mrvl_pp2_if_t *interfaces; - mrvl_pp2_per_thread_data_t *per_thread_data; - - /* API message ID base */ - u16 msg_id_base; -} mrvl_pp2_main_t; - -extern vnet_device_class_t mrvl_pp2_device_class; -extern mrvl_pp2_main_t mrvl_pp2_main; - -typedef struct -{ - u8 *name; - u16 rx_q_sz; - u16 tx_q_sz; - - /* return */ - i32 rv; - u32 sw_if_index; - clib_error_t *error; -} mrvl_pp2_create_if_args_t; - -void mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args); -void mrvl_pp2_delete_if (mrvl_pp2_if_t * dfif); -clib_error_t *mrvl_pp2_plugin_api_hookup (vlib_main_t * vm); - -/* output.c */ - -#define foreach_mrvl_pp2_tx_func_error \ - _(NO_FREE_SLOTS, "no free tx slots") \ - _(PPIO_SEND, "pp2_ppio_send errors") \ - _(PPIO_GET_NUM_OUTQ_DONE, "pp2_ppio_get_num_outq_done errors") - -typedef enum -{ -#define _(f,s) MRVL_PP2_TX_ERROR_##f, - foreach_mrvl_pp2_tx_func_error -#undef _ - MRVL_PP2_TX_N_ERROR, -} mrvl_pp2_tx_func_error_t; - -uword mrvl_pp2_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame); - -/* input.c */ - -typedef struct -{ - u32 next_index; - u32 hw_if_index; - struct pp2_ppio_desc desc; -} mrvl_pp2_input_trace_t; - -extern vlib_node_registration_t mrvl_pp2_input_node; - -/* format.c */ -format_function_t format_mrvl_pp2_input_trace; -format_function_t format_mrvl_pp2_interface; -format_function_t format_mrvl_pp2_interface_name; - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/pp2_api.c b/src/plugins/marvell/pp2/pp2_api.c deleted file mode 100644 index c1f3a9e1d1d..00000000000 --- a/src/plugins/marvell/pp2/pp2_api.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2019 Arm Limited. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> - -#include <marvell/pp2/pp2.h> - -#include <vlibapi/api.h> -#include <vlibmemory/api.h> - -/* define message IDs */ -#include <marvell/pp2/pp2.api_enum.h> -#include <marvell/pp2/pp2.api_types.h> - -#define REPLY_MSG_ID_BASE (pp2->msg_id_base) -#include <vlibapi/api_helper_macros.h> - -static void -vl_api_mrvl_pp2_create_t_handler (vl_api_mrvl_pp2_create_t * mp) -{ - mrvl_pp2_main_t *pp2 = &mrvl_pp2_main; - mrvl_pp2_create_if_args_t args = { 0 }; - vl_api_mrvl_pp2_create_reply_t *rmp; - int rv; - - args.name = format (0, "%s", mp->if_name); - args.rx_q_sz = ntohs (mp->rx_q_sz); - args.tx_q_sz = ntohs (mp->tx_q_sz); - mrvl_pp2_create_if (&args); - rv = args.rv; - vec_free (args.name); - if (args.error) - { - clib_error_free (args.error); - } - REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY, - ({ rmp->sw_if_index = ntohl (args.sw_if_index); })); -} - -static void -vl_api_mrvl_pp2_delete_t_handler (vl_api_mrvl_pp2_delete_t * mp) -{ - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hw; - mrvl_pp2_main_t *pp2 = &mrvl_pp2_main; - vl_api_mrvl_pp2_delete_reply_t *rmp; - mrvl_pp2_if_t *dif; - int rv = 0; - mp->sw_if_index = ntohl (mp->sw_if_index); - hw = vnet_get_sup_hw_interface (vnm, mp->sw_if_index); - if (hw == NULL || mrvl_pp2_device_class.index != hw->dev_class_index) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto reply; - } - - dif = pool_elt_at_index (pp2->interfaces, hw->dev_instance); - - mrvl_pp2_delete_if (dif); - -reply: - REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY); -} - -#include <marvell/pp2/pp2.api.c> -/* set up the API message handling tables */ -clib_error_t * -mrvl_pp2_plugin_api_hookup (vlib_main_t * vm) -{ - mrvl_pp2_main_t *pp2 = &mrvl_pp2_main; - - /* ask for a correctly-sized block of API message decode slots */ - pp2->msg_id_base = setup_message_id_table (); - - return 0; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/marvell/pp2/pp2_test.c b/src/plugins/marvell/pp2/pp2_test.c deleted file mode 100644 index 26a9e9a6e34..00000000000 --- a/src/plugins/marvell/pp2/pp2_test.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2019 Arm Limited. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> - -#include <vat/vat.h> -#include <vlibapi/api.h> -#include <vlibmemory/api.h> - -#include <vppinfra/error.h> -#include <marvell/pp2/pp2.h> - -#define __plugin_msg_base pp2_test_main.msg_id_base -#include <vlibapi/vat_helper_macros.h> - -/* declare message IDs */ -#include <marvell/pp2/pp2.api_enum.h> -#include <marvell/pp2/pp2.api_types.h> - -typedef struct -{ - /* API message ID base */ - u16 msg_id_base; - vat_main_t *vat_main; -} pp2_test_main_t; - -pp2_test_main_t pp2_test_main; - -/* mrvl_pp2 create API */ -static int -api_mrvl_pp2_create (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_mrvl_pp2_create_t *mp; - mrvl_pp2_create_if_args_t args; - int ret; - u16 size; - - clib_memset (&args, 0, sizeof (mrvl_pp2_create_if_args_t)); - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "name %s", &args.name)) - ; - else if (unformat (i, "rx-queue-size %u", &size)) - args.rx_q_sz = size; - else if (unformat (i, "tx-queue-size %u", &size)) - args.tx_q_sz = size; - else - { - clib_warning ("unknown input '%U'", format_unformat_error, i); - return -99; - } - } - - M (MRVL_PP2_CREATE, mp); - - strncpy_s ((char *) mp->if_name, ARRAY_LEN (mp->if_name), - (char *) (args.name), strlen ((char *) args.name)); - mp->rx_q_sz = clib_host_to_net_u16 (args.rx_q_sz); - mp->tx_q_sz = clib_host_to_net_u16 (args.tx_q_sz); - - S (mp); - W (ret); - - vec_free (args.name); - - return ret; -} - -/* mrvl_pp2 create reply handler */ -static void -vl_api_mrvl_pp2_create_reply_t_handler (vl_api_mrvl_pp2_create_reply_t * mp) -{ - vat_main_t *vam = pp2_test_main.vat_main; - i32 retval = ntohl (mp->retval); - - if (retval == 0) - { - fformat (vam->ofp, "created mrvl_pp2 with sw_if_index %d\n", - ntohl (mp->sw_if_index)); - } - - vam->retval = retval; - vam->result_ready = 1; - vam->regenerate_interface_table = 1; -} - - -/* mrvl_pp2 delete API */ -static int -api_mrvl_pp2_delete (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - //vnet_main_t *vnm = vnet_get_main (); - vl_api_mrvl_pp2_delete_t *mp; - u32 sw_if_index = 0; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "sw_if_index %d", &sw_if_index)) - ; - else - { - clib_warning ("unknown input '%U'", format_unformat_error, i); - return -99; - } - } - - M (MRVL_PP2_DELETE, mp); - - mp->sw_if_index = clib_host_to_net_u32 (sw_if_index); - - S (mp); - W (ret); - - return ret; -} - -#include <marvell/pp2/pp2.api_test.c> - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c index 1f01410afce..b6c9d51d777 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_api.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c @@ -442,7 +442,8 @@ send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_nat44_ed_output_interface_details_t_endian (rmp); + vl_api_nat44_ed_output_interface_details_t_endian (rmp, + 1 /* to network */); rmp->_vl_msg_id = htons (rmp->_vl_msg_id); rmp->context = htonl (rmp->context); })); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c index 8671a556929..454a5032c6a 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_api.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c @@ -751,7 +751,8 @@ send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_nat44_ei_output_interface_details_t_endian (rmp); + vl_api_nat44_ei_output_interface_details_t_endian (rmp, + 1 /* to network */); rmp->_vl_msg_id = htons (rmp->_vl_msg_id); rmp->context = htonl (rmp->context); })); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 01b333a5234..3b981d69986 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -859,7 +859,7 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0, nat44_ei_main_t *nm = &nat44_ei_main; vlib_main_t *vm = vlib_get_main (); ip4_address_t addr; - u16 port; + u16 port = 0; u32 fib_index; nat_protocol_t proto; icmp_echo_header_t *echo0, *inner_echo0 = 0; diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c index 02e61219d1e..a4e7ff192bf 100644 --- a/src/plugins/nat/pnat/pnat_api.c +++ b/src/plugins/nat/pnat/pnat_api.c @@ -116,7 +116,8 @@ static void send_bindings_details(u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_pnat_bindings_details_t_endian(rmp); + vl_api_pnat_bindings_details_t_endian( + rmp, 1 /* to network */); rmp->_vl_msg_id = htons(rmp->_vl_msg_id); rmp->context = htonl(rmp->context); })); @@ -158,7 +159,7 @@ static void send_interfaces_details(u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_pnat_interfaces_details_t_endian(rmp); + vl_api_pnat_interfaces_details_t_endian(rmp, 1 /* to network */); rmp->_vl_msg_id = htons(rmp->_vl_msg_id); rmp->context = htonl(rmp->context); })); diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt new file mode 100644 index 00000000000..d53a9e0911a --- /dev/null +++ b/src/plugins/netmap/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024 Tom Jones <thj@freebsd.org> +# +# This software was developed by Tom Jones <thj@freebsd.org> under sponsorship +# from the FreeBSD Foundation. +# + +if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled") + return() +endif() + +add_vpp_plugin(netmap + SOURCES + plugin.c + netmap.c + node.c + device.c + cli.c + netmap_api.c + + MULTIARCH_SOURCES + node.c + device.c + + INSTALL_HEADERS + netmap.h + net_netmap.h + + API_FILES + netmap.api +) diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml new file mode 100644 index 00000000000..a9dfb2163e4 --- /dev/null +++ b/src/plugins/netmap/FEATURE.yaml @@ -0,0 +1,12 @@ +--- +name: Netmap Device +maintainer: Tom Jones <thj@freebsd.org> +features: + - L4 checksum offload +description: "Create a netmap interface, which is a high speed user-space + interface that allows VPP to patch to a physical or virtual NIC + without the use of DPDK" +missing: + - API dump +state: production +properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c new file mode 100644 index 00000000000..b54d397ecbe --- /dev/null +++ b/src/plugins/netmap/cli.c @@ -0,0 +1,236 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +static clib_error_t * +netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + int r; + u8 is_pipe = 0; + u8 is_master = 0; + u32 sw_if_index = ~0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else if (unformat (line_input, "pipe")) + is_pipe = 1; + else if (unformat (line_input, "master")) + is_master = 1; + else if (unformat (line_input, "slave")) + is_master = 0; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + r = + netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, + &sw_if_index); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * '<em>netmap</em>' is a framework for very fast packet I/O from userspace. + * '<em>VALE</em>' is an equally fast in-kernel software switch using the + * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared + * memory packet transport channel. Together, they provide a high speed + * user-space interface that allows VPP to patch into a linux namespace, a + * linux container, or a physical NIC without the use of DPDK. Netmap/VALE + * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded + * before netmap interfaces can be created. + * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. + * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, + * which is a snapshot of the Netmap/VALE repo with minor changes to work + * with containers and modified kernel drivers to work with NICs. + * + * Create a netmap interface that will attach to a linux interface. + * The interface must already exist. Once created, a new netmap interface + * will exist in VPP with the name '<em>netmap-<ifname></em>', where + * '<em><ifname></em>' takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * This command has the following optional parameters: + * + * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>' + * instance should be created. + * + * - <b>master | slave</b> - Optional flag to indicate whether VPP should + * be the master or slave of the '<em>netmap pipe</em>'. Only considered + * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered. + * + * @cliexpar + * Example of how to create a netmap interface tied to the linux + * namespace '<em>vpp1</em>': + * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} + * netmap-vale00:vpp1 + * @cliexend + * Once the netmap interface is created, enable the interface using: + * @cliexcmd{set interface state netmap-vale00:vpp1 up} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_create_command, static) = { + .path = "create netmap", + .short_help = "create netmap name <ifname>|valeXXX:YYY " + "[hw-addr <mac-addr>] [pipe] [master|slave]", + .function = netmap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + netmap_delete_if (vm, host_if_name); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Delete a netmap interface. Use the '<em><ifname></em>' to identify + * the netmap interface to be deleted. In VPP, netmap interfaces are + * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>' + * takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * @cliexpar + * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>': + * @cliexcmd{delete netmap name vale00:vpp1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_delete_command, static) = { + .path = "delete netmap", + .short_help = "delete netmap name <ifname>|valeXXX:YYY", + .function = netmap_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +netmap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (netmap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c new file mode 100644 index 00000000000..505deb988c4 --- /dev/null +++ b/src/plugins/netmap/device.c @@ -0,0 +1,252 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) NETMAP_TX_ERROR_##f, + foreach_netmap_tx_func_error +#undef _ + NETMAP_TX_N_ERROR, +} netmap_tx_func_error_t; + +static char *netmap_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_netmap_tx_func_error +#undef _ +}; + + +static u8 * +format_netmap_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + netmap_main_t *apm = &netmap_main; + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "netmap-%s", nif->host_if_name); + return s; +} + +static u8 * +format_netmap_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); + u32 indent = format_get_indent (s); + + s = format (s, "NETMAP interface"); + if (verbose) + { + s = format (s, "\n%U version %d flags 0x%x" + "\n%U region %u memsize 0x%x offset 0x%x" + "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", + format_white_space, indent + 2, + nif->req->nr_version, + nif->req->nr_flags, + format_white_space, indent + 2, + nif->mem_region, + nif->req->nr_memsize, + nif->req->nr_offset, + format_white_space, indent + 2, + nif->req->nr_tx_slots, + nif->req->nr_rx_slots, + nif->req->nr_tx_rings, nif->req->nr_rx_rings); + } + return s; +} + +static u8 * +format_netmap_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + netmap_main_t *nm = &netmap_main; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + f64 const time_constant = 1e3; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); + int cur_ring; + + clib_spinlock_lock_if_init (&nif->lockp); + + cur_ring = nif->first_tx_ring; + + while (n_left && cur_ring <= nif->last_tx_ring) + { + struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); + int n_free_slots = nm_ring_space (ring); + uint cur = ring->cur; + + if (nm_tx_pending (ring)) + { + if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) + clib_unix_warning ("NIOCTXSYNC"); + clib_cpu_time_wait (time_constant); + + if (nm_tx_pending (ring) && !n_free_slots) + { + cur_ring++; + continue; + } + } + + while (n_left && n_free_slots) + { + vlib_buffer_t *b0 = 0; + u32 bi = buffers[0]; + u32 len; + u32 offset = 0; + buffers++; + + struct netmap_slot *slot = &ring->slot[cur]; + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + /* memcpy */ + clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + slot->len = offset; + cur = (cur + 1) % ring->num_slots; + n_free_slots--; + n_left--; + } + CLIB_MEMORY_BARRIER (); + ring->head = ring->cur = cur; + } + + if (n_left < frame->n_vectors) + ioctl (nif->fd, NIOCTXSYNC, NULL); + + clib_spinlock_unlock_if_init (&nif->lockp); + + if (n_left) + vlib_error_count (vm, node->node_index, + (n_left == + frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : + NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); + + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + nif->per_interface_next_index = node_index; + return; + } + + nif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), netmap_input_node.index, + node_index); +} + +static void +netmap_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (nif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +netmap_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (netmap_device_class) = { + .name = "netmap", + .format_device_name = format_netmap_device_name, + .format_device = format_netmap_device, + .format_tx_trace = format_netmap_tx_trace, + .tx_function_n_errors = NETMAP_TX_N_ERROR, + .tx_function_error_strings = netmap_tx_func_error_strings, + .rx_redirect_to_node = netmap_set_interface_next_node, + .clear_counters = netmap_clear_hw_interface_counters, + .admin_up_down_function = netmap_interface_admin_up_down, + .subif_add_del_function = netmap_subif_add_del_function, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h new file mode 100644 index 00000000000..ecccedd4484 --- /dev/null +++ b/src/plugins/netmap/net_netmap.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + * + * This API is also used to communicate with the VALE software switch + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +#define NETMAP_API 14 /* current API version */ + +#define NETMAP_MIN_API 14 /* min and max versions accepted */ +#define NETMAP_MAX_API 15 +/* + * Some fields should be cache-aligned to reduce contention. + * The alignment is architecture and OS dependent, but rather than + * digging into OS headers to find the exact value we use an estimate + * that should cover most architectures. + */ +#define NM_CACHE_ALIGN 128 + +/* + * --- Netmap data structures --- + * + * The userspace data structures used by netmap are shown below. + * They are allocated by the kernel and mmap()ed by userspace threads. + * Pointers are implemented as memory offsets or indexes, + * so that they can be easily dereferenced in kernel and userspace. + + KERNEL (opaque, obviously) + + ==================================================================== + | + USERSPACE | struct netmap_ring + +---->+---------------+ + / | head,cur,tail | + struct netmap_if (nifp, 1 per fd) / | buf_ofs | + +---------------+ / | other fields | + | ni_tx_rings | / +===============+ + | ni_rx_rings | / | buf_idx, len | slot[0] + | | / | flags, ptr | + | | / +---------------+ + +===============+ / | buf_idx, len | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | + | txring_ofs[1] | +---------------+ + (tx+1 entries) (num_slots entries) + | txring_ofs[t] | | buf_idx, len | slot[n-1] + +---------------+ | flags, ptr | + | rxring_ofs[0] | +---------------+ + | rxring_ofs[1] | + (rx+1 entries) + | rxring_ofs[r] | + +---------------+ + + * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to + * a file descriptor, the mmap()ed region contains a (logically readonly) + * struct netmap_if pointing to struct netmap_ring's. + * + * There is one netmap_ring per physical NIC ring, plus one tx/rx ring + * pair attached to the host stack (this pair is unused for non-NIC ports). + * + * All physical/host stack ports share the same memory region, + * so that zero-copy can be implemented between them. + * VALE switch ports instead have separate memory regions. + * + * The netmap_ring is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer (MTU-sized and residing in the + * mmapped region), its length and some flags. An extra 64-bit pointer + * is provided for user-supplied buffers in the tx path. + * + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE + * + * Added in NETMAP_API 11: + * + * + NIOCREGIF can request the allocation of extra spare buffers from + * the same memory pool. The desired number of buffers must be in + * nr_arg3. The ioctl may return fewer buffers, depending on memory + * availability. nr_arg3 will return the actual value, and, once + * mapped, nifp->ni_bufs_head will be the index of the first buffer. + * + * The buffers are linked to each other using the first uint32_t + * as the index. On close, ni_bufs_head must point to the list of + * buffers to be released. + * + * + NIOCREGIF can request space for extra rings (and buffers) + * allocated in the same memory space. The number of extra rings + * is in nr_arg1, and is advisory. This is a no-op on NICs where + * the size of the memory space is fixed. + * + * + NIOCREGIF can attach to PIPE rings sharing the same memory + * space with a parent device. The ifname indicates the parent device, + * which must already exist. Flags in nr_flags indicate if we want to + * bind the master or slave side, the index (from nr_ringid) + * is just a cookie and does not need to be sequential. + * + * + NIOCREGIF can also attach to 'monitor' rings that replicate + * the content of specific rings, also from the same memory space. + * + * Extra flags in nr_flags support the above functions. + * Application libraries may use the following naming scheme: + * netmap:foo all NIC ring pairs + * netmap:foo^ only host ring pair + * netmap:foo+ all NIC ring + host ring pairs + * netmap:foo-k the k-th NIC ring pair + * netmap:foo{k PIPE ring pair k, master side + * netmap:foo}k PIPE ring pair k, slave side + */ + +/* + * struct netmap_slot is a buffer descriptor + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* length for this slot */ + uint16_t flags; /* buf changed, etc. */ + uint64_t ptr; /* pointer for indirect buffers */ +}; + +/* + * The following flags control how the slot is used + */ + +#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ + /* + * must be set whenever buf_idx is changed (as it might be + * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. + */ + +#define NS_REPORT 0x0002 /* ask the hardware to report results */ + /* + * Request notification when slot is used by the hardware. + * Normally transmit completions are handled lazily and + * may be unreported. This flag lets us know when a slot + * has been sent (e.g. to terminate the sender). + */ + +#define NS_FORWARD 0x0004 /* pass packet 'forward' */ + /* + * (Only for physical ports, rx rings with NR_FORWARD set). + * Slot released to the kernel (i.e. before ring->head) with + * this flag set are passed to the peer ring (host/NIC), + * thus restoring the host-NIC connection for these slots. + * This supports efficient traffic monitoring or firewalling. + */ + +#define NS_NO_LEARN 0x0008 /* disable bridge learning */ + /* + * On a VALE switch, do not 'learn' the source port for + * this buffer. + */ + +#define NS_INDIRECT 0x0010 /* userspace buffer */ + /* + * (VALE tx rings only) data is in a userspace buffer, + * whose address is in the 'ptr' field in the slot. + */ + +#define NS_MOREFRAG 0x0020 /* packet has more fragments */ + /* + * (VALE ports only) + * Set on all but the last slot of a multi-segment packet. + * The 'len' field refers to the individual fragment. + */ + +#define NS_PORT_SHIFT 8 +#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) + /* + * The high 8 bits of the flag, if not zero, indicate the + * destination port for the VALE switch, overriding + * the lookup table. + */ + +#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) + /* + * (VALE rx rings only) the high 8 bits + * are the number of fragments. + */ + + +/* + * struct netmap_ring + * + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level the important fields are: head, cur, tail. + * + * In TX rings: + * + * head first slot available for transmission. + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] can be used for new packets to send; + * 'head' and 'cur' must be incremented as slots are filled + * with new packets to be sent; + * 'cur' can be moved further ahead if we need more space + * for new transmissions. XXX todo (2014-03-12) + * + * In RX rings: + * + * head first valid received packet + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] contain received packets; + * 'head' and 'cur' must be incremented as slots are consumed + * and can be returned to the kernel; + * 'cur' can be moved further ahead if we want to wait for + * new packets without returning the previous ones. + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring, and all slots and buffers in the range + * [head .. tail-1] are owned by the user program; + * the kernel only accesses them during a netmap system call + * and in the user thread context. + * + * Other slots and buffers are reserved for use by the kernel + */ +struct netmap_ring { + /* + * buf_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + const int64_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + const uint32_t nr_buf_size; + const uint16_t ringid; + const uint16_t dir; /* 0: tx, 1: rx */ + + uint32_t head; /* (u) first user slot */ + uint32_t cur; /* (u) wakeup point */ + uint32_t tail; /* (k) first kernel slot */ + + uint32_t flags; + + struct timeval ts; /* (k) time of last *sync() */ + + /* opaque room for a mutex or similar object */ +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * RING FLAGS + */ +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + /* + * updates the 'ts' field on each netmap syscall. This saves + * saves a separate gettimeofday(), and is not much worse than + * software timestamps generated in the interrupt handler. + */ + +#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ + /* + * Enables the NS_FORWARD slot flag for the ring. + */ + + +/* + * Netmap representation of an interface and its queue(s). + * This is initialized by the kernel when binding a file + * descriptor to a port, and should be considered as readonly + * by user programs. The kernel never uses it. + * + * There is one netmap_if for each file descriptor on which we want + * to select/poll. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const uint32_t ni_version; /* API version, currently unused */ + const uint32_t ni_flags; /* properties */ +#define NI_PRIV_MEM 0x1 /* private memory region */ + + /* + * The number of packet rings available in netmap mode. + * Physical NICs can have different numbers of tx and rx rings. + * Physical NICs also have a 'host' ring pair. + * Additionally, clients can request additional ring pairs to + * be used for internal communication. + */ + const uint32_t ni_tx_rings; /* number of HW tx rings */ + const uint32_t ni_rx_rings; /* number of HW rx rings */ + + uint32_t ni_bufs_head; /* head index for extra bufs */ + uint32_t ni_spare1[5]; + /* + * The following array contains the offset of each netmap ring + * from this structure, in the following order: + * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; + * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. + * + * The area is filled up by the kernel on NIOCREGIF, + * and then only read by userspace code. + */ + const ssize_t ring_ofs[0]; +}; + + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid. + * These are non blocking and take no argument. + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * The info returned is only advisory and may change before + * the interface is bound to a file descriptor. + * + * NIOCREGIF takes an interface name within a struct nmre, + * and activates netmap mode on the interface (if possible). + * + * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we + * can pass it down to other NIC-related ioctls. + * + * The actual argument (struct nmreq) has a number of options to request + * different functions. + * The following are used in NIOCREGIF when nr_cmd == 0: + * + * nr_name (in) + * The name of the port (em0, valeXXX:YYY, etc.) + * limited to IFNAMSIZ for backward compatibility. + * + * nr_version (in/out) + * Must match NETMAP_API as used in the kernel, error otherwise. + * Always returns the desired value on output. + * + * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) + * On input, non-zero values may be used to reconfigure the port + * according to the requested values, but this is not guaranteed. + * On output the actual values in use are reported. + * + * nr_ringid (in) + * Indicates how rings should be bound to the file descriptors. + * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) + * are used to indicate the ring number, and nr_flags specifies + * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. + * + * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: + * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control + * the binding as follows: + * 0 (default) binds all physical rings + * NETMAP_HW_RING | ring number binds a single ring pair + * NETMAP_SW_RING binds only the host tx/rx rings + * + * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push + * packets on tx rings only if POLLOUT is set. + * The default is to push any pending packet. + * + * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release + * packets on rx rings also when POLLIN is NOT set. + * The default is to touch the rx ring only with POLLIN. + * Note that this is the opposite of TX because it + * reflects the common usage. + * + * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. + * NETMAP_PRIV_MEM is set on return for ports that do not use + * the global memory allocator. + * This information is not significant and applications + * should look at the region id in nr_arg2 + * + * nr_flags is the recommended mode to indicate which rings should + * be bound to a file descriptor. Values are NR_REG_* + * + * nr_arg1 (in) The number of extra rings to be reserved. + * Especially when allocating a VALE port the system only + * allocates the amount of memory needed for the port. + * If more shared memory rings are desired (e.g. for pipes), + * the first invocation for the same basename/allocator + * should specify a suitable number. Memory cannot be + * extended after the first allocation without closing + * all ports on the same region. + * + * nr_arg2 (in/out) The identity of the memory region used. + * On input, 0 means the system decides autonomously, + * other values may try to select a specific region. + * On return the actual value is reported. + * Region '1' is the global allocator, normally shared + * by all interfaces. Other values are private regions. + * If two ports the same region zero-copy is possible. + * + * nr_arg3 (in/out) number of extra buffers to be allocated. + * + * + * + * nr_cmd (in) if non-zero indicates a special command: + * NETMAP_BDG_ATTACH and nr_name = vale*:ifname + * attaches the NIC to the switch; nr_ringid specifies + * which rings to use. Used by vale-ctl -a ... + * nr_arg1 = NETMAP_BDG_HOST also attaches the host port + * as in vale-ctl -h ... + * + * NETMAP_BDG_DETACH and nr_name = vale*:ifname + * disconnects a previously attached NIC. + * Used by vale-ctl -d ... + * + * NETMAP_BDG_LIST + * list the configuration of VALE switches. + * + * NETMAP_BDG_VNET_HDR + * Set the virtio-net header length used by the client + * of a VALE switch port. + * + * NETMAP_BDG_NEWIF + * create a persistent VALE port with name nr_name. + * Used by vale-ctl -n ... + * + * NETMAP_BDG_DELIF + * delete a persistent VALE port. Used by vale-ctl -d ... + * + * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific + * + * + * + */ + + +/* + * struct nmreq overlays a struct ifreq (just the name) + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ +#define NETMAP_SW_RING 0x2000 /* only host ring pair */ + +#define NETMAP_RING_MASK 0x0fff /* the ring number */ + +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ + +#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ + + uint16_t nr_cmd; +#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ +#define NETMAP_BDG_DETACH 2 /* detach the NIC */ +#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ +#define NETMAP_BDG_LIST 4 /* get bridge's info */ +#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ +#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ +#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ +#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ + uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ +#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ + + uint16_t nr_arg2; + uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ + uint32_t nr_flags; + /* various modes, extends nr_ringid */ + uint32_t spare2[1]; +}; + +#define NR_REG_MASK 0xf /* values for nr_flags */ +enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ + NR_REG_ALL_NIC = 1, + NR_REG_SW = 2, + NR_REG_NIC_SW = 3, + NR_REG_ONE_NIC = 4, + NR_REG_PIPE_MASTER = 5, + NR_REG_PIPE_SLAVE = 6, +}; +/* monitor uses the NR_REG to select the rings to monitor */ +#define NR_MONITOR_TX 0x100 +#define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" + +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ +#endif /* !NIOCREGIF */ + + +/* + * Helper functions for kernel and userspace + */ + +/* + * check if space is available in the ring. + */ +static inline int +nm_ring_empty(struct netmap_ring *ring) +{ + return (ring->cur == ring->tail); +} + +/* + * Opaque structure that is passed to an external kernel + * module via ioctl(fd, NIOCCONFIG, req) for a user-owned + * bridge port (at this point ephemeral VALE interface). + */ +#define NM_IFRDATA_LEN 256 +struct nm_ifreq { + char nifr_name[IFNAMSIZ]; + char data[NM_IFRDATA_LEN]; +}; + +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; +#endif /* _NET_NETMAP_H_ */ diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api new file mode 100644 index 00000000000..a14753cad9c --- /dev/null +++ b/src/plugins/netmap/netmap.api @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Create netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC + @param is_pipe - is pipe + @param is_master - 0=slave, 1=master +*/ +autoreply define netmap_create +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; + u8 is_pipe; + u8 is_master; +}; + +/** \brief Delete netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name +*/ +autoreply define netmap_delete +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c new file mode 100644 index 00000000000..ebef215eb3b --- /dev/null +++ b/src/plugins/netmap/netmap.c @@ -0,0 +1,334 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <fcntl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +netmap_main_t netmap_main; + +static clib_error_t * +netmap_fd_read_ready (clib_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + netmap_main_t *nm = &netmap_main; + u32 idx = uf->private_data; + + nm->pending_input_bitmap = + clib_bitmap_set (nm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, netmap_input_node.index); + + return 0; +} + +static void +close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) +{ + if (nif->clib_file_index != ~0) + { + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; + } + else if (nif->fd > -1) + close (nif->fd); + + if (nif->mem_region) + { + netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; + if (--reg->refcnt == 0) + { + munmap (reg->mem, reg->region_size); + reg->region_size = 0; + } + } + + + mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, + &nif->if_index); + vec_free (nif->host_if_name); + vec_free (nif->req); + + clib_memset (nif, 0, sizeof (*nif)); + pool_put (nm->interfaces, nif); +} + +int +netmap_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + return 0; +} + +int +netmap_worker_thread_disable () +{ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + } + + return 0; +} + +int +netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index) +{ + netmap_main_t *nm = &netmap_main; + int ret = 0; + uint32_t nr_reg; + netmap_if_t *nif = 0; + u8 hw_addr[6]; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + struct nmreq *req = 0; + netmap_mem_region_t *reg; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int fd; + + p = mhash_get (&nm->if_index_by_host_if_name, if_name); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + fd = open ("/dev/netmap", O_RDWR); + if (fd < 0) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (nm->interfaces, nif); + nif->if_index = nif - nm->interfaces; + nif->fd = fd; + nif->clib_file_index = ~0; + + vec_validate (req, 0); + nif->req = req; + req->nr_version = NETMAP_API; + req->nr_flags = NR_REG_ALL_NIC; + + if (is_pipe) + req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; + else + req->nr_flags = NR_REG_ALL_NIC; + + req->nr_flags |= NR_ACCEPT_VNET_HDR; + snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); + req->nr_name[IFNAMSIZ - 1] = 0; + + if (ioctl (nif->fd, NIOCREGIF, req)) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + + nif->mem_region = req->nr_arg2; + vec_validate (nm->mem_regions, nif->mem_region); + reg = &nm->mem_regions[nif->mem_region]; + if (reg->region_size == 0) + { + reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + clib_warning ("mem %p", reg->mem); + if (reg->mem == MAP_FAILED) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + reg->region_size = req->nr_memsize; + } + reg->refcnt++; + + nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); + nr_reg = nif->req->nr_flags & NR_REG_MASK; + + if (nr_reg == NR_REG_SW) + { /* host stack */ + nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings; + nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ALL_NIC) + { /* only nic */ + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings - 1; + nif->last_rx_ring = nif->req->nr_rx_rings - 1; + } + else if (nr_reg == NR_REG_NIC_SW) + { + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings; + nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ONE_NIC) + { + /* XXX check validity */ + nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring = + nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK; + } + else + { /* pipes */ + nif->first_tx_ring = nif->last_tx_ring = 0; + nif->first_rx_ring = nif->last_rx_ring = 0; + } + + nif->host_if_name = if_name; + nif->per_interface_next_index = ~0; + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&nif->lockp); + + { + clib_file_t template = { 0 }; + template.read_function = netmap_fd_read_ready; + template.file_descriptor = nif->fd; + template.private_data = nif->if_index; + template.description = format (0, "netmap socket"); + nif->clib_file_index = clib_file_add (&file_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + vnet_eth_interface_registration_t eir = {}; + + eir.dev_class_index = netmap_device_class.index; + eir.dev_instance = nif->if_index; + eir.address = hw_addr; + eir.cb.set_max_frame_size = NULL; + + nif->hw_if_index = vnet_eth_register_interface (vnm, &eir); + + sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); + nif->sw_if_index = sw->sw_if_index; + + mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); + + if (sw_if_index) + *sw_if_index = nif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) + netmap_worker_thread_enable (); + + return 0; + +error: + close_netmap_if (nm, nif); + return ret; +} + +int +netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif; + uword *p; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + nif = pool_elt_at_index (nm->interfaces, p[0]); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); + + ethernet_delete_interface (vnm, nif->hw_if_index); + + close_netmap_if (nm, nif); + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) + netmap_worker_thread_disable (); + + return 0; +} + +static clib_error_t * +netmap_init (vlib_main_t * vm) +{ + netmap_main_t *nm = &netmap_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + clib_memset (nm, 0, sizeof (netmap_main_t)); + + nm->input_cpu_first_index = 0; + nm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + nm->input_cpu_first_index = tr->first_index; + nm->input_cpu_count = tr->count; + } + + mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (netmap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h new file mode 100644 index 00000000000..29f855fda8e --- /dev/null +++ b/src/plugins/netmap/netmap.h @@ -0,0 +1,166 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/* + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <vppinfra/lock.h> + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u8 *host_if_name; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + u32 clib_file_index; + + u32 per_interface_next_index; + u8 is_admin_up; + + /* netmap */ + struct nmreq *req; + u16 mem_region; + int fd; + struct netmap_if *nifp; + u16 first_tx_ring; + u16 last_tx_ring; + u16 first_rx_ring; + u16 last_rx_ring; + +} netmap_if_t; + +typedef struct +{ + char *mem; + u32 region_size; + int refcnt; +} netmap_mem_region_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + netmap_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; + + /* vector of memory regions */ + netmap_mem_region_t *mem_regions; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; +} netmap_main_t; + +extern netmap_main_t netmap_main; +extern vnet_device_class_t netmap_device_class; +extern vlib_node_registration_t netmap_input_node; + +int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index); +int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); + + +/* Macros and helper functions from sys/net/netmap_user.h */ + +#ifdef _NET_NETMAP_H_ + +#define _NETMAP_OFFSET(type, ptr, offset) \ + ((type)(void *)((char *)(ptr) + (offset))) + +#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) + +#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index] ) + +#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +static inline uint32_t +nm_ring_next (struct netmap_ring *ring, uint32_t i) +{ + return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); +} + + +/* + * Return 1 if we have pending transmissions in the tx ring. + * When everything is complete ring->head = ring->tail + 1 (modulo ring size) + */ +static inline int +nm_tx_pending (struct netmap_ring *ring) +{ + return nm_ring_next (ring, ring->tail) != ring->head; +} + +static inline uint32_t +nm_ring_space (struct netmap_ring *ring) +{ + int ret = ring->tail - ring->cur; + if (ret < 0) + ret += ring->num_slots; + return ret; +} +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c new file mode 100644 index 00000000000..51f572a23e6 --- /dev/null +++ b/src/plugins/netmap/netmap_api.c @@ -0,0 +1,95 @@ +/* + *------------------------------------------------------------------ + * netmap_api.c - netmap api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <netmap/netmap.h> + +#include <vnet/format_fns.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(NETMAP_CREATE, netmap_create) \ +_(NETMAP_DELETE, netmap_delete) \ + +static void +vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_create_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = + netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, + mp->is_pipe, mp->is_master, 0); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); +} + +static void +vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_delete_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = netmap_delete_if (vm, if_name); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); +} + +#include <netmap/netmap.api.c> +static clib_error_t * +netmap_api_hookup (vlib_main_t * vm) +{ + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (netmap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c new file mode 100644 index 00000000000..6169847fa79 --- /dev/null +++ b/src/plugins/netmap/node.c @@ -0,0 +1,295 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_input_error + +typedef enum +{ +#define _(f,s) NETMAP_INPUT_ERROR_##f, + foreach_netmap_input_error +#undef _ + NETMAP_INPUT_N_ERROR, +} netmap_input_error_t; + +static char *netmap_input_error_strings[] = { +#define _(n,s) s, + foreach_netmap_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + struct netmap_slot slot; +} netmap_input_trace_t; + +static u8 * +format_netmap_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "netmap: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", + format_white_space, indent + 2, + t->slot.flags, t->slot.len, t->slot.buf_idx); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, netmap_if_t * nif) +{ + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + netmap_main_t *nm = &netmap_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + struct netmap_ring *ring; + int cur_ring; + u32 thread_index = vm->thread_index; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); + + if (nif->per_interface_next_index != ~0) + next_index = nif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (nm->rx_buffers[thread_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], + VLIB_FRAME_SIZE); + vec_set_len (nm->rx_buffers[thread_index], n_free_bufs); + } + + cur_ring = nif->first_rx_ring; + while (cur_ring <= nif->last_rx_ring && n_free_bufs) + { + int r = 0; + u32 cur_slot_index; + ring = NETMAP_RXRING (nif->nifp, cur_ring); + r = nm_ring_space (ring); + + if (!r) + { + cur_ring++; + continue; + } + + if (r > n_free_bufs) + r = n_free_bufs; + + cur_slot_index = ring->cur; + while (r) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (r && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; + u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; + struct netmap_slot *slot = &ring->slot[cur_slot_index]; + u32 data_len = slot->len; + + /* prefetch 2 slots in advance */ + CLIB_PREFETCH (&ring->slot[next2_slot_index], + CLIB_CACHE_LINE_BYTES, LOAD); + /* prefetch start of next packet */ + CLIB_PREFETCH (NETMAP_BUF + (ring, ring->slot[next_slot_index].buf_idx), + CLIB_CACHE_LINE_BYTES, LOAD); + + while (data_len && n_free_bufs) + { + vlib_buffer_t *b0; + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (nm->rx_buffers[thread_index]) - 1; + prev_bi0 = bi0; + bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + vec_set_len (nm->rx_buffers[thread_index], + last_empty_buffer); + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy_fast (vlib_buffer_get_current (b0), + (u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + nif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + + /* trace */ + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0) && + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0)) + { + netmap_input_trace_t *tr; + + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = nif->hw_if_index; + memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); + } + } + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += slot->len; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + cur_slot_index = next_slot_index; + + r--; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + ring->head = ring->cur = cur_slot_index; + cur_ring++; + } + + if (n_rx_packets) + ioctl (nif->fd, NIOCRXSYNC, NULL); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + + vnet_device_increment_rx_packets (thread_index, n_rx_packets); + + return n_rx_packets; +} + +VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + u32 thread_index = vm->thread_index; + netmap_main_t *nm = &netmap_main; + netmap_if_t *nmi; + + for (i = 0; i < vec_len (nm->interfaces); i++) + { + nmi = vec_elt_at_index (nm->interfaces, i); + if (nmi->is_admin_up && + (i % nm->input_cpu_count) == + (thread_index - nm->input_cpu_first_index)) + n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (netmap_input_node) = { + .name = "netmap-input", + .sibling_of = "device-input", + .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, + .format_trace = format_netmap_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = NETMAP_INPUT_N_ERROR, + .error_strings = netmap_input_error_strings, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c new file mode 100644 index 00000000000..1673225b683 --- /dev/null +++ b/src/plugins/netmap/plugin.c @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones <thj@freebsd.org> + * + * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "netmap", +}; diff --git a/src/plugins/npt66/npt66.api b/src/plugins/npt66/npt66.api index 63640ac2097..dab09cda31f 100644 --- a/src/plugins/npt66/npt66.api +++ b/src/plugins/npt66/npt66.api @@ -36,5 +36,16 @@ counters npt66 { units "packets"; description "packet translation failed"; }; - + icmp6_checksum { + severity error; + type counter64; + units "packets"; + description "ICMP6 checksum validation failed"; + }; + icmp6_truncated { + severity error; + type counter64; + units "packets"; + description "ICMP6 packet truncated"; + }; };
\ No newline at end of file diff --git a/src/plugins/npt66/npt66_node.c b/src/plugins/npt66/npt66_node.c index f74f9143998..0d0c475f2c3 100644 --- a/src/plugins/npt66/npt66_node.c +++ b/src/plugins/npt66/npt66_node.c @@ -127,10 +127,7 @@ npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir) if (!ip6_prefix_cmp (ip->src_address, binding->internal, binding->internal_plen)) { - clib_warning ( - "npt66_translate: src address is not internal (%U -> %U)", - format_ip6_address, &ip->src_address, format_ip6_address, - &ip->dst_address); + /* Packet is not for us */ goto done; } ip->src_address = ip6_prefix_copy (ip->src_address, binding->external, @@ -144,10 +141,7 @@ npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir) if (!ip6_prefix_cmp (ip->dst_address, binding->external, binding->external_plen)) { - clib_warning ( - "npt66_translate: dst address is not external (%U -> %U)", - format_ip6_address, &ip->src_address, format_ip6_address, - &ip->dst_address); + /* Packet is not for us */ goto done; } ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal, @@ -162,7 +156,7 @@ done: static int npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, icmp46_header_t *icmp, npt66_binding_t *binding, - int dir) + int dir, u32 *error) { ip6_header_t *ip = (ip6_header_t *) (icmp + 2); int rv = 0; @@ -171,7 +165,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, if (clib_net_to_host_u16 (outer_ip->payload_length) < sizeof (icmp46_header_t) + 4 + sizeof (ip6_header_t)) { - clib_warning ("ICMP6 payload too short"); + *error = NPT66_ERROR_ICMP6_TRUNCATED; return -1; } @@ -181,7 +175,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, b, outer_ip, &bogus_length); if (sum16 != 0 && sum16 != 0xffff) { - clib_warning ("ICMP6 checksum failed"); + *error = NPT66_ERROR_ICMP6_CHECKSUM; return -1; } if (dir == VLIB_RX) @@ -189,10 +183,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, if (!ip6_prefix_cmp (ip->src_address, binding->external, binding->external_plen)) { - clib_warning ( - "npt66_icmp6_translate: src address is not internal (%U -> %U)", - format_ip6_address, &ip->src_address, format_ip6_address, - &ip->dst_address); + /* Not for us */ goto done; } ip->src_address = ip6_prefix_copy (ip->src_address, binding->internal, @@ -206,10 +197,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, if (!ip6_prefix_cmp (ip->dst_address, binding->external, binding->external_plen)) { - clib_warning ( - "npt66_icmp6_translate: dst address is not external (%U -> %U)", - format_ip6_address, &ip->src_address, format_ip6_address, - &ip->dst_address); + /* Not for us */ goto done; } ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal, @@ -217,8 +205,8 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip, rv = npt66_adjust_checksum (binding->internal_plen, false, binding->delta, &ip->dst_address); } -done: +done: return rv; } @@ -243,10 +231,12 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, b, n_left_from); npt66_binding_t *binding; + u32 translated = 0; /* Stage 1: build vector of flow hash (based on lookup mask) */ while (n_left_from > 0) { + u32 error = NPT66_ERROR_TRANSLATION; u32 sw_if_index = vnet_buffer (b[0])->sw_if_index[dir]; u32 iph_offset = dir == VLIB_TX ? vnet_buffer (b[0])->ip.save_rewrite_length : 0; @@ -261,28 +251,26 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, icmp46_header_t *icmp = (icmp46_header_t *) (ip + 1); if (ip->protocol == IP_PROTOCOL_ICMP6 && icmp->type < 128) { - rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir); + rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir, &error); if (rv < 0) { - clib_warning ("ICMP6 npt66_translate failed"); *next = NPT66_NEXT_DROP; + b[0]->error = node->errors[error]; goto next; } } - rv = npt66_translate (ip, binding, dir); + rv = npt66_translate (ip, binding, dir); if (rv < 0) { - vlib_node_increment_counter (vm, node->node_index, - NPT66_ERROR_TRANSLATION, 1); + b[0]->error = node->errors[error]; *next = NPT66_NEXT_DROP; goto next; } - else if (dir == VLIB_TX) - vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_TX, 1); else - vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_RX, 1); - + { + translated++; + } next: next += 1; n_left_from -= 1; @@ -321,6 +309,9 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, break; } } + vlib_node_increment_counter ( + vm, node->node_index, dir == VLIB_TX ? NPT66_ERROR_TX : NPT66_ERROR_RX, + translated); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; @@ -338,17 +329,17 @@ VLIB_NODE_FN (npt66_output_node) } VLIB_REGISTER_NODE(npt66_input_node) = { - .name = "npt66-input", - .vector_size = sizeof(u32), - .format_trace = format_npt66_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = NPT66_N_ERROR, - .error_counters = npt66_error_counters, - .n_next_nodes = NPT66_N_NEXT, - .next_nodes = - { - [NPT66_NEXT_DROP] = "error-drop", - }, + .name = "npt66-input", + .vector_size = sizeof(u32), + .format_trace = format_npt66_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = NPT66_N_ERROR, + .error_counters = npt66_error_counters, + .n_next_nodes = NPT66_N_NEXT, + .next_nodes = + { + [NPT66_NEXT_DROP] = "error-drop", + }, }; VLIB_REGISTER_NODE (npt66_output_node) = { diff --git a/src/plugins/builtinurl/CMakeLists.txt b/src/plugins/osi/CMakeLists.txt index ddbca5e50f1..8ab014770ea 100644 --- a/src/plugins/builtinurl/CMakeLists.txt +++ b/src/plugins/osi/CMakeLists.txt @@ -1,5 +1,4 @@ - -# Copyright (c) <current-year> <your-organization> +# Copyright (c) 2023 Cisco and/or its affiliates # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -12,15 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_vpp_plugin(builtinurl - SOURCES - builtins.c - builtinurl.c - builtinurl.h +add_vpp_plugin(osi - API_FILES - builtinurl.api + SOURCES + osi.c + node.c + pg.c + plugin.c - API_TEST_SOURCES - builtinurl_test.c + INSTALL_HEADERS + osi.h ) diff --git a/src/plugins/osi/FEATURE.yaml b/src/plugins/osi/FEATURE.yaml new file mode 100644 index 00000000000..337be1c7146 --- /dev/null +++ b/src/plugins/osi/FEATURE.yaml @@ -0,0 +1,11 @@ +--- +name: OSI plugin +maintainer: + - community <vpp-dev@lists.fd.io> +features: + - Adds support for OSI protocols (SAP types) + - Registered as input protocol for PPP, HDLC, and LLC +missing: + - No tests for this feature currently exist +description: "" +state: experimental diff --git a/src/plugins/osi/node.c b/src/plugins/osi/node.c new file mode 100644 index 00000000000..a36b1525e0e --- /dev/null +++ b/src/plugins/osi/node.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi_node.c: osi packet processing + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <osi/osi.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> +#include <vnet/llc/llc.h> + +#define foreach_osi_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") + +typedef enum +{ +#define _(s,n) OSI_INPUT_NEXT_##s, + foreach_osi_input_next +#undef _ + OSI_INPUT_N_NEXT, +} osi_input_next_t; + +typedef struct +{ + u8 packet_data[32]; +} osi_input_trace_t; + +static u8 * +format_osi_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + osi_input_trace_t *t = va_arg (*va, osi_input_trace_t *); + + s = format (s, "%U", format_osi_header, t->packet_data); + + return s; +} + +static uword +osi_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + osi_main_t *lm = &osi_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (osi_input_trace_t)); + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + osi_header_t *h0, *h1; + u8 next0, next1, enqueue_code; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *b2, *b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + + CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + next0 = lm->input_next_by_protocol[h0->protocol]; + next1 = lm->input_next_by_protocol[h1->protocol]; + + b0->error = + node->errors[next0 == + OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : + OSI_ERROR_NONE]; + b1->error = + node->errors[next1 == + OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : + OSI_ERROR_NONE]; + + enqueue_code = (next0 != next_index) + 2 * (next1 != next_index); + + if (PREDICT_FALSE (enqueue_code != 0)) + { + switch (enqueue_code) + { + case 1: + /* A B A */ + to_next[-2] = bi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, bi1); + break; + + case 3: + /* A B B or A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, bi0); + vlib_set_next_frame_buffer (vm, node, next1, bi1); + if (next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next1; + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + osi_header_t *h0; + u8 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + next0 = lm->input_next_by_protocol[h0->protocol]; + + b0->error = + node->errors[next0 == + OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL : + OSI_ERROR_NONE]; + + /* Sent packet to wrong next? */ + if (PREDICT_FALSE (next0 != next_index)) + { + /* Return old frame; remove incorrectly enqueued packet. */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + + /* Send to correct next. */ + next_index = next0; + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char *osi_error_strings[] = { +#define _(f,s) s, + foreach_osi_error +#undef _ +}; + +VLIB_REGISTER_NODE (osi_input_node) = { + .function = osi_input, + .name = "osi-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = OSI_N_ERROR, + .error_strings = osi_error_strings, + + .n_next_nodes = OSI_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [OSI_INPUT_NEXT_##s] = n, + foreach_osi_input_next +#undef _ + }, + + .format_buffer = format_osi_header_with_length, + .format_trace = format_osi_input_trace, + .unformat_buffer = unformat_osi_header, +}; + +static void +osi_setup_node (vlib_main_t *vm, u32 node_index) +{ + vlib_node_t *n = vlib_get_node (vm, node_index); + pg_node_t *pn = pg_get_node (node_index); + + n->format_buffer = format_osi_header_with_length; + n->unformat_buffer = unformat_osi_header; + pn->unformat_edit = unformat_pg_osi_header; +} + +static clib_error_t * +osi_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + osi_main_t *lm = &osi_main; + + if ((error = vlib_call_init_function (vm, osi_init))) + return error; + + osi_setup_node (vm, osi_input_node.index); + + { + int i; + for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++) + lm->input_next_by_protocol[i] = OSI_INPUT_NEXT_DROP; + } + + ppp_register_input_protocol (vm, PPP_PROTOCOL_osi, osi_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_osi, osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer1, + osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer2, + osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer3, + osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer4, + osi_input_node.index); + llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer5, + osi_input_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (osi_input_init); + +void +osi_register_input_protocol (osi_protocol_t protocol, u32 node_index) +{ + osi_main_t *lm = &osi_main; + vlib_main_t *vm = lm->vlib_main; + osi_protocol_info_t *pi; + + { + clib_error_t *error = vlib_call_init_function (vm, osi_input_init); + if (error) + clib_error_report (error); + } + + pi = osi_get_protocol_info (lm, protocol); + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, osi_input_node.index, node_index); + + lm->input_next_by_protocol[protocol] = pi->next_index; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/osi/osi.c b/src/plugins/osi/osi.c new file mode 100644 index 00000000000..67c7053f388 --- /dev/null +++ b/src/plugins/osi/osi.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi.c: osi support + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <osi/osi.h> + +/* Global main structure. */ +osi_main_t osi_main; + +u8 * +format_osi_protocol (u8 * s, va_list * args) +{ + osi_protocol_t p = va_arg (*args, u32); + osi_main_t *pm = &osi_main; + osi_protocol_info_t *pi = osi_get_protocol_info (pm, p); + + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "0x%02x", p); + + return s; +} + +u8 * +format_osi_header_with_length (u8 * s, va_list * args) +{ + osi_main_t *pm = &osi_main; + osi_header_t *h = va_arg (*args, osi_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + osi_protocol_t p = h->protocol; + u32 indent, header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "osi header truncated"); + + indent = format_get_indent (s); + + s = format (s, "OSI %U", format_osi_protocol, p); + + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + { + osi_protocol_info_t *pi = osi_get_protocol_info (pm, p); + vlib_node_t *node = vlib_get_node (pm->vlib_main, pi->node_index); + if (node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) (h + 1), + max_header_bytes - header_bytes); + } + + return s; +} + +u8 * +format_osi_header (u8 * s, va_list * args) +{ + osi_header_t *h = va_arg (*args, osi_header_t *); + return format (s, "%U", format_osi_header_with_length, h, 0); +} + +/* Returns osi protocol as an int in host byte order. */ +uword +unformat_osi_protocol (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + osi_main_t *pm = &osi_main; + int p, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &p) || unformat (input, "%d", &p)) + { + if (p >= (1 << 8)) + return 0; + *result = p; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + pm->protocol_info_by_name, &i)) + { + osi_protocol_info_t *pi = vec_elt_at_index (pm->protocol_infos, i); + *result = pi->protocol; + return 1; + } + + return 0; +} + +uword +unformat_osi_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + osi_header_t _h, *h = &_h; + u8 p; + + if (!unformat (input, "%U", unformat_osi_protocol, &p)) + return 0; + + h->protocol = p; + + /* Add header to result. */ + { + void *p; + u32 n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, n_bytes); + clib_memcpy (p, h, n_bytes); + } + + return 1; +} + +static void +add_protocol (osi_main_t * pm, osi_protocol_t protocol, char *protocol_name) +{ + osi_protocol_info_t *pi; + u32 i; + + vec_add2 (pm->protocol_infos, pi, 1); + i = pi - pm->protocol_infos; + + pi->name = protocol_name; + pi->protocol = protocol; + pi->next_index = pi->node_index = ~0; + + hash_set (pm->protocol_info_by_protocol, protocol, i); + hash_set_mem (pm->protocol_info_by_name, pi->name, i); +} + +static clib_error_t * +osi_init (vlib_main_t * vm) +{ + osi_main_t *pm = &osi_main; + + clib_memset (pm, 0, sizeof (pm[0])); + pm->vlib_main = vm; + + pm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + pm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); + +#define _(f,n) add_protocol (pm, OSI_PROTOCOL_##f, #f); + foreach_osi_protocol; +#undef _ + + return vlib_call_init_function (vm, osi_input_init); +} + +/* init order dependency: llc_init -> osi_init -> snap_init*/ +/* Otherwise, osi_input_init will wipe out e.g. the snap init */ +VLIB_INIT_FUNCTION (osi_init) = { + .init_order = VLIB_INITS ("llc_init", "osi_init", "snap_init"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/osi/osi.h b/src/plugins/osi/osi.h new file mode 100644 index 00000000000..fb248ed9cc5 --- /dev/null +++ b/src/plugins/osi/osi.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi.h: OSI definitions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_osi_h +#define included_osi_h + +#include <vnet/vnet.h> + +#define foreach_osi_protocol \ + _ (null, 0x0) \ + _ (x_29, 0x01) \ + _ (x_633, 0x03) \ + _ (q_931, 0x08) \ + _ (q_933, 0x08) \ + _ (q_2931, 0x09) \ + _ (q_2119, 0x0c) \ + _ (snap, 0x80) \ + _ (clnp, 0x81) \ + _ (esis, 0x82) \ + _ (isis, 0x83) \ + _ (idrp, 0x85) \ + _ (x25_esis, 0x8a) \ + _ (iso10030, 0x8c) \ + _ (iso11577, 0x8d) \ + _ (ip6, 0x8e) \ + _ (compressed, 0xb0) \ + _ (sndcf, 0xc1) \ + _ (ip4, 0xcc) \ + _ (ppp, 0xcf) + +typedef enum +{ +#define _(f,n) OSI_PROTOCOL_##f = n, + foreach_osi_protocol +#undef _ +} osi_protocol_t; + +typedef struct +{ + u8 protocol; + + u8 payload[0]; +} osi_header_t; + +typedef struct +{ + /* Name (a c string). */ + char *name; + + /* OSI protocol (SAP type). */ + osi_protocol_t protocol; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} osi_protocol_info_t; + +#define foreach_osi_error \ + _ (NONE, "no error") \ + _ (UNKNOWN_PROTOCOL, "unknown osi protocol") + +typedef enum +{ +#define _(f,s) OSI_ERROR_##f, + foreach_osi_error +#undef _ + OSI_N_ERROR, +} osi_error_t; + +typedef struct +{ + vlib_main_t *vlib_main; + + osi_protocol_info_t *protocol_infos; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword *protocol_info_by_name, *protocol_info_by_protocol; + + /* osi-input next index indexed by protocol. */ + u8 input_next_by_protocol[256]; +} osi_main_t; + +always_inline osi_protocol_info_t * +osi_get_protocol_info (osi_main_t * m, osi_protocol_t protocol) +{ + uword *p = hash_get (m->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0; +} + +extern osi_main_t osi_main; + +/* Register given node index to take input for given osi type. */ +void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index); + +format_function_t format_osi_protocol; +format_function_t format_osi_header; +format_function_t format_osi_header_with_length; + +/* Parse osi protocol as 0xXXXX or protocol name. */ +unformat_function_t unformat_osi_protocol; + +/* Parse osi header. */ +unformat_function_t unformat_osi_header; +unformat_function_t unformat_pg_osi_header; + +void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index); + +format_function_t format_osi_header; + +#endif /* included_osi_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/osi/pg.c b/src/plugins/osi/pg.c new file mode 100644 index 00000000000..3bac693c127 --- /dev/null +++ b/src/plugins/osi/pg.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * osi_pg.c: packet generator osi interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <osi/osi.h> + +typedef struct +{ + pg_edit_t protocol; +} pg_osi_header_t; + +static inline void +pg_osi_header_init (pg_osi_header_t * e) +{ + pg_edit_init (&e->protocol, osi_header_t, protocol); +} + +uword +unformat_pg_osi_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_osi_header_t *h; + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (osi_header_t), + &group_index); + pg_osi_header_init (h); + + error = 1; + if (!unformat (input, "%U", + unformat_pg_edit, unformat_osi_protocol, &h->protocol)) + goto done; + + { + osi_main_t *pm = &osi_main; + osi_protocol_info_t *pi = 0; + pg_node_t *pg_node = 0; + + if (h->protocol.type == PG_EDIT_FIXED) + { + u8 t = *h->protocol.values[PG_EDIT_LO]; + pi = osi_get_protocol_info (pm, t); + if (pi && pi->node_index != ~0) + pg_node = pg_get_node (pi->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; +done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/osi/plugin.c b/src/plugins/osi/plugin.c new file mode 100644 index 00000000000..5fc412e093e --- /dev/null +++ b/src/plugins/osi/plugin.c @@ -0,0 +1,23 @@ +/* + * plugin.c: osi + * + * Copyright (c) 2023 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "OSI plugin", +};
\ No newline at end of file diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c index 934e8480d3c..475e98b1038 100644 --- a/src/plugins/prom/prom.c +++ b/src/plugins/prom/prom.c @@ -191,6 +191,7 @@ send_data_to_hss (hss_session_handle_t sh) args.sh = sh; args.data = vec_dup (pm->stats); args.data_len = vec_len (pm->stats); + args.ct = HTTP_CONTENT_TEXT_PLAIN; args.sc = HTTP_STATUS_OK; args.free_vec_data = 1; @@ -207,7 +208,7 @@ static uword prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f) { - uword *event_data = 0, event_type; + uword *event_data = 0, event_type, *sh_as_uword; prom_main_t *pm = &prom_main; hss_session_handle_t sh; f64 timeout = 10000.0; @@ -222,12 +223,15 @@ prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt, /* timeout, do nothing */ break; case PROM_SCRAPER_EVT_RUN: - sh.as_u64 = event_data[0]; vec_reset_length (pm->stats); pm->stats = scrape_stats_segment (pm->stats, pm->stats_patterns, pm->used_only); - session_send_rpc_evt_to_thread_force (sh.thread_index, - send_data_to_hss_rpc, &sh); + vec_foreach (sh_as_uword, event_data) + { + sh.as_u64 = (u64) *sh_as_uword; + session_send_rpc_evt_to_thread_force ( + sh.thread_index, send_data_to_hss_rpc, sh_as_uword); + } pm->last_scrape = vlib_time_now (vm); break; default: diff --git a/src/plugins/pvti/CMakeLists.txt b/src/plugins/pvti/CMakeLists.txt new file mode 100644 index 00000000000..900b662d54a --- /dev/null +++ b/src/plugins/pvti/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) 2024 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(pvti + SOURCES + pvti_if.c + pvti.c + input.h + input.c + input-main.c + output.h + output.c + output-main.c + bypass.h + bypass.c + bypass-main.c + api.c + pvti.h + + MULTIARCH_SOURCES + input.c + output.c + bypass.c + + API_FILES + pvti.api + + # API_TEST_SOURCES + # pvti_test.c +) diff --git a/src/plugins/pvti/FEATURE.yaml b/src/plugins/pvti/FEATURE.yaml new file mode 100644 index 00000000000..52dbe5b7c1b --- /dev/null +++ b/src/plugins/pvti/FEATURE.yaml @@ -0,0 +1,8 @@ +--- +name: Packet Vector Tunnel +maintainer: Andrew Yourtchenko <ayourtch@gmail.com> +features: + - support inner MTU up to ~8K over standard 1280..1500 MTU substrate +description: "Large MTU Tunnels" +state: development +properties: [API, CLI] diff --git a/src/plugins/pvti/api.c b/src/plugins/pvti/api.c new file mode 100644 index 00000000000..cda39ad44e8 --- /dev/null +++ b/src/plugins/pvti/api.c @@ -0,0 +1,137 @@ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/format_fns.h> +#include <vnet/ip/ip_types_api.h> +#include <vlibapi/api.h> + +#include <pvti/pvti.api_enum.h> +#include <pvti/pvti.api_types.h> + +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +#define REPLY_MSG_ID_BASE pvm->msg_id_base +#include <vlibapi/api_helper_macros.h> + +typedef struct +{ + vl_api_registration_t *reg; + u32 context; +} pvti_if_details_ctx_t; + +typedef struct +{ + +} pvti_interface_dump_ctx_t; + +static walk_rc_t +pvti_if_send_details (index_t pvtii, void *data) +{ + vl_api_pvti_interface_details_t *rmp; + pvti_if_details_ctx_t *ctx = data; + const pvti_if_t *pvi; + + pvi = pvti_if_get (pvtii); + + rmp = vl_msg_api_alloc_zero (sizeof (*rmp)); + rmp->_vl_msg_id = + htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base); + + rmp->interface.sw_if_index = htonl (pvi->sw_if_index); + rmp->interface.local_port = htons (pvi->local_port); + rmp->interface.remote_port = htons (pvi->remote_port); + rmp->interface.underlay_mtu = htons (pvi->underlay_mtu); + + ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip); + ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip); + + rmp->context = ctx->context; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return (WALK_CONTINUE); +} + +static void +vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp) +{ + vl_api_registration_t *reg; + // pvti_main_t *pvm = &pvti_main; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (reg == 0) + return; + + pvti_if_details_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + u32 sw_if_index = ntohl (mp->sw_if_index); + if (sw_if_index == ~0) + pvti_if_walk (pvti_if_send_details, &ctx); + else + { + index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index); + if (pvtii != INDEX_INVALID) + pvti_if_send_details (pvtii, &ctx); + } +} + +static void +vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp) +{ + vl_api_pvti_interface_create_reply_t *rmp; + pvti_main_t *pvm = &pvti_main; + int rv = ~0; + u32 sw_if_index = ~0; + ip_address_t local_ip; + ip_address_t remote_ip; + + ip_address_decode2 (&mp->interface.local_ip, &local_ip); + ip_address_decode2 (&mp->interface.remote_ip, &remote_ip); + u16 lport = clib_host_to_net_u16 (mp->interface.local_port); + u16 rport = clib_host_to_net_u16 (mp->interface.remote_port); + u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu); + u32 underlay_fib_index = + clib_host_to_net_u32 (mp->interface.underlay_fib_index); + pvti_peer_address_method_t peer_address_method = + mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD : + PVTI_PEER_ADDRESS_FIXED; + + if (underlay_mtu == 0) + { + underlay_mtu = 1500; + } + + rv = + pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method, + underlay_mtu, underlay_fib_index, &sw_if_index); + + REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY, + { rmp->sw_if_index = htonl (sw_if_index); }); +} + +static void +vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp) +{ + vl_api_pvti_interface_delete_reply_t *rmp; + pvti_main_t *pvm = &pvti_main; + int rv = 0; + + rv = pvti_if_delete (ntohl (mp->sw_if_index)); + REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY); +} + +/* API definitions */ +#include <pvti/pvti.api.c> + +void +pvti_api_init () +{ + pvti_main_t *pvm = &pvti_main; + /* Add our API messages to the global name_crc hash table */ + pvm->msg_id_base = setup_message_id_table (); +} diff --git a/src/plugins/pvti/bypass-main.c b/src/plugins/pvti/bypass-main.c new file mode 100644 index 00000000000..db79ccd2113 --- /dev/null +++ b/src/plugins/pvti/bypass-main.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/bypass.h> + +/* packet trace format function */ +static u8 * +format_pvti_bypass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *); + + s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n", + t->sw_if_index, t->next_index); + s = format (s, " src %U sport %d dport %d\n", format_ip_address, + &t->remote_ip, t->remote_port, t->local_port); + s = format (s, " seq: %d", t->seq); + return s; +} + +vlib_node_registration_t pvti4_bypass_node; +vlib_node_registration_t pvti6_bypass_node; + +static char *pvti_bypass_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_bypass_error +#undef _ +}; + +VLIB_REGISTER_NODE (pvti4_bypass_node) = +{ + .name = "ip4-pvti-bypass", + .vector_size = sizeof (u32), + .format_trace = format_pvti_bypass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_bypass_error_strings), + .error_strings = pvti_bypass_error_strings, + + .n_next_nodes = PVTI_BYPASS_N_NEXT, + + .next_nodes = { + [PVTI_BYPASS_NEXT_DROP] = "error-drop", + [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input", + }, + +}; + +VLIB_REGISTER_NODE (pvti6_bypass_node) = +{ + .name = "ip6-pvti-bypass", + .vector_size = sizeof (u32), + .format_trace = format_pvti_bypass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_bypass_error_strings), + .error_strings = pvti_bypass_error_strings, + + .n_next_nodes = PVTI_BYPASS_N_NEXT, + + .next_nodes = { + [PVTI_BYPASS_NEXT_DROP] = "error-drop", + [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input", + }, + +}; diff --git a/src/plugins/pvti/bypass.c b/src/plugins/pvti/bypass.c new file mode 100644 index 00000000000..14c976439eb --- /dev/null +++ b/src/plugins/pvti/bypass.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/bypass.h> + +always_inline u16 +pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + u32 n_left_from, *from, *to_next; + pvti_bypass_next_t next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + + u32 pkts_processed = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + udp_header_t *udp0; + u32 bi0, ip_len0, udp_len0, flags0, next0; + u8 error0, good_udp0, proto0; + i32 len_diff0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* setup the packet for the next feature */ + vnet_feature_next (&next0, b0); + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (b0); + } + else + { + ip40 = vlib_buffer_get_current (b0); + } + + if (is_ip6) + { + proto0 = ip60->protocol; + } + else + { + /* Treat IP frag packets as "experimental" protocol for now */ + proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; + } + + /* Process packet 0 */ + if (proto0 != IP_PROTOCOL_UDP) + goto exit; /* not UDP packet */ + + if (is_ip6) + udp0 = ip6_next_header (ip60); + else + udp0 = ip4_next_header (ip40); + + /* look up the destination ip and port */ + u32 pvti_index0 = INDEX_INVALID; + if (is_ip6) + { + pvti_index0 = pvti_if_find_by_remote_ip6_and_port ( + &ip60->src_address, clib_net_to_host_u16 (udp0->src_port)); + } + else + { + pvti_index0 = pvti_if_find_by_remote_ip4_and_port ( + &ip40->src_address, clib_net_to_host_u16 (udp0->src_port)); + } + if (pvti_index0 == INDEX_INVALID) + goto exit; + + flags0 = b0->flags; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + /* Don't verify UDP checksum for packets with explicit zero checksum. + */ + good_udp0 |= udp0->checksum == 0; + + /* Verify UDP length */ + if (is_ip6) + ip_len0 = clib_net_to_host_u16 (ip60->payload_length); + else + ip_len0 = clib_net_to_host_u16 (ip40->length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + + /* Verify UDP checksum */ + if (PREDICT_FALSE (!good_udp0)) + { + if (is_ip6) + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); + else + flags0 = ip4_tcp_udp_validate_checksum (vm, b0); + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + + if (is_ip6) + { + error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; + } + else + { + error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; + } + + next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT; + b0->error = error0 ? error_node->errors[error0] : 0; + + /* pvtiX-input node expect current at PVTI header */ + if (is_ip6) + vlib_buffer_advance (b0, sizeof (ip6_header_t) + + sizeof (udp_header_t)); + else + vlib_buffer_advance (b0, sizeof (ip4_header_t) + + sizeof (udp_header_t)); + exit: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_bypass_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->seq = 0; // clib_net_to_host_u32 (pvti0->seq); + if (is_ip6) + { + } + else + { + t->remote_ip.ip.ip4 = ip40->src_address; + t->remote_ip.version = AF_IP4; + } + // t->local_port = h0->udp.dst_port; + // t->remote_port = h0->udp.src_port; + } + + pkts_processed += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + PVTI_BYPASS_ERROR_PROCESSED, pkts_processed); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_bypass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_bypass_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_bypass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_bypass_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/bypass.h b/src/plugins/pvti/bypass.h new file mode 100644 index 00000000000..611d5770ad3 --- /dev/null +++ b/src/plugins/pvti/bypass.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_bypass_h__ +#define __included_pvti_bypass_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u32 seq; +} pvti_bypass_trace_t; + +#define foreach_pvti_bypass_error \ + _ (PROCESSED, "PVTI bypass tunnel packets processed") + +typedef enum +{ +#define _(sym, str) PVTI_BYPASS_ERROR_##sym, + foreach_pvti_bypass_error +#undef _ + PVTI_BYPASS_N_ERROR, +} pvti_bypass_error_t; + +typedef enum +{ + PVTI_BYPASS_NEXT_DROP, + PVTI_BYPASS_NEXT_PVTI_INPUT, + PVTI_BYPASS_N_NEXT, +} pvti_bypass_next_t; + +#endif // pvti_bypass_h diff --git a/src/plugins/pvti/input-main.c b/src/plugins/pvti/input-main.c new file mode 100644 index 00000000000..8ab8b18dd7c --- /dev/null +++ b/src/plugins/pvti/input-main.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/input.h> + +static char *pvti_input_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_input_error +#undef _ +}; + +#define _(f, s) s, +static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type }; +#undef _ + +static char * +get_pvti_trace_type_name (u8 ptype) +{ + if (ptype < PVTI_INPUT_TRACE_N_TYPES) + { + return pvti_input_trace_type_names[ptype]; + } + else + { + return "unknown"; + } +} + +/* packet trace format function */ +static u8 * +format_pvti_input_trace (u8 *s, va_list *args) +{ + int i; + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *); + + u32 indent = format_get_indent (s); + + s = format (s, + "PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), " + "chunkcnt: %d\n", + t->sw_if_index, t->next_index, + get_pvti_trace_type_name (t->trace_type), t->trace_type, + t->chunk_count); + s = format (s, " src %U sport %d dport %d\n", format_ip_address, + &t->remote_ip, t->remote_port, t->local_port); + s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count); + u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count; + for (i = 0; i < max; i++) + { + s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length); + } + s = format (s, "\n%U%U", format_white_space, indent, + format_ip_adjacency_packet_data, t->packet_data, + sizeof (t->packet_data)); + + return s; +} + +vlib_node_registration_t pvti4_input_node; +vlib_node_registration_t pvti6_input_node; + +VLIB_REGISTER_NODE (pvti4_input_node) = +{ + .name = "pvti4-input", + .vector_size = sizeof (u32), + .format_trace = format_pvti_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_input_error_strings), + .error_strings = pvti_input_error_strings, + + .n_next_nodes = PVTI_INPUT_N_NEXT, + + .next_nodes = { + [PVTI_INPUT_NEXT_DROP] = "error-drop", + [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [PVTI_INPUT_NEXT_PUNT] = "error-punt", + }, + +}; +VLIB_REGISTER_NODE (pvti6_input_node) = +{ + .name = "pvti6-input", + .vector_size = sizeof (u32), + .format_trace = format_pvti_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_input_error_strings), + .error_strings = pvti_input_error_strings, + + .n_next_nodes = PVTI_INPUT_N_NEXT, + + .next_nodes = { + [PVTI_INPUT_NEXT_DROP] = "error-drop", + [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [PVTI_INPUT_NEXT_PUNT] = "error-punt", + }, + +}; diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c new file mode 100644 index 00000000000..6a8806e2795 --- /dev/null +++ b/src/plugins/pvti/input.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/input.h> + +always_inline void +pvti_enqueue_rx_bi_to_next_and_trace (vlib_main_t *vm, + vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, u32 bi0, + u16 next0) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0))) + { + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->trace_type = PVTI_INPUT_TRACE_decap; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + vec_add1 (ptd->pending_rx_buffers, bi0); + vec_add1 (ptd->pending_rx_nexts, next0); +} + +always_inline pvti_rx_peer_t * +pvti_try_find_or_create_rx_peer (pvti_per_thread_data_t *ptd, + vlib_buffer_t *b0, bool is_ip6) +{ + pvti_rx_peer_t *peer; + + ip_address_t remote_ip = { 0 }; + u16 remote_port; + if (is_ip6) + { + pvti_ip6_encap_header_t *h0 = + ((pvti_ip6_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + ip_address_set (&remote_ip, &h0->ip6.src_address, AF_IP6); + remote_port = clib_net_to_host_u16 (h0->udp.src_port); + } + else + { + pvti_ip4_encap_header_t *h0 = + ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + ip_address_set (&remote_ip, &h0->ip4.src_address, AF_IP4); + remote_port = clib_net_to_host_u16 (h0->udp.src_port); + } + + pool_foreach (peer, ptd->rx_peers) + { + if (peer->remote_port == remote_port && + 0 == ip_address_cmp (&remote_ip, &peer->remote_ip)) + { + if (peer->deleted) + { + // The peer has been marked as deleted - wipe it. + clib_memset (peer, 0xca, sizeof (*peer)); + pool_put (ptd->rx_peers, peer); + continue; + } + return peer; + } + } + + index_t pvti_if_index0 = + pvti_if_find_by_remote_ip_and_port (&remote_ip, remote_port); + if (INDEX_INVALID == pvti_if_index0) + { + // no suitable interface found, bail + return 0; + } + pvti_if_t *pvti_if0 = pvti_if_get (pvti_if_index0); + + pvti_rx_peer_t new_peer = { + .local_ip = pvti_if0->local_ip, + .local_port = pvti_if0->local_port, + .remote_ip = remote_ip, + .remote_port = remote_port, + .pvti_if_index = pvti_if_index0, + .rx_streams = { { 0 } }, + }; + pvti_rx_peer_t *rx_new_peer; + pool_get (ptd->rx_peers, rx_new_peer); + *rx_new_peer = new_peer; + + int i; + for (i = 0; i < MAX_RX_STREAMS; i++) + { + rx_new_peer->rx_streams[i].rx_bi0 = INDEX_INVALID; + rx_new_peer->rx_streams[i].rx_bi0_first = INDEX_INVALID; + rx_new_peer->rx_streams[i].rx_next0 = 0; + } + + return rx_new_peer; +} + +always_inline u16 +pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + u32 n_left_from, *from; + pvti_chunk_header_t *chunks[MAX_CHUNKS]; + u32 pkts_processed = 0; + u32 pkts_decapsulated = 0; + u32 decap_failed_no_buffers = 0; + + pvti_main_t *pvm = &pvti_main; + + u32 thread_index = vlib_get_thread_index (); + pvti_per_thread_data_t *ptd = + vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = PVTI_INPUT_NEXT_DROP; + u32 sw_if_index0; + u8 true_chunk_count = 0; + u8 max_chunk_count; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + pvti_ip4_encap_header_t *h0 = + ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + pvti_rx_peer_t *pvti_rx_peer0 = + pvti_try_find_or_create_rx_peer (ptd, b0, is_ip6); + if (!pvti_rx_peer0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_PEER]; + goto drop_and_maybe_trace; + } + + b0 = vlib_get_buffer (vm, bi0); + pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0); + u8 stream_index = pvti0->stream_index; + max_chunk_count = + pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS; + u16 pvti_packet_header_sz0 = + pvti0->pad_bytes + offsetof (pvti_packet_header_t, pad); + if (b0->current_length < pvti_packet_header_sz0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_PACKET_TOO_SHORT]; + goto drop_and_maybe_trace; + } + vlib_buffer_advance (b0, pvti_packet_header_sz0); + + if (max_chunk_count == 0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_NOCHUNKS]; + goto drop_and_maybe_trace; + } + if (pvti0->reass_chunk_count > max_chunk_count) + { + b0->error = node->errors[PVTI_INPUT_ERROR_TOOMANYREASS]; + goto drop_and_maybe_trace; + } + pvti_per_rx_stream_data_t *rx_stream0 = + &pvti_rx_peer0->rx_streams[stream_index]; + + u32 new_seq0 = clib_net_to_host_u32 (pvti0->seq); + if (new_seq0 == rx_stream0->last_rx_seq + 1) + { + /* Sequence# matches, we can attempt adding the leading chunks to + * reassembly */ + rx_stream0->last_rx_seq = new_seq0; + + while ((b0->current_length > 0) && + true_chunk_count < pvti0->reass_chunk_count) + { + /* attempt to either incorporate the first chunk into + * reassembly or skip it. */ + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + const u16 chunk_payload_length = + clib_net_to_host_u16 (pvc0->total_chunk_length) - + sizeof (*pvc0); + vlib_buffer_advance (b0, sizeof (*pvc0)); + + if (rx_stream0->rx_bi0 == INDEX_INVALID) + { + clib_warning ( + "RX internal error: not-first chunk but no wip block"); + } + else + { + + vlib_buffer_t *rb0 = + vlib_get_buffer (vm, rx_stream0->rx_bi0); + u16 allowed_length = + PVTI_RX_MAX_LENGTH - rb0->current_length; + if (allowed_length > chunk_payload_length) + { + // simple case - there is space in the buffer to fit + // the whole chunk + void *tail = + vlib_buffer_put_uninit (rb0, chunk_payload_length); + clib_memcpy (tail, vlib_buffer_get_current (b0), + chunk_payload_length); + } + else + { + // The current chunk can not fit - need to make two + // copies, one into the current buffer, and one into + // a newly allocated chained buffer. + void *tail = + vlib_buffer_put_uninit (rb0, allowed_length); + clib_memcpy (tail, vlib_buffer_get_current (b0), + allowed_length); + u16 remaining_payload_length = + chunk_payload_length - allowed_length; + u32 nrbi0 = pvti_get_new_buffer (vm); + if (INDEX_INVALID == nrbi0) + { + ASSERT (0); // FIXME what the recovery is + // supposed to look like ? + } + else + { + // link up the new buffer and copy the remainder + // there + vlib_buffer_t *nrb0 = vlib_get_buffer (vm, nrbi0); + rb0->flags |= VLIB_BUFFER_NEXT_PRESENT; + rb0->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + rb0->next_buffer = nrbi0; + rx_stream0->rx_bi0 = nrbi0; + void *tail = vlib_buffer_put_uninit ( + nrb0, remaining_payload_length); + clib_memcpy (tail, + vlib_buffer_get_current (b0) + + allowed_length, + remaining_payload_length); + } + } + pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length += chunk_payload_length; + if (pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length == + pvti_rx_peer0->rx_streams[stream_index] + .rx_expected_inner_length) + { + next0 = rx_stream0->rx_next0; + pvti_enqueue_rx_bi_to_next_and_trace ( + vm, node, ptd, rx_stream0->rx_bi0_first, next0); + pkts_decapsulated += 1; + + // clean out the current reassemly state + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length = 0; + pvti_rx_peer0->rx_streams[stream_index] + .rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + } + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + vlib_buffer_advance (b0, chunk_payload_length); + } + } + else + { + /* Sequence does not match, skip the reassembly chunks and reset + * the reassembly state */ + + while ((b0->current_length > 0) && + true_chunk_count < pvti0->reass_chunk_count) + { + /* skip the reassembly chunks */ + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + vlib_buffer_advance ( + b0, clib_net_to_host_u16 (pvc0->total_chunk_length)); + } + // FIXME: discard the current reassembly state, reset the seq# + if (rx_stream0->rx_bi0_first != INDEX_INVALID) + { + clib_warning ("RX PVTI: discard chunk being reassembled"); + vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first); + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + rx_stream0->rx_received_inner_length = 0; + rx_stream0->rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + } + + while ((b0->current_length > 0) && true_chunk_count < max_chunk_count) + { + if (b0->current_length < sizeof (pvti_chunk_header_t)) + { + clib_warning ("RX ERR: length too short for a chunk"); + break; + } + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + u16 total_chunk_length = + clib_net_to_host_u16 (pvc0->total_chunk_length); + if (b0->current_length < total_chunk_length) + { + clib_warning ("RX ERR: length 0x%x too big for a chunk", + true_chunk_count); + break; + } + u8 *pkt = (u8 *) (pvc0 + 1); + u16 inner_length; + if (rx_stream0->rx_bi0_first != INDEX_INVALID) + { + vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first); + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + rx_stream0->rx_received_inner_length = 0; + rx_stream0->rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + + switch (*pkt & 0xf0) + { + case 0x40: + next0 = PVTI_INPUT_NEXT_IP4_INPUT; + inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 2))); + break; + case 0x60: + next0 = PVTI_INPUT_NEXT_IP6_INPUT; + inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 4))) + + sizeof (ip6_header_t); + break; + default: + next0 = PVTI_INPUT_NEXT_DROP; + vlib_buffer_advance (b0, total_chunk_length); + continue; + } + vlib_buffer_advance (b0, sizeof (pvti_chunk_header_t)); + + if (inner_length + sizeof (pvti_chunk_header_t) > total_chunk_length) + { + /* FIXME: the packet size is larger than the chunk -> it's a + * first fragment */ + // enqueue the chunk and finish packet processing. + // There must be no active reassembly. + ASSERT (rx_stream0->rx_bi0_first == INDEX_INVALID); + rx_stream0->rx_next0 = next0; + rx_stream0->rx_bi0 = bi0; + rx_stream0->rx_bi0_first = bi0; + rx_stream0->rx_expected_inner_length = inner_length; + rx_stream0->rx_received_inner_length = + total_chunk_length - sizeof (pvti_chunk_header_t); + rx_stream0->last_rx_seq = new_seq0; + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = ~0; + t->trace_type = PVTI_INPUT_TRACE_enqueue; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + goto continue_outer; + } + + u32 nbi0 = pvti_get_new_buffer (vm); + if (INDEX_INVALID == nbi0) + { + decap_failed_no_buffers += 1; + continue; + }; + vlib_buffer_t *nb0 = vlib_get_buffer (vm, nbi0); + pvti_if_t *pvti_if0 = pvti_if_get (pvti_rx_peer0->pvti_if_index); + vnet_buffer (nb0)->sw_if_index[VLIB_RX] = pvti_if0->sw_if_index; + void *new_packet = vlib_buffer_put_uninit (nb0, inner_length); + clib_memcpy (new_packet, pvc0 + 1, inner_length); + vlib_buffer_advance (b0, inner_length); + + pvti_enqueue_rx_bi_to_next_and_trace (vm, node, ptd, nbi0, next0); + pkts_decapsulated += 1; + } + /* we have processed all the chunks from the buffer, but the buffer + * remains. Free it. */ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = ~0; + t->trace_type = PVTI_INPUT_TRACE_free; + t->seq = clib_net_to_host_u32 (pvti0->seq); + t->chunk_count = pvti0->chunk_count; + u8 chunk_count = + pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS; + for (int i = 0; i < chunk_count; i++) + { + t->chunks[i].total_chunk_length = + clib_net_to_host_u16 (chunks[i]->total_chunk_length); + } + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + vlib_buffer_free_one (vm, bi0); + + continue_outer: + pkts_processed += 1; + continue; + + drop_and_maybe_trace: + next0 = PVTI_INPUT_NEXT_DROP; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + int i; + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->trace_type = PVTI_INPUT_TRACE_drop; + t->next_index = next0; + t->remote_ip.ip.ip4 = h0->ip4.src_address; + t->remote_ip.version = AF_IP4; + t->local_port = h0->udp.dst_port; + t->remote_port = h0->udp.src_port; + if (!pvti_rx_peer0) + { + t->seq = 0xdeaddead; + } + else + { + t->seq = clib_net_to_host_u32 (pvti0->seq); + t->chunk_count = pvti0->chunk_count; + u8 chunk_count = pvti0->chunk_count < MAX_CHUNKS ? + pvti0->chunk_count : + MAX_CHUNKS; + for (i = 0; i < chunk_count; i++) + { + t->chunks[i].total_chunk_length = + clib_net_to_host_u16 (chunks[i]->total_chunk_length); + } + } + } + + pkts_processed += 1; + vec_add1 (ptd->pending_rx_buffers, bi0); + vec_add1 (ptd->pending_rx_nexts, next0); + } + + vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_rx_buffers, + &ptd->pending_rx_nexts, + vec_len (ptd->pending_rx_nexts)); + vec_reset_length (ptd->pending_rx_buffers); + vec_reset_length (ptd->pending_rx_nexts); + + vlib_node_increment_counter (vm, node->node_index, + PVTI_INPUT_ERROR_PROCESSED, pkts_processed); + vlib_node_increment_counter ( + vm, node->node_index, PVTI_INPUT_ERROR_DECAPSULATED, pkts_decapsulated); + vlib_node_increment_counter (vm, node->node_index, + PVTI_INPUT_ERROR_NO_BUFFERS, + decap_failed_no_buffers); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_input_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_input_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/input.h b/src/plugins/pvti/input.h new file mode 100644 index 00000000000..02a186cde05 --- /dev/null +++ b/src/plugins/pvti/input.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_input_h__ +#define __included_pvti_input_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u16 total_chunk_length; +} pvti_input_chunk_t; + +#define MAX_CHUNKS 32 +#define PVTI_RX_MAX_LENGTH 2048 + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u32 seq; + pvti_input_chunk_t chunks[MAX_CHUNKS]; + u8 chunk_count; + u8 trace_type; + u8 packet_data[64]; +} pvti_input_trace_t; + +#define foreach_pvti_input_trace_type \ + _ (drop, "drop") \ + _ (decap, "decapsulate") \ + _ (free, "free") \ + _ (enqueue, "enqueue") + +typedef enum +{ +#define _(f, s) PVTI_INPUT_TRACE_##f, + foreach_pvti_input_trace_type +#undef _ + PVTI_INPUT_TRACE_N_TYPES, +} pvti_input_trace_type_t; + +#define foreach_pvti_input_error \ + _ (PROCESSED, "PVTI tunneled packets processed") \ + _ (DECAPSULATED, "PVTI inner packets decapsulated") \ + _ (PEER, "Could not find a peer") \ + _ (NOCHUNKS, "Packet has no chunks") \ + _ (NO_BUFFERS, "No buffers available to decapsulate") \ + _ (TOOMANYREASS, "Packet has more reassembly chunks than total") \ + _ (PACKET_TOO_SHORT, "Packet too short") + +typedef enum +{ +#define _(sym, str) PVTI_INPUT_ERROR_##sym, + foreach_pvti_input_error +#undef _ + PVTI_INPUT_N_ERROR, +} pvti_input_error_t; + +typedef enum +{ + PVTI_INPUT_NEXT_DROP, + PVTI_INPUT_NEXT_IP4_INPUT, + PVTI_INPUT_NEXT_IP6_INPUT, + PVTI_INPUT_NEXT_PUNT, + PVTI_INPUT_N_NEXT, +} pvti_input_next_t; + +#endif // pvti_input_h diff --git a/src/plugins/pvti/output-main.c b/src/plugins/pvti/output-main.c new file mode 100644 index 00000000000..ae4ae5f8e98 --- /dev/null +++ b/src/plugins/pvti/output-main.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/output.h> + +/* packet trace format function */ +static u8 * +format_pvti_output_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *); + + u32 indent = format_get_indent (s); + s = + format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,", + t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu); + s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d", + format_white_space, indent, t->stream_index, + t->bi0_max_current_length, t->tx_seq); + s = format (s, "\n%U%U", format_white_space, indent, + format_ip_adjacency_packet_data, t->packet_data, + sizeof (t->packet_data)); + + return s; +} + +vlib_node_registration_t pvti_output_node; + +static char *pvti_output_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_output_error +#undef _ +}; + +VLIB_REGISTER_NODE (pvti4_output_node) = +{ + .name = "pvti4-output", + .vector_size = sizeof (u32), + .format_trace = format_pvti_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(pvti_output_error_strings), + .error_strings = pvti_output_error_strings, + + .n_next_nodes = PVTI_OUTPUT_N_NEXT, + + .next_nodes = { + [PVTI_OUTPUT_NEXT_DROP] = "error-drop", + [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", + [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, + +}; +VLIB_REGISTER_NODE (pvti6_output_node) = +{ + .name = "pvti6-output", + .vector_size = sizeof (u32), + .format_trace = format_pvti_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(pvti_output_error_strings), + .error_strings = pvti_output_error_strings, + + .n_next_nodes = PVTI_OUTPUT_N_NEXT, + + .next_nodes = { + [PVTI_OUTPUT_NEXT_DROP] = "error-drop", + [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", + [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, + +}; diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c new file mode 100644 index 00000000000..1939c6f585a --- /dev/null +++ b/src/plugins/pvti/output.c @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/output.h> + +static_always_inline u32 +ip6_vtcfl (u8 stream_index) +{ + u32 vtcfl = 0x6 << 28; + vtcfl |= stream_index; + + return (clib_host_to_net_u32 (vtcfl)); +} + +always_inline vlib_buffer_t * +pvti_alloc_new_tx_buffer (vlib_main_t *vm) +{ + u32 bi0 = INDEX_INVALID; + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + { + return 0; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + b0->current_data = 0; + b0->current_length = 0; + return b0; +} + +always_inline bool +pvti_find_or_try_create_tx_peer (vlib_main_t *vm, pvti_per_thread_data_t *ptd, + pvti_if_t *pvti_if0, ip_address_t *remote_ip, + u16 remote_port, u32 *out_index) +{ + + pvti_tx_peer_t *peer; + pool_foreach (peer, ptd->tx_peers) + { + if (peer->remote_port == remote_port && + 0 == ip_address_cmp (remote_ip, &peer->remote_ip)) + { + if (peer->deleted) + { + // Bad luck, the peer has been deleted. + u32 boi0 = vlib_get_buffer_index (vm, peer->bo0); + if (peer->bo0) + { + vlib_buffer_free (vm, &boi0, 1); + } + clib_memset (peer, 0xca, sizeof (*peer)); + pool_put (ptd->tx_peers, peer); + continue; + } + *out_index = peer - ptd->tx_peers; + return 1; + } + } + + ip_address_family_t dst_ver = ip_addr_version (&pvti_if0->remote_ip); + + u16 pvti_encap_overhead = (dst_ver == AF_IP6) ? + sizeof (pvti_ip6_encap_header_t) : + sizeof (pvti_ip4_encap_header_t); + + u16 pvti_packet_overhead = + pvti_encap_overhead + sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES; + + ASSERT (pvti_if0->underlay_mtu > pvti_packet_overhead); + + u32 bo0_max_current_length = pvti_if0->underlay_mtu - pvti_packet_overhead; + + vlib_buffer_t *bo0 = pvti_alloc_new_tx_buffer (vm); + + if (!bo0) + { + return 0; + } + + pvti_tx_peer_t new_peer = { + .local_ip = pvti_if0->local_ip, + .remote_ip = *remote_ip, + .local_port = pvti_if0->local_port, + .remote_port = remote_port, + .underlay_mtu = pvti_if0->underlay_mtu, + .underlay_fib_index = pvti_if0->underlay_fib_index, + .bo0_max_current_length = bo0_max_current_length, + .pvti_if_index = pvti_if_get_index (pvti_if0), + .deleted = 0, + .bo0 = bo0, + .chunk_count = 0, + .reass_chunk_count = 0, + .current_tx_seq = 42, + }; + + pvti_tx_peer_t *tx_new_peer; + pool_get (ptd->tx_peers, tx_new_peer); + + *tx_new_peer = new_peer; + *out_index = tx_new_peer - ptd->tx_peers; + return 1; +} + +always_inline bool +pvti_try_get_tx_peer_index (vlib_main_t *vm, pvti_per_thread_data_t *ptd, + pvti_if_t *pvti_if0, vlib_buffer_t *b0, + bool is_ip6, u32 *out_index) +{ + if (pvti_if0->peer_address_from_payload) + { + ip_address_t remote_ip = { 0 }; + if (is_ip6) + { + ip6_header_t *ip6 = vlib_buffer_get_current (b0); + ip_address_set (&remote_ip, &ip6->dst_address, AF_IP6); + } + else + { + ip4_header_t *ip4 = vlib_buffer_get_current (b0); + ip_address_set (&remote_ip, &ip4->dst_address, AF_IP4); + } + return pvti_find_or_try_create_tx_peer ( + vm, ptd, pvti_if0, &remote_ip, pvti_if0->remote_port, out_index); + } + else + { + return pvti_find_or_try_create_tx_peer ( + vm, ptd, pvti_if0, &pvti_if0->remote_ip, pvti_if0->remote_port, + out_index); + } + /* not reached */ +} + +always_inline void +pvti_finalize_chunk (pvti_tx_peer_t *tx_peer, + pvti_chunk_header_t *chunk_header, u8 *tail, + bool is_reassembly_chunk) +{ + clib_memset (chunk_header, 0xab, sizeof (pvti_chunk_header_t)); + chunk_header->total_chunk_length = + clib_host_to_net_u16 (tail - (u8 *) chunk_header); + tx_peer->chunk_count++; + if (is_reassembly_chunk) + { + tx_peer->reass_chunk_count++; + } +} + +always_inline pvti_output_next_t +encap_pvti_buffer_ip46 (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_tx_peer_t *tx_peer, int is_ip6) +{ + ip_address_family_t src_ver = ip_addr_version (&tx_peer->local_ip); + ip_address_family_t dst_ver = ip_addr_version (&tx_peer->remote_ip); + u8 stream_index = 0; + + ASSERT (src_ver == dst_ver); + bool is_ip6_encap = (AF_IP6 == src_ver); + + vlib_buffer_t *b0 = tx_peer->bo0; + vlib_buffer_advance (b0, + -(sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES)); + + pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0); + clib_memset (pvti0, 0xca, sizeof (*pvti0) + PVTI_ALIGN_BYTES); + pvti0->pad_bytes = PVTI_ALIGN_BYTES; + + pvti0->seq = clib_host_to_net_u32 (tx_peer->current_tx_seq); + pvti0->stream_index = stream_index; + pvti0->reass_chunk_count = tx_peer->reass_chunk_count; + pvti0->chunk_count = tx_peer->chunk_count; + pvti0->mandatory_flags_mask = 0; + pvti0->flags_value = 0; + + if (is_ip6_encap) + { + vlib_buffer_advance (b0, -(sizeof (pvti_ip6_encap_header_t))); + if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE) + { + // undo the change + vlib_buffer_advance (b0, (sizeof (pvti_ip6_encap_header_t))); + b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE]; + return PVTI_OUTPUT_NEXT_DROP; + } + pvti_ip6_encap_header_t *ve = vlib_buffer_get_current (b0); + + ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port); + ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port); + ve->udp.length = clib_host_to_net_u16 ( + b0->current_length - offsetof (pvti_ip6_encap_header_t, udp)); + ve->udp.checksum = 0; + + ve->ip6.ip_version_traffic_class_and_flow_label = + ip6_vtcfl (stream_index); + ve->ip6.payload_length = ve->udp.length; + ve->ip6.protocol = 17; + ve->ip6.hop_limit = 128; + ip_address_copy_addr (&ve->ip6.src_address, &tx_peer->local_ip); + ip_address_copy_addr (&ve->ip6.dst_address, &tx_peer->remote_ip); + } + else + { + vlib_buffer_advance (b0, -(sizeof (pvti_ip4_encap_header_t))); + if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE) + { + // undo the change + vlib_buffer_advance (b0, (sizeof (pvti_ip4_encap_header_t))); + b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE]; + return PVTI_OUTPUT_NEXT_DROP; + } + pvti_ip4_encap_header_t *ve = vlib_buffer_get_current (b0); + + ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port); + ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port); + ve->udp.length = clib_host_to_net_u16 ( + b0->current_length - offsetof (pvti_ip4_encap_header_t, udp)); + ve->udp.checksum = 0; + + ve->ip4.ip_version_and_header_length = 0x45; + ve->ip4.tos = 0; + ve->ip4.length = clib_host_to_net_u16 (b0->current_length); + ve->ip4.fragment_id = + clib_host_to_net_u16 (tx_peer->current_tx_seq & 0xffff); + ve->ip4.flags_and_fragment_offset = 0; + ve->ip4.ttl = 128; + ve->ip4.protocol = 17; + + ve->ip4.dst_address.as_u32 = ip_addr_v4 (&tx_peer->remote_ip).data_u32; + ve->ip4.src_address.as_u32 = ip_addr_v4 (&tx_peer->local_ip).data_u32; + ve->ip4.checksum = ip4_header_checksum (&ve->ip4); + } + + // This is important, if not reset, causes a crash + vnet_buffer (b0)->sw_if_index[VLIB_TX] = tx_peer->underlay_fib_index; + + // vnet_buffer (b0)->oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; + return is_ip6_encap ? PVTI_OUTPUT_NEXT_IP6_LOOKUP : + PVTI_OUTPUT_NEXT_IP4_LOOKUP; +} + +always_inline void +pvti_enqueue_tx_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, vlib_buffer_t *b0, + u16 next0, u8 stream_index, pvti_tx_peer_t *tx_peer) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + tx_peer->is_bo0_traced)) + { + if (PREDICT_TRUE ( + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0))) + { + + pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->underlay_mtu = tx_peer->underlay_mtu; + t->stream_index = stream_index; + t->trace_type = 1; + t->bi0_max_current_length = tx_peer->bo0_max_current_length; + t->tx_seq = tx_peer->current_tx_seq; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + } + u32 bi0 = vlib_get_buffer_index (vm, b0); + vec_add1 (ptd->pending_tx_buffers, bi0); + vec_add1 (ptd->pending_tx_nexts, next0); +} + +always_inline void +pvti_enqueue_tx_drop_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, vlib_buffer_t *b0, + u8 stream_index) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = PVTI_OUTPUT_NEXT_DROP; + t->stream_index = stream_index; + t->trace_type = 0; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + u32 bi0 = vlib_get_buffer_index (vm, b0); + vec_add1 (ptd->pending_tx_buffers, bi0); + vec_add1 (ptd->pending_tx_nexts, PVTI_OUTPUT_NEXT_DROP); +} + +always_inline bool +pvti_flush_peer_and_recharge (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, u32 tx_peer_index, + u8 stream_index, const bool is_ip6) +{ + pvti_tx_peer_t *tx_peer = pool_elt_at_index (ptd->tx_peers, tx_peer_index); + u16 next0 = encap_pvti_buffer_ip46 (vm, node, tx_peer, is_ip6); + + pvti_enqueue_tx_and_trace (vm, node, ptd, tx_peer->bo0, next0, stream_index, + tx_peer); + + tx_peer->bo0 = pvti_alloc_new_tx_buffer (vm); + tx_peer->reass_chunk_count = 0; + tx_peer->chunk_count = 0; + tx_peer->current_tx_seq++; + + return 1; +} + +always_inline u16 +pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, const bool is_ip6) +{ + pvti_main_t *pvm = &pvti_main; + + u32 n_left_from, *from; + u32 pkts_encapsulated = 0; + u32 pkts_processed = 0; + u32 pkts_chopped = 0; + u32 pkts_overflow = 0; + u32 pkts_overflow_cantfit = 0; + + bool is_node_traced = (node->flags & VLIB_NODE_FLAG_TRACE) ? 1 : 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + u8 stream_index = pvti_get_stream_index (is_ip6); + + u32 thread_index = vlib_get_thread_index (); + pvti_per_thread_data_t *ptd = + vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); + + vlib_buffer_t *ibufs[VLIB_FRAME_SIZE], **ib = ibufs; + + vlib_get_buffers (vm, from, ibufs, n_left_from); + + n_left_from = frame->n_vectors; + while (1 && n_left_from > 0) + { + n_left_from -= 1; + vlib_buffer_t *b0 = ib[0]; + ib++; + u32 bi0 = vlib_get_buffer_index (vm, b0); + bool is_b0_traced = + is_node_traced && ((b0->flags & VLIB_BUFFER_IS_TRACED) ? 1 : 0); + pkts_processed += 1; + + u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + u32 pvti_index0 = pvti_if_find_by_sw_if_index (sw_if_index0); + if (pvti_index0 == INDEX_INVALID) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_PEER]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index); + continue; + } + pvti_if_t *pvti_if0 = pvti_if_get (pvti_index0); + u32 tx_peer_index; + if (!pvti_try_get_tx_peer_index (vm, ptd, pvti_if0, b0, is_ip6, + &tx_peer_index)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_MAKE_PEER]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index); + continue; + } + pvti_tx_peer_t *tx_peer = &ptd->tx_peers[tx_peer_index]; + + u32 b0_len = vlib_buffer_length_in_chain (vm, b0); + u32 total_chunk_len = sizeof (pvti_chunk_header_t) + b0_len; + + if (tx_peer->bo0_max_current_length >= + tx_peer->bo0->current_length + total_chunk_len) + { + /* Happy case, we can fit the entire new chunk */ + pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + u8 *tail = vlib_buffer_put_uninit (tx_peer->bo0, b0_len); + vlib_buffer_t *b0_curr; + b0_curr = b0; + while (b0_len > 0) + { + clib_memcpy (tail, vlib_buffer_get_current (b0_curr), + b0_curr->current_length); + tail += b0_curr->current_length; + b0_len -= b0_curr->current_length; + ASSERT ((b0_len == 0) || + (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)); + if (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer); + } + } + tx_peer->is_bo0_traced |= is_b0_traced; + pvti_finalize_chunk (tx_peer, chunk_header, tail, false); + } + else + { + bool is_reassembly = false; + /* FIXME: here, flush a packet if we want to avoid fragmenting it */ +#define PVTI_TINY_PACKET_SZ 20 + int threshold_len = + sizeof (pvti_chunk_header_t) + PVTI_TINY_PACKET_SZ; + + /* Can we fit anything meaningful into bo0 ? if not - flush */ + if (tx_peer->bo0_max_current_length <= + tx_peer->bo0->current_length + threshold_len) + { + if (!pvti_flush_peer_and_recharge (vm, node, ptd, tx_peer_index, + stream_index, is_ip6)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE0]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, + stream_index); + continue; + } + pkts_encapsulated += 1; + } + + pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + + u8 *tail; + vlib_buffer_t *b0_curr; + /* append the chained buffers and flush as necessary */ + b0_curr = b0; + + int curr_b0_start_offset = 0; + + while (b0_len > 0) + { + ASSERT (tx_peer->bo0_max_current_length > + tx_peer->bo0->current_length); + int copy_len = + clib_min (b0_curr->current_length - curr_b0_start_offset, + tx_peer->bo0_max_current_length - + tx_peer->bo0->current_length); + tail = vlib_buffer_put_uninit (tx_peer->bo0, copy_len); + clib_memcpy (tail, + (u8 *) vlib_buffer_get_current (b0_curr) + + curr_b0_start_offset, + copy_len); + tail += copy_len; + b0_len -= copy_len; + // Advance the start offset or reset it if we copied the entire + // block + curr_b0_start_offset = + curr_b0_start_offset + copy_len == b0_curr->current_length ? + 0 : + curr_b0_start_offset + copy_len; + ASSERT ((b0_len == 0) || (curr_b0_start_offset > 0) || + (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)); + if (curr_b0_start_offset > 0) + { + pvti_finalize_chunk (tx_peer, chunk_header, tail, + is_reassembly); + tx_peer->is_bo0_traced |= is_b0_traced; + if (!pvti_flush_peer_and_recharge ( + vm, node, ptd, tx_peer_index, stream_index, is_ip6)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE1]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, + stream_index); + continue; + } + pkts_encapsulated += 1; + /* next chunk(s) will be reassembly until the next block */ + is_reassembly = true; + chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + } + else + { + if ((b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer); + } + else + { + pvti_finalize_chunk (tx_peer, chunk_header, tail, + is_reassembly); + tx_peer->is_bo0_traced |= is_b0_traced; + } + } + } + } + vlib_buffer_free_one (vm, bi0); + } + + int i; + for (i = 0; i < vec_len (ptd->tx_peers); i++) + { + if (ptd->tx_peers[i].chunk_count) + { + pvti_flush_peer_and_recharge (vm, node, ptd, i, stream_index, + is_ip6); + pkts_encapsulated += 1; + } + } + + vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_tx_buffers, + &ptd->pending_tx_nexts, + vec_len (ptd->pending_tx_nexts)); + vec_reset_length (ptd->pending_tx_buffers); + vec_reset_length (ptd->pending_tx_nexts); + + vlib_node_increment_counter ( + vm, node->node_index, PVTI_OUTPUT_ERROR_ENCAPSULATED, pkts_encapsulated); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_PROCESSED, pkts_processed); + vlib_node_increment_counter (vm, node->node_index, PVTI_OUTPUT_ERROR_CHOPPED, + pkts_chopped); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_OVERFLOW, pkts_overflow); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_OVERFLOW_CANTFIT, + pkts_overflow_cantfit); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_output_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_output_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/output.h b/src/plugins/pvti/output.h new file mode 100644 index 00000000000..95e78ba9720 --- /dev/null +++ b/src/plugins/pvti/output.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_output_h__ +#define __included_pvti_output_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 tx_seq; + u16 underlay_mtu; + u16 bi0_max_current_length; + u8 stream_index; + u8 trace_type; + u8 packet_data[96]; +} pvti_output_trace_t; + +#define foreach_pvti_output_error \ + _ (NONE, "No error") \ + _ (PROCESSED, "Packets processed") \ + _ (ENCAPSULATED, "Packets encapsulated") \ + _ (PEER, "No peer found") \ + _ (MAKE_PEER, "Could not make peer") \ + _ (RECHARGE0, "Could not recharge 0") \ + _ (RECHARGE1, "Could not recharge 1") \ + _ (NO_PRE_SPACE, "Not enought pre-data space") \ + _ (CHOPPED, "Packets chopped") \ + _ (OVERFLOW, "Packets overflowed") \ + _ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess") + +typedef enum +{ +#define _(sym, str) PVTI_OUTPUT_ERROR_##sym, + foreach_pvti_output_error +#undef _ + PVTI_OUTPUT_N_ERROR, +} pvti_output_error_t; + +typedef enum +{ + PVTI_INDEPENDENT_CHUNK = 0, + PVTI_REASS_CHUNK, +} pvti_chunk_type_t; + +#define MAX_CURR_LEN_UNKNOWN 0xffff + +typedef enum +{ + PVTI_OUTPUT_NEXT_DROP, + PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT, + PVTI_OUTPUT_NEXT_IP4_LOOKUP, + PVTI_OUTPUT_NEXT_IP6_LOOKUP, + PVTI_OUTPUT_N_NEXT, +} pvti_output_next_t; + +#endif // pvti_output_h diff --git a/src/plugins/pvti/pvti.api b/src/plugins/pvti/pvti.api new file mode 100644 index 00000000000..859ed1ab6b0 --- /dev/null +++ b/src/plugins/pvti/pvti.api @@ -0,0 +1,111 @@ +/* + * pvti.api - binary API skeleton + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file pvti.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane binary API messages which are generally + * called through a shared memory interface. + */ + +/* Version and type recitations */ + +option version = "0.0.1"; +import "vnet/interface_types.api"; +import "vnet/ip/ip_types.api"; + +/** \brief A composite type uniquely defining a PVTI tunnel. + @param sw_if_index - ignored on create/delete, present in details. + @param src_ip - Source IP address + @param src_port - Source UDP port + @param dst_ip - Destination IP address + @param dst_port - Destination UDP port + @param underlay_mtu - Underlay MTU for packet splitting/coalescing + @param underlay_fib_index - Underlay FIB index to be used after encap +*/ +typedef pvti_tunnel +{ + vl_api_interface_index_t sw_if_index; + vl_api_address_t local_ip; + u16 local_port; + vl_api_address_t remote_ip; + bool peer_address_from_payload; + u16 remote_port; + u16 underlay_mtu; + u32 underlay_fib_index; +}; + + +/** @brief API to enable / disable pvti on an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 to enable, 0 to disable the feature + @param sw_if_index - interface handle +*/ + +define pvti_interface_create +{ + option status="in_progress"; + + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + vl_api_pvti_tunnel_t interface; +}; + +define pvti_interface_create_reply +{ + option status="in_progress"; + u32 context; + i32 retval; + + /* Index for the newly created interface */ + vl_api_interface_index_t sw_if_index; +}; + +autoreply define pvti_interface_delete { + option status="in_progress"; + + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + vl_api_interface_index_t sw_if_index; +}; + + +define pvti_interface_dump +{ + option status="in_progress"; + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; +}; + +define pvti_interface_details +{ + option status="in_progress"; + u32 context; + vl_api_pvti_tunnel_t interface; +}; + + diff --git a/src/plugins/pvti/pvti.c b/src/plugins/pvti/pvti.c new file mode 100644 index 00000000000..646276dec09 --- /dev/null +++ b/src/plugins/pvti/pvti.c @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vnet/fib/fib_table.h> +#include <pvti/pvti.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> +#include <stdbool.h> + +#include <pvti/pvti.api_enum.h> +#include <pvti/pvti.api_types.h> + +#include <pvti/pvti_if.h> + +#define REPLY_MSG_ID_BASE pmp->msg_id_base +#include <vlibapi/api_helper_macros.h> +#include <vnet/ip/ip_format_fns.h> + +pvti_main_t pvti_main; + +u8 * +format_pvti_tx_peer_ptr (u8 *s, va_list *args) +{ + pvti_tx_peer_t *peer = va_arg (*args, pvti_tx_peer_t *); + + s = format ( + s, + "[%p]%s local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d " + "pvti_idx:%d b0_max_clen:%d cseq:%d chunk_count:%d reass_chunk_count:%d", + peer, peer->deleted ? " DELETED" : "", format_ip46_address, + &peer->local_ip, IP46_TYPE_ANY, peer->local_port, format_ip46_address, + &peer->remote_ip, IP46_TYPE_ANY, peer->remote_port, peer->underlay_mtu, + peer->underlay_fib_index, peer->pvti_if_index, + peer->bo0_max_current_length, peer->current_tx_seq, peer->chunk_count, + peer->reass_chunk_count); + + return (s); +} + +u8 * +format_pvti_rx_peer_ptr (u8 *s, va_list *args) +{ + pvti_rx_peer_t *peer = va_arg (*args, pvti_rx_peer_t *); + + s = format (s, "[%p]%s local:%U:%d remote:%U:%d pvti_idx:%d", peer, + peer->deleted ? " DELETED" : "", format_ip46_address, + &peer->local_ip, IP46_TYPE_ANY, peer->local_port, + format_ip46_address, &peer->remote_ip, IP46_TYPE_ANY, + peer->remote_port, peer->pvti_if_index); + + return (s); +} + +void +pvti_verify_initialized (pvti_main_t *pvm) +{ + if (!pvm->is_initialized) + { + const int n_threads = vlib_get_n_threads (); + vec_validate (pvm->per_thread_data[0], n_threads - 1); + vec_validate (pvm->per_thread_data[1], n_threads - 1); + pvm->is_initialized = 1; + } +} + +void +vnet_int_pvti_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable) +{ + pvti_main_t *pvm = &pvti_main; + + if (pool_is_free_index (pvm->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return; + + pvti_verify_initialized (pvm); + + is_enable = !!is_enable; + + if (is_ip6) + { + if (clib_bitmap_get (pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) != + is_enable) + { + vnet_feature_enable_disable ("ip6-unicast", "ip6-pvti-bypass", + sw_if_index, is_enable, 0, 0); + pvm->bm_ip6_bypass_enabled_by_sw_if = clib_bitmap_set ( + pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index, is_enable); + } + } + else + { + if (clib_bitmap_get (pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) != + is_enable) + { + vnet_feature_enable_disable ("ip4-unicast", "ip4-pvti-bypass", + sw_if_index, is_enable, 0, 0); + pvm->bm_ip4_bypass_enabled_by_sw_if = clib_bitmap_set ( + pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index, is_enable); + } + } +} + +static clib_error_t * +set_ip_pvti_bypass (u32 is_ip6, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, is_enable; + + sw_if_index = ~0; + is_enable = 1; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_enable = 0; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == sw_if_index) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, line_input); + goto done; + } + + vnet_int_pvti_bypass_mode (sw_if_index, is_ip6, is_enable); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +set_ip4_pvti_bypass (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + return set_ip_pvti_bypass (0, input, cmd); +} + +VLIB_CLI_COMMAND (set_interface_ip_pvti_bypass_command, static) = { + .path = "set interface ip pvti-bypass", + .function = set_ip4_pvti_bypass, + .short_help = "set interface ip pvti-bypass <interface> [del]", +}; + +static clib_error_t * +set_ip6_pvti_bypass (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + return set_ip_pvti_bypass (1, input, cmd); +} + +VLIB_CLI_COMMAND (set_interface_ip6_pvti_bypass_command, static) = { + .path = "set interface ip6 pvti-bypass", + .function = set_ip6_pvti_bypass, + .short_help = "set interface ip6 pvti-bypass <interface> [del]", +}; + +static clib_error_t * +pvti_interface_create_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + // pvti_main_t * pmp = &pvti_main; + u32 sw_if_index = ~0; + int rv = 0; + ip_address_t peer_ip = { 0 }; + ip_address_t local_ip = { 0 }; + u32 peer_port = 0; + u32 local_port = 12345; + u32 underlay_mtu = 1500; + u32 underlay_fib_index = ~0; + u32 underlay_table_id = ~0; + pvti_peer_address_method_t peer_address_method = PVTI_PEER_ADDRESS_FIXED; + bool peer_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "peer %U %d %d", unformat_ip_address, &peer_ip, + &peer_port, &local_port)) + { + peer_set = 1; + } + else if (unformat (line_input, "underlay-mtu %d", &underlay_mtu)) + { + // MTU set + } + else if (unformat (line_input, "local-ip %U", unformat_ip_address, + &local_ip)) + { + // local IP set + } + else if (unformat (line_input, "underlay-fib %d", &underlay_fib_index)) + { + // underlay fib set + } + else if (unformat (line_input, "peer-address-from-payload")) + { + peer_address_method = PVTI_PEER_ADDRESS_FROM_PAYLOAD; + } + else if (unformat (line_input, "underlay-table %d", &underlay_table_id)) + { + fib_protocol_t fib_proto = FIB_PROTOCOL_IP4; + if (peer_ip.version == AF_IP6) + { + fib_proto = FIB_PROTOCOL_IP6; + } + u32 fib_index = fib_table_find (fib_proto, underlay_table_id); + + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", + underlay_table_id); + goto done; + } + underlay_fib_index = fib_index; + } + else + break; + } + if (!peer_set) + { + error = clib_error_return (0, "Please specify a peer..."); + goto done; + } + + rv = pvti_if_create (&local_ip, local_port, &peer_ip, peer_port, + peer_address_method, underlay_mtu, underlay_fib_index, + &sw_if_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + error = clib_error_return (0, "Invalid interface"); + break; + + default: + error = clib_error_return (0, "pvti_if_create returned %d", rv); + } +done: + unformat_free (line_input); + return error; +} + +static clib_error_t * +pvti_interface_delete_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + // pvti_main_t * pmp = &pvti_main; + u32 sw_if_index = ~0; + int rv = 0; + bool if_index_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "if-index %d", &sw_if_index)) + { + if_index_set = 1; + } + else + break; + } + if (!if_index_set) + return clib_error_return (0, "Please specify a sw_if_index..."); + + rv = pvti_if_delete (sw_if_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "Invalid interface"); + break; + + default: + return clib_error_return (0, "pvti_if_delete returned %d", rv); + } + return 0; +} + +VLIB_CLI_COMMAND (pvti_interface_create_command, static) = { + .path = "pvti interface create", + .short_help = + "pvti interface create peer <remote-ip> <remote-port> <local-port> [ " + "local-ip <ip-addr> ][ underlay-mtu <MTU>][underlay-table " + "<table-index>][inderlay-fib <fib-index>]", + .function = pvti_interface_create_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_interface_delete_command, static) = { + .path = "pvti interface delete", + .short_help = "pvti interface delete if-index <sw-ifindex>", + .function = pvti_interface_delete_command_fn, +}; + +static clib_error_t * +pvti_show_interface_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_if_t *pvti_if; + vec_foreach (pvti_if, pvti_main.if_pool) + { + int index = pvti_if - pvti_main.if_pool; + vlib_cli_output (vm, "%U", format_pvti_if, index); + }; + return 0; +} + +static clib_error_t * +pvti_show_tx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_per_thread_data_t *ptd; + int is_ip6; + for (is_ip6 = 0; is_ip6 <= 1; is_ip6++) + { + vec_foreach (ptd, pvti_main.per_thread_data[is_ip6]) + { + vlib_cli_output (vm, "thread %d (%s)", + ptd - pvti_main.per_thread_data[is_ip6], + is_ip6 ? "IPv6" : "IPv4"); + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + vlib_cli_output (vm, " %U", format_pvti_tx_peer_ptr, peer); + } + } + } + return 0; +} + +static clib_error_t * +pvti_show_rx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_per_thread_data_t *ptd; + int is_ip6; + for (is_ip6 = 0; is_ip6 <= 1; is_ip6++) + { + vec_foreach (ptd, pvti_main.per_thread_data[is_ip6]) + { + vlib_cli_output (vm, "thread %d (%s)", + ptd - pvti_main.per_thread_data[is_ip6], + is_ip6 ? "IPv6" : "IPv4"); + pvti_rx_peer_t *peer; + vec_foreach (peer, ptd->rx_peers) + { + vlib_cli_output (vm, " %U", format_pvti_rx_peer_ptr, peer); + } + } + } + return 0; +} + +VLIB_CLI_COMMAND (pvti_show_interface_command, static) = { + .path = "show pvti interface", + .short_help = "show pvti interface", + .function = pvti_show_interface_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_show_tx_peers_command, static) = { + .path = "show pvti tx peers", + .short_help = "show pvti tx peers", + .function = pvti_show_tx_peers_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_show_rx_peers_command, static) = { + .path = "show pvti rx peers", + .short_help = "show pvti rx peers", + .function = pvti_show_rx_peers_command_fn, +}; + +void pvti_api_init (); + +VNET_FEATURE_INIT (pvti4_bypass, static) = { + .arc_name = "ip4-unicast", + .node_name = "ip4-pvti-bypass", + .runs_before = 0, +}; + +VNET_FEATURE_INIT (pvti6_bypass, static) = { + .arc_name = "ip6-unicast", + .node_name = "ip6-pvti-bypass", + .runs_before = 0, +}; + +static clib_error_t * +pvti_early_config (vlib_main_t *vm, unformat_input_t *input) +{ + u8 *runs_before = 0; + int rbi = 0; + if (vec_len (vnet_feat_pvti4_bypass.runs_before) == 0) + { + rbi = 0; + } + else + { + rbi = vec_len (vnet_feat_pvti4_bypass.runs_before) - 1; + } + vec_validate (vnet_feat_pvti4_bypass.runs_before, rbi); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "runs-before %v", &runs_before)) + { + vec_add1 (runs_before, 0); + vnet_feat_pvti4_bypass.runs_before[rbi] = (char *) runs_before; + vec_add1 (vnet_feat_pvti4_bypass.runs_before, 0); + } + else + return clib_error_return (0, "unknown input"); + } + + return NULL; +} + +VLIB_EARLY_CONFIG_FUNCTION (pvti_early_config, "pvti"); + +static clib_error_t * +pvti_init (vlib_main_t *vm) +{ + pvti_main_t *pmp = &pvti_main; + clib_error_t *error = 0; + + pmp->vlib_main = vm; + pmp->vnet_main = vnet_get_main (); + pmp->is_initialized = 0; + + pvti_api_init (); + return error; +} + +VLIB_INIT_FUNCTION (pvti_init); + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Packet Vector Tunnel Interface plugin", +}; diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h new file mode 100644 index 00000000000..ac097c5ecca --- /dev/null +++ b/src/plugins/pvti/pvti.h @@ -0,0 +1,257 @@ +/* + * pvti.h - skeleton vpp engine plug-in header file + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_h__ +#define __included_pvti_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> + +#define VPP_MAX_THREADS (1 << 8) + +#define MAX_RX_STREAMS 256 + +#define PVTI_ALIGN_BYTES 9 + +typedef CLIB_PACKED (struct { + u32 seq; + u8 stream_index; // set to the cpu# on the sending side + u8 chunk_count; + u8 reass_chunk_count; // number of chunks in the front that are related to + // previously started buffer + // mandatory_flags_mask highlights which of the flags cause packet drop if + // not understood, and which of them can be just ignored. + u8 mandatory_flags_mask; + u8 flags_value; + u8 pad_bytes; + u8 pad[0]; +}) pvti_packet_header_t; + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + udp_header_t udp; + // not part of encap header pvti_packet_header_t pv; +}) pvti_ip4_encap_header_t; + +typedef CLIB_PACKED (struct { + ip6_header_t ip6; + udp_header_t udp; + // not part of encap header pvti_packet_header_t pv; +}) pvti_ip6_encap_header_t; + +typedef CLIB_PACKED (struct { + u16 total_chunk_length; + // More fragments: this chunk is not the last block fragment +#define CHUNK_FLAGS_MF (1 << 0) + // More blocks: this block has chained blocks that follow +#define CHUNK_FLAGS_MB (1 << 1) + u16 _pad0; + u32 _pad1; + u8 chunk_data[0]; +}) pvti_chunk_header_t; + +typedef struct +{ + // a buffer being built from the smaller packets + u32 bi0; + + // how big can this buffer grow + u32 bi0_max_current_length; + + // how many chunks are already in the buffer + u8 chunk_count; + // leading reassembly chunk count + u8 reass_chunk_count; + + u32 current_tx_seq; +} pvti_per_tx_stream_data_t; + +typedef struct +{ + /* The seq# that we last processed */ + u32 last_rx_seq; + + // a current buffer that is being reassembled + u32 rx_bi0; + // The root buffer, most of the times == rx_bi0 except in the case of chained + // buffers. + u32 rx_bi0_first; + + // Next index for dispatch when the reassembly is done + u16 rx_next0; + // expected totall inner length for the packet + u16 rx_expected_inner_length; + u16 rx_received_inner_length; + +} pvti_per_rx_stream_data_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u16 underlay_mtu; + u32 underlay_fib_index; + + u32 pvti_if_index; + bool deleted; + bool is_bo0_traced; + + u32 bo0_max_current_length; + + u8 chunk_count; + u8 reass_chunk_count; + u32 current_tx_seq; + vlib_buffer_t *bo0; + +} pvti_tx_peer_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + + pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS]; + + u32 pvti_if_index; + bool deleted; +} pvti_rx_peer_t; + +typedef struct +{ + /* pool of destination-based structures which are used to build the packets + */ + pvti_tx_peer_t *tx_peers; + + /* vector of buffers to send */ + u32 *pending_tx_buffers; + u16 *pending_tx_nexts; + /* pool of source-based structures for the remote peers' data tracking + */ + pvti_rx_peer_t *rx_peers; + + /* vector of buffers being decapsulated */ + u32 *pending_rx_buffers; + u16 *pending_rx_nexts; + +} pvti_per_thread_data_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u16 underlay_mtu; + u32 underlay_fib_index; + bool peer_address_from_payload; + u64 created_at; + + u32 sw_if_index; + u32 hw_if_index; + + // per-stream data for TX + pvti_per_tx_stream_data_t tx_streams[256]; + pvti_per_rx_stream_data_t rx_streams[256]; + +} pvti_if_t; + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* have we initialized the data structures ? */ + bool is_initialized; + + /* interface pool */ + pvti_if_t *if_pool; + + /* if_index in the pool above by sw_if_index */ + index_t *if_index_by_sw_if_index; + + /* indices by port */ + index_t **if_indices_by_port; + + /* per-thread data, ip4[0] and ip6[1] */ + pvti_per_thread_data_t *per_thread_data[2]; + + /* on/off switch for the periodic function */ + u8 periodic_timer_enabled; + /* Node index, non-zero if the periodic process has been created */ + u32 periodic_node_index; + + /* graph node state */ + uword *bm_ip4_bypass_enabled_by_sw_if; + uword *bm_ip6_bypass_enabled_by_sw_if; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; +} pvti_main_t; + +extern pvti_main_t pvti_main; + +extern vlib_node_registration_t pvti_node; +extern vlib_node_registration_t pvti4_input_node; +extern vlib_node_registration_t pvti4_output_node; +extern vlib_node_registration_t pvti6_input_node; +extern vlib_node_registration_t pvti6_output_node; +extern vlib_node_registration_t pvti_periodic_node; + +always_inline u8 +pvti_get_stream_index (int is_ip6) +{ + u32 thread_index = vlib_get_thread_index (); + + ASSERT ((thread_index & 0xffffff80) == 0); + + u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0); + return stream_index; +} + +/* attempt to get a new buffer */ +always_inline u32 +pvti_get_new_buffer (vlib_main_t *vm) +{ + u32 bi0 = INDEX_INVALID; + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + { + return INDEX_INVALID; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + b0->current_data = 0; + b0->current_length = 0; + return bi0; +} + +/* Periodic function events */ +#define PVTI_EVENT1 1 +#define PVTI_EVENT2 2 +#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3 + +void pvti_create_periodic_process (pvti_main_t *); +void pvti_verify_initialized (pvti_main_t *pvm); + +#endif /* __included_pvti_h__ */ diff --git a/src/plugins/pvti/pvti_if.c b/src/plugins/pvti/pvti_if.c new file mode 100644 index 00000000000..4f83994a1a4 --- /dev/null +++ b/src/plugins/pvti/pvti_if.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Copyright (c) 2020 Doc.ai and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj_midchain.h> +#include <vnet/udp/udp.h> + +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +static u8 * +format_pvti_if_name (u8 *s, va_list *args) +{ + u32 dev_instance = va_arg (*args, u32); + // wg_if_t *wgi = wg_if_get (dev_instance); + return format (s, "pvti%d", dev_instance); +} + +u8 * +format_pvti_if (u8 *s, va_list *args) +{ + index_t pvtii = va_arg (*args, u32); + pvti_if_t *pvti_if = pvti_if_get (pvtii); + + s = format ( + s, "[%d] %U local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d", + pvtii, format_vnet_sw_if_index_name, vnet_get_main (), + pvti_if->sw_if_index, format_ip46_address, &pvti_if->local_ip, + IP46_TYPE_ANY, pvti_if->local_port, format_ip46_address, + &pvti_if->remote_ip, IP46_TYPE_ANY, pvti_if->remote_port, + pvti_if->underlay_mtu, pvti_if->underlay_fib_index); + + return (s); +} + +index_t +pvti_if_find_by_sw_if_index (u32 sw_if_index) +{ + if (vec_len (pvti_main.if_index_by_sw_if_index) <= sw_if_index) + return INDEX_INVALID; + u32 ti = pvti_main.if_index_by_sw_if_index[sw_if_index]; + if (ti == ~0) + return INDEX_INVALID; + + return (ti); +} + +index_t +pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4, + u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->remote_ip.version == AF_IP4) && + ((ifc->remote_ip.ip.ip4.as_u32 == remote_ip4->as_u32) || + ifc->peer_address_from_payload)) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +index_t +pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip6, + u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->remote_ip.version == AF_IP6) && + ((0 == memcmp (&ifc->remote_ip.ip.ip6, remote_ip6, + sizeof (*remote_ip6))) || + ifc->peer_address_from_payload)) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +index_t +pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->peer_address_from_payload || + (0 == ip_address_cmp (remote_ip, &ifc->remote_ip)))) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +static void +pvti_add_tidx_by_port (index_t t_index, u16 port) +{ + pvti_main_t *pvm = &pvti_main; + vec_validate_init_empty (pvm->if_indices_by_port, port, NULL); + vec_add1 (pvm->if_indices_by_port[port], t_index); +} + +static void +pvti_del_tidx_by_port (index_t t_index, u16 port) +{ + pvti_main_t *pvm = &pvti_main; + index_t *ii; + if (!pvm->if_indices_by_port) + { + return; + } + if (port >= vec_len (pvm->if_indices_by_port)) + { + return; + } + if (vec_len (pvm->if_indices_by_port[port]) == 0) + { + ALWAYS_ASSERT (pvm->if_indices_by_port[port] > 0); + /* not reached */ + return; + } + + vec_foreach (ii, pvm->if_indices_by_port[port]) + { + if (*ii == t_index) + { + vec_del1 (pvm->if_indices_by_port[port], + pvm->if_indices_by_port[port] - ii); + break; + } + } +} + +static u32 +pvti_get_tunnel_count_by_port (u16 port) +{ + pvti_main_t *pvm = &pvti_main; + if (!pvm->if_indices_by_port) + { + return 0; + } + return vec_len (vec_elt (pvm->if_indices_by_port, port)); +} + +static clib_error_t * +pvti_if_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) +{ + // vnet_hw_interface_t *hi; + u32 hw_flags; + + // hi = vnet_get_hw_interface (vnm, hw_if_index); + hw_flags = + (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? VNET_HW_INTERFACE_FLAG_LINK_UP : + 0); + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return (NULL); +} + +void +pvti_if_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai) +{ + + /* Convert any neighbour adjacency that has a next-hop reachable through + * the wg interface into a midchain. This is to avoid sending ARP/ND to + * resolve the next-hop address via the wg interface. Then, if one of the + * peers has matching prefix among allowed prefixes, the midchain will be + * updated to the corresponding one. + */ + adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL); + + // wgii = wg_if_find_by_sw_if_index (sw_if_index); + // wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai); +} + +VNET_DEVICE_CLASS (pvti_if_device_class) = { + .name = "Packet Vectorizer Tunnel", + .format_device_name = format_pvti_if_name, + .admin_up_down_function = pvti_if_admin_up_down, +}; + +VNET_HW_INTERFACE_CLASS (pvti_hw_interface_class) = { + .name = "PVTunnel", + .update_adjacency = pvti_if_update_adj, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, + // .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA, +}; + +int +pvti_if_create (ip_address_t *local_ip, u16 local_port, + ip_address_t *remote_ip, u16 remote_port, + pvti_peer_address_method_t peer_address_method, + u16 underlay_mtu, u32 underlay_fib_index, u32 *sw_if_indexp) +{ + vnet_main_t *vnm = vnet_get_main (); + pvti_main_t *pvm = &pvti_main; + u32 hw_if_index; + vnet_hw_interface_t *hi; + pvti_verify_initialized (pvm); + + pvti_if_t *pvti_if; + + ASSERT (sw_if_indexp); + + *sw_if_indexp = (u32) ~0; + + pool_get_zero (pvti_main.if_pool, pvti_if); + pvti_if->local_ip = *local_ip; + pvti_if->local_port = local_port; + pvti_if->remote_ip = *remote_ip; + if (peer_address_method == PVTI_PEER_ADDRESS_FROM_PAYLOAD) + { + pvti_if->peer_address_from_payload = 1; + } + pvti_if->remote_port = remote_port; + pvti_if->underlay_mtu = underlay_mtu; + pvti_if->underlay_fib_index = underlay_fib_index; + pvti_if->created_at = clib_cpu_time_now (); + + /* tunnel index (or instance) */ + u32 t_idx = pvti_if - pvti_main.if_pool; + + hw_if_index = + vnet_register_interface (vnm, pvti_if_device_class.index, t_idx, + pvti_hw_interface_class.index, t_idx); + + pvti_if->hw_if_index = hw_if_index; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + pvti_if->sw_if_index = *sw_if_indexp = hi->sw_if_index; + + vec_validate_init_empty (pvm->if_index_by_sw_if_index, hi->sw_if_index, + INDEX_INVALID); + + vec_elt (pvm->if_index_by_sw_if_index, hi->sw_if_index) = t_idx; + pvti_if_t *pvti_if0 = pool_elt_at_index (pvti_main.if_pool, t_idx); + int i; + for (i = 0; i < 256; i++) + { + pvti_if0->tx_streams[i].bi0 = INDEX_INVALID; + pvti_if0->tx_streams[i].current_tx_seq = 42; + + pvti_if0->rx_streams[i].rx_bi0 = INDEX_INVALID; + pvti_if0->rx_streams[i].rx_bi0_first = INDEX_INVALID; + } + + /* + int is_ip6 = 0; + u32 encap_index = !is_ip6 ? + pvti4_output_node.index : pvti6_output_node.index; + vnet_set_interface_output_node (vnm, pvti_if->hw_if_index, encap_index); + */ + vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index, + (u8 *) "pvti4-output"); + + pvti_add_tidx_by_port (t_idx, local_port); + if (1 == pvti_get_tunnel_count_by_port (local_port)) + { + clib_warning ("Registering local port %d", local_port); + udp_register_dst_port (vlib_get_main (), local_port, + pvti4_input_node.index, UDP_IP4); + udp_register_dst_port (vlib_get_main (), local_port, + pvti6_input_node.index, UDP_IP6); + } + else + { + clib_warning ("Not registering the port"); + } + + vnet_hw_interface_set_flags (vnm, pvti_if->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + return 0; +} + +void +pvti_if_walk (pvti_if_walk_cb_t fn, void *data) +{ + index_t pvtii; + + pool_foreach_index (pvtii, pvti_main.if_pool) + { + if (WALK_STOP == fn (pvtii, data)) + break; + } +} + +int +pvti_if_delete (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + pvti_main_t *pvm = &pvti_main; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == 0 || hw->dev_class_index != pvti_if_device_class.index) + return VNET_API_ERROR_INVALID_VALUE; + + pvti_if_t *ifc; + bool found = 0; + pool_foreach (ifc, pvm->if_pool) + { + if (ifc->sw_if_index == sw_if_index) + { + found = 1; + break; + } + } + if (!found) + { + return VNET_API_ERROR_INVALID_VALUE_2; + } + index_t tidx = ifc - pvm->if_pool; + + u16 local_port = ifc->local_port; + pvti_del_tidx_by_port (tidx, local_port); + pvm->if_index_by_sw_if_index[sw_if_index] = INDEX_INVALID; + + if (0 == pvti_get_tunnel_count_by_port (local_port)) + { + udp_unregister_dst_port (vlib_get_main (), local_port, 1); + udp_unregister_dst_port (vlib_get_main (), local_port, 0); + } + + vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index); + vnet_delete_hw_interface (vnm, hw->hw_if_index); + pool_put (pvti_main.if_pool, ifc); + + /* mark per-thread peers as deleted */ + pvti_per_thread_data_t *ptd; + + vec_foreach (ptd, pvm->per_thread_data[0]) + { + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + if (tidx == peer->pvti_if_index) + { + peer->deleted = 1; + } + } + } + vec_foreach (ptd, pvm->per_thread_data[1]) + { + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + if (tidx == peer->pvti_if_index) + { + peer->deleted = 1; + } + } + } + + return 0; +} diff --git a/src/plugins/pvti/pvti_if.h b/src/plugins/pvti/pvti_if.h new file mode 100644 index 00000000000..44bf22ce825 --- /dev/null +++ b/src/plugins/pvti/pvti_if.h @@ -0,0 +1,47 @@ +#ifndef PVTI_IF_H +#define PVTI_IF_H + +#include <vnet/interface_funcs.h> + +typedef enum +{ + PVTI_PEER_ADDRESS_FIXED = 0, + PVTI_PEER_ADDRESS_FROM_PAYLOAD +} pvti_peer_address_method_t; + +typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data); +void pvti_if_walk (pvti_if_walk_cb_t fn, void *data); + +int pvti_if_create (ip_address_t *local_ip, u16 local_port, + ip_address_t *remote_ip, u16 remote_port, + pvti_peer_address_method_t peer_address_method, + u16 underlay_mtu, u32 underlay_fib_index, + u32 *sw_if_indexp); +index_t pvti_if_find_by_sw_if_index (u32 sw_if_index); +index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4, + u16 remote_port); +index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4, + u16 remote_port); + +index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, + u16 remote_port); + +int pvti_if_delete (u32 sw_if_index); + +u8 *format_pvti_if (u8 *s, va_list *args); + +static_always_inline pvti_if_t * +pvti_if_get (index_t pvtii) +{ + if (INDEX_INVALID == pvtii) + return (NULL); + return (pool_elt_at_index (pvti_main.if_pool, pvtii)); +} + +static_always_inline index_t +pvti_if_get_index (pvti_if_t *pvti_if) +{ + return pvti_if - pvti_main.if_pool; +} + +#endif diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index 60d4ac21c19..3797cd2b4ea 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -1058,6 +1058,8 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream) svm_fifo_add_want_deq_ntf (stream_session->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL | SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); + svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */); stream_session->session_state = SESSION_STATE_ACCEPTING; if ((rv = app_worker_accept_notify (app_wrk, stream_session))) @@ -1302,6 +1304,8 @@ quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep) return app_worker_connect_notify (app_wrk, NULL, rv, sep->opaque); } + svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */); svm_fifo_add_want_deq_ntf (stream_session->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL | SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); @@ -1328,14 +1332,16 @@ quic_connect_connection (session_endpoint_cfg_t * sep) quic_ctx_t *ctx; app_worker_t *app_wrk; application_t *app; + transport_endpt_ext_cfg_t *ext_cfg; int error; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; /* Use pool on thread 1 if we have workers because of UDP */ thread_index = transport_cl_thread (); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; clib_memset (cargs, 0, sizeof (*cargs)); ctx_index = quic_ctx_alloc (thread_index); @@ -1471,13 +1477,15 @@ quic_start_listen (u32 quic_listen_session_index, quic_ctx_t *lctx; u32 lctx_index; app_listener_t *app_listener; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); QUIC_DBG (2, "Called quic_start_listen for app %d", app_wrk->app_index); @@ -1679,6 +1687,9 @@ quic_on_quic_session_connected (quic_ctx_t * ctx) return; } + svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */); + quic_session->session_state = SESSION_STATE_CONNECTING; if ((rv = app_worker_connect_notify (app_wrk, quic_session, SESSION_E_NONE, ctx->client_opaque))) @@ -2137,6 +2148,9 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx) return; } + svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */); + app_wrk = app_worker_get (quic_session->app_wrk_index); quic_session->session_state = SESSION_STATE_ACCEPTING; if ((rv = app_worker_accept_notify (app_wrk, quic_session))) diff --git a/src/plugins/snort/CMakeLists.txt b/src/plugins/snort/CMakeLists.txt index bd9dcdc4fdd..3fc2bd625a4 100644 --- a/src/plugins/snort/CMakeLists.txt +++ b/src/plugins/snort/CMakeLists.txt @@ -7,6 +7,10 @@ add_vpp_plugin(snort dequeue.c main.c cli.c + snort_api.c + + API_FILES + snort.api MULTIARCH_SOURCES enqueue.c diff --git a/src/plugins/snort/cli.c b/src/plugins/snort/cli.c index 08740f41b37..4b6dbc742a7 100644 --- a/src/plugins/snort/cli.c +++ b/src/plugins/snort/cli.c @@ -25,6 +25,7 @@ snort_create_instance_command_fn (vlib_main_t *vm, unformat_input_t *input, u8 *name = 0; u32 queue_size = 1024; u8 drop_on_diconnect = 1; + int rv = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -60,8 +61,30 @@ snort_create_instance_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - err = snort_instance_create (vm, (char *) name, min_log2 (queue_size), - drop_on_diconnect); + rv = snort_instance_create (vm, (char *) name, min_log2 (queue_size), + drop_on_diconnect); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_ENTRY_ALREADY_EXISTS: + err = clib_error_return (0, "instance '%s' already exists", name); + break; + case VNET_API_ERROR_SYSCALL_ERROR_1: + err = clib_error_return (0, "memory fd failure: %U", format_clib_error, + clib_mem_get_last_error ()); + break; + case VNET_API_ERROR_SYSCALL_ERROR_2: + err = clib_error_return (0, "ftruncate failure"); + break; + case VNET_API_ERROR_SYSCALL_ERROR_3: + err = clib_error_return (0, "mmap failure"); + break; + default: + err = clib_error_return (0, "snort_instance_create returned %d", rv); + break; + } done: vec_free (name); @@ -77,6 +100,118 @@ VLIB_CLI_COMMAND (snort_create_instance_command, static) = { }; static clib_error_t * +snort_disconnect_instance_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *err = 0; + u8 *name = 0; + snort_instance_t *si; + int rv = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "please specify instance name"); + + if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + unformat (line_input, "%s", &name); + + if (!name) + { + err = clib_error_return (0, "please specify instance name"); + goto done; + } + + si = snort_get_instance_by_name ((char *) name); + if (!si) + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + else + rv = snort_instance_disconnect (vm, si->index); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + err = clib_error_return (0, "unknown instance '%s'", name); + break; + case VNET_API_ERROR_FEATURE_DISABLED: + err = clib_error_return (0, "instance '%s' is not connected", name); + break; + case VNET_API_ERROR_INVALID_VALUE: + err = clib_error_return (0, "failed to disconnect a broken client"); + break; + default: + err = clib_error_return (0, "snort_instance_disconnect returned %d", rv); + break; + } + +done: + vec_free (name); + unformat_free (line_input); + return err; +} + +VLIB_CLI_COMMAND (snort_disconnect_instance_command, static) = { + .path = "snort disconnect instance", + .short_help = "snort disconnect instance <name>", + .function = snort_disconnect_instance_command_fn, +}; + +static clib_error_t * +snort_delete_instance_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *err = 0; + u8 *name = 0; + int rv = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "please specify instance name"); + + if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + unformat (line_input, "%s", &name); + + if (!name) + { + err = clib_error_return (0, "please specify instance name"); + goto done; + } + + snort_instance_t *si = snort_get_instance_by_name ((char *) name); + if (!si) + err = clib_error_return (0, "unknown instance '%s' requested", name); + else + rv = snort_instance_delete (vm, si->index); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + err = clib_error_return (0, "instance '%s' deletion failure", name); + break; + case VNET_API_ERROR_INSTANCE_IN_USE: + err = clib_error_return (0, "instance '%s' has connected client", name); + break; + default: + err = clib_error_return (0, "snort_instance_delete returned %d", rv); + break; + } + +done: + vec_free (name); + unformat_free (line_input); + return err; +} + +VLIB_CLI_COMMAND (snort_delete_instance_command, static) = { + .path = "snort delete instance", + .short_help = "snort delete instance <name>", + .function = snort_delete_instance_command_fn, +}; + +static clib_error_t * snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { @@ -86,6 +221,7 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, u8 *name = 0; u32 sw_if_index = ~0; snort_attach_dir_t dir = SNORT_INOUT; + int rv = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -124,8 +260,29 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - err = - snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir); + rv = snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_FEATURE_ALREADY_ENABLED: + /* already attached to same instance */ + break; + case VNET_API_ERROR_INSTANCE_IN_USE: + err = clib_error_return (0, + "interface %U already assigned to " + "an instance", + format_vnet_sw_if_index_name, vnm, sw_if_index); + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + err = clib_error_return (0, "unknown instance '%s'", name); + break; + default: + err = clib_error_return (0, "snort_interface_enable_disable returned %d", + rv); + break; + } done: vec_free (name); @@ -148,6 +305,7 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input, vnet_main_t *vnm = vnet_get_main (); clib_error_t *err = 0; u32 sw_if_index = ~0; + int rv = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -172,7 +330,23 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - err = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT); + rv = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT); + + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_INVALID_INTERFACE: + err = clib_error_return (0, + "interface %U is not assigned to snort " + "instance!", + format_vnet_sw_if_index_name, vnm, sw_if_index); + break; + default: + err = clib_error_return (0, "snort_interface_enable_disable returned %d", + rv); + break; + } done: unformat_free (line_input); @@ -213,7 +387,7 @@ snort_show_interfaces_command_fn (vlib_main_t *vm, unformat_input_t *input, snort_instance_t *si; u32 *index; - vlib_cli_output (vm, "interface\tsnort instance"); + vlib_cli_output (vm, "interface\t\tsnort instance"); vec_foreach (index, sm->instance_by_sw_if_index) { if (index[0] != ~0) @@ -237,7 +411,18 @@ snort_show_clients_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { snort_main_t *sm = &snort_main; - vlib_cli_output (vm, "number of clients: %d", pool_elts (sm->clients)); + u32 n_clients = pool_elts (sm->clients); + snort_client_t *c; + snort_instance_t *si; + + vlib_cli_output (vm, "number of clients: %d", n_clients); + if (n_clients) + vlib_cli_output (vm, "client snort instance"); + pool_foreach (c, sm->clients) + { + si = vec_elt_at_index (sm->instances, c->instance_index); + vlib_cli_output (vm, "%6d %s", c - sm->clients, si->name); + } return 0; } @@ -251,14 +436,16 @@ static clib_error_t * snort_mode_polling_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { - return snort_set_node_mode (vm, VLIB_NODE_STATE_POLLING); + snort_set_node_mode (vm, VLIB_NODE_STATE_POLLING); + return 0; } static clib_error_t * snort_mode_interrupt_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { - return snort_set_node_mode (vm, VLIB_NODE_STATE_INTERRUPT); + snort_set_node_mode (vm, VLIB_NODE_STATE_INTERRUPT); + return 0; } VLIB_CLI_COMMAND (snort_mode_polling_command, static) = { diff --git a/src/plugins/snort/daq_vpp.c b/src/plugins/snort/daq_vpp.c index 386092a0382..6fc0bf5506a 100644 --- a/src/plugins/snort/daq_vpp.c +++ b/src/plugins/snort/daq_vpp.c @@ -10,6 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> #include <sys/mman.h> +#include <sys/time.h> #include <errno.h> #include <sys/epoll.h> @@ -521,6 +522,7 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp, { uint32_t n_recv, n_left; uint32_t head, next, mask = qp->queue_size - 1; + struct timeval tv; if (max_recv == 0) return 0; @@ -535,11 +537,14 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp, n_left = n_recv = max_recv; } + gettimeofday (&tv, NULL); while (n_left--) { uint32_t desc_index = qp->enq_ring[next & mask]; daq_vpp_desc_t *d = qp->descs + desc_index; VPPDescData *dd = qp->desc_data + desc_index; + dd->pkthdr.ts.tv_sec = tv.tv_sec; + dd->pkthdr.ts.tv_usec = tv.tv_usec; dd->pkthdr.pktlen = d->length; dd->pkthdr.address_space_id = d->address_space_id; dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset; diff --git a/src/plugins/snort/dequeue.c b/src/plugins/snort/dequeue.c index 31745de404c..bc301f6888b 100644 --- a/src/plugins/snort/dequeue.c +++ b/src/plugins/snort/dequeue.c @@ -307,7 +307,7 @@ snort_deq_node_polling (vlib_main_t *vm, vlib_node_runtime_t *node, snort_qpair_t *qp; snort_instance_t *si; - vec_foreach (si, sm->instances) + pool_foreach (si, sm->instances) { qp = vec_elt_at_index (si->qpairs, vm->thread_index); u32 ready = __atomic_load_n (&qp->ready, __ATOMIC_ACQUIRE); diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c index 409c0e49078..ce4f34491ec 100644 --- a/src/plugins/snort/enqueue.c +++ b/src/plugins/snort/enqueue.c @@ -133,7 +133,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, nexts, n_processed); } - vec_foreach (si, sm->instances) + pool_foreach (si, sm->instances) { u32 head, freelist_len, n_pending, n_enq, mask; u64 ctr = 1; diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c index 2430fcdc5c2..50bff027a13 100644 --- a/src/plugins/snort/main.c +++ b/src/plugins/snort/main.c @@ -3,10 +3,24 @@ */ #include <vlib/vlib.h> +#include <vlibapi/api_types.h> #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> #include <snort/snort.h> +#include <snort/snort.api_enum.h> +#include <snort/snort.api_types.h> + +#include <vnet/ip/ip_types_api.h> +#include <vnet/format_fns.h> + +#include <vlibapi/api_helper_macros.h> + +#include <vnet/vnet.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> + #include <sys/eventfd.h> snort_main_t snort_main; @@ -18,6 +32,12 @@ VLIB_REGISTER_LOG_CLASS (snort_log, static) = { #define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__) #define log_err(fmt, ...) vlib_log_err (snort_log.class, fmt, __VA_ARGS__) +snort_main_t * +snort_get_main () +{ + return &snort_main; +} + static void snort_client_disconnect (clib_file_t *uf) { @@ -45,7 +65,38 @@ snort_client_disconnect (clib_file_t *uf) pool_put (sm->clients, c); } -static snort_instance_t * +int +snort_instance_disconnect (vlib_main_t *vm, u32 instance_index) +{ + snort_main_t *sm = &snort_main; + snort_instance_t *si; + snort_client_t *client; + clib_file_main_t *fm = &file_main; + clib_file_t *uf = 0; + int rv = 0; + + si = snort_get_instance_by_index (instance_index); + if (!si) + return VNET_API_ERROR_NO_SUCH_ENTRY; + if (si->client_index == ~0) + return VNET_API_ERROR_FEATURE_DISABLED; + + client = pool_elt_at_index (sm->clients, si->client_index); + uf = clib_file_get (fm, client->file_index); + if (uf) + snort_client_disconnect (uf); + else + { + log_err ("failed to disconnect a broken client from" + "instance '%s'", + si->name); + rv = VNET_API_ERROR_INVALID_VALUE; + } + + return rv; +} + +snort_instance_t * snort_get_instance_by_name (char *name) { snort_main_t *sm = &snort_main; @@ -54,7 +105,16 @@ snort_get_instance_by_name (char *name) return 0; return vec_elt_at_index (sm->instances, p[0]); - ; +} + +snort_instance_t * +snort_get_instance_by_index (u32 instance_index) +{ + snort_main_t *sm = &snort_main; + + if (pool_is_free_index (sm->instances, instance_index)) + return 0; + return pool_elt_at_index (sm->instances, instance_index); } static clib_error_t * @@ -110,6 +170,8 @@ snort_conn_fd_read_ready (clib_file_t *uf) snort_client_disconnect (uf); return 0; } + snort_freelist_init (qp->freelist); + *qp->enq_head = *qp->deq_head = qp->next_desc = 0; } base = (u8 *) si->shm_base; @@ -281,14 +343,13 @@ snort_listener_init (vlib_main_t *vm) return 0; } -clib_error_t * +int snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, u8 drop_on_disconnect) { vlib_thread_main_t *tm = vlib_get_thread_main (); snort_main_t *sm = &snort_main; snort_instance_t *si; - clib_error_t *err = 0; u32 index, i; u8 *base = CLIB_MEM_VM_MAP_FAILED; u32 size; @@ -296,9 +357,10 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, u32 qpair_mem_sz = 0; u32 qsz = 1 << log2_queue_sz; u8 align = CLIB_CACHE_LINE_BYTES; + int rv = 0; if (snort_get_instance_by_name (name)) - return clib_error_return (0, "instance already exists"); + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; /* descriptor table */ qpair_mem_sz += round_pow2 (qsz * sizeof (daq_vpp_desc_t), align); @@ -316,14 +378,13 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, if (fd == -1) { - err = clib_error_return (0, "memory fd failure: %U", format_clib_error, - clib_mem_get_last_error ()); + rv = VNET_API_ERROR_SYSCALL_ERROR_1; goto done; } if ((ftruncate (fd, size)) == -1) { - err = clib_error_return (0, "ftruncate failure"); + rv = VNET_API_ERROR_SYSCALL_ERROR_2; goto done; } @@ -331,7 +392,7 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, if (base == CLIB_MEM_VM_MAP_FAILED) { - err = clib_error_return (0, "mmap failure"); + rv = VNET_API_ERROR_SYSCALL_ERROR_3; goto done; } @@ -399,17 +460,17 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, sm->input_mode); done: - if (err) + if (rv) { if (base != CLIB_MEM_VM_MAP_FAILED) clib_mem_vm_unmap (base); if (fd != -1) close (fd); } - return err; + return rv; } -clib_error_t * +int snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, u32 sw_if_index, int is_enable, snort_attach_dir_t snort_dir) @@ -417,16 +478,16 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, snort_main_t *sm = &snort_main; vnet_main_t *vnm = vnet_get_main (); snort_instance_t *si; - clib_error_t *err = 0; u64 fa_data; u32 index; + int rv = 0; if (is_enable) { if ((si = snort_get_instance_by_name (instance_name)) == 0) { - err = clib_error_return (0, "unknown instance '%s'", instance_name); - goto done; + log_err ("unknown instance '%s'", instance_name); + return VNET_API_ERROR_NO_SUCH_ENTRY; } vec_validate_init_empty (sm->instance_by_sw_if_index, sw_if_index, ~0); @@ -434,12 +495,13 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, index = sm->instance_by_sw_if_index[sw_if_index]; if (index != ~0) { + if (index == si->index) + rv = VNET_API_ERROR_FEATURE_ALREADY_ENABLED; + else + rv = VNET_API_ERROR_INSTANCE_IN_USE; si = vec_elt_at_index (sm->instances, index); - err = clib_error_return (0, - "interface %U already assgined to " - "instance '%s'", - format_vnet_sw_if_index_name, vnm, - sw_if_index, si->name); + log_err ("interface %U already assgined to instance '%s'", + format_vnet_sw_if_index_name, vnm, sw_if_index, si->name); goto done; } @@ -462,11 +524,9 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, if (sw_if_index >= vec_len (sm->instance_by_sw_if_index) || sm->instance_by_sw_if_index[sw_if_index] == ~0) { - err = - clib_error_return (0, - "interface %U is not assigned to snort " - "instance!", - format_vnet_sw_if_index_name, vnm, sw_if_index); + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("interface %U is not assigned to snort instance!", + format_vnet_sw_if_index_name, vnm, sw_if_index); goto done; } index = sm->instance_by_sw_if_index[sw_if_index]; @@ -488,12 +548,66 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, } done: - if (err) - log_err ("%U", format_clib_error, err); - return 0; + return rv; } -clib_error_t * +static int +snort_strip_instance_interfaces (vlib_main_t *vm, u32 instance_index) +{ + snort_main_t *sm = &snort_main; + u32 *index; + int rv = 0; + + vec_foreach (index, sm->instance_by_sw_if_index) + { + if (*index == instance_index) + rv = snort_interface_enable_disable ( + vm, NULL, index - sm->instance_by_sw_if_index, 0, 0); + if (rv) + break; + } + + return rv; +} + +int +snort_instance_delete (vlib_main_t *vm, u32 instance_index) +{ + snort_main_t *sm = &snort_main; + snort_instance_t *si; + snort_qpair_t *qp; + int rv = 0; + + si = snort_get_instance_by_index (instance_index); + if (!si) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (si->client_index != ~0) + return VNET_API_ERROR_INSTANCE_IN_USE; + + if ((rv = snort_strip_instance_interfaces (vm, si->index))) + return rv; + + hash_unset_mem (sm->instance_by_name, si->name); + + clib_mem_vm_unmap (si->shm_base); + close (si->shm_fd); + + vec_foreach (qp, si->qpairs) + { + clib_file_del_by_index (&file_main, qp->deq_fd_file_index); + } + + log_debug ("deleting instance '%s'", si->name); + + vec_free (si->qpairs); + vec_free (si->name); + pool_put (sm->instances, si); + + return rv; +} + +int snort_set_node_mode (vlib_main_t *vm, u32 mode) { int i; diff --git a/src/plugins/snort/snort.api b/src/plugins/snort/snort.api new file mode 100644 index 00000000000..5c65f79e68a --- /dev/null +++ b/src/plugins/snort/snort.api @@ -0,0 +1,226 @@ +option version = "1.0.0"; + +import "vnet/interface_types.api"; +import "vnet/ip/ip_types.api"; + +define snort_instance_create { + u32 client_index; + u32 context; + u32 queue_size; + u8 drop_on_disconnect; + string name[]; +}; + +define snort_instance_create_reply { + u32 context; + i32 retval; + u32 instance_index; +}; + +define snort_instance_delete { + u32 client_index; + u32 context; + u32 instance_index; +}; + +define snort_instance_delete_reply { + u32 context; + i32 retval; +}; + +define snort_client_disconnect { + u32 client_index; + u32 context; + u32 snort_client_index; +}; + +define snort_client_disconnect_reply { + u32 context; + i32 retval; +}; + +define snort_instance_disconnect { + u32 client_index; + u32 context; + u32 instance_index; +}; + +define snort_instance_disconnect_reply { + u32 context; + i32 retval; +}; + +define snort_interface_attach { + u32 client_index; + u32 context; + u32 instance_index; + u32 sw_if_index; + u8 snort_dir; +}; + +define snort_interface_attach_reply { + u32 context; + i32 retval; +}; + +define snort_interface_detach { + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +define snort_interface_detach_reply { + u32 context; + i32 retval; +}; + +define snort_input_mode_get { + u32 client_index; + u32 context; +}; + +define snort_input_mode_get_reply { + u32 context; + i32 retval; + u32 snort_mode; +}; + +define snort_input_mode_set { + u32 client_index; + u32 context; + u8 input_mode; +}; + +define snort_input_mode_set_reply { + u32 context; + i32 retval; +}; + +service { + rpc snort_instance_get returns snort_instance_get_reply + stream snort_instance_details; +}; + +/** \brief Get snort instance(s). + @param client_index - opaque cookie to identify the sender. + @param context - sender context + @param cursor - current iterator value (all requested). + @param instance_index - instance index (~0 for all). +*/ +define snort_instance_get +{ + u32 client_index; + u32 context; + u32 cursor; + u32 instance_index; +}; + +/** \brief Reply for snort instance(s). + @param context - sender context + @param retval - return code for the request. + @param cursor - iterator value to continue with (if there is more). +*/ +define snort_instance_get_reply +{ + u32 context; + i32 retval; + u32 cursor; +}; + +/** \brief Details of a snort instance. + @param context - sender context + @param instance - snort instance info. +*/ +define snort_instance_details { + u32 context; + u32 instance_index; + u32 shm_size; + u32 shm_fd; + u8 drop_on_disconnect; + u32 snort_client_index; + string name[]; +}; + +service { + rpc snort_interface_get returns snort_interface_get_reply + stream snort_interface_details; +}; + +/** \brief Get snort interface(s). + @param client_index - opaque cookie to identify the sender. + @param context - sender context + @param cursor - current iterator value (all requested). + @param sw_if_index - sw if index (~0 for all). +*/ +define snort_interface_get +{ + u32 client_index; + u32 context; + u32 cursor; + u32 sw_if_index; +}; + +/** \brief Reply for snort interface(s). + @param context - sender context + @param retval - return code for the request. + @param cursor - iterator value to continue with (if there is more). +*/ +define snort_interface_get_reply +{ + u32 context; + i32 retval; + u32 cursor; +}; + +/** \brief Details of a snort interface. + @param context - sender context + @param sw_if_index - interface index + @param instance_index - snort instance the interface is attached to. +*/ +define snort_interface_details { + u32 context; + u32 sw_if_index; + u32 instance_index; +}; + +service { + rpc snort_client_get returns snort_client_get_reply + stream snort_client_details; +}; + +/** \brief Get snort clients. + @param client_index - opaque cookie to identify the sender. + @param context - sender context + @param cursor - current iterator value (all requested). + @param client_index (~0 for all). +*/ +define snort_client_get +{ + u32 client_index; + u32 context; + u32 cursor; + u32 snort_client_index; +}; + +/** \brief Reply for snort clients. + @param context - sender context + @param retval - return code for the request. + @param cursor - iterator value to continue with (if there is more). +*/ +define snort_client_get_reply +{ + u32 context; + i32 retval; + u32 cursor; +}; + +/** \brief Details of a snort client. + @param context - sender context + @param client index + @param instance_index - snort instance of the client. +*/ +define snort_client_details { + u32 context; + u32 client_index; + u32 instance_index; +}; diff --git a/src/plugins/snort/snort.h b/src/plugins/snort/snort.h index 79299aa6d91..c7e856c0127 100644 --- a/src/plugins/snort/snort.h +++ b/src/plugins/snort/snort.h @@ -7,6 +7,7 @@ #include <vppinfra/error.h> #include <vppinfra/socket.h> +#include <vppinfra/file.h> #include <vlib/vlib.h> #include <snort/daq_vpp.h> @@ -78,8 +79,11 @@ typedef struct snort_per_thread_data_t *per_thread_data; u32 input_mode; u8 *socket_name; + /* API message ID base */ + u16 msg_id_base; } snort_main_t; +extern clib_file_main_t file_main; extern snort_main_t snort_main; extern vlib_node_registration_t snort_enq_node; extern vlib_node_registration_t snort_deq_node; @@ -103,13 +107,17 @@ typedef enum } /* functions */ -clib_error_t *snort_instance_create (vlib_main_t *vm, char *name, - u8 log2_queue_sz, u8 drop_on_disconnect); -clib_error_t *snort_interface_enable_disable (vlib_main_t *vm, - char *instance_name, - u32 sw_if_index, int is_enable, - snort_attach_dir_t dir); -clib_error_t *snort_set_node_mode (vlib_main_t *vm, u32 mode); +snort_main_t *snort_get_main (); +snort_instance_t *snort_get_instance_by_index (u32 instance_index); +snort_instance_t *snort_get_instance_by_name (char *name); +int snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, + u8 drop_on_disconnect); +int snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, + u32 sw_if_index, int is_enable, + snort_attach_dir_t dir); +int snort_set_node_mode (vlib_main_t *vm, u32 mode); +int snort_instance_delete (vlib_main_t *vm, u32 instance_index); +int snort_instance_disconnect (vlib_main_t *vm, u32 instance_index); always_inline void snort_freelist_init (u32 *fl) diff --git a/src/plugins/snort/snort_api.c b/src/plugins/snort/snort_api.c new file mode 100644 index 00000000000..adad0d8763f --- /dev/null +++ b/src/plugins/snort/snort_api.c @@ -0,0 +1,405 @@ +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <snort/snort.h> +#include <vlibapi/api_types.h> + +#include <snort/snort.api_enum.h> +#include <snort/snort.api_types.h> + +#include <vlibmemory/api.h> +#include <vnet/ip/ip_types_api.h> +#include <vnet/format_fns.h> +#include <vnet/api_errno.h> + +/** + * Base message ID fot the plugin + */ +static u32 snort_base_msg_id; +#define REPLY_MSG_ID_BASE snort_base_msg_id + +#include <vlibapi/api_helper_macros.h> + +#include <vnet/vnet.h> + +#include <vlibapi/api.h> +#include <sys/eventfd.h> + +VLIB_REGISTER_LOG_CLASS (snort_log, static) = { + .class_name = "snort", +}; + +#define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (snort_log.class, fmt, __VA_ARGS__) + +static void +vl_api_snort_instance_create_t_handler (vl_api_snort_instance_create_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_instance_create_reply_t *rmp; + char *name = vl_api_from_api_to_new_c_string (&mp->name); + u32 queue_sz = clib_net_to_host_u32 (mp->queue_size); + u8 drop_on_disconnect = mp->drop_on_disconnect; + int rv = 0; + u32 instance_index = ~0; + snort_instance_t *si; + + rv = + snort_instance_create (vm, name, min_log2 (queue_sz), drop_on_disconnect); + + if ((si = snort_get_instance_by_name (name))) + { + instance_index = si->index; + } + + REPLY_MACRO2 (VL_API_SNORT_INSTANCE_CREATE_REPLY, ({ + rmp->instance_index = clib_host_to_net_u32 (instance_index); + })); +} + +static void +vl_api_snort_instance_delete_t_handler (vl_api_snort_instance_delete_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_instance_delete_reply_t *rmp; + u32 instance_index = clib_net_to_host_u32 (mp->instance_index); + int rv; + + rv = snort_instance_delete (vm, instance_index); + + REPLY_MACRO (VL_API_SNORT_INSTANCE_DELETE_REPLY); +} + +static void +vl_api_snort_interface_attach_t_handler (vl_api_snort_interface_attach_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_interface_attach_reply_t *rmp; + u32 instance_index = clib_net_to_host_u32 (mp->instance_index); + snort_instance_t *instance = 0; + u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); + u8 snort_dir = mp->snort_dir; + int rv = VNET_API_ERROR_NO_SUCH_ENTRY; + + if (sw_if_index == INDEX_INVALID) + rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; + else + { + instance = snort_get_instance_by_index (instance_index); + if (instance) + rv = snort_interface_enable_disable (vm, (char *) instance->name, + sw_if_index, 1 /* is_enable */, + snort_dir); + } + + REPLY_MACRO (VL_API_SNORT_INTERFACE_ATTACH_REPLY); +} + +static void +send_snort_instance_details (const snort_instance_t *instance, + vl_api_registration_t *rp, u32 context) +{ + vl_api_snort_instance_details_t *rmp; + u32 name_len = vec_len (instance->name); + + REPLY_MACRO_DETAILS5 ( + VL_API_SNORT_INSTANCE_DETAILS, name_len, rp, context, ({ + rmp->instance_index = clib_host_to_net_u32 (instance->index); + vl_api_vec_to_api_string (instance->name, &rmp->name); + rmp->snort_client_index = clib_host_to_net_u32 (instance->client_index); + rmp->shm_size = clib_host_to_net_u32 (instance->shm_size); + rmp->shm_fd = clib_host_to_net_u32 (instance->shm_fd); + rmp->drop_on_disconnect = instance->drop_on_disconnect; + })); +} + +static void +vl_api_snort_instance_get_t_handler (vl_api_snort_instance_get_t *mp) +{ + snort_main_t *sm = snort_get_main (); + snort_instance_t *instance = 0; + vl_api_snort_instance_get_reply_t *rmp; + u32 instance_index; + int rv = 0; + + instance_index = clib_net_to_host_u32 (mp->instance_index); + + if (instance_index == INDEX_INVALID) + { + /* clang-format off */ + REPLY_AND_DETAILS_MACRO ( + VL_API_SNORT_INSTANCE_GET_REPLY, sm->instances, ({ + instance = pool_elt_at_index (sm->instances, cursor); + send_snort_instance_details (instance, rp, mp->context); + })); + /* clang-format on */ + } + else + { + instance = snort_get_instance_by_index (instance_index); + + if (instance) + { + vl_api_registration_t *rp = + vl_api_client_index_to_registration (mp->client_index); + + if (rp == NULL) + { + return; + } + + send_snort_instance_details (instance, rp, mp->context); + } + else + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + } + + /* clang-format off */ + REPLY_MACRO2 (VL_API_SNORT_INSTANCE_GET_REPLY, ({ + rmp->cursor = INDEX_INVALID; + })); + /* clang-format on */ + } +} + +static void +send_snort_interface_details (u32 sw_if_index, u32 instance_index, + vl_api_registration_t *rp, u32 context) +{ + vl_api_snort_interface_details_t *rmp; + + if (instance_index != ~0) + { + REPLY_MACRO_DETAILS4 (VL_API_SNORT_INTERFACE_DETAILS, rp, context, ({ + rmp->instance_index = + clib_host_to_net_u32 (instance_index); + rmp->sw_if_index = + clib_host_to_net_u32 (sw_if_index); + })); + } +} + +static void +vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp) +{ + snort_main_t *sm = snort_get_main (); + vl_api_snort_interface_get_reply_t *rmp; + u32 sw_if_index; + u32 *index; + int rv = 0; + + sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); + + if (sw_if_index == INDEX_INVALID) + { + /* clang-format off */ + if (vec_len (sm->instance_by_sw_if_index) == 0) + { + REPLY_MACRO2 (VL_API_SNORT_INTERFACE_GET_REPLY, ({ rmp->cursor = ~0; })); + return; + } + + REPLY_AND_DETAILS_VEC_MACRO( + VL_API_SNORT_INTERFACE_GET_REPLY, + sm->instance_by_sw_if_index, + mp, rmp, rv, ({ + index = vec_elt_at_index (sm->instance_by_sw_if_index, cursor); + send_snort_interface_details (cursor, *index, rp, mp->context); + })) + /* clang-format on */ + } + else + { + index = vec_elt_at_index (sm->instance_by_sw_if_index, sw_if_index); + if (snort_get_instance_by_index (index[0])) + { + vl_api_registration_t *rp = + vl_api_client_index_to_registration (mp->client_index); + + if (rp == NULL) + { + return; + } + + send_snort_interface_details (sw_if_index, *index, rp, mp->context); + } + else + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + } + + /* clang-format off */ + REPLY_MACRO2 (VL_API_SNORT_INTERFACE_GET_REPLY, ({ + rmp->cursor = INDEX_INVALID; + })); + /* clang-format on */ + } +} + +static void +send_snort_client_details (const snort_client_t *client, + vl_api_registration_t *rp, u32 context) +{ + snort_main_t *sm = snort_get_main (); + vl_api_snort_client_details_t *rmp; + snort_instance_t *instance; + + if (client->instance_index == ~0) + { + return; + } + + instance = pool_elt_at_index (sm->instances, client->instance_index); + if (instance) + { + REPLY_MACRO_DETAILS4 (VL_API_SNORT_CLIENT_DETAILS, rp, context, ({ + rmp->instance_index = + clib_host_to_net_u32 (client->instance_index); + rmp->client_index = + clib_host_to_net_u32 (client - sm->clients); + })); + } +} + +static void +vl_api_snort_client_get_t_handler (vl_api_snort_client_get_t *mp) +{ + snort_main_t *sm = snort_get_main (); + snort_client_t *client; + vl_api_snort_client_get_reply_t *rmp; + u32 client_index; + int rv = 0; + + client_index = clib_net_to_host_u32 (mp->snort_client_index); + + if (client_index == INDEX_INVALID) + { + /* clang-format off */ + REPLY_AND_DETAILS_MACRO ( + VL_API_SNORT_CLIENT_GET_REPLY, sm->clients, ({ + client = pool_elt_at_index (sm->clients, cursor); + send_snort_client_details (client, rp, mp->context); + })); + /* clang-format on */ + } + else + { + client = pool_elt_at_index (sm->clients, client_index); + + if (client) + { + vl_api_registration_t *rp = + vl_api_client_index_to_registration (mp->client_index); + + if (rp == NULL) + { + return; + } + + send_snort_client_details (client, rp, mp->context); + } + else + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + } + + /* clang-format off */ + REPLY_MACRO2 (VL_API_SNORT_CLIENT_GET_REPLY, ({ + rmp->cursor = INDEX_INVALID; + })); + /* clang-format on */ + } +} + +static void +vl_api_snort_client_disconnect_t_handler (vl_api_snort_client_disconnect_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + snort_main_t *sm = snort_get_main (); + snort_client_t *client; + vl_api_snort_client_disconnect_reply_t *rmp; + u32 client_index = clib_net_to_host_u32 (mp->snort_client_index); + int rv = 0; + + if (pool_is_free_index (sm->clients, client_index)) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + } + else + { + client = pool_elt_at_index (sm->clients, client_index); + rv = snort_instance_disconnect (vm, client->instance_index); + } + + REPLY_MACRO (VL_API_SNORT_CLIENT_DISCONNECT_REPLY); +} + +static void +vl_api_snort_instance_disconnect_t_handler ( + vl_api_snort_instance_disconnect_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_instance_disconnect_reply_t *rmp; + u32 instance_index = clib_net_to_host_u32 (mp->instance_index); + int rv = snort_instance_disconnect (vm, instance_index); + + REPLY_MACRO (VL_API_SNORT_INSTANCE_DISCONNECT_REPLY); +} + +static void +vl_api_snort_interface_detach_t_handler (vl_api_snort_interface_detach_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_interface_detach_reply_t *rmp; + u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); + int rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; + + if (sw_if_index != INDEX_INVALID) + rv = snort_interface_enable_disable (vm, NULL, sw_if_index, + 0 /* is_enable */, SNORT_INOUT); + + REPLY_MACRO (VL_API_SNORT_INTERFACE_DETACH_REPLY); +} + +static void +vl_api_snort_input_mode_get_t_handler (vl_api_snort_input_mode_get_t *mp) +{ + snort_main_t *sm = &snort_main; + vl_api_snort_input_mode_get_reply_t *rmp; + int rv = 0; + + REPLY_MACRO2 (VL_API_SNORT_INPUT_MODE_GET_REPLY, ({ + rmp->snort_mode = clib_host_to_net_u32 (sm->input_mode); + })); +} + +static void +vl_api_snort_input_mode_set_t_handler (vl_api_snort_input_mode_set_t *mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_snort_input_mode_set_reply_t *rmp; + u8 mode = mp->input_mode; + int rv = 0; + + if (mode != VLIB_NODE_STATE_INTERRUPT && mode != VLIB_NODE_STATE_POLLING) + { + clib_error_return (0, "invalid input mode %u", mode); + } + snort_set_node_mode (vm, mode); + + REPLY_MACRO (VL_API_SNORT_INPUT_MODE_SET_REPLY); +} + +/* API definitions */ +#include <snort/snort.api.c> + +clib_error_t * +snort_init_api (vlib_main_t *vm) +{ + /* Add our API messages to the global name_crc hash table */ + snort_base_msg_id = setup_message_id_table (); + + return NULL; +} + +VLIB_INIT_FUNCTION (snort_init_api); diff --git a/src/plugins/srmpls/CMakeLists.txt b/src/plugins/srmpls/CMakeLists.txt new file mode 100644 index 00000000000..25905d31e1b --- /dev/null +++ b/src/plugins/srmpls/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Cisco and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(srmpls + SOURCES + sr_mpls_policy.c + sr_mpls_steering.c + sr_mpls_api.c + plugin.c + + INSTALL_HEADERS + sr_mpls.h + + API_FILES + sr_mpls.api + + # This might need to be VAT_AUTO_TEST? Not documented + API_TEST_SOURCES + sr_mpls_test.c +) diff --git a/src/plugins/srmpls/FEATURE.yaml b/src/plugins/srmpls/FEATURE.yaml new file mode 100644 index 00000000000..c5b958224c7 --- /dev/null +++ b/src/plugins/srmpls/FEATURE.yaml @@ -0,0 +1,9 @@ +--- +name: Segment Routing for MPLS +maintainer: Pablo Camarillo <pcamaril@cisco.com> +features: + - SR Policy support + - Automated steering (SR steering based on NextHop/Color) +description: "SR-MPLS" +state: production +properties: [API, CLI, MULTITHREAD] diff --git a/src/plugins/srmpls/dir.dox b/src/plugins/srmpls/dir.dox new file mode 100644 index 00000000000..76ec1d6a41b --- /dev/null +++ b/src/plugins/srmpls/dir.dox @@ -0,0 +1,22 @@ +/* + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir + @brief Segment Routing MPLS code + + An implementation of Segment Routing for the MPLS dataplane. + +*/
\ No newline at end of file diff --git a/src/plugins/marvell/plugin.c b/src/plugins/srmpls/plugin.c index ed90776ba95..af87607764f 100644 --- a/src/plugins/marvell/plugin.c +++ b/src/plugins/srmpls/plugin.c @@ -1,6 +1,7 @@ /* - *------------------------------------------------------------------ - * Copyright (c) 2018 Cisco and/or its affiliates. + * plugin.c: srmpls + * + * Copyright (c) 2024 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -12,22 +13,14 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - *------------------------------------------------------------------ */ #include <vlib/vlib.h> #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> +// register a plugin VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, - .description = "Marvell PP2 Device Driver", + .description = "Segment Routing for MPLS plugin", }; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/srmpls/sr_doc.rst b/src/plugins/srmpls/sr_doc.rst new file mode 100644 index 00000000000..ed847fa0d42 --- /dev/null +++ b/src/plugins/srmpls/sr_doc.rst @@ -0,0 +1,215 @@ +.. _srmpls_doc: + +SR-MPLS: Segment Routing for MPLS +================================= + +This is a memo intended to contain documentation of the VPP SR-MPLS +implementation. Everything that is not directly obvious should come +here. For any feedback on content that should be explained please +mailto:pcamaril@cisco.com + +Segment Routing +--------------- + +Segment routing is a network technology focused on addressing the +limitations of existing IP and Multiprotocol Label Switching (MPLS) +networks in terms of simplicity, scale, and ease of operation. It is a +foundation for application engineered routing as it prepares the +networks for new business models where applications can control the +network behavior. + +Segment routing seeks the right balance between distributed intelligence +and centralized optimization and programming. It was built for the +software-defined networking (SDN) era. + +Segment routing enhances packet forwarding behavior by enabling a +network to transport unicast packets through a specific forwarding path, +different from the normal path that a packet usually takes (IGP shortest +path or BGP best path). This capability benefits many use cases, and one +can build those specific paths based on application requirements. + +Segment routing uses the source routing paradigm. A node, usually a +router but also a switch, a trusted server, or a virtual forwarder +running on a hypervisor, steers a packet through an ordered list of +instructions, called segments. A segment can represent any instruction, +topological or service-based. A segment can have a local semantic to a +segment-routing node or global within a segment-routing network. Segment +routing allows an operator to enforce a flow through any topological +path and service chain while maintaining per-flow state only at the +ingress node to the segment-routing network. Segment routing also +supports equal-cost multipath (ECMP) by design. + +Segment routing can operate with either an MPLS or an IPv6 data plane. +All the currently available MPLS services, such as Layer 3 VPN (L3VPN), +L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services +[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet +VPN [PBB-EVPN]), can run on top of a segment-routing transport network. + +**The implementation of Segment Routing in VPP covers both the IPv6 data +plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page +contains the SR-MPLS documentation.** + +Segment Routing terminology +--------------------------- + +- SegmentID (SID): is an MPLS label. +- Segment List (SL) (SID List): is the sequence of SIDs that the packet + will traverse. +- SR Policy: is a set of candidate paths (SID list+weight). An SR + policy is uniquely identified by its Binding SID and associated with + a weighted set of Segment Lists. In case several SID lists are + defined, traffic steered into the policy is unevenly load-balanced + among them according to their respective weights. +- BindingSID: a BindingSID is a SID (only one) associated one-one with + an SR Policy. If a packet arrives with MPLS label corresponding to a + BindingSID, then the SR policy will be applied to such packet. + (BindingSID is popped first.) + +SR-MPLS features in VPP +----------------------- + +The SR-MPLS implementation is focused on the SR policies, as well on its +steering. Others SR-MPLS features, such as for example AdjSIDs, can be +achieved using the regular VPP MPLS implementation. + +The Segment Routing Policy +(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies. + +Creating a SR Policy +-------------------- + +An SR Policy is defined by a Binding SID and a weighted set of Segment +Lists. + +A new SR policy is created with a first SID list using: + +:: + + sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5) + +- The weight parameter is only used if more than one SID list is + associated with the policy. + +An SR policy is deleted with: + +:: + + sr mpls policy del bsid 40001 + +The existing SR policies are listed with: + +:: + + show sr mpls policies + +Adding/Removing SID Lists from an SR policy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +An additional SID list is associated with an existing SR policy with: + +:: + + sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3) + +Conversely, a SID list can be removed from an SR policy with: + +:: + + sr mpls policy mod bsid 4001 del sl index 1 + +Note that this CLI cannot be used to remove the last SID list of a +policy. Instead the SR policy delete CLI must be used. + +The weight of a SID list can also be modified with: + +:: + + sr mpls policy mod bsid 40001 mod sl index 1 weight 4 + +SR Policies: Spray policies +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spray policies are a specific type of SR policies where the packet is +replicated on all the SID lists, rather than load-balanced among them. + +SID list weights are ignored with this type of policies. + +A Spray policy is instantiated by appending the keyword **spray** to a +regular SR-MPLS policy command, as in: + +:: + + sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray + +Spray policies are used for removing multicast state from a network core +domain, and instead send a linear unicast copy to every access node. The +last SID in each list accesses the multicast tree within the access +node. + +Steering packets into a SR Policy +--------------------------------- + +Segment Routing supports three methods of steering traffic into an SR +policy. + +Local steering +~~~~~~~~~~~~~~ + +In this variant incoming packets match a routing policy which directs +them on a local SR policy. + +In order to achieve this behavior the user needs to create an ‘sr +steering policy via sr policy bsid’. + +:: + + sr mpls steer l3 2001::/64 via sr policy bsid 40001 + sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3 + sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 + sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500 + +Remote steering +~~~~~~~~~~~~~~~ + +In this variant incoming packets have an active SID matching a local +BSID at the head-end. + +In order to achieve this behavior the packets should simply arrive with +an active SID equal to the Binding SID of a locally instantiated SR +policy. + +Automated steering +~~~~~~~~~~~~~~~~~~ + +In this variant incoming packets match a BGP/Service route which +recurses on the BSID of a local policy. + +In order to achieve this behavior the user first needs to color the SR +policies. He can do so by using the CLI: + +:: + + sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234 + +Notice that an SR policy can have a single endpoint and a single color. +Notice that the *endpoint* value is an IP46 address and the color a u32. + +Then, for any BGP/Service route the user has to use the API to steer +prefixes: + +:: + + sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 + sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500 + +Notice that *co* refers to the CO-bits (values [0|1|2|3]). + +Notice also that a given prefix might be steered over several colors +(same next-hop and same co-bit value). In order to add new colors just +execute the API several times (or with the del parameter to delete the +color). + +This variant is meant to be used in conjunction with a control plane +agent that uses the underlying binary API bindings of +*sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP +service route received. diff --git a/src/plugins/srmpls/sr_mpls.api b/src/plugins/srmpls/sr_mpls.api new file mode 100644 index 00000000000..742f135d493 --- /dev/null +++ b/src/plugins/srmpls/sr_mpls.api @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. Licensed under the + * Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +option version = "3.0.0"; + +import "vnet/interface_types.api"; +import "vnet/ip/ip_types.api"; +import "vnet/srv6/sr_types.api"; + +/** \brief MPLS SR policy add + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid - is the bindingSID of the SR Policy. MPLS label (20bit) + @param weight - is the weight of the sid list. optional. + @param is_spray - is the type of the SR policy. (0.Default // 1.Spray) + @param segments - vector of labels (20bit) composing the segment list +*/ +autoreply define sr_mpls_policy_add +{ + u32 client_index; + u32 context; + u32 bsid; + u32 weight; + bool is_spray; + u8 n_segments; + u32 segments[n_segments]; +}; + +/** \brief MPLS SR policy modification + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy. MPLS label (20bit) + @param sr_policy_index is the index of the SR policy + @param fib_table is the VRF where to install the FIB entry for the BSID + @param operation is the operation to perform (among the top ones) + @param segments is a vector of MPLS labels composing the segment list + @param sl_index is the index of the Segment List to modify/delete + @param weight is the weight of the sid list. optional. + @param is_encap Mode. Encapsulation or SRH insertion. +*/ +autoreply define sr_mpls_policy_mod +{ + u32 client_index; + u32 context; + u32 bsid; + vl_api_sr_policy_op_t operation; + u32 sl_index; + u32 weight; + u8 n_segments; + u32 segments[n_segments]; +}; + +/** \brief MPLS SR policy deletion + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy. MPLS label (20bit) +*/ +autoreply define sr_mpls_policy_del +{ + u32 client_index; + u32 context; + u32 bsid; +}; + +/** \brief MPLS SR steering add/del + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_del + @param bsid - is the bindingSID of the SR Policy (~0 is no bsid) + @param table_id - is the VRF where to install the FIB entry for the BSID + @param prefix - is the IPv4/v6 address for L3 traffic type. + @param mask_width - is the mask for L3 traffic type + @param next_hop - describes the next_hop (in case no BSID) + @param color - describes the color + @param co_bits - are the CO_bits of the steering policy + @param vpn_label - is an additonal last VPN label. (~0 is no label) +*/ +autoreply define sr_mpls_steering_add_del +{ + u32 client_index; + u32 context; + bool is_del[default = false]; + u32 bsid; + u32 table_id; + vl_api_prefix_t prefix; + u32 mask_width; + vl_api_address_t next_hop; + u32 color; + u8 co_bits; + u32 vpn_label; +}; + +/** \brief MPLS SR steering add/del + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param endpoint is the endpoint of the SR policy + @param color is the color of the sr policy +*/ +autoreply define sr_mpls_policy_assign_endpoint_color +{ + u32 client_index; + u32 context; + u32 bsid; + vl_api_address_t endpoint; + u32 color; +}; + +/* + * fd.io coding-style-patch-verification: ON Local Variables: eval: + * (c-set-style "gnu") End: + */ diff --git a/src/plugins/srmpls/sr_mpls.h b/src/plugins/srmpls/sr_mpls.h new file mode 100644 index 00000000000..a8f9494428f --- /dev/null +++ b/src/plugins/srmpls/sr_mpls.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. Licensed under the Apache + * License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +/** + * @file + * @brief Segment Routing MPLS data structures definitions + * + */ + +#ifndef included_vnet_srmpls_h +#define included_vnet_srmpls_h + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#include <vnet/dpo/dpo.h> +#include <vnet/dpo/replicate_dpo.h> + +#include <stdlib.h> +#include <string.h> + +/* SR policy types */ +#define SR_POLICY_TYPE_DEFAULT 0 +#define SR_POLICY_TYPE_SPRAY 1 + +#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 + +#define SR_STEER_IPV4 4 +#define SR_STEER_IPV6 6 + +#define SR_TE_CO_BITS_00 0 +#define SR_TE_CO_BITS_01 1 +#define SR_TE_CO_BITS_10 2 +#define SR_TE_CO_BITS_11 3 + +/** + * @brief SR Segment List (SID list) + */ +typedef struct +{ + /* SIDs (key) */ + mpls_label_t *segments; + + /* SID list weight (wECMP / UCMP) */ + u32 weight; + +} mpls_sr_sl_t; + +typedef struct +{ + u32 *segments_lists; /**< Pool of SID lists indexes */ + + mpls_label_t bsid; /**< BindingSID (key) */ + + u8 type; /**< Type (default is 0) */ + /* SR Policy specific DPO */ + /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */ + /* IF Type = SPRAY then Spray DPO with all SID lists */ + + ip46_address_t endpoint; /**< Optional NH for SR TE */ + u8 endpoint_type; + u32 color; /**< Optional color for SR TE */ +} mpls_sr_policy_t; + +/** + * @brief Steering db key + * + * L3 is IPv4/IPv6 + mask + */ +typedef struct +{ + ip46_address_t prefix; /**< IP address of the prefix */ + u32 mask_width; /**< Mask width of the prefix */ + u32 fib_table; /**< VRF of the prefix */ + u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ + u8 padding[3]; +} sr_mpls_steering_key_t; + +typedef struct +{ + sr_mpls_steering_key_t classify; /**< Traffic classification */ + mpls_label_t bsid; /**< SR Policy index */ + ip46_address_t next_hop; /**< SR TE NH */ + char nh_type; + u32 *color; /**< Vector of SR TE colors */ + char co_bits; /**< Color-Only bits */ + mpls_label_t vpn_label; +} mpls_sr_steering_policy_t; + +/** + * @brief Segment Routing main datastructure + */ +typedef struct +{ + /* SR SID lists */ + mpls_sr_sl_t *sid_lists; + + /* SR MPLS policies */ + mpls_sr_policy_t *sr_policies; + + /* Hash table mapping BindingSID to SR MPLS policy */ + uword *sr_policies_index_hash; + + /* Pool of SR steer policies instances */ + mpls_sr_steering_policy_t *steer_policies; + + /* MHash table mapping steering rules to SR steer instance */ + mhash_t sr_steer_policies_hash; + + /** SR TE **/ + /* Hash table mapping (Color->Endpoint->BSID) for SR policies */ + mhash_t sr_policies_c2e2eclabel_hash; + /* SR TE (internal) fib table (Endpoint, Color) */ + u32 fib_table_EC; + /* Pool of (Endpoint, Color) hidden labels */ + u32 *ec_labels; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} mpls_sr_main_t; + +extern mpls_sr_main_t sr_mpls_main; + +extern int +sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments, + u8 behavior, u32 weight); + +extern int +sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, + mpls_label_t * segments, u32 sl_index, u32 weight); + +extern int sr_mpls_policy_del (mpls_label_t bsid); + +extern int +sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid, + ip46_address_t * endpoint, + u8 endpoint_type, u32 color); + +extern int +sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id, + ip46_address_t * prefix, u32 mask_width, + u8 traffic_type, ip46_address_t * next_hop, + u8 nh_type, u32 color, char co_bits, + mpls_label_t vpn_label); + +extern int +sr_mpls_steering_policy_del (ip46_address_t * prefix, + u32 mask_width, u8 traffic_type, u32 table_id, + u32 color); + +extern u32 find_or_create_internal_label (ip46_address_t endpoint, u32 color); + +extern void internal_label_lock (ip46_address_t endpoint, u32 color); + +extern void internal_label_unlock (ip46_address_t endpoint, u32 color); + +#endif /* included_vnet_sr_mpls_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: eval: (c-set-style "gnu") End: + */ diff --git a/src/plugins/srmpls/sr_mpls_api.c b/src/plugins/srmpls/sr_mpls_api.c new file mode 100644 index 00000000000..3e89017dbc1 --- /dev/null +++ b/src/plugins/srmpls/sr_mpls_api.c @@ -0,0 +1,257 @@ +/* + * ------------------------------------------------------------------ + * sr_api.c - ipv6 segment routing api + * + * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache + * License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * ------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include "sr_mpls.h" +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/feature/feature.h> +#include <vnet/ip/ip_types_api.h> + +#include <vnet/format_fns.h> +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> + +#define vl_api_version(n, v) static u32 api_version = v; +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_api_version + +#define vl_endianfun +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_endianfun + +#define vl_calcsizefun +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_calcsizefun + +#define vl_printfun +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_printfun + +#define vl_msg_name_crc_list +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_msg_name_crc_list + +#define REPLY_MSG_ID_BASE msg_id_base +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(SR_MPLS_POLICY_DEL, sr_mpls_policy_del) \ +_(SR_MPLS_STEERING_ADD_DEL, sr_mpls_steering_add_del) \ +_(SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR, sr_mpls_policy_assign_endpoint_color) + +static u16 msg_id_base; + +static void +vl_api_sr_mpls_policy_add_t_handler (vl_api_sr_mpls_policy_add_t * mp) +{ + vl_api_sr_mpls_policy_add_reply_t *rmp; + + mpls_label_t *segments = 0, *seg; + mpls_label_t this_address = 0; + + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + this_address = ntohl (mp->segments[i]); + clib_memcpy (seg, &this_address, sizeof (this_address)); + } + + int rv = 0; + rv = sr_mpls_policy_add (ntohl (mp->bsid), + segments, mp->is_spray, ntohl (mp->weight)); + vec_free (segments); + + REPLY_MACRO (VL_API_SR_MPLS_POLICY_ADD_REPLY); +} + +static void +vl_api_sr_mpls_policy_mod_t_handler (vl_api_sr_mpls_policy_mod_t * mp) +{ + vl_api_sr_mpls_policy_mod_reply_t *rmp; + + mpls_label_t *segments = 0, *seg; + mpls_label_t this_address = 0; + + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + this_address = ntohl (mp->segments[i]); + clib_memcpy (seg, &this_address, sizeof (this_address)); + } + + int rv = 0; + rv = sr_mpls_policy_mod (ntohl (mp->bsid), + ntohl (mp->operation), segments, + ntohl (mp->sl_index), ntohl (mp->weight)); + vec_free (segments); + + REPLY_MACRO (VL_API_SR_MPLS_POLICY_MOD_REPLY); +} + +static void +vl_api_sr_mpls_policy_del_t_handler (vl_api_sr_mpls_policy_del_t * mp) +{ + vl_api_sr_mpls_policy_del_reply_t *rmp; + int rv = 0; + rv = sr_mpls_policy_del (ntohl (mp->bsid)); + + REPLY_MACRO (VL_API_SR_MPLS_POLICY_DEL_REPLY); +} + +static void vl_api_sr_mpls_steering_add_del_t_handler + (vl_api_sr_mpls_steering_add_del_t * mp) +{ + vl_api_sr_mpls_steering_add_del_reply_t *rmp; + fib_prefix_t prefix; + ip46_address_t next_hop; + clib_memset (&prefix, 0, sizeof (ip46_address_t)); + + ip_prefix_decode (&mp->prefix, &prefix); + ip_address_decode (&mp->next_hop, &next_hop); + + int rv = 0; + if (mp->is_del) + rv = sr_mpls_steering_policy_del (&prefix.fp_addr, + prefix.fp_len, + ip46_address_is_ip4 (&prefix.fp_addr) ? + SR_STEER_IPV4 : SR_STEER_IPV6, + ntohl (mp->table_id), + ntohl (mp->color)); + else + rv = sr_mpls_steering_policy_add (ntohl (mp->bsid), + ntohl (mp->table_id), + &prefix.fp_addr, + prefix.fp_len, + ip46_address_is_ip4 (&prefix.fp_addr) ? + SR_STEER_IPV4 : SR_STEER_IPV6, + &next_hop, + ip46_address_is_ip4 (&next_hop) ? + SR_STEER_IPV4 : SR_STEER_IPV6, + ntohl (mp->color), mp->co_bits, + ntohl (mp->vpn_label)); + + REPLY_MACRO (VL_API_SR_MPLS_STEERING_ADD_DEL_REPLY); +} + +static void vl_api_sr_mpls_policy_assign_endpoint_color_t_handler + (vl_api_sr_mpls_policy_assign_endpoint_color_t * mp) +{ + vl_api_sr_mpls_policy_assign_endpoint_color_reply_t *rmp; + int rv = 0; + + ip46_address_t endpoint; + clib_memset (&endpoint, 0, sizeof (ip46_address_t)); + ip_address_decode (&mp->endpoint, &endpoint); + + rv = sr_mpls_policy_assign_endpoint_color (ntohl (mp->bsid), + &endpoint, + ip46_address_is_ip4 (&endpoint) ? + SR_STEER_IPV4 : SR_STEER_IPV6, + ntohl (mp->color)); + + REPLY_MACRO (VL_API_SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR_REPLY); +} + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id, n, crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + REPLY_MSG_ID_BASE); + foreach_vl_msg_name_crc_sr_mpls; +#undef _ +} + +static clib_error_t * +sr_mpls_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = vlibapi_get_main (); + + u8 *name = format (0, "sr_mpls_%08x%c", api_version, 0); + REPLY_MSG_ID_BASE = + vl_msg_api_get_msg_ids ((char *) name, VL_MSG_SR_MPLS_LAST); + vec_free (name); + +#define _(N, n) \ + vl_msg_api_config (&(vl_msg_api_msg_config_t){ \ + .id = REPLY_MSG_ID_BASE + VL_API_##N, \ + .name = #n, \ + .handler = vl_api_##n##_t_handler, \ + .endian = vl_api_##n##_t_endian, \ + .format_fn = vl_api_##n##_t_format, \ + .size = sizeof (vl_api_##n##_t), \ + .traced = 1, \ + .tojson = vl_api_##n##_t_tojson, \ + .fromjson = vl_api_##n##_t_fromjson, \ + .calc_size = vl_api_##n##_t_calc_size, \ + }); + foreach_vpe_api_msg; +#undef _ + + /* + * Manually register the sr policy add msg, so we trace enough bytes + * to capture a typical segment list + */ + vl_msg_api_config (&(vl_msg_api_msg_config_t){ + .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD, + .name = "sr_mpls_policy_add", + .handler = vl_api_sr_mpls_policy_add_t_handler, + .endian = vl_api_sr_mpls_policy_add_t_endian, + .format_fn = vl_api_sr_mpls_policy_add_t_format, + .size = 256, + .traced = 1, + .tojson = vl_api_sr_mpls_policy_add_t_tojson, + .fromjson = vl_api_sr_mpls_policy_add_t_fromjson, + .calc_size = vl_api_sr_mpls_policy_add_t_calc_size, + }); + /* + * Manually register the sr policy mod msg, so we trace enough bytes + * to capture a typical segment list + */ + vl_msg_api_config (&(vl_msg_api_msg_config_t){ + .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD, + .name = "sr_mpls_policy_mod", + .handler = vl_api_sr_mpls_policy_mod_t_handler, + .endian = vl_api_sr_mpls_policy_mod_t_endian, + .format_fn = vl_api_sr_mpls_policy_mod_t_format, + .size = 256, + .traced = 1, + .tojson = vl_api_sr_mpls_policy_mod_t_tojson, + .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson, + .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size, + }); + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (sr_mpls_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: eval: (c-set-style "gnu") End: + */ diff --git a/src/plugins/srmpls/sr_mpls_policy.c b/src/plugins/srmpls/sr_mpls_policy.c new file mode 100644 index 00000000000..af24acd8cf6 --- /dev/null +++ b/src/plugins/srmpls/sr_mpls_policy.c @@ -0,0 +1,903 @@ +/* + * sr_mpls_policy.c: SR-MPLS policies + * + * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache + * License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +/** + * @file + * @brief SR MPLS policy creation and application + * + * Create an SR policy. + * An SR policy can be either of 'default' type or 'spray' type + * An SR policy has attached a list of SID lists. + * In case the SR policy is a default one it will load balance among them. + * An SR policy has associated a BindingSID. + * In case any packet arrives with MPLS_label == BindingSID then the SR policy + * associated to such bindingSID will be applied to such packet. + * Also, a BSID can be associated with a (Next-Hop, Color) + * + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include "sr_mpls.h" +#include <vnet/fib/mpls_fib.h> +#include <vnet/dpo/dpo.h> +#include <vnet/ip/ip.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +mpls_sr_main_t sr_mpls_main; + +/*************************** SR LB helper functions **************************/ +/** + * @brief Creates a Segment List and adds it to an SR policy + * + * Creates a Segment List and adds it to the SR policy. Notice that the SL are + * not necessarily unique. Hence there might be two Segment List within the + * same SR Policy with exactly the same segments and same weight. + * + * @param sr_policy is the SR policy where the SL will be added + * @param sl is a vector of IPv6 addresses composing the Segment List + * @param weight is the weight of the SegmentList (for load-balancing purposes) + * @param is_encap represents the mode (SRH insertion vs Encapsulation) + * + * @return pointer to the just created segment list + */ +static inline mpls_sr_sl_t * +create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_sl_t *segment_list; + u32 ii; + + pool_get (sm->sid_lists, segment_list); + clib_memset (segment_list, 0, sizeof (*segment_list)); + + vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); + + /* Fill in segment list */ + segment_list->weight = + (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); + segment_list->segments = vec_dup (sl); + + mpls_eos_bit_t eos; + FOR_EACH_MPLS_EOS_BIT (eos) + { + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = NULL, + .frp_local_label = sl[0], + }; + + if (vec_len (sl) > 1) + { + vec_validate (path.frp_label_stack, vec_len (sl) - 2); + for (ii = 1; ii < vec_len (sl); ii++) + { + path.frp_label_stack[ii - 1].fml_value = sl[ii]; + } + } + else + { + /* + * add an impliciet NULL label to allow non-eos recursion + */ + fib_mpls_label_t lbl = { + .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL, + }; + vec_add1 (path.frp_label_stack, lbl); + } + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + fib_table_entry_path_add2 (0, + &pfx, + FIB_SOURCE_SR, + (sr_policy->type == SR_POLICY_TYPE_DEFAULT ? + FIB_ENTRY_FLAG_NONE : + FIB_ENTRY_FLAG_MULTICAST), paths); + vec_free (paths); + } + + return segment_list; +} + +/******************************* SR rewrite API *******************************/ +/* + * Three functions for handling sr policies: -> sr_mpls_policy_add -> + * sr_mpls_policy_del -> sr_mpls_policy_mod All of them are API. CLI function + * on sr_policy_command_fn + */ + +/** + * @brief Create a new SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param segments is a vector of MPLS labels composing the segment list + * @param behavior is the behavior of the SR policy. (default//spray) + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param weight is the weight of this specific SID list + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments, + u8 behavior, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + uword *p; + + if (!sm->sr_policies_index_hash) + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + + /* MPLS SR policies cannot be created unless the MPLS table is present */ + if (~0 == fib_table_find (FIB_PROTOCOL_MPLS, MPLS_FIB_DEFAULT_TABLE_ID)) + return (VNET_API_ERROR_NO_SUCH_TABLE); + + /* Search for existing keys (BSID) */ + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + { + /* Add SR policy that already exists; complain */ + return -12; + } + /* Add an SR policy object */ + pool_get (sm->sr_policies, sr_policy); + clib_memset (sr_policy, 0, sizeof (*sr_policy)); + + /* the first policy needs to lock the MPLS table so it doesn't + * disappear with policies in it */ + if (1 == pool_elts (sm->sr_policies)) + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID, + FIB_SOURCE_SR); + sr_policy->bsid = bsid; + sr_policy->type = behavior; + sr_policy->endpoint_type = 0; + ip6_address_set_zero (&sr_policy->endpoint.ip6); + sr_policy->color = (u32) ~ 0; + + /* Copy the key */ + hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies); + + /* Create a segment list and add the index to the SR policy */ + create_sl (sr_policy, segments, weight); + + return 0; +} + +/** + * @brief Delete a SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_del (mpls_label_t bsid) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_sl_t *segment_list; + mpls_eos_bit_t eos; + u32 *sl_index; + uword *p; + + if (!sm->sr_policies_index_hash) + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + + /* Clean SID Lists */ + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_local_label = segment_list->segments[0], + }; + + vec_add (path.frp_label_stack, segment_list + 1, + vec_len (segment_list) - 1); + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + /* remove each of the MPLS routes */ + FOR_EACH_MPLS_EOS_BIT (eos) + { + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); + } + vec_free (paths); + vec_free (segment_list->segments); + pool_put_index (sm->sid_lists, *sl_index); + } + + /* If there is still traces of TE, make sure locks are released */ + if (sr_policy->endpoint_type != 0 && sr_policy->color != (u32) ~ 0) + { + sr_mpls_policy_assign_endpoint_color (bsid, NULL, 0, (u32) ~ 0); + } + + /* Remove SR policy entry */ + hash_unset (sm->sr_policies_index_hash, sr_policy->bsid); + pool_put (sm->sr_policies, sr_policy); + + if (0 == pool_elts (sm->sr_policies)) + fib_table_unlock (MPLS_FIB_DEFAULT_TABLE_ID, + FIB_PROTOCOL_MPLS, FIB_SOURCE_SR); + + return 0; +} + +/** + * @brief Modify an existing SR policy + * + * The possible modifications are adding a new Segment List, modifying an + * existing Segment List (modify the weight only) and delete a given + * Segment List from the SR Policy. + * + * @param bsid is the bindingSID of the SR Policy + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param operation is the operation to perform (among the top ones) + * @param segments is a vector of IPv6 address composing the segment list + * @param sl_index is the index of the Segment List to modify/delete + * @param weight is the weight of the sid list. optional. + * + * @return 0 ok, >0 index of SL, <0 error + */ +int +sr_mpls_policy_mod (mpls_label_t bsid, u8 operation, + mpls_label_t * segments, u32 sl_index, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_sl_t *segment_list; + u32 *sl_index_iterate; + uword *p; + + if (!sm->sr_policies_index_hash) + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + + if (operation == 1) + { /* Add SR List to an existing SR policy */ + /* Create the new SL */ + segment_list = create_sl (sr_policy, segments, weight); + return segment_list - sm->sid_lists; + } + else if (operation == 2) + { /* Delete SR List from an existing SR + * policy */ + /* Check that currently there are more than one SID list */ + if (vec_len (sr_policy->segments_lists) == 1) + return -21; + + /* + * Check that the SR list does exist and is assigned to the + * sr policy + */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -22; + + /* Remove the lucky SR list that is being kicked out */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + + mpls_eos_bit_t eos; + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_local_label = segment_list->segments[0], + }; + + vec_add (path.frp_label_stack, segment_list + 1, + vec_len (segment_list) - 1); + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + FOR_EACH_MPLS_EOS_BIT (eos) + { + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); + } + + vec_free (paths); + vec_free (segment_list->segments); + pool_put_index (sm->sid_lists, sl_index); + vec_del1 (sr_policy->segments_lists, + sl_index_iterate - sr_policy->segments_lists); + } + else if (operation == 3) + { /* Modify the weight of an existing + * SR List */ + /* Find the corresponding SL */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -32; + + /* Change the weight */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + + /* Update LB */ + mpls_eos_bit_t eos; + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_local_label = segment_list->segments[0], + }; + + vec_add (path.frp_label_stack, segment_list + 1, + vec_len (segment_list) - 1); + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + FOR_EACH_MPLS_EOS_BIT (eos) + { + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); + } + + segment_list->weight = weight; + + path.frp_weight = segment_list->weight; + + vec_free (paths); + paths = NULL; + vec_add1 (paths, path); + + FOR_EACH_MPLS_EOS_BIT (eos) + { + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + fib_table_entry_path_add2 (0, + &pfx, + FIB_SOURCE_SR, + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? + FIB_ENTRY_FLAG_NONE : + FIB_ENTRY_FLAG_MULTICAST), paths); + } + } + return 0; +} + +/** + * @brief CLI for 'sr mpls policies' command family + */ +static clib_error_t * +sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv = -1; + char is_del = 0, is_add = 0, is_mod = 0; + char policy_set = 0; + mpls_label_t bsid, next_label; + u32 sl_index = (u32) ~ 0; + u32 weight = (u32) ~ 0; + mpls_label_t *segments = 0; + u8 operation = 0; + u8 is_spray = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!is_add && !is_mod && !is_del && unformat (input, "add")) + is_add = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "del")) + is_del = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) + is_mod = 1; + else if (!policy_set + && unformat (input, "bsid %U", unformat_mpls_unicast_label, + &bsid)) + policy_set = 1; + else if (unformat (input, "weight %d", &weight)); + else if (unformat + (input, "next %U", unformat_mpls_unicast_label, &next_label)) + { + vec_add (segments, &next_label, 1); + } + else if (unformat (input, "add sl")) + operation = 1; + else if (unformat (input, "del sl index %d", &sl_index)) + operation = 2; + else if (unformat (input, "mod sl index %d", &sl_index)) + operation = 3; + else if (unformat (input, "spray")) + is_spray = 1; + else + break; + } + + if (!is_add && !is_mod && !is_del) + return clib_error_return (0, "Incorrect CLI"); + + if (!policy_set) + return clib_error_return (0, "No SR policy BSID or index specified"); + + if (is_add) + { + if (vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + + rv = sr_mpls_policy_add (bsid, segments, + (is_spray ? SR_POLICY_TYPE_SPRAY : + SR_POLICY_TYPE_DEFAULT), weight); + vec_free (segments); + } + else if (is_del) + rv = sr_mpls_policy_del (bsid); + else if (is_mod) + { + if (!operation) + return clib_error_return (0, "No SL modification specified"); + if (operation != 1 && sl_index == (u32) ~ 0) + return clib_error_return (0, "No Segment List index specified"); + if (operation == 1 && vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + if (operation == 3 && weight == (u32) ~ 0) + return clib_error_return (0, "No new weight for the SL specified"); + rv = sr_mpls_policy_mod (bsid, operation, segments, sl_index, weight); + vec_free (segments); + } + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -12: + return clib_error_return (0, + "There is already a FIB entry for the BindingSID address.\n" + "The SR policy could not be created."); + case -21: + return clib_error_return (0, + "The selected SR policy only contains ONE segment list. " + "Please remove the SR policy instead"); + case -22: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -23: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -32: + return clib_error_return (0, + "Could not modify the segment list. " + "The given SL is not associated with such SR policy."); + case VNET_API_ERROR_NO_SUCH_TABLE: + return clib_error_return (0, "the Default MPLS table is not present"); + default: + return clib_error_return (0, "BUG: sr policy returns %d", rv); + } + return 0; +} + +VLIB_CLI_COMMAND(sr_mpls_policy_command, static)= +{ + .path = "sr mpls policy", + .short_help = "sr mpls policy [add||del||mod] bsid 2999 " + "next 10 next 20 next 30 (weight 1) (spray)", + .long_help = "TBD.\n", + .function = sr_mpls_policy_command_fn, +}; + +/** + * @brief CLI to display onscreen all the SR MPLS policies + */ +static clib_error_t * +show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_sl_t *segment_list = 0; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_policy_t **vec_policies = 0; + mpls_label_t *label; + u32 *sl_index; + u8 *s; + int i = 0; + + vlib_cli_output (vm, "SR MPLS policies:"); + + pool_foreach (sr_policy, sm->sr_policies) { + vec_add1(vec_policies, sr_policy); + } + + vec_foreach_index (i, vec_policies) + { + sr_policy = vec_policies[i]; + vlib_cli_output (vm, "[%u].-\tBSID: %U", + (u32) (sr_policy - sm->sr_policies), + format_mpls_unicast_label, sr_policy->bsid); + switch (sr_policy->endpoint_type) + { + case SR_STEER_IPV6: + vlib_cli_output (vm, "\tEndpoint: %U", format_ip6_address, + &sr_policy->endpoint.ip6); + vlib_cli_output (vm, "\tColor: %u", sr_policy->color); + break; + case SR_STEER_IPV4: + vlib_cli_output (vm, "\tEndpoint: %U", format_ip4_address, + &sr_policy->endpoint.ip4); + vlib_cli_output (vm, "\tColor: %u", sr_policy->color); + break; + default: + vlib_cli_output (vm, "\tTE disabled"); + } + vlib_cli_output (vm, "\tType: %s", + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); + vlib_cli_output (vm, "\tSegment Lists:"); + vec_foreach (sl_index, sr_policy->segments_lists) + { + s = NULL; + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + s = format (s, "\t[%u].- ", *sl_index); + s = format (s, "< "); + vec_foreach (label, segment_list->segments) + { + s = format (s, "%U, ", format_mpls_unicast_label, *label); + } + s = format (s, "\b\b > "); + vlib_cli_output (vm, " %s", s); + } + vlib_cli_output (vm, "-----------"); + } + vec_free (vec_policies); + return 0; +} + +VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)= +{ + .path = "show sr mpls policies", + .short_help = "show sr mpls policies", + .function = show_sr_mpls_policies_command_fn, +}; + +/** + * @brief Update the Endpoint,Color tuple of an SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param endpoint represents the IP46 of the endpoint + * @param color represents the color (u32) + * + * To reset to NULL use ~0 as parameters. + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid, + ip46_address_t * endpoint, + u8 endpoint_type, u32 color) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + uword *endpoint_table, *p, *old_value; + + ip46_address_t any; + any.as_u64[0] = any.as_u64[1] = (u64) ~ 0; + + if (!sm->sr_policies_index_hash) + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + + /* If previous Endpoint, color existed, remove (NH,C) and (ANY,C) */ + if (sr_policy->endpoint_type) + { + endpoint_table = + mhash_get (&sm->sr_policies_c2e2eclabel_hash, &sr_policy->color); + if (!endpoint_table) + return -2; + old_value = + mhash_get ((mhash_t *) endpoint_table, &sr_policy->endpoint); + + /* CID 180995 This should never be NULL unless the two hash tables + * get out of sync */ + ALWAYS_ASSERT (old_value != NULL); + + fib_prefix_t pfx = { 0 }; + pfx.fp_proto = FIB_PROTOCOL_MPLS; + pfx.fp_len = 21; + pfx.fp_label = (u32) * old_value; + + mpls_eos_bit_t eos; + FOR_EACH_MPLS_EOS_BIT (eos) + { + pfx.fp_eos = eos; + fib_table_entry_path_remove (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + DPO_PROTO_MPLS, + NULL, + ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE); + } + + old_value = mhash_get ((mhash_t *) endpoint_table, &any); + pfx.fp_label = (u32) * old_value; + + FOR_EACH_MPLS_EOS_BIT (eos) + { + pfx.fp_eos = eos; + fib_table_entry_path_remove (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + DPO_PROTO_MPLS, + NULL, + ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE); + } + + /* Release the lock on (NH, Color) and (ANY, Color) */ + internal_label_unlock (sr_policy->endpoint, sr_policy->color); + internal_label_unlock (any, sr_policy->color); + + /* Reset the values on the SR policy */ + sr_policy->endpoint_type = 0; + sr_policy->endpoint.as_u64[0] = sr_policy->endpoint.as_u64[1] = + (u64) ~ 0; + sr_policy->color = (u32) ~ 0; + } + + if (endpoint_type) + { + sr_policy->endpoint_type = endpoint_type; + sr_policy->endpoint.as_u64[0] = endpoint->as_u64[0]; + sr_policy->endpoint.as_u64[1] = endpoint->as_u64[1]; + sr_policy->color = color; + + u32 label = find_or_create_internal_label (*endpoint, color); + internal_label_lock (*endpoint, sr_policy->color); + + /* If FIB doesnt exist, create them */ + if (sm->fib_table_EC == (u32) ~ 0) + { + sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS, + FIB_SOURCE_SR, + "SR-MPLS Traffic Engineering (NextHop,Color)"); + + fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS, + FIB_SOURCE_SPECIAL); + } + + fib_prefix_t pfx = { 0 }; + pfx.fp_proto = FIB_PROTOCOL_MPLS; + pfx.fp_len = 21; + + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = 0 + }; + path.frp_local_label = sr_policy->bsid; + + //Add the entry to ANY,Color + u32 any_label = find_or_create_internal_label (any, color); + internal_label_lock (any, sr_policy->color); + + pfx.fp_eos = MPLS_EOS; + path.frp_eos = MPLS_EOS; + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + pfx.fp_label = label; + fib_table_entry_update (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + pfx.fp_label = any_label; + fib_table_entry_update (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + fib_mpls_label_t fml = { + .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL, + }; + + vec_add1 (path.frp_label_stack, fml); + pfx.fp_eos = MPLS_NON_EOS; + path.frp_eos = MPLS_NON_EOS; + + paths = NULL; + vec_add1 (paths, path); + + pfx.fp_label = label; + fib_table_entry_update (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + pfx.fp_label = any_label; + fib_table_entry_update (sm->fib_table_EC, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + } + return 0; +} + +/** + * @brief CLI to modify the Endpoint,Color of an SR policy + */ +static clib_error_t * +cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip46_address_t endpoint; + u32 color = (u32) ~ 0; + mpls_label_t bsid; + u8 endpoint_type = 0; + char clear = 0, color_set = 0, bsid_set = 0; + + clib_memset (&endpoint, 0, sizeof (ip46_address_t)); + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!endpoint_type + && unformat (input, "endpoint %U", unformat_ip6_address, + &endpoint.ip6)) + endpoint_type = SR_STEER_IPV6; + else if (!endpoint_type + && unformat (input, "endpoint %U", unformat_ip4_address, + &endpoint.ip4)) + endpoint_type = SR_STEER_IPV4; + else if (!color_set && unformat (input, "color %u", &color)) + color_set = 1; + else if (!bsid_set + && unformat (input, "bsid %U", unformat_mpls_unicast_label, + &bsid)) + bsid_set = 1; + else if (!clear && unformat (input, "clear")) + clear = 1; + else + break; + } + + if (!bsid_set) + return clib_error_return (0, "No BSID specified"); + if (!endpoint_type && !clear) + return clib_error_return (0, "No Endpoint specified"); + if (!color_set && !clear) + return clib_error_return (0, "No Color set"); + + /* In case its a cleanup */ + if (clear) + { + ip6_address_set_zero (&endpoint.ip6); + color = (u32) ~ 0; + } + rv = + sr_mpls_policy_assign_endpoint_color (bsid, &endpoint, endpoint_type, + color); + + if (rv) + clib_error_return (0, "Error on Endpoint,Color"); + + return 0; +} + +VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)= +{ + .path = "sr mpls policy te", + .short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234", + .function = cli_sr_mpls_policy_ec_command_fn, +}; + +/********************* SR MPLS Policy initialization ***********************/ +/** + * @brief SR MPLS Policy initialization + */ +clib_error_t * +sr_mpls_policy_rewrite_init (vlib_main_t * vm) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + + /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ + sm->sr_policies_index_hash = NULL; + sm->sr_policies_c2e2eclabel_hash.hash = NULL; + return 0; +} + +VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: eval: (c-set-style "gnu") End: + */ diff --git a/src/plugins/srmpls/sr_mpls_steering.c b/src/plugins/srmpls/sr_mpls_steering.c new file mode 100644 index 00000000000..24c8b0e2d9f --- /dev/null +++ b/src/plugins/srmpls/sr_mpls_steering.c @@ -0,0 +1,897 @@ +/* + * sr_steering.c: ipv6 segment routing steering into SR policy + * + * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache + * License, Version 2.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +/** + * @file + * @brief Packet steering into SR-MPLS Policies + * + * This file is in charge of handling the FIB appropiatly to steer packets + * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here + * we are only doing steering. SR policy application is done in + * sr_policy_rewrite.c + * + * Supports: + * - Steering of IPv6 traffic Destination Address based through BSID + * - Steering of IPv4 traffic Destination Address based through BSID + * - Steering of IPv4 and IPv6 traffic through N,C (SR CP) + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include "sr_mpls.h" +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/mpls_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#define SRMPLS_TE_OFFSET 50 + +/** + * @brief function to sort the colors in descending order + */ +int +sort_color_descent (const u32 * x, u32 * y) +{ + return *y - *x; +} + +/********************* Internal (NH, C) labels *******************************/ +/** + * @brief find the corresponding label for (endpoint, color) and lock it + * endpoint might be NULL or ANY + * NULL = 0, ANY=~0 + */ +u32 +find_or_create_internal_label (ip46_address_t endpoint, u32 color) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + uword *color_table, *result_label; + + if (!sm->sr_policies_c2e2eclabel_hash.hash) + mhash_init (&sm->sr_policies_c2e2eclabel_hash, sizeof (mhash_t), + sizeof (u32)); + + color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color); + if (!color_table) + { + mhash_t color_t; + clib_memset (&color_t, 0, sizeof (mhash_t)); + mhash_init (&color_t, sizeof (u32), sizeof (ip46_address_t)); + mhash_set_mem (&sm->sr_policies_c2e2eclabel_hash, &color, + (uword *) & color_t, NULL); + color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color); + } + + result_label = mhash_get ((mhash_t *) color_table, &endpoint); + + if (result_label) + return (u32) * result_label; + + /* Create and set a new internal label */ + u32 *new_internal_label = 0; + pool_get (sm->ec_labels, new_internal_label); + *new_internal_label = 0; + mhash_set ((mhash_t *) color_table, &endpoint, + (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET, NULL); + + return (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET; +} + +always_inline void +internal_label_lock_co (ip46_address_t endpoint, u32 color, char co_bits) +{ + ip46_address_t zero, any; + ip46_address_reset (&zero); + any.as_u64[0] = any.as_u64[1] = (u64) ~ 0; + switch (co_bits) + { + case SR_TE_CO_BITS_10: + internal_label_lock (endpoint, color); + internal_label_lock (zero, color); + internal_label_lock (any, color); + break; + case SR_TE_CO_BITS_01: + internal_label_lock (endpoint, color); + internal_label_lock (zero, color); + break; + case SR_TE_CO_BITS_00: + case SR_TE_CO_BITS_11: + internal_label_lock (endpoint, color); + break; + } +} + +/** + * @brief lock the label for (NH, C) + * endpoint might be NULL or ANY + * NULL = 0, ANY=~0 + */ +void +internal_label_lock (ip46_address_t endpoint, u32 color) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + uword *color_table, *result_label; + + if (!sm->sr_policies_c2e2eclabel_hash.hash) + return; + + color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color); + if (!color_table) + return; + + result_label = mhash_get ((mhash_t *) color_table, &endpoint); + + if (!result_label) + return; + + /* Lock it */ + u32 *label_lock = + pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET); + (*label_lock)++; +} + + +always_inline void +internal_label_unlock_co (ip46_address_t endpoint, u32 color, char co_bits) +{ + ip46_address_t zero, any; + ip46_address_reset (&zero); + any.as_u64[0] = any.as_u64[1] = (u64) ~ 0; + switch (co_bits) + { + case SR_TE_CO_BITS_10: + internal_label_unlock (endpoint, color); + internal_label_unlock (zero, color); + internal_label_unlock (any, color); + break; + case SR_TE_CO_BITS_01: + internal_label_unlock (endpoint, color); + internal_label_unlock (zero, color); + break; + case SR_TE_CO_BITS_00: + case SR_TE_CO_BITS_11: + internal_label_unlock (endpoint, color); + break; + } +} + +/** + * @brief Release lock on label for (endpoint, color) + * endpoint might be NULL or ANY + * NULL = 0, ANY=~0 + */ +void +internal_label_unlock (ip46_address_t endpoint, u32 color) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + uword *color_table, *result_label; + + if (!sm->sr_policies_c2e2eclabel_hash.hash) + return; + + color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color); + if (!color_table) + return; + + result_label = mhash_get ((mhash_t *) color_table, &endpoint); + + if (!result_label) + return; + + u32 *label_lock = + pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET); + (*label_lock)--; + + if (*label_lock == 0) + { + pool_put (sm->ec_labels, label_lock); + mhash_unset ((mhash_t *) color_table, &endpoint, NULL); + if (mhash_elts ((mhash_t *) color_table) == 0) + { + mhash_free ((mhash_t *) color_table); + mhash_unset (&sm->sr_policies_c2e2eclabel_hash, &color, NULL); + if (mhash_elts (&sm->sr_policies_c2e2eclabel_hash) == 0) + { + mhash_free (&sm->sr_policies_c2e2eclabel_hash); + sm->sr_policies_c2e2eclabel_hash.hash = NULL; + fib_table_unlock (sm->fib_table_EC, FIB_PROTOCOL_MPLS, + FIB_SOURCE_SR); + sm->fib_table_EC = (u32) ~ 0; + } + } + } +} + +/********************* steering computation *********************************/ +/** + * @brief function to update the FIB + */ +void +compute_sr_te_automated_steering_fib_entry (mpls_sr_steering_policy_t * + steer_pl) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + fib_prefix_t pfx = { 0 }; + + u32 *internal_labels = 0; + ip46_address_t zero, any; + ip46_address_reset (&zero); + any.as_u64[0] = any.as_u64[1] = (u64) ~ 0; + + u32 *color_i = NULL; + vec_foreach (color_i, steer_pl->color) + { + switch (steer_pl->co_bits) + { + case SR_TE_CO_BITS_10: + vec_add1 (internal_labels, + find_or_create_internal_label (steer_pl->next_hop, + *color_i)); + vec_add1 (internal_labels, + find_or_create_internal_label (zero, *color_i)); + vec_add1 (internal_labels, + find_or_create_internal_label (any, *color_i)); + break; + case SR_TE_CO_BITS_01: + vec_add1 (internal_labels, + find_or_create_internal_label (steer_pl->next_hop, + *color_i)); + vec_add1 (internal_labels, + find_or_create_internal_label (zero, *color_i)); + break; + case SR_TE_CO_BITS_00: + case SR_TE_CO_BITS_11: + vec_add1 (internal_labels, + find_or_create_internal_label (steer_pl->next_hop, + *color_i)); + break; + } + } + + /* Does hidden FIB already exist? */ + if (sm->fib_table_EC == (u32) ~ 0) + { + sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS, + FIB_SOURCE_SR, + "SR-MPLS Traffic Engineering (NextHop,Color)"); + + fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS, + FIB_SOURCE_SPECIAL); + } + + /* Add the corresponding FIB entries */ + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_eos = MPLS_EOS, + .frp_sw_if_index = ~0, + .frp_fib_index = sm->fib_table_EC, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = 0 + }; + fib_route_path_t *paths = NULL; + + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + } + + if (steer_pl->vpn_label != (u32) ~ 0) + { + fib_mpls_label_t fml = { + .fml_value = steer_pl->vpn_label, + }; + vec_add1 (path.frp_label_stack, fml); + path.frp_eos = MPLS_NON_EOS; + } + + u32 label_i; + vec_foreach_index (label_i, internal_labels) + { + path.frp_local_label = internal_labels[label_i]; + path.frp_preference = label_i; + vec_add1 (paths, path); + } + + /* Finally we must add to FIB IGP to N */ + clib_memcpy (&path.frp_addr, &steer_pl->next_hop, + sizeof (steer_pl->next_hop)); + path.frp_preference = vec_len (internal_labels); + path.frp_label_stack = NULL; + + if (steer_pl->nh_type == SR_STEER_IPV6) + { + path.frp_proto = DPO_PROTO_IP6; + path.frp_fib_index = + fib_table_find (FIB_PROTOCOL_IP6, + (steer_pl->classify.fib_table != + (u32) ~ 0 ? steer_pl->classify.fib_table : 0)); + } + else if (steer_pl->nh_type == SR_STEER_IPV4) + { + path.frp_proto = DPO_PROTO_IP4; + path.frp_fib_index = + fib_table_find (FIB_PROTOCOL_IP4, + (steer_pl->classify.fib_table != + (u32) ~ 0 ? steer_pl->classify.fib_table : 0)); + } + + vec_add1 (paths, path); + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + fib_table_entry_update (fib_table_find + (FIB_PROTOCOL_IP6, + (steer_pl->classify.fib_table != + (u32) ~ 0 ? steer_pl->classify.fib_table : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + fib_table_entry_update (fib_table_find + (FIB_PROTOCOL_IP4, + (steer_pl->classify.fib_table != + (u32) ~ 0 ? steer_pl->classify.fib_table : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + vec_free (paths); + paths = NULL; +} + +/** + * @brief Steer traffic L3 traffic through a given SR-MPLS policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param traffic_type describes the type of traffic + * @param next_hop SR TE Next-Hop + * @param nh_type is the AF of Next-Hop + * @param color SR TE color + * @param co_bits SR TE color-only bits + * + * @return 0 if correct, else error + */ +int +sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id, + ip46_address_t * prefix, u32 mask_width, + u8 traffic_type, ip46_address_t * next_hop, + u8 nh_type, u32 color, char co_bits, + mpls_label_t vpn_label) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + sr_mpls_steering_key_t key; + mpls_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + + mpls_sr_policy_t *sr_policy = 0; + uword *p = 0; + + clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t)); + + if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6) + return -1; + + /* Compute the steer policy key */ + key.prefix.as_u64[0] = prefix->as_u64[0]; + key.prefix.as_u64[1] = prefix->as_u64[1]; + key.mask_width = mask_width; + key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + key.traffic_type = traffic_type; + + /* + * Search for steering policy. If already exists we are adding a new + * color. + */ + if (!sm->sr_steer_policies_hash.hash) + mhash_init (&sm->sr_steer_policies_hash, sizeof (uword), + sizeof (sr_mpls_steering_key_t)); + + p = mhash_get (&sm->sr_steer_policies_hash, &key); + if (p) + { + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + if (steer_pl->bsid != (u32) ~ 0) + return -1; //Means we are rewritting the steering. Not allowed. + + /* Means we are adding a color. Check that NH match. */ + if (ip46_address_cmp (&steer_pl->next_hop, next_hop)) + return -2; + if (vec_search (steer_pl->color, color) != ~0) + return -3; + if (steer_pl->co_bits != co_bits) + return -4; /* CO colors should be the same */ + if (steer_pl->vpn_label != vpn_label) + return -5; /* VPN label should be the same */ + + /* Remove the steering and ReDo it */ + vec_add1 (steer_pl->color, color); + vec_sort_with_function (steer_pl->color, sort_color_descent); + compute_sr_te_automated_steering_fib_entry (steer_pl); + internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits); + return 0; + } + + /* Create a new steering policy */ + pool_get (sm->steer_policies, steer_pl); + clib_memset (steer_pl, 0, sizeof (*steer_pl)); + clib_memcpy (&steer_pl->classify.prefix, prefix, sizeof (ip46_address_t)); + clib_memcpy (&steer_pl->next_hop, next_hop, sizeof (ip46_address_t)); + steer_pl->nh_type = nh_type; + steer_pl->co_bits = co_bits; + steer_pl->classify.mask_width = mask_width; + steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + steer_pl->classify.traffic_type = traffic_type; + steer_pl->color = NULL; + steer_pl->vpn_label = vpn_label; + + /* Create and store key */ + mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies, + NULL); + + /* Local steering */ + if (bsid != (u32) ~ 0) + { + if (!sm->sr_policies_index_hash) + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + steer_pl->bsid = bsid; + p = hash_get (sm->sr_policies_index_hash, bsid); + if (!p) + return -1; + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + + fib_route_path_t path = { + .frp_proto = DPO_PROTO_MPLS, + .frp_local_label = sr_policy->bsid, + .frp_eos = MPLS_EOS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = 0 + }; + fib_route_path_t *paths = NULL; + + if (steer_pl->vpn_label != (u32) ~ 0) + { + fib_mpls_label_t fml = { + .fml_value = steer_pl->vpn_label, + }; + vec_add1 (path.frp_label_stack, fml); + } + + /* FIB API calls - Recursive route through the BindingSID */ + if (traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + path.frp_fib_index = 0; + path.frp_preference = 0; + vec_add1 (paths, path); + fib_table_entry_path_add2 (fib_table_find + (FIB_PROTOCOL_IP6, + (table_id != (u32) ~ 0 ? table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + vec_free (paths); + } + else if (traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + path.frp_fib_index = 0; + path.frp_preference = 0; + vec_add1 (paths, path); + fib_table_entry_path_add2 (fib_table_find + (FIB_PROTOCOL_IP4, + (table_id != (u32) ~ 0 ? table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + vec_free (paths); + } + } + /* Automated steering */ + else + { + steer_pl->bsid = (u32) ~ 0; + vec_add1 (steer_pl->color, color); + compute_sr_te_automated_steering_fib_entry (steer_pl); + internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits); + } + return 0; +} + +/** + * @brief Delete steering rule for an SR-MPLS policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param traffic_type describes the type of traffic + * @param next_hop SR TE Next-HOP + * @param nh_type is the AF of Next-Hop + * @param color SR TE color + * + * @return 0 if correct, else error + */ +int +sr_mpls_steering_policy_del (ip46_address_t * prefix, u32 mask_width, + u8 traffic_type, u32 table_id, u32 color) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + sr_mpls_steering_key_t key; + mpls_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + uword *p = 0; + + clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t)); + + /* Compute the steer policy key */ + if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6) + return -1; + + key.prefix.as_u64[0] = prefix->as_u64[0]; + key.prefix.as_u64[1] = prefix->as_u64[1]; + key.mask_width = mask_width; + key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + key.traffic_type = traffic_type; + + if (!sm->sr_steer_policies_hash.hash) + mhash_init (&sm->sr_steer_policies_hash, sizeof (uword), + sizeof (sr_mpls_steering_key_t)); + + /* Search for the item */ + p = mhash_get (&sm->sr_steer_policies_hash, &key); + + if (!p) + return -1; + + /* Retrieve Steer Policy function */ + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + + if (steer_pl->bsid == (u32) ~ 0) + { + /* Remove the color from the color vector */ + vec_del1 (steer_pl->color, vec_search (steer_pl->color, color)); + + if (vec_len (steer_pl->color)) + { + /* Reorder Colors */ + vec_sort_with_function (steer_pl->color, sort_color_descent); + compute_sr_te_automated_steering_fib_entry (steer_pl); + /* Remove all the locks for this ones... */ + internal_label_unlock_co (steer_pl->next_hop, color, + steer_pl->co_bits); + return 0; + } + else + { + vec_free (steer_pl->color); + /* Remove FIB entry */ + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP6, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP4, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + /* Remove all the locks for this ones... */ + internal_label_unlock_co (steer_pl->next_hop, color, + steer_pl->co_bits); + } + } + else //Remove by BSID + { + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP6, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP4, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + } + /* Delete SR steering policy entry */ + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + if (mhash_elts (&sm->sr_steer_policies_hash) == 0) + { + mhash_free (&sm->sr_steer_policies_hash); + sm->sr_steer_policies_hash.hash = NULL; + } + return 0; +} + +static clib_error_t * +sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + + ip46_address_t prefix, nh; + u32 dst_mask_width = 0; + u8 traffic_type = 0; + u8 nh_type = 0; + u32 fib_table = (u32) ~ 0, color = (u32) ~ 0; + u32 co_bits = 0; + + mpls_label_t bsid, vpn_label = (u32) ~ 0; + + u8 sr_policy_set = 0; + + clib_memset (&prefix, 0, sizeof (ip46_address_t)); + clib_memset (&nh, 0, sizeof (ip46_address_t)); + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip6_address, + &prefix.ip6, &dst_mask_width)) + traffic_type = SR_STEER_IPV6; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip4_address, + &prefix.ip4, &dst_mask_width)) + traffic_type = SR_STEER_IPV4; + else if (!sr_policy_set + && unformat (input, "via sr policy bsid %U", + unformat_mpls_unicast_label, &bsid)) + sr_policy_set = 1; + else if (!sr_policy_set + && unformat (input, "via next-hop %U color %d co %d", + unformat_ip4_address, &nh.ip4, &color, &co_bits)) + { + sr_policy_set = 1; + nh_type = SR_STEER_IPV4; + } + else if (!sr_policy_set + && unformat (input, "via next-hop %U color %d co %d", + unformat_ip6_address, &nh.ip6, &color, &co_bits)) + { + sr_policy_set = 1; + nh_type = SR_STEER_IPV6; + } + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else if (unformat (input, "vpn-label %U", + unformat_mpls_unicast_label, &vpn_label)); + else + break; + } + + if (!traffic_type) + return clib_error_return (0, "No L3 traffic specified"); + if (!sr_policy_set) + return clib_error_return (0, "No SR policy specified"); + + /* Make sure that the prefixes are clean */ + if (traffic_type == SR_STEER_IPV4) + { + u32 mask = + (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); + prefix.ip4.as_u32 &= mask; + } + else if (traffic_type == SR_STEER_IPV6) + { + ip6_address_t mask; + ip6_address_mask_from_width (&mask, dst_mask_width); + ip6_address_mask (&prefix.ip6, &mask); + } + + if (nh_type) + bsid = (u32) ~ 0; + + if (is_del) + rv = + sr_mpls_steering_policy_del (&prefix, dst_mask_width, + traffic_type, fib_table, color); + + else + rv = + sr_mpls_steering_policy_add (bsid, fib_table, &prefix, dst_mask_width, + traffic_type, &nh, nh_type, color, co_bits, + vpn_label); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, "Incorrect API usage."); + case -2: + return clib_error_return (0, "The Next-Hop does not match."); + case -3: + return clib_error_return (0, "The color already exists."); + case -4: + return clib_error_return (0, "The co-bits do not match."); + case -5: + return clib_error_return (0, "The VPN-labels do not match."); + default: + return clib_error_return (0, "BUG: sr steer policy returns %d", rv); + } + return 0; +} + +VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)= +{ + .path = "sr mpls steer", + .short_help = "sr mpls steer (del) l3 <ip_addr/mask> " + "via [sr policy bsid <mpls_label> || next-hop <ip46_addr> color <u32> co <0|1|2|3> ](fib-table <fib_table_index>)(vpn-label 500)", + .long_help = + "\tSteer L3 traffic through an existing SR policy.\n" + "\tExamples:\n" + "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n" + "\t\tsr steer del l3 2001::/64 via sr_policy bsid 29999\n" + "\t\tsr steer l3 2001::/64 via next-hop 1.1.1.1 color 1234 co 0\n" + "\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n", + .function = sr_mpls_steer_policy_command_fn, +}; + +static clib_error_t * +show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_steering_policy_t **steer_policies = 0; + mpls_sr_steering_policy_t *steer_pl; + + int i; + + vlib_cli_output (vm, "SR MPLS steering policies:"); + pool_foreach (steer_pl, sm->steer_policies) { + vec_add1(steer_policies, steer_pl); + } + for (i = 0; i < vec_len (steer_policies); i++) + { + vlib_cli_output (vm, "=========================="); + steer_pl = steer_policies[i]; + if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "Prefix: %U/%d via:", + format_ip4_address, + &steer_pl->classify.prefix.ip4, + steer_pl->classify.mask_width); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "Prefix: %U/%d via:", + format_ip6_address, + &steer_pl->classify.prefix.ip6, + steer_pl->classify.mask_width); + } + + if (steer_pl->bsid != (u32) ~ 0) + { + vlib_cli_output (vm, "· BSID %U", + format_mpls_unicast_label, steer_pl->bsid); + } + else + { + if (steer_pl->nh_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "· Next-hop %U", + format_ip4_address, &steer_pl->next_hop.ip4); + } + else if (steer_pl->nh_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "· Next-hop %U", + format_ip6_address, &steer_pl->next_hop.ip6); + } + + u32 *color_i = 0; + u8 *s = NULL; + s = format (s, "[ "); + vec_foreach (color_i, steer_pl->color) + { + s = format (s, "%d, ", *color_i); + } + s = format (s, "\b\b ]"); + vlib_cli_output (vm, "· Color %s", s); + + switch (steer_pl->co_bits) + { + case SR_TE_CO_BITS_00: + vlib_cli_output (vm, "· CO-bits: 00"); + break; + case SR_TE_CO_BITS_01: + vlib_cli_output (vm, "· CO-bits: 01"); + break; + case SR_TE_CO_BITS_10: + vlib_cli_output (vm, "· CO-bits: 10"); + break; + case SR_TE_CO_BITS_11: + vlib_cli_output (vm, "· CO-bits: 11"); + break; + } + } + } + return 0; +} + +VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)= +{ + .path = "show sr mpls steering policies", + .short_help = "show sr mpls steering policies", + .function = show_sr_mpls_steering_policies_command_fn, +}; + +clib_error_t * +sr_mpls_steering_init (vlib_main_t * vm) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + + /* Init memory for function keys */ + sm->sr_steer_policies_hash.hash = NULL; + + sm->fib_table_EC = (u32) ~ 0; + sm->ec_labels = 0; + + return 0; +} + +VLIB_INIT_FUNCTION(sr_mpls_steering_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: eval: (c-set-style "gnu") End: + */ diff --git a/src/plugins/srmpls/sr_mpls_test.c b/src/plugins/srmpls/sr_mpls_test.c new file mode 100644 index 00000000000..7aff4c32b06 --- /dev/null +++ b/src/plugins/srmpls/sr_mpls_test.c @@ -0,0 +1,174 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2021 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vppinfra/error.h> +#include <vpp/api/types.h> + +#define __plugin_msg_base sr_mpls_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +/* Declare message IDs */ +#include <vnet/format_fns.h> +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> + +#define vl_endianfun /* define message structures */ +#include <plugins/srmpls/sr_mpls.api.h> +#undef vl_endianfun + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} sr_mpls_test_main_t; + +static sr_mpls_test_main_t sr_mpls_test_main; + +static int +api_sr_mpls_policy_mod (vat_main_t *vam) +{ + return -1; +} + +static int +api_sr_mpls_steering_add_del (vat_main_t *vam) +{ + return -1; +} + +static int +api_sr_mpls_policy_assign_endpoint_color (vat_main_t *vam) +{ + return -1; +} + +static int +api_sr_mpls_policy_add (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_sr_mpls_policy_add_t *mp; + u32 bsid = 0; + u32 weight = 1; + u8 type = 0; + u8 n_segments = 0; + u32 sid; + u32 *segments = NULL; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "bsid %d", &bsid)) + ; + else if (unformat (i, "weight %d", &weight)) + ; + else if (unformat (i, "spray")) + type = 1; + else if (unformat (i, "next %d", &sid)) + { + n_segments += 1; + vec_add1 (segments, htonl (sid)); + } + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (bsid == 0) + { + errmsg ("bsid not set"); + return -99; + } + + if (n_segments == 0) + { + errmsg ("no sid in segment stack"); + return -99; + } + + /* Construct the API message */ + M2 (SR_MPLS_POLICY_ADD, mp, sizeof (u32) * n_segments); + + mp->bsid = htonl (bsid); + mp->weight = htonl (weight); + mp->is_spray = type; + mp->n_segments = n_segments; + memcpy (mp->segments, segments, sizeof (u32) * n_segments); + vec_free (segments); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_sr_mpls_policy_del (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_sr_mpls_policy_del_t *mp; + u32 bsid = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "bsid %d", &bsid)) + ; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (bsid == 0) + { + errmsg ("bsid not set"); + return -99; + } + + /* Construct the API message */ + M (SR_MPLS_POLICY_DEL, mp); + + mp->bsid = htonl (bsid); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +#include <plugins/srmpls/sr_mpls.api_test.c> + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c index bb54e672918..6862301d2d2 100644 --- a/src/plugins/srtp/srtp.c +++ b/src/plugins/srtp/srtp.c @@ -641,10 +641,12 @@ srtp_connect (transport_endpoint_cfg_t *tep) application_t *app; srtp_tc_t *ctx; u32 ctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); @@ -658,7 +660,7 @@ srtp_connect (transport_endpoint_cfg_t *tep) ctx->srtp_ctx_handle = ctx_index; ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data); + srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data); clib_memcpy_fast (&cargs->sep, sep, sizeof (session_endpoint_t)); cargs->sep.transport_proto = TRANSPORT_PROTO_UDP; @@ -723,9 +725,11 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) app_listener_t *al; srtp_tc_t *lctx; u32 lctx_index; + transport_endpt_ext_cfg_t *ext_cfg; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); @@ -756,7 +760,7 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lctx->c_s_index = app_listener_index; lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data); + srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data); SRTP_DBG (1, "Started listening %d", lctx_index); return lctx_index; diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c index af04f1adeb0..2f4757e28a1 100644 --- a/src/plugins/tlsmbedtls/tls_mbedtls.c +++ b/src/plugins/tlsmbedtls/tls_mbedtls.c @@ -396,6 +396,8 @@ mbedtls_ctx_handshake_rx (tls_ctx_t * ctx) if (mc->ssl.state != MBEDTLS_SSL_HANDSHAKE_OVER) return 0; + ctx->flags |= TLS_CONN_F_HS_DONE; + /* * Handshake complete */ @@ -532,17 +534,10 @@ mbedtls_ctx_read (tls_ctx_t * ctx, session_t * tls_session) return enq; } -static u8 -mbedtls_handshake_is_over (tls_ctx_t * ctx) -{ - mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx; - return (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER); -} - static int mbedtls_transport_close (tls_ctx_t * ctx) { - if (!mbedtls_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { session_close (session_get_from_handle (ctx->tls_session_handle)); return 0; @@ -554,7 +549,7 @@ mbedtls_transport_close (tls_ctx_t * ctx) static int mbedtls_transport_reset (tls_ctx_t *ctx) { - if (!mbedtls_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { session_close (session_get_from_handle (ctx->tls_session_handle)); return 0; @@ -590,7 +585,6 @@ const static tls_engine_vft_t mbedtls_engine = { .ctx_init_client = mbedtls_ctx_init_client, .ctx_write = mbedtls_ctx_write, .ctx_read = mbedtls_ctx_read, - .ctx_handshake_is_over = mbedtls_handshake_is_over, .ctx_start_listen = mbedtls_start_listen, .ctx_stop_listen = mbedtls_stop_listen, .ctx_transport_close = mbedtls_transport_close, diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index 5d172a0adcf..c8e685f20c5 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -1037,15 +1037,6 @@ openssl_ctx_init_server (tls_ctx_t * ctx) return 0; } -static u8 -openssl_handshake_is_over (tls_ctx_t * ctx) -{ - openssl_ctx_t *mc = (openssl_ctx_t *) ctx; - if (!mc->ssl) - return 0; - return SSL_is_init_finished (mc->ssl); -} - static int openssl_transport_close (tls_ctx_t * ctx) { @@ -1054,7 +1045,7 @@ openssl_transport_close (tls_ctx_t * ctx) return 0; #endif - if (!openssl_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { openssl_handle_handshake_failure (ctx); return 0; @@ -1066,7 +1057,7 @@ openssl_transport_close (tls_ctx_t * ctx) static int openssl_transport_reset (tls_ctx_t *ctx) { - if (!openssl_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { openssl_handle_handshake_failure (ctx); return 0; @@ -1166,7 +1157,6 @@ const static tls_engine_vft_t openssl_engine = { .ctx_init_client = openssl_ctx_init_client, .ctx_write = openssl_ctx_write, .ctx_read = openssl_ctx_read, - .ctx_handshake_is_over = openssl_handshake_is_over, .ctx_start_listen = openssl_start_listen, .ctx_stop_listen = openssl_stop_listen, .ctx_transport_close = openssl_transport_close, @@ -1286,7 +1276,10 @@ tls_openssl_set_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - vnet_session_enable_disable (vm, 1); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); if (openssl_engine_register (engine_name, engine_alg, async) < 0) { return clib_error_return (0, "Failed to register %s polling", diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c index 7375b928206..9459cb776b5 100644 --- a/src/plugins/tlspicotls/tls_picotls.c +++ b/src/plugins/tlspicotls/tls_picotls.c @@ -88,14 +88,6 @@ picotls_lctx_get (u32 lctx_index) return pool_elt_at_index (picotls_main.lctx_pool, lctx_index); } -static u8 -picotls_handshake_is_over (tls_ctx_t * ctx) -{ - picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx; - assert (ptls_ctx->tls); - return ptls_handshake_is_complete (ptls_ctx->tls); -} - static int picotls_try_handshake_write (picotls_ctx_t * ptls_ctx, session_t * tls_session, ptls_buffer_t * buf) @@ -194,7 +186,7 @@ picotls_confirm_app_close (tls_ctx_t * ctx) static int picotls_transport_close (tls_ctx_t * ctx) { - if (!picotls_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { picotls_handle_handshake_failure (ctx); return 0; @@ -206,7 +198,7 @@ picotls_transport_close (tls_ctx_t * ctx) static int picotls_transport_reset (tls_ctx_t *ctx) { - if (!picotls_handshake_is_over (ctx)) + if (!(ctx->flags & TLS_CONN_F_HS_DONE)) { picotls_handle_handshake_failure (ctx); return 0; @@ -435,7 +427,7 @@ picotls_ctx_read (tls_ctx_t *ctx, session_t *tcp_session) if (PREDICT_FALSE (!ptls_handshake_is_complete (ptls_ctx->tls))) { picotls_do_handshake (ptls_ctx, tcp_session); - if (picotls_handshake_is_over (ctx)) + if (ctx->flags & TLS_CONN_F_HS_DONE) { if (ptls_is_server (ptls_ctx->tls)) { @@ -750,7 +742,6 @@ const static tls_engine_vft_t picotls_engine = { .ctx_free = picotls_ctx_free, .ctx_get = picotls_ctx_get, .ctx_get_w_thread = picotls_ctx_get_w_thread, - .ctx_handshake_is_over = picotls_handshake_is_over, .ctx_start_listen = picotls_start_listen, .ctx_stop_listen = picotls_stop_listen, .ctx_init_server = picotls_ctx_init_server, diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c index fbac809d726..491d135322c 100644 --- a/src/plugins/unittest/fib_test.c +++ b/src/plugins/unittest/fib_test.c @@ -10264,7 +10264,57 @@ fib_test_inherit (void) &l99_o_10_10_10_3), "%U via interposer label", format_fib_prefix,&pfx_10_10_10_21_s_32); + fib_table_entry_special_remove(0, + &pfx_10_10_10_0_s_24, + FIB_SOURCE_SPECIAL); + + const ip46_address_t nh_0_0_0_0 = { + .ip4.as_u32 = clib_host_to_net_u32(0x00000000), + }; + const fib_prefix_t pfx_0_0_0_0_s_0 = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = nh_0_0_0_0, + }; + /* we have prio(API) < prio(hi_src) < prio(SPECIAL) */ + /* Add/remove an interposer source from the top of the subtrie. The + * interposer source is inherited. + */ + fib_table_entry_special_dpo_add(0, + &pfx_0_0_0_0_s_0, + hi_src, + (FIB_ENTRY_FLAG_COVERED_INHERIT | + FIB_ENTRY_FLAG_INTERPOSE), + &interposer); + /* + * Add/remove an interposer source from the top of the subtrie. The + * interposer source is inherited, the previous inheritance is discarded. + */ + fib_table_entry_special_dpo_add(0, + &pfx_10_10_10_0_s_24, + FIB_SOURCE_SPECIAL, + (FIB_ENTRY_FLAG_COVERED_INHERIT | + FIB_ENTRY_FLAG_INTERPOSE), + &interposer); + /* force a tree walk */ + fib_table_entry_update_one_path(0, + &pfx_0_0_0_0_s_0, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP4, + &nh_10_10_10_3, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_special_remove(0, + &pfx_10_10_10_0_s_24, + FIB_SOURCE_SPECIAL); + fib_table_entry_special_remove(0, + &pfx_0_0_0_0_s_0, + hi_src); /* * cleanup */ @@ -10275,6 +10325,7 @@ fib_test_inherit (void) fib_table_entry_delete(0, &pfx_10_10_10_0_s_24, FIB_SOURCE_API); fib_table_entry_delete(0, &pfx_10_10_0_0_s_16, FIB_SOURCE_API); fib_table_entry_delete(0, &pfx_10_10_10_0_s_24, FIB_SOURCE_SPECIAL); + fib_table_entry_delete(0, &pfx_0_0_0_0_s_0, FIB_SOURCE_API); adj_unlock(ai_10_10_10_1); adj_unlock(ai_10_10_10_2); adj_unlock(ai_10_10_10_3); diff --git a/src/plugins/unittest/policer_test.c b/src/plugins/unittest/policer_test.c index 2b14bf687bf..41f769960a3 100644 --- a/src/plugins/unittest/policer_test.c +++ b/src/plugins/unittest/policer_test.c @@ -21,7 +21,7 @@ policer_test (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd_arg) { int policer_index, i; - uint rate_kbps, burst, num_pkts; + unsigned int rate_kbps, burst, num_pkts; double total_bytes, cpu_ticks_per_pkt, time = 0; double cpu_speed, cpu_ticks_per_byte; policer_result_e result, input_colour = POLICE_CONFORM; diff --git a/src/plugins/unittest/segment_manager_test.c b/src/plugins/unittest/segment_manager_test.c index a106470ee48..29da662e007 100644 --- a/src/plugins/unittest/segment_manager_test.c +++ b/src/plugins/unittest/segment_manager_test.c @@ -739,8 +739,11 @@ segment_manager_test (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) { int res = 0; + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; - vnet_session_enable_disable (vm, 1); + vnet_session_enable_disable (vm, &args); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index b7627acc129..7702e817070 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -13,13 +13,11 @@ * limitations under the License. */ -#include <vnet/session/application_namespace.h> -#include <vnet/session/application_interface.h> +#include <arpa/inet.h> #include <vnet/session/application.h> #include <vnet/session/session.h> -#include <vnet/session/session_rules_table.h> -#include <vnet/tcp/tcp.h> #include <sys/epoll.h> +#include <vnet/session/session_rules_table.h> #define SESSION_TEST_I(_cond, _comment, _args...) \ ({ \ @@ -133,7 +131,8 @@ session_create_lookpback (u32 table_id, u32 * sw_if_index, if (table_id != 0) { - ip_table_create (FIB_PROTOCOL_IP4, table_id, 0, 0); + ip_table_create (FIB_PROTOCOL_IP4, table_id, 0 /* is_api */, + 1 /* create_mfib */, 0); ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id); } @@ -774,10 +773,37 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) return 0; } +static void +session_test_disable_rt_backend_engine (vlib_main_t *vm) +{ + session_enable_disable_args_t args = { .is_en = 0, + .rt_engine_type = + RT_BACKEND_ENGINE_DISABLE }; + vnet_session_enable_disable (vm, &args); +} + +static void +session_test_enable_rule_table_engine (vlib_main_t *vm) +{ + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); +} + +static void +session_test_enable_sdl_engine (vlib_main_t *vm) +{ + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_SDL }; + vnet_session_enable_disable (vm, &args); +} + static int session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) { - session_rules_table_t _srt, *srt = &_srt; + session_table_t *st = session_table_alloc (); u16 lcl_port = 1234, rmt_port = 4321; u32 action_index = 1, res; ip4_address_t lcl_lkup, rmt_lkup; @@ -795,8 +821,13 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) } } - clib_memset (srt, 0, sizeof (*srt)); - session_rules_table_init (srt); + session_test_disable_rt_backend_engine (vm); + session_test_enable_rule_table_engine (vm); + + session_table_init (st, FIB_PROTOCOL_MAX); + vec_add1 (st->appns_index, + app_namespace_index (app_namespace_get_default ())); + session_rules_table_init (st, FIB_PROTOCOL_MAX); ip4_address_t lcl_ip = { .as_u32 = clib_host_to_net_u32 (0x01020304), @@ -835,12 +866,13 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) .action_index = action_index++, .is_add = 1, }; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/16 1234 5.6.7.8/16 4321 action %d", action_index - 1); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 1), "Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 1: %d", res); @@ -851,13 +883,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl.fp_addr.ip4 = lcl_ip; args.lcl.fp_len = 24; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1234 5.6.7.8/16 4321 action %d", action_index - 1); args.rmt.fp_addr.ip4 = rmt_ip; args.rmt.fp_len = 24; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1234 5.6.7.8/24 4321 action %d", action_index - 1); @@ -869,13 +903,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.rmt.fp_addr.ip4 = rmt_ip2; args.rmt.fp_len = 16; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 2.2.2.2/24 1234 6.6.6.6/16 4321 action %d", action_index - 1); args.lcl.fp_addr.ip4 = lcl_ip3; args.rmt.fp_addr.ip4 = rmt_ip3; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 3.3.3.3/24 1234 7.7.7.7/16 4321 action %d", action_index - 1); @@ -885,7 +921,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl.fp_addr.ip4 = lcl_ip3; args.rmt.fp_addr.ip4 = rmt_ip3; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "overwrite 3.3.3.3/24 1234 7.7.7.7/16 4321 " "action %d", action_index - 1); @@ -893,23 +930,22 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) * Lookup 1.2.3.4/32 1234 5.6.7.8/32 4321, 1.2.2.4/32 1234 5.6.7.9/32 4321 * and 3.3.3.3 1234 7.7.7.7 4321 */ - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 3), "Lookup 1.2.3.4 1234 5.6.7.8 4321 action " "should be 3: %d", res); lcl_lkup.as_u32 = clib_host_to_net_u32 (0x01020204); rmt_lkup.as_u32 = clib_host_to_net_u32 (0x05060709); - res = - session_rules_table_lookup4 (srt, &lcl_lkup, - &rmt_lkup, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_lkup, &rmt_lkup, lcl_port, rmt_port); SESSION_TEST ((res == 1), "Lookup 1.2.2.4 1234 5.6.7.9 4321, action " "should be 1: %d", res); - res = - session_rules_table_lookup4 (srt, &lcl_ip3, &rmt_ip3, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip3, &rmt_ip3, lcl_port, rmt_port); SESSION_TEST ((res == 6), "Lookup 3.3.3.3 1234 7.7.7.7 4321, action " "should be 6 (updated): %d", res); @@ -925,17 +961,17 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl_port = 0; args.rmt_port = 0; args.action_index = action_index++; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/24 * 5.6.7.8/24 * action %d", action_index - 1); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 7), "Lookup 1.2.3.4 1234 5.6.7.8 4321, action should" " be 7 (lpm dst): %d", res); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, - lcl_port + 1, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port + 1, rmt_port); SESSION_TEST ((res == 7), "Lookup 1.2.3.4 1235 5.6.7.8 4321, action should " "be 7: %d", res); @@ -947,7 +983,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) * 1.2.3.4 1235 5.6.7.8 4322 */ args.is_add = 0; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Del 1.2.3.4/24 * 5.6.7.8/24 *"); args.lcl.fp_addr.ip4 = lcl_ip; @@ -958,7 +995,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.rmt_port = 0; args.action_index = action_index++; args.is_add = 1; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/16 * 5.6.7.8/16 * action %d", action_index - 1); @@ -970,27 +1008,28 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.rmt_port = rmt_port; args.action_index = action_index++; args.is_add = 1; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1235 5.6.7.8/24 4321 action %d", action_index - 1); if (verbose) - session_rules_table_cli_dump (vm, srt, FIB_PROTOCOL_IP4); + session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP, + FIB_PROTOCOL_IP4); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 3), "Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d", res); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, - lcl_port + 1, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port + 1, rmt_port); SESSION_TEST ((res == 9), "Lookup 1.2.3.4 1235 5.6.7.8 4321, action should " "be 9: %d", res); res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, - lcl_port + 1, rmt_port + 1); + session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, &lcl_ip, + &rmt_ip, lcl_port + 1, rmt_port + 1); SESSION_TEST ((res == 8), "Lookup 1.2.3.4 1235 5.6.7.8 4322, action should " "be 8: %d", res); @@ -1004,10 +1043,11 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl.fp_len = 16; args.rmt.fp_len = 16; args.is_add = 0; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Del 1.2.0.0/16 1234 5.6.0.0/16 4321"); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 3), "Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d", res); @@ -1015,10 +1055,11 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl_port = 0; args.rmt_port = 0; args.is_add = 0; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Del 1.2.0.0/16 * 5.6.0.0/16 *"); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 3), "Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d", res); @@ -1033,12 +1074,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) args.lcl_port = 1234; args.rmt_port = 4321; args.is_add = 0; - error = session_rules_table_add_del (srt, &args); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); SESSION_TEST ((error == 0), "Del 1.2.3.4/24 1234 5.6.7.5/24"); - res = - session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); SESSION_TEST ((res == 2), "Action should be 2: %d", res); + session_table_free (st, FIB_PROTOCOL_MAX); + return 0; } @@ -1074,6 +1118,9 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) } } + session_test_disable_rt_backend_engine (vm); + session_test_enable_rule_table_engine (vm); + server_sep.is_ip4 = 1; server_sep.port = placeholder_port; clib_memset (options, 0, sizeof (options)); @@ -2073,13 +2120,330 @@ session_test_mq_basic (vlib_main_t * vm, unformat_input_t * input) return 0; } +static f32 +session_get_memory_usage (void) +{ + clib_mem_heap_t *heap = clib_mem_get_per_cpu_heap (); + u8 *s = 0; + char *ss; + f32 used = 0.0; + + s = format (s, "%U\n", format_clib_mem_heap, heap, 0); + ss = strstr ((char *) s, "used:"); + if (ss) + sscanf (ss, "used: %f", &used); + else + clib_warning ("substring 'used:' not found from show memory"); + vec_free (s); + return (used); +} + +static int +session_test_enable_disable (vlib_main_t *vm, unformat_input_t *input) +{ + u32 iteration = 100, i; + uword was_enabled; + f32 was_using, now_using; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "repeat %d", &iteration)) + ; + else + { + vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, + input); + return -1; + } + } + + was_enabled = clib_mem_trace_enable_disable (0); + /* warm up */ + for (i = 0; i < 10; i++) + { + session_test_disable_rt_backend_engine (vm); + session_test_enable_sdl_engine (vm); + session_test_disable_rt_backend_engine (vm); + session_test_enable_rule_table_engine (vm); + } + was_using = session_get_memory_usage (); + + for (i = 0; i < iteration; i++) + { + session_test_disable_rt_backend_engine (vm); + session_test_enable_sdl_engine (vm); + session_test_disable_rt_backend_engine (vm); + session_test_enable_rule_table_engine (vm); + } + now_using = session_get_memory_usage (); + + clib_mem_trace_enable_disable (was_enabled); + SESSION_TEST ((was_using == now_using), "was using %.2fM, now using %.2fM", + was_using, now_using); + + return 0; +} + +static int +session_test_sdl (vlib_main_t *vm, unformat_input_t *input) +{ + session_table_t *st = session_table_alloc (); + u16 lcl_port = 0, rmt_port = 0; + u32 action_index = 1, res; + int verbose = 0, error; + ip4_address_t rmt_ip; + const char ip_str_1234[] = "1.2.3.4"; + inet_pton (AF_INET, ip_str_1234, &rmt_ip); + ip4_address_t lcl_ip = { + .as_u32 = clib_host_to_net_u32 (0x0), + }; + ip6_address_t lcl_ip6 = { + .as_u64 = { 0, 0 }, + }; + fib_prefix_t rmt_pref = { + .fp_addr.ip4.as_u32 = rmt_ip.as_u32, + .fp_len = 16, + .fp_proto = FIB_PROTOCOL_IP4, + }; + fib_prefix_t lcl_pref = { + .fp_addr.ip4.as_u32 = lcl_ip.as_u32, + .fp_len = 0, + .fp_proto = 0, + }; + session_rule_table_add_del_args_t args = { + .lcl = lcl_pref, + .rmt = rmt_pref, + .lcl_port = lcl_port, + .rmt_port = rmt_port, + .action_index = action_index++, + .is_add = 1, + }; + const char ip_str_1200[] = "1.2.0.0"; + const char ip_str_1230[] = "1.2.3.0"; + const char ip_str_1111[] = "1.1.1.1"; + const char ip6_str[] = "2501:0db8:85a3:0000:0000:8a2e:0371:1"; + const char ip6_str2[] = "2501:0db8:85a3:0000:0000:8a2e:0372:1"; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + { + vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, + input); + return -1; + } + } + + session_test_disable_rt_backend_engine (vm); + session_test_enable_sdl_engine (vm); + + session_table_init (st, FIB_PROTOCOL_MAX); + vec_add1 (st->appns_index, + app_namespace_index (app_namespace_get_default ())); + session_rules_table_init (st, FIB_PROTOCOL_MAX); + + /* Add 1.2.0.0/16 */ + args.rmt.fp_len = 16; + inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "Add %s/%d action %d", ip_str_1200, + args.rmt.fp_len, action_index - 1); + + /* Lookup 1.2.3.4 */ + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); + SESSION_TEST ((res == action_index - 1), + "Lookup %s, action should " + "be 1: %d", + ip_str_1234, action_index - 1); + + /* + * Add 1.2.3.0/24 + */ + args.rmt.fp_len = 24; + inet_pton (AF_INET, ip_str_1230, &args.rmt.fp_addr.ip4.as_u32); + args.action_index = action_index++; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "Add %s/%d action %d", ip_str_1230, + args.rmt.fp_len, action_index - 1); + + /* Lookup 1.2.3.4 */ + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); + SESSION_TEST ((res == action_index - 1), + "Lookup %s, action should " + "be 2: %d", + ip_str_1234, action_index - 1); + + /* look up 1.1.1.1, should be -1 (invalid index) */ + inet_pton (AF_INET, ip_str_1111, &rmt_ip); + res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip, &rmt_ip, lcl_port, rmt_port); + SESSION_TEST ((res == SESSION_TABLE_INVALID_INDEX), + "Lookup %s, action should " + "be -1: %d", + ip_str_1111, res); + + /* Add again 1.2.0.0/16, should be rejected */ + args.rmt.fp_len = 16; + inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32); + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == SESSION_E_IPINUSE), "Add %s/%d action %d", + ip_str_1200, args.rmt.fp_len, error); + /* + * Add 0.0.0.0/0, should get an error + */ + args.rmt.fp_len = 0; + args.rmt.fp_addr.ip4.as_u32 = 0; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == SESSION_E_IPINUSE), "Add 0.0.0.0/%d action %d", + args.rmt.fp_len, error); + + /* delete 0.0.0.0 should be rejected */ + args.is_add = 0; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == SESSION_E_NOROUTE), "Del 0.0.0.0/%d action %d", + args.rmt.fp_len, error); + if (verbose) + session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP, + FIB_PROTOCOL_IP4); + + /* + * Clean up + * Delete 1.2.0.0/16 + * Delete 1.2.3.0/24 + */ + inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32); + args.rmt.fp_len = 16; + args.is_add = 0; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "Del %s/%d should 0: %d", ip_str_1200, + args.rmt.fp_len, error); + + inet_pton (AF_INET, ip_str_1230, &args.rmt.fp_addr.ip4.as_u32); + args.rmt.fp_len = 24; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "Del %s/%d, should be 0: %d", ip_str_1230, + args.rmt.fp_len, error); + if (verbose) + session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP, + FIB_PROTOCOL_IP4); + + /* ip6 tests */ + + /* + * Add ip6 2001:0db8:85a3:0000:0000:8a2e:0371:1/124 + */ + ip6_address_t lcl_lkup; + inet_pton (AF_INET6, ip6_str, &args.rmt.fp_addr.ip6); + args.rmt.fp_len = 124; + args.rmt.fp_proto = FIB_PROTOCOL_IP6; + args.action_index = action_index++; + args.is_add = 1; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "Add %s/%d action %d", ip6_str, args.rmt.fp_len, + action_index - 1); + if (verbose) + session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP, + FIB_PROTOCOL_IP6); + + /* Lookup 2001:0db8:85a3:0000:0000:8a2e:0371:1 */ + res = session_rules_table_lookup6 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip6, &args.rmt.fp_addr.ip6, lcl_port, + rmt_port); + SESSION_TEST ((res == action_index - 1), + "Lookup %s action should " + "be 3: %d", + ip6_str, action_index - 1); + + /* Lookup 2001:0db8:85a3:0000:0000:8a2e:0372:1 */ + inet_pton (AF_INET6, ip6_str2, &lcl_lkup); + res = session_rules_table_lookup6 (st->srtg_handle, TRANSPORT_PROTO_TCP, + &lcl_ip6, &lcl_lkup, lcl_port, rmt_port); + SESSION_TEST ((res == SESSION_TABLE_INVALID_INDEX), + "Lookup %s action should " + "be -1: %d", + ip6_str2, res); + + /* + * del ip6 2001:0db8:85a3:0000:0000:8a2e:0371:1/124 + */ + args.is_add = 0; + args.rmt.fp_len = 124; + error = + session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args); + SESSION_TEST ((error == 0), "del %s/%d, should be 0: %d", ip6_str, + args.rmt.fp_len, error); + if (verbose) + session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP, + FIB_PROTOCOL_IP6); + + session_table_free (st, FIB_PROTOCOL_MAX); + + return 0; +} + +static int +session_test_ext_cfg (vlib_main_t *vm, unformat_input_t *input) +{ + session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; + transport_endpt_ext_cfg_t *ext_cfg; + + ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP, + sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 60; + + ext_cfg = + session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = 1; + + ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE, + sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 345; + + ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_HTTP should be present"); + SESSION_TEST ((ext_cfg->opaque == 60), + "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 60: %u", + ext_cfg->opaque); + ext_cfg = + session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_CRYPTO should be present"); + SESSION_TEST ( + (ext_cfg->crypto.ckpair_index == 1), + "TRANSPORT_ENDPT_EXT_CFG_HTTP ckpair_index value should be 1: %u", + ext_cfg->crypto.ckpair_index); + ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_NONE should be present"); + SESSION_TEST ((ext_cfg->opaque == 345), + "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 345: %u", + ext_cfg->opaque); + session_endpoint_free_ext_cfgs (&sep); + + return 0; +} + static clib_error_t * session_test (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) { int res = 0; - vnet_session_enable_disable (vm, 1); + session_test_enable_rule_table_engine (vm); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -2099,6 +2463,12 @@ session_test (vlib_main_t * vm, res = session_test_mq_speed (vm, input); else if (unformat (input, "mq-basic")) res = session_test_mq_basic (vm, input); + else if (unformat (input, "enable-disable")) + res = session_test_enable_disable (vm, input); + else if (unformat (input, "sdl")) + res = session_test_sdl (vm, input); + else if (unformat (input, "ext-cfg")) + res = session_test_ext_cfg (vm, input); else if (unformat (input, "all")) { if ((res = session_test_basic (vm, input))) @@ -2117,6 +2487,12 @@ session_test (vlib_main_t * vm, goto done; if ((res = session_test_mq_basic (vm, input))) goto done; + if ((res = session_test_sdl (vm, input))) + goto done; + if ((res = session_test_ext_cfg (vm, input))) + goto done; + if ((res = session_test_enable_disable (vm, input))) + goto done; } else break; diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 34033a0b622..bd39474ce93 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1550,8 +1550,11 @@ tcp_test (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) { int res = 0; + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; - vnet_session_enable_disable (vm, 1); + vnet_session_enable_disable (vm, &args); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/unittest/util_test.c b/src/plugins/unittest/util_test.c index 53384e55494..5b7e30bc21f 100644 --- a/src/plugins/unittest/util_test.c +++ b/src/plugins/unittest/util_test.c @@ -101,6 +101,36 @@ VLIB_CLI_COMMAND (test_hash_command, static) = .function = test_hash_command_fn, }; +static void * +leak_memory_fn (void *args) +{ + u8 *p = 0; + vec_validate (p, 100); + p = 0; + return 0; +} + +static clib_error_t * +test_mem_leak_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + /* do memory leak from thread, so no 'unix_cli' in traceback */ + pthread_t thread; + int rv = pthread_create (&thread, NULL, leak_memory_fn, 0); + if (rv) + { + return clib_error_return (0, "pthread_create failed"); + } + + return 0; +} + +VLIB_CLI_COMMAND (test_mem_leak_command, static) = { + .path = "test mem-leak", + .short_help = "leak some memory", + .function = test_mem_leak_command_fn, +}; + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/urpf/CMakeLists.txt b/src/plugins/urpf/CMakeLists.txt index 2f44e3b2344..f665d30b0bb 100644 --- a/src/plugins/urpf/CMakeLists.txt +++ b/src/plugins/urpf/CMakeLists.txt @@ -22,6 +22,10 @@ add_vpp_plugin(urpf ip4_urpf.c ip6_urpf.c + INSTALL_HEADERS + urpf_dp.h + urpf.h + API_FILES urpf.api ) diff --git a/src/plugins/urpf/urpf.c b/src/plugins/urpf/urpf.c index e5209caafb4..1e7d6c0fb91 100644 --- a/src/plugins/urpf/urpf.c +++ b/src/plugins/urpf/urpf.c @@ -60,7 +60,17 @@ static const char *urpf_feats[N_AF][VLIB_N_DIR][URPF_N_MODES] = urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR]; u8 * -format_urpf_mode (u8 * s, va_list * a) +format_urpf_trace (u8 *s, va_list *va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + urpf_trace_t *t = va_arg (*va, urpf_trace_t *); + + return format (s, "uRPF:%d fib:%d", t->urpf, t->fib_index); +} + +__clib_export u8 * +format_urpf_mode (u8 *s, va_list *a) { urpf_mode_t mode = va_arg (*a, int); @@ -76,8 +86,8 @@ format_urpf_mode (u8 * s, va_list * a) return (format (s, "unknown")); } -static uword -unformat_urpf_mode (unformat_input_t * input, va_list * args) +__clib_export uword +unformat_urpf_mode (unformat_input_t *input, va_list *args) { urpf_mode_t *mode = va_arg (*args, urpf_mode_t *); @@ -94,7 +104,16 @@ unformat_urpf_mode (unformat_input_t * input, va_list * args) return 0; } -int +__clib_export int +urpf_feature_enable_disable (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, u32 sw_if_index, int enable) +{ + return vnet_feature_enable_disable (urpf_feat_arcs[af][dir], + urpf_feats[af][dir][mode], sw_if_index, + enable, 0, 0); +} + +__clib_export int urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af, vlib_dir_t dir, u32 table_id) { diff --git a/src/plugins/urpf/urpf.h b/src/plugins/urpf/urpf.h index 6983a2b440c..a40a25df16b 100644 --- a/src/plugins/urpf/urpf.h +++ b/src/plugins/urpf/urpf.h @@ -32,7 +32,15 @@ typedef enum urpf_mode_t_ #define URPF_N_MODES (URPF_MODE_STRICT+1) -extern u8 *format_urpf_mode (u8 * s, va_list * a); +typedef struct +{ + index_t urpf; + u32 fib_index; +} urpf_trace_t; + +u8 *format_urpf_trace (u8 *s, va_list *va); +u8 *format_urpf_mode (u8 *s, va_list *a); +uword unformat_urpf_mode (unformat_input_t *input, va_list *args); typedef struct { @@ -43,8 +51,8 @@ typedef struct extern urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR]; -extern int urpf_update (urpf_mode_t mode, u32 sw_if_index, - ip_address_family_t af, vlib_dir_t dir, u32 table_id); +int urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af, + vlib_dir_t dir, u32 table_id); #endif diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h index 816d8b70b90..b17fed7e04b 100644 --- a/src/plugins/urpf/urpf_dp.h +++ b/src/plugins/urpf/urpf_dp.h @@ -53,22 +53,6 @@ * * This file contains the interface unicast source check. */ -typedef struct -{ - index_t urpf; -} urpf_trace_t; - -static u8 * -format_urpf_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - urpf_trace_t *t = va_arg (*va, urpf_trace_t *); - - s = format (s, "uRPF:%d", t->urpf); - - return s; -} #define foreach_urpf_error \ _(DROP, "uRPF Drop") \ @@ -87,10 +71,157 @@ typedef enum URPF_N_NEXT, } urpf_next_t; +static_always_inline u32 +urpf_get_fib_index (vlib_buffer_t *b, ip_address_family_t af, vlib_dir_t dir) +{ + u32 sw_if_index = vnet_buffer (b)->sw_if_index[dir]; + return vec_elt (urpf_cfgs[af][dir], sw_if_index).fib_index; +} + +static_always_inline void +urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, vlib_buffer_t *b, const u8 *h, + u32 fib_index, load_balance_t **lb, u32 *pass) +{ + load_balance_t *llb; + u32 lpass; + u32 lb_index; + + ASSERT (fib_index != ~0); + + if (AF_IP4 == af) + { + const ip4_header_t *ip; + + ip = (ip4_header_t *) h; + + lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address); + + /* Pass multicast. */ + lpass = (ip4_address_is_multicast (&ip->src_address) || + ip4_address_is_global_broadcast (&ip->src_address)); + } + else + { + const ip6_header_t *ip; + + ip = (ip6_header_t *) h; + + lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address); + lpass = ip6_address_is_multicast (&ip->src_address); + } + + llb = load_balance_get (lb_index); + + if (URPF_MODE_STRICT == mode) + { + int res; + + res = fib_urpf_check (llb->lb_urpf, vnet_buffer (b)->sw_if_index[dir]); + if (VLIB_RX == dir) + lpass |= res; + else + { + lpass |= !res && fib_urpf_check_size (llb->lb_urpf); + lpass |= b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + } + else + lpass |= fib_urpf_check_size (llb->lb_urpf); + + *lb = llb; + *pass = lpass; +} + +static_always_inline void +urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, vlib_buffer_t *b0, vlib_buffer_t *b1, + const u8 *h0, const u8 *h1, u32 fib_index0, + u32 fib_index1, load_balance_t **lb0, + load_balance_t **lb1, u32 *pass0, u32 *pass1) +{ + load_balance_t *llb0, *llb1; + u32 lpass0, lpass1; + u32 lb_index0, lb_index1; + + ASSERT (fib_index0 != ~0); + ASSERT (fib_index1 != ~0); + + if (AF_IP4 == af) + { + const ip4_header_t *ip0, *ip1; + + ip0 = (ip4_header_t *) h0; + ip1 = (ip4_header_t *) h1; + + ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address, + &ip1->src_address, &lb_index0, &lb_index1); + /* Pass multicast. */ + lpass0 = (ip4_address_is_multicast (&ip0->src_address) || + ip4_address_is_global_broadcast (&ip0->src_address)); + lpass1 = (ip4_address_is_multicast (&ip1->src_address) || + ip4_address_is_global_broadcast (&ip1->src_address)); + } + else + { + const ip6_header_t *ip0, *ip1; + + ip0 = (ip6_header_t *) h0; + ip1 = (ip6_header_t *) h1; + + lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address); + lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address); + lpass0 = ip6_address_is_multicast (&ip0->src_address); + lpass1 = ip6_address_is_multicast (&ip1->src_address); + } + + llb0 = load_balance_get (lb_index0); + llb1 = load_balance_get (lb_index1); + + if (URPF_MODE_STRICT == mode) + { + /* for RX the check is: would this source adddress be + * forwarded out of the interface on which it was recieved, + * if yes allow. For TX it's; would this source address be + * forwarded out of the interface through which it is being + * sent, if yes drop. + */ + int res0, res1; + + res0 = + fib_urpf_check (llb0->lb_urpf, vnet_buffer (b0)->sw_if_index[dir]); + res1 = + fib_urpf_check (llb1->lb_urpf, vnet_buffer (b1)->sw_if_index[dir]); + + if (VLIB_RX == dir) + { + lpass0 |= res0; + lpass1 |= res1; + } + else + { + lpass0 |= !res0 && fib_urpf_check_size (llb0->lb_urpf); + lpass1 |= !res1 && fib_urpf_check_size (llb1->lb_urpf); + + /* allow locally generated */ + lpass0 |= b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + lpass1 |= b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + } + else + { + lpass0 |= fib_urpf_check_size (llb0->lb_urpf); + lpass1 |= fib_urpf_check_size (llb1->lb_urpf); + } + + *lb0 = llb0; + *lb1 = llb1; + *pass0 = lpass0; + *pass1 = lpass1; +} + static_always_inline uword -urpf_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, +urpf_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, ip_address_family_t af, vlib_dir_t dir, urpf_mode_t mode) { vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; @@ -106,8 +237,8 @@ urpf_inline (vlib_main_t * vm, while (n_left >= 4) { - u32 pass0, lb_index0, pass1, lb_index1; - const load_balance_t *lb0, *lb1; + u32 pass0, pass1; + load_balance_t *lb0 = 0, *lb1 = 0; u32 fib_index0, fib_index1; const u8 *h0, *h1; @@ -121,87 +252,32 @@ urpf_inline (vlib_main_t * vm, h0 = (u8 *) vlib_buffer_get_current (b[0]); h1 = (u8 *) vlib_buffer_get_current (b[1]); - if (VLIB_TX == dir) { h0 += vnet_buffer (b[0])->ip.save_rewrite_length; h1 += vnet_buffer (b[1])->ip.save_rewrite_length; } - fib_index0 = - urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index; - fib_index1 = - urpf_cfgs[af][dir][vnet_buffer (b[1])->sw_if_index[dir]].fib_index; + fib_index0 = urpf_get_fib_index (b[0], af, dir); + fib_index1 = urpf_get_fib_index (b[1], af, dir); + urpf_perform_check_x2 (af, dir, mode, b[0], b[1], h0, h1, fib_index0, + fib_index1, &lb0, &lb1, &pass0, &pass1); - if (AF_IP4 == af) - { - const ip4_header_t *ip0, *ip1; - - ip0 = (ip4_header_t *) h0; - ip1 = (ip4_header_t *) h1; - - ip4_fib_forwarding_lookup_x2 (fib_index0, - fib_index1, - &ip0->src_address, - &ip1->src_address, - &lb_index0, &lb_index1); - /* Pass multicast. */ - pass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - pass1 = (ip4_address_is_multicast (&ip1->src_address) || - ip4_address_is_global_broadcast (&ip1->src_address)); - } - else + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) { - const ip6_header_t *ip0, *ip1; - - ip0 = (ip6_header_t *) h0; - ip1 = (ip6_header_t *) h1; - - lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, - &ip0->src_address); - lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, - &ip1->src_address); - pass0 = ip6_address_is_multicast (&ip0->src_address); - pass1 = ip6_address_is_multicast (&ip1->src_address); - } - - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); + urpf_trace_t *t; - if (URPF_MODE_STRICT == mode) - { - /* for RX the check is: would this source adddress be forwarded - * out of the interface on which it was recieved, if yes allow. - * For TX it's; would this source address be forwarded out of the - * interface through which it is being sent, if yes drop. - */ - int res0, res1; - - res0 = fib_urpf_check (lb0->lb_urpf, - vnet_buffer (b[0])->sw_if_index[dir]); - res1 = fib_urpf_check (lb1->lb_urpf, - vnet_buffer (b[1])->sw_if_index[dir]); - - if (VLIB_RX == dir) - { - pass0 |= res0; - pass1 |= res1; - } - else - { - pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf); - pass1 |= !res1 && fib_urpf_check_size (lb1->lb_urpf); - - /* allow locally generated */ - pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - pass1 |= b[1]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + t = vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->urpf = lb0 ? lb0->lb_urpf : ~0; + t->fib_index = fib_index0; } - else + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) { - pass0 |= fib_urpf_check_size (lb0->lb_urpf); - pass1 |= fib_urpf_check_size (lb1->lb_urpf); + urpf_trace_t *t; + + t = vlib_add_trace (vm, node, b[1], sizeof (*t)); + t->urpf = lb1 ? lb1->lb_urpf : ~0; + t->fib_index = fib_index1; } if (PREDICT_TRUE (pass0)) @@ -218,22 +294,6 @@ urpf_inline (vlib_main_t * vm, next[1] = URPF_NEXT_DROP; b[1]->error = node->errors[URPF_ERROR_DROP]; } - - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->urpf = lb0->lb_urpf; - } - if (b[1]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[1], sizeof (*t)); - t->urpf = lb1->lb_urpf; - } - b += 2; next += 2; n_left -= 2; @@ -241,8 +301,8 @@ urpf_inline (vlib_main_t * vm, while (n_left) { - u32 pass0, lb_index0, fib_index0; - const load_balance_t *lb0; + u32 pass0, fib_index0; + load_balance_t *lb0 = 0; const u8 *h0; h0 = (u8 *) vlib_buffer_get_current (b[0]); @@ -250,51 +310,18 @@ urpf_inline (vlib_main_t * vm, if (VLIB_TX == dir) h0 += vnet_buffer (b[0])->ip.save_rewrite_length; - fib_index0 = - urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index; - - if (AF_IP4 == af) - { - const ip4_header_t *ip0; - - ip0 = (ip4_header_t *) h0; - - lb_index0 = ip4_fib_forwarding_lookup (fib_index0, - &ip0->src_address); + fib_index0 = urpf_get_fib_index (b[0], af, dir); + urpf_perform_check_x1 (af, dir, mode, b[0], h0, fib_index0, &lb0, + &pass0); - /* Pass multicast. */ - pass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - } - else + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) { - const ip6_header_t *ip0; - - ip0 = (ip6_header_t *) h0; - - lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, - &ip0->src_address); - pass0 = ip6_address_is_multicast (&ip0->src_address); - } - - lb0 = load_balance_get (lb_index0); + urpf_trace_t *t; - if (URPF_MODE_STRICT == mode) - { - int res0; - - res0 = fib_urpf_check (lb0->lb_urpf, - vnet_buffer (b[0])->sw_if_index[dir]); - if (VLIB_RX == dir) - pass0 |= res0; - else - { - pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf); - pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + t = vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->urpf = lb0 ? lb0->lb_urpf : ~0; + t->fib_index = fib_index0; } - else - pass0 |= fib_urpf_check_size (lb0->lb_urpf); if (PREDICT_TRUE (pass0)) vnet_feature_next_u16 (&next[0], b[0]); @@ -303,14 +330,6 @@ urpf_inline (vlib_main_t * vm, next[0] = URPF_NEXT_DROP; b[0]->error = node->errors[URPF_ERROR_DROP]; } - - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->urpf = lb0->lb_urpf; - } b++; next++; n_left--; diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c index 0dd7908d2e2..ad644ff6cb8 100644 --- a/src/plugins/wireguard/wireguard_chachapoly.c +++ b/src/plugins/wireguard/wireguard_chachapoly.c @@ -72,11 +72,11 @@ wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; @@ -102,11 +102,11 @@ wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c index 5fe2e44b03b..c3f28f442f5 100644 --- a/src/plugins/wireguard/wireguard_noise.c +++ b/src/plugins/wireguard/wireguard_noise.c @@ -751,8 +751,8 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN]) unix_nanosec &= REJECT_INTERVAL_MASK; /* https://cr.yp.to/libtai/tai64.html */ - sec = htobe64 (0x400000000000000aULL + unix_sec); - nsec = htobe32 (unix_nanosec); + sec = clib_host_to_big_u64 (0x400000000000000aULL + unix_sec); + nsec = clib_host_to_big_u32 (unix_nanosec); /* memcpy to output buffer, assuming output could be unaligned. */ clib_memcpy (output, &sec, sizeof (sec)); |