diff options
-rw-r--r-- | vnet/vnet/devices/dpdk/cli.c | 1027 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/device.c | 1311 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/dpdk.h | 329 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/dpdk_priv.h | 175 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/format.c | 539 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/init.c | 1621 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/node.c | 704 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/vhost_user.c | 2075 |
8 files changed, 4146 insertions, 3635 deletions
diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c index a40dcc7c9ad..f257a8bb892 100644 --- a/vnet/vnet/devices/dpdk/cli.c +++ b/vnet/vnet/devices/dpdk/cli.c @@ -27,199 +27,204 @@ static clib_error_t * pcap_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, vlib_cli_command_t * cmd) { - dpdk_main_t * dm = &dpdk_main; - u8 * filename; + dpdk_main_t *dm = &dpdk_main; + u8 *filename; u32 max; int matched = 0; - clib_error_t * error = 0; + clib_error_t *error = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "on")) - { - if (dm->tx_pcap_enable == 0) - { - if (dm->pcap_filename == 0) - dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); - - memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); - dm->pcap_main.file_name = (char *) dm->pcap_filename; - dm->pcap_main.n_packets_to_capture = 100; - if (dm->pcap_pkts_to_capture) - dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; - - dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; - dm->tx_pcap_enable = 1; - matched = 1; - vlib_cli_output (vm, "pcap tx capture on..."); - } - else - { - vlib_cli_output (vm, "pcap tx capture already on..."); - } - matched = 1; - } + { + if (dm->tx_pcap_enable == 0) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = 100; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + matched = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + } + matched = 1; + } else if (unformat (input, "off")) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, "captured %d pkts...", - dm->pcap_main.n_packets_captured+1); - if (dm->pcap_main.n_packets_captured) - { - dm->pcap_main.n_packets_to_capture = - dm->pcap_main.n_packets_captured; - error = pcap_write (&dm->pcap_main); - if (error) - clib_error_report (error); - else - vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); - } - } - else - { - vlib_cli_output (vm, "pcap tx capture already off..."); - } - - dm->tx_pcap_enable = 0; - matched = 1; - } + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured + 1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + } + + dm->tx_pcap_enable = 0; + matched = 1; + } else if (unformat (input, "max %d", &max)) - { - dm->pcap_pkts_to_capture = max; - matched = 1; - } + { + dm->pcap_pkts_to_capture = max; + matched = 1; + } else if (unformat (input, "intfc %U", - unformat_vnet_sw_interface, dm->vnet_main, - &dm->pcap_sw_if_index)) - matched = 1; + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + matched = 1; else if (unformat (input, "intfc any")) - { - dm->pcap_sw_if_index = 0; - matched = 1; - } + { + dm->pcap_sw_if_index = 0; + matched = 1; + } else if (unformat (input, "file %s", &filename)) - { - u8 * chroot_filename; - /* Brain-police user path input */ - if (strstr((char *)filename, "..") || index((char *)filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - continue; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - vec_free (filename); - - if (dm->pcap_filename) - vec_free (dm->pcap_filename); - vec_add1 (filename, 0); - dm->pcap_filename = chroot_filename; - matched = 1; - } + { + u8 *chroot_filename; + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + continue; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (filename, 0); + dm->pcap_filename = chroot_filename; + matched = 1; + } else if (unformat (input, "status")) - { - if (dm->tx_pcap_enable == 0) - { - vlib_cli_output (vm, "pcap tx capture is off..."); - continue; - } - - vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...", - dm->pcap_main.n_packets_captured, - dm->pcap_main.n_packets_to_capture); - matched = 1; - } + { + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + continue; + } + + vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + matched = 1; + } else - break; + break; } if (matched == 0) return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + format_unformat_error, input); return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (pcap_trace_command, static) = { .path = "pcap tx trace", .short_help = "pcap tx trace on off max <nn> intfc <intfc> file <name> status", .function = pcap_trace_command_fn, }; +/* *INDENT-ON* */ static clib_error_t * show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) + vlib_cli_command_t * cmd) { - struct rte_mempool * rmp; + struct rte_mempool *rmp; int i; - for(i = 0; i < vec_len(vm->buffer_main->pktmbuf_pools); i++) + for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++) { rmp = vm->buffer_main->pktmbuf_pools[i]; if (rmp) - { + { #if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 0) - unsigned count = rte_mempool_avail_count(rmp); - unsigned free_count = rte_mempool_in_use_count(rmp); + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); #else - unsigned count = rte_mempool_count(rmp); - unsigned free_count = rte_mempool_free_count(rmp); + unsigned count = rte_mempool_count (rmp); + unsigned free_count = rte_mempool_free_count (rmp); #endif - vlib_cli_output(vm, "name=\"%s\" available = %7d allocated = %7d total = %7d\n", - rmp->name, (u32)count, (u32)free_count, - (u32)(count+free_count)); - } + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } else - { - vlib_cli_output(vm, "rte_mempool is NULL (!)\n"); - } + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } } return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { .path = "show dpdk buffer", .short_help = "show dpdk buffer state", .function = show_dpdk_buffer, .is_mp_safe = 1, }; +/* *INDENT-ON* */ static clib_error_t * test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) + vlib_cli_command_t * cmd) { - static u32 * allocated_buffers; + static u32 *allocated_buffers; u32 n_alloc = 0; u32 n_free = 0; u32 first, actual_alloc; - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "allocate %d", &n_alloc)) - ; + ; else if (unformat (input, "free %d", &n_free)) - ; + ; else - break; + break; } if (n_free) { if (vec_len (allocated_buffers) < n_free) - return clib_error_return (0, "Can't free %d, only %d allocated", - n_free, vec_len (allocated_buffers)); + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); - first = vec_len(allocated_buffers) - n_free; + first = vec_len (allocated_buffers) - n_free; vlib_buffer_free (vm, allocated_buffers + first, n_free); _vec_len (allocated_buffers) = first; } @@ -227,186 +232,199 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, { first = vec_len (allocated_buffers); vec_validate (allocated_buffers, - vec_len (allocated_buffers) + n_alloc - 1); + vec_len (allocated_buffers) + n_alloc - 1); actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, - n_alloc); + n_alloc); _vec_len (allocated_buffers) = first + actual_alloc; if (actual_alloc < n_alloc) - vlib_cli_output (vm, "WARNING: only allocated %d buffers", - actual_alloc); + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); } vlib_cli_output (vm, "Currently %d buffers allocated", - vec_len (allocated_buffers)); + vec_len (allocated_buffers)); - if (allocated_buffers && vec_len(allocated_buffers) == 0) - vec_free(allocated_buffers); + if (allocated_buffers && vec_len (allocated_buffers) == 0) + vec_free (allocated_buffers); return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { .path = "test dpdk buffer", .short_help = "test dpdk buffer [allocate <nn>][free <nn>]", .function = test_dpdk_buffer, .is_mp_safe = 1, }; +/* *INDENT-ON* */ static void show_dpdk_device_stats (vlib_main_t * vm, dpdk_device_t * xd) { - vlib_cli_output(vm, - "device_index %d\n" - " last_burst_sz %d\n" - " max_burst_sz %d\n" - " full_frames_cnt %u\n" - " consec_full_frames_cnt %u\n" - " congestion_cnt %d\n" - " last_poll_time %llu\n" - " max_poll_delay %llu\n" - " discard_cnt %u\n" - " total_packet_cnt %u\n", - xd->device_index, - xd->efd_agent.last_burst_sz, - xd->efd_agent.max_burst_sz, - xd->efd_agent.full_frames_cnt, - xd->efd_agent.consec_full_frames_cnt, - xd->efd_agent.congestion_cnt, - xd->efd_agent.last_poll_time, - xd->efd_agent.max_poll_delay, - xd->efd_agent.discard_cnt, - xd->efd_agent.total_packet_cnt); - - u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, - 0 /* queue_id */); - vlib_cli_output(vm, - " device_queue_sz %u\n", - device_queue_sz); + vlib_cli_output (vm, + "device_index %d\n" + " last_burst_sz %d\n" + " max_burst_sz %d\n" + " full_frames_cnt %u\n" + " consec_full_frames_cnt %u\n" + " congestion_cnt %d\n" + " last_poll_time %llu\n" + " max_poll_delay %llu\n" + " discard_cnt %u\n" + " total_packet_cnt %u\n", + xd->device_index, + xd->efd_agent.last_burst_sz, + xd->efd_agent.max_burst_sz, + xd->efd_agent.full_frames_cnt, + xd->efd_agent.consec_full_frames_cnt, + xd->efd_agent.congestion_cnt, + xd->efd_agent.last_poll_time, + xd->efd_agent.max_poll_delay, + xd->efd_agent.discard_cnt, xd->efd_agent.total_packet_cnt); + + u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index, + 0 /* queue_id */ ); + vlib_cli_output (vm, " device_queue_sz %u\n", device_queue_sz); } static void show_efd_config (vlib_main_t * vm) { - vlib_thread_main_t * tm = vlib_get_thread_main(); - dpdk_main_t * dm = &dpdk_main; - - vlib_cli_output(vm, - "dpdk: (0x%04x) enabled:%d monitor:%d drop_all:%d\n" - " dpdk_queue_hi_thresh %d\n" - " consec_full_frames_hi_thresh %d\n" - "---------\n" - "worker: (0x%04x) enabled:%d monitor:%d\n" - " worker_queue_hi_thresh %d\n", - dm->efd.enabled, - ((dm->efd.enabled & DPDK_EFD_DISCARD_ENABLED) ? 1:0), - ((dm->efd.enabled & DPDK_EFD_MONITOR_ENABLED) ? 1:0), - ((dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED) ? 1:0), - dm->efd.queue_hi_thresh, - dm->efd.consec_full_frames_hi_thresh, - tm->efd.enabled, - ((tm->efd.enabled & VLIB_EFD_DISCARD_ENABLED) ? 1:0), - ((dm->efd.enabled & VLIB_EFD_MONITOR_ENABLED) ? 1:0), - tm->efd.queue_hi_thresh); - vlib_cli_output(vm, - "---------\n" - "ip_prec_bitmap 0x%02x\n" - "mpls_exp_bitmap 0x%02x\n" - "vlan_cos_bitmap 0x%02x\n", - tm->efd.ip_prec_bitmap, - tm->efd.mpls_exp_bitmap, - tm->efd.vlan_cos_bitmap); + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + vlib_cli_output (vm, + "dpdk: (0x%04x) enabled:%d monitor:%d drop_all:%d\n" + " dpdk_queue_hi_thresh %d\n" + " consec_full_frames_hi_thresh %d\n" + "---------\n" + "worker: (0x%04x) enabled:%d monitor:%d\n" + " worker_queue_hi_thresh %d\n", + dm->efd.enabled, + ((dm->efd.enabled & DPDK_EFD_DISCARD_ENABLED) ? 1 : 0), + ((dm->efd.enabled & DPDK_EFD_MONITOR_ENABLED) ? 1 : 0), + ((dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED) ? 1 : 0), + dm->efd.queue_hi_thresh, + dm->efd.consec_full_frames_hi_thresh, + tm->efd.enabled, + ((tm->efd.enabled & VLIB_EFD_DISCARD_ENABLED) ? 1 : 0), + ((dm->efd.enabled & VLIB_EFD_MONITOR_ENABLED) ? 1 : 0), + tm->efd.queue_hi_thresh); + vlib_cli_output (vm, + "---------\n" + "ip_prec_bitmap 0x%02x\n" + "mpls_exp_bitmap 0x%02x\n" + "vlan_cos_bitmap 0x%02x\n", + tm->efd.ip_prec_bitmap, + tm->efd.mpls_exp_bitmap, tm->efd.vlan_cos_bitmap); } static clib_error_t * show_efd (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, vlib_cli_command_t * cmd) { - if (unformat(input, "config")) { - show_efd_config(vm); - } else if (unformat(input, "dpdk")) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; - u32 device_id = ~0; - - (void) unformat(input, "device %d", &device_id); - vec_foreach (xd, dm->devices) { - if ((xd->device_index == device_id) || (device_id == ~0)) { + if (unformat (input, "config")) + { + show_efd_config (vm); + } + else if (unformat (input, "dpdk")) + { + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + u32 device_id = ~0; + + (void) unformat (input, "device %d", &device_id); + /* *INDENT-OFF* */ + vec_foreach (xd, dm->devices) + { + if ((xd->device_index == device_id) || (device_id == ~0)) + { show_dpdk_device_stats(vm, xd); - } - } - } else if (unformat(input, "worker")) { - vlib_thread_main_t * tm = vlib_get_thread_main(); - vlib_frame_queue_t *fq; - vlib_thread_registration_t * tr; - int thread_id; - u32 num_workers = 0; - u32 first_worker_index = 0; - uword * p; - - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - ASSERT (p); - tr = (vlib_thread_registration_t *) p[0]; - if (tr) - { - num_workers = tr->count; - first_worker_index = tr->first_index; + } } - - vlib_cli_output(vm, - "num_workers %d\n" - "first_worker_index %d\n" - "vlib_frame_queues[%d]:\n", - num_workers, - first_worker_index, - tm->n_vlib_mains); - - for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) { - fq = vlib_frame_queues[thread_id]; - if (fq) { - vlib_cli_output(vm, - "%2d: frames_queued %u\n" - " frames_queued_hint %u\n" - " enqueue_full_events %u\n" - " enqueue_efd_discards %u\n", - thread_id, - (fq->tail - fq->head), - (fq->tail - fq->head_hint), - fq->enqueue_full_events, - fq->enqueue_efd_discards); - } - } - } else if (unformat(input, "help")) { - vlib_cli_output(vm, "Usage: show efd config | " - "dpdk [device <id>] | worker\n"); - } else { - show_efd_config(vm); + /* *INDENT-ON* */ + } + else if (unformat (input, "worker")) + { + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_frame_queue_t *fq; + vlib_thread_registration_t *tr; + int thread_id; + u32 num_workers = 0; + u32 first_worker_index = 0; + uword *p; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + ASSERT (p); + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + num_workers = tr->count; + first_worker_index = tr->first_index; + } + + vlib_cli_output (vm, + "num_workers %d\n" + "first_worker_index %d\n" + "vlib_frame_queues[%d]:\n", + num_workers, first_worker_index, tm->n_vlib_mains); + + for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) + { + fq = vlib_frame_queues[thread_id]; + if (fq) + { + vlib_cli_output (vm, + "%2d: frames_queued %u\n" + " frames_queued_hint %u\n" + " enqueue_full_events %u\n" + " enqueue_efd_discards %u\n", + thread_id, + (fq->tail - fq->head), + (fq->tail - fq->head_hint), + fq->enqueue_full_events, + fq->enqueue_efd_discards); + } + } + } + else if (unformat (input, "help")) + { + vlib_cli_output (vm, "Usage: show efd config | " + "dpdk [device <id>] | worker\n"); + } + else + { + show_efd_config (vm); } - return 0; + return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_efd_command, static) = { .path = "show efd", .short_help = "Show efd [device <id>] | [config]", .function = show_efd, }; +/* *INDENT-ON* */ static clib_error_t * clear_efd (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, vlib_cli_command_t * cmd) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; - vlib_thread_main_t * tm = vlib_get_thread_main(); - vlib_frame_queue_t *fq; - int thread_id; - - vec_foreach (xd, dm->devices) { + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_frame_queue_t *fq; + int thread_id; + + /* *INDENT-OFF* */ + vec_foreach (xd, dm->devices) + { xd->efd_agent.last_burst_sz = 0; xd->efd_agent.max_burst_sz = 0; xd->efd_agent.full_frames_cnt = 0; @@ -416,211 +434,280 @@ clear_efd (vlib_main_t * vm, xd->efd_agent.max_poll_delay = 0; xd->efd_agent.discard_cnt = 0; xd->efd_agent.total_packet_cnt = 0; - } + } + /* *INDENT-ON* */ - for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) { - fq = vlib_frame_queues[thread_id]; - if (fq) { - fq->enqueue_full_events = 0; - fq->enqueue_efd_discards = 0; - } + for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++) + { + fq = vlib_frame_queues[thread_id]; + if (fq) + { + fq->enqueue_full_events = 0; + fq->enqueue_efd_discards = 0; + } } - return 0; + return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_efd_command,static) = { .path = "clear efd", .short_help = "Clear early-fast-discard counters", .function = clear_efd, }; +/* *INDENT-ON* */ static clib_error_t * -parse_op_and_prec (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd, - char *prec_type, u8 *prec_bitmap) +parse_op_and_prec (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd, + char *prec_type, u8 * prec_bitmap) { - clib_error_t * error = NULL; - u8 op = 0; - u8 prec = 0; - - if (unformat(input, "ge")) { - op = EFD_OPERATION_GREATER_OR_EQUAL; - } else if (unformat(input, "lt")) { - op = EFD_OPERATION_LESS_THAN; - } else if (unformat(input, "help")) { - vlib_cli_output(vm, - "enter operation [ge | lt] and precedence <0-7>)"); - return (error); - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + clib_error_t *error = NULL; + u8 op = 0; + u8 prec = 0; + + if (unformat (input, "ge")) + { + op = EFD_OPERATION_GREATER_OR_EQUAL; + } + else if (unformat (input, "lt")) + { + op = EFD_OPERATION_LESS_THAN; + } + else if (unformat (input, "help")) + { + vlib_cli_output (vm, "enter operation [ge | lt] and precedence <0-7>)"); + return (error); + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); } - if (unformat (input, "%u", &prec)) { - if (prec > 7) { - return clib_error_return(0, "precedence %d is out of range <0-7>", - prec); - } - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + if (unformat (input, "%u", &prec)) + { + if (prec > 7) + { + return clib_error_return (0, "precedence %d is out of range <0-7>", + prec); + } + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); } - set_efd_bitmap(prec_bitmap, prec, op); + set_efd_bitmap (prec_bitmap, prec, op); - vlib_cli_output(vm, - "EFD will be set for %s precedence %s%u%s.", - prec_type, - (op == EFD_OPERATION_LESS_THAN) ? "less than " : "", - prec, - (op == EFD_OPERATION_GREATER_OR_EQUAL) ? " and greater" : ""); + vlib_cli_output (vm, + "EFD will be set for %s precedence %s%u%s.", + prec_type, + (op == EFD_OPERATION_LESS_THAN) ? "less than " : "", + prec, + (op == + EFD_OPERATION_GREATER_OR_EQUAL) ? " and greater" : ""); - return (error); + return (error); } static clib_error_t * -set_efd (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) +set_efd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - dpdk_main_t * dm = &dpdk_main; - vlib_thread_main_t * tm = vlib_get_thread_main(); - clib_error_t * error = NULL; - vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); - - if (unformat(input, "enable")) { - if (unformat(input, "dpdk")) { - dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; - } else if (unformat(input, "worker")) { - tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; - } else if (unformat(input, "monitor")) { - dm->efd.enabled |= DPDK_EFD_MONITOR_ENABLED; - tm->efd.enabled |= VLIB_EFD_MONITOR_ENABLED; - } else if (unformat(input, "drop_all")) { - dm->efd.enabled |= DPDK_EFD_DROPALL_ENABLED; - } else if (unformat(input, "default")) { - dm->efd.enabled = (DPDK_EFD_DISCARD_ENABLED | - DPDK_EFD_MONITOR_ENABLED); - tm->efd.enabled = (VLIB_EFD_DISCARD_ENABLED | - VLIB_EFD_MONITOR_ENABLED); - } else { - return clib_error_return(0, "Usage: set efd enable [dpdk | " - "worker | monitor | drop_all | default]"); - } - } else if (unformat(input, "disable")) { - if (unformat(input, "dpdk")) { - dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; - } else if (unformat(input, "worker")) { - tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; - } else if (unformat(input, "monitor")) { - dm->efd.enabled &= ~DPDK_EFD_MONITOR_ENABLED; - tm->efd.enabled &= ~VLIB_EFD_MONITOR_ENABLED; - } else if (unformat(input, "drop_all")) { - dm->efd.enabled &= ~DPDK_EFD_DROPALL_ENABLED; - } else if (unformat(input, "all")) { - dm->efd.enabled = 0; - tm->efd.enabled = 0; - } else { - return clib_error_return(0, "Usage: set efd disable [dpdk | " - "worker | monitor | drop_all | all]"); - } - } else if (unformat(input, "worker_queue_hi_thresh")) { - u32 mark; - if (unformat (input, "%u", &mark)) { - tm->efd.queue_hi_thresh = mark; - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); - } - } else if (unformat(input, "dpdk_device_hi_thresh")) { - u32 thresh; - if (unformat (input, "%u", &thresh)) { - dm->efd.queue_hi_thresh = thresh; - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); - } - } else if (unformat(input, "consec_full_frames_hi_thresh")) { - u32 thresh; - if (unformat (input, "%u", &thresh)) { - dm->efd.consec_full_frames_hi_thresh = thresh; - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); - } - } else if (unformat(input, "ip-prec")) { - return (parse_op_and_prec(vm, input, cmd, - "ip", &tm->efd.ip_prec_bitmap)); - } else if (unformat(input, "mpls-exp")) { - return (parse_op_and_prec(vm, input, cmd, - "mpls", &tm->efd.mpls_exp_bitmap)); - } else if (unformat(input, "vlan-cos")) { - return (parse_op_and_prec(vm, input, cmd, - "vlan", &tm->efd.vlan_cos_bitmap)); - } else if (unformat(input, "help")) { - vlib_cli_output(vm, - "Usage:\n" - " set efd enable <dpdk | worker | monitor | drop_all | default> |\n" - " set efd disable <dpdk | worker | monitor | drop_all | all> |\n" - " set efd <ip-prec | mpls-exp | vlan-cos> <ge | lt> <0-7>\n" - " set efd worker_queue_hi_thresh <0-32> |\n" - " set efd dpdk_device_hi_thresh <0-%d> |\n" - " set efd consec_full_frames_hi_thresh <count> |\n", - DPDK_NB_RX_DESC_10GE); - } else { - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + clib_error_t *error = NULL; + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, dpdk_input_node.index); + + if (unformat (input, "enable")) + { + if (unformat (input, "dpdk")) + { + dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; + } + else if (unformat (input, "worker")) + { + tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; + } + else if (unformat (input, "monitor")) + { + dm->efd.enabled |= DPDK_EFD_MONITOR_ENABLED; + tm->efd.enabled |= VLIB_EFD_MONITOR_ENABLED; + } + else if (unformat (input, "drop_all")) + { + dm->efd.enabled |= DPDK_EFD_DROPALL_ENABLED; + } + else if (unformat (input, "default")) + { + dm->efd.enabled = (DPDK_EFD_DISCARD_ENABLED | + DPDK_EFD_MONITOR_ENABLED); + tm->efd.enabled = (VLIB_EFD_DISCARD_ENABLED | + VLIB_EFD_MONITOR_ENABLED); + } + else + { + return clib_error_return (0, "Usage: set efd enable [dpdk | " + "worker | monitor | drop_all | default]"); + } + } + else if (unformat (input, "disable")) + { + if (unformat (input, "dpdk")) + { + dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; + } + else if (unformat (input, "worker")) + { + tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; + } + else if (unformat (input, "monitor")) + { + dm->efd.enabled &= ~DPDK_EFD_MONITOR_ENABLED; + tm->efd.enabled &= ~VLIB_EFD_MONITOR_ENABLED; + } + else if (unformat (input, "drop_all")) + { + dm->efd.enabled &= ~DPDK_EFD_DROPALL_ENABLED; + } + else if (unformat (input, "all")) + { + dm->efd.enabled = 0; + tm->efd.enabled = 0; + } + else + { + return clib_error_return (0, "Usage: set efd disable [dpdk | " + "worker | monitor | drop_all | all]"); + } + } + else if (unformat (input, "worker_queue_hi_thresh")) + { + u32 mark; + if (unformat (input, "%u", &mark)) + { + tm->efd.queue_hi_thresh = mark; + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + else if (unformat (input, "dpdk_device_hi_thresh")) + { + u32 thresh; + if (unformat (input, "%u", &thresh)) + { + dm->efd.queue_hi_thresh = thresh; + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + else if (unformat (input, "consec_full_frames_hi_thresh")) + { + u32 thresh; + if (unformat (input, "%u", &thresh)) + { + dm->efd.consec_full_frames_hi_thresh = thresh; + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + else if (unformat (input, "ip-prec")) + { + return (parse_op_and_prec (vm, input, cmd, + "ip", &tm->efd.ip_prec_bitmap)); + } + else if (unformat (input, "mpls-exp")) + { + return (parse_op_and_prec (vm, input, cmd, + "mpls", &tm->efd.mpls_exp_bitmap)); + } + else if (unformat (input, "vlan-cos")) + { + return (parse_op_and_prec (vm, input, cmd, + "vlan", &tm->efd.vlan_cos_bitmap)); + } + else if (unformat (input, "help")) + { + vlib_cli_output (vm, + "Usage:\n" + " set efd enable <dpdk | worker | monitor | drop_all | default> |\n" + " set efd disable <dpdk | worker | monitor | drop_all | all> |\n" + " set efd <ip-prec | mpls-exp | vlan-cos> <ge | lt> <0-7>\n" + " set efd worker_queue_hi_thresh <0-32> |\n" + " set efd dpdk_device_hi_thresh <0-%d> |\n" + " set efd consec_full_frames_hi_thresh <count> |\n", + DPDK_NB_RX_DESC_10GE); + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); } - if (dm->efd.enabled) - rt->function = dpdk_input_efd_multiarch_select(); - else if (dm->use_rss) - rt->function = dpdk_input_rss_multiarch_select(); - else - rt->function = dpdk_input_multiarch_select(); + if (dm->efd.enabled) + rt->function = dpdk_input_efd_multiarch_select (); + else if (dm->use_rss) + rt->function = dpdk_input_rss_multiarch_select (); + else + rt->function = dpdk_input_multiarch_select (); - return error; + return error; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_efd,static) = { .path = "set efd", .short_help = "set early-fast-discard commands", .function = set_efd, }; +/* *INDENT-ON* */ static clib_error_t * -set_dpdk_if_desc (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) +set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) { - unformat_input_t _line_input, * line_input = &_line_input; - dpdk_main_t * dm = &dpdk_main; - vnet_hw_interface_t * hw; - dpdk_device_t * xd; - u32 hw_if_index = (u32) ~0; - u32 nb_rx_desc = (u32) ~0; - u32 nb_tx_desc = (u32) ~0; - clib_error_t * rv; - - if (! unformat_user (input, unformat_line_input, line_input)) + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 nb_rx_desc = (u32) ~ 0; + u32 nb_tx_desc = (u32) ~ 0; + clib_error_t *rv; + + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "tx %d", &nb_tx_desc)) - ; - else if (unformat (line_input, "rx %d", &nb_rx_desc)) - ; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - } + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } unformat_free (line_input); - if (hw_if_index == (u32) ~0) + if (hw_if_index == (u32) ~ 0) return clib_error_return (0, "please specify valid interface name"); hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); @@ -628,69 +715,75 @@ set_dpdk_if_desc (vlib_main_t *vm, unformat_input_t *input, if (xd->dev_type != VNET_DPDK_DEV_ETH) return clib_error_return (0, "number of descriptors can be set only for " - "physical devices"); + "physical devices"); - if ((nb_rx_desc == (u32) ~0 || nb_rx_desc == xd->nb_rx_desc) && - (nb_tx_desc == (u32) ~0 || nb_tx_desc == xd->nb_tx_desc)) + if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) return clib_error_return (0, "nothing changed"); - if (nb_rx_desc != (u32) ~0) - xd->nb_rx_desc = nb_rx_desc; + if (nb_rx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; - if (nb_tx_desc != (u32) ~0) - xd->nb_rx_desc = nb_rx_desc; + if (nb_tx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; - rv = dpdk_port_setup(dm, xd); + rv = dpdk_port_setup (dm, xd); return rv; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { .path = "set dpdk interface descriptors", .short_help = "set dpdk interface descriptors <if-name> [rx <n>] [tx <n>]", .function = set_dpdk_if_desc, }; +/* *INDENT-ON* */ static clib_error_t * -show_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) +show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) { - vlib_thread_main_t * tm = vlib_get_thread_main(); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_and_queue_t * dq; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; int cpu; if (tm->n_vlib_mains == 1) - vlib_cli_output(vm, "All interfaces are handled by main thread"); + vlib_cli_output (vm, "All interfaces are handled by main thread"); - for(cpu = 0; cpu < vec_len(dm->devices_by_cpu); cpu++) + for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) { - if (vec_len(dm->devices_by_cpu[cpu])) - vlib_cli_output(vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].dpdk_lcore_id); + if (vec_len (dm->devices_by_cpu[cpu])) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].dpdk_lcore_id); + /* *INDENT-OFF* */ vec_foreach(dq, dm->devices_by_cpu[cpu]) { u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); } + /* *INDENT-ON* */ } return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { .path = "show dpdk interface placement", .short_help = "show dpdk interface placement", .function = show_dpdk_if_placement, }; +/* *INDENT-ON* */ static int -dpdk_device_queue_sort(void * a1, void * a2) +dpdk_device_queue_sort (void *a1, void *a2) { - dpdk_device_and_queue_t * dq1 = a1; - dpdk_device_and_queue_t * dq2 = a2; + dpdk_device_and_queue_t *dq1 = a1; + dpdk_device_and_queue_t *dq2 = a2; if (dq1->device > dq2->device) return 1; @@ -705,38 +798,40 @@ dpdk_device_queue_sort(void * a1, void * a2) } static clib_error_t * -set_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) +set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) { - unformat_input_t _line_input, * line_input = &_line_input; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_and_queue_t * dq; - vnet_hw_interface_t * hw; - dpdk_device_t * xd; - u32 hw_if_index = (u32) ~0; + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; u32 queue = (u32) 0; - u32 cpu = (u32) ~0; + u32 cpu = (u32) ~ 0; int i; - if (! unformat_user (input, unformat_line_input, line_input)) + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - } + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } unformat_free (line_input); - if (hw_if_index == (u32) ~0) + if (hw_if_index == (u32) ~ 0) return clib_error_return (0, "please specify valid interface name"); if (cpu < dm->input_cpu_first_index || @@ -746,8 +841,9 @@ set_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); - for(i = 0; i < vec_len(dm->devices_by_cpu); i++) + for (i = 0; i < vec_len (dm->devices_by_cpu); i++) { + /* *INDENT-OFF* */ vec_foreach(dq, dm->devices_by_cpu[i]) { if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && @@ -780,16 +876,19 @@ set_dpdk_if_placement (vlib_main_t *vm, unformat_input_t *input, return 0; } } + /* *INDENT-ON* */ } return clib_error_return (0, "not found"); } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { .path = "set dpdk interface placement", .short_help = "set dpdk interface placement <if-name> [queue <n>] thread <n>", .function = set_dpdk_if_placement, }; +/* *INDENT-ON* */ clib_error_t * dpdk_cli_init (vlib_main_t * vm) @@ -798,3 +897,11 @@ dpdk_cli_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (dpdk_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c index 61774f0f3e2..f00b80b6def 100644 --- a/vnet/vnet/devices/dpdk/device.c +++ b/vnet/vnet/devices/dpdk/device.c @@ -30,84 +30,92 @@ _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ _(REPL_FAIL, "Tx packet drops (replication failure)") -typedef enum { +typedef enum +{ #define _(f,s) DPDK_TX_FUNC_ERROR_##f, foreach_dpdk_tx_func_error #undef _ - DPDK_TX_FUNC_N_ERROR, + DPDK_TX_FUNC_N_ERROR, } dpdk_tx_func_error_t; -static char * dpdk_tx_func_error_strings[] = { +static char *dpdk_tx_func_error_strings[] = { #define _(n,s) s, - foreach_dpdk_tx_func_error + foreach_dpdk_tx_func_error #undef _ }; clib_error_t * -dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address) +dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) { - int error; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error=rte_eth_dev_default_mac_addr_set(xd->device_index, - (struct ether_addr *) address); - - if (error) { - return clib_error_return (0, "mac address set failed: %d", error); - } else { - return NULL; - } + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address set failed: %d", error); + } + else + { + return NULL; + } } clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr) + struct ether_addr mc_addr_vec[], int naddr) { int error; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - error=rte_eth_dev_set_mc_addr_list(xd->device_index, mc_addr_vec, naddr); + error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); - if (error) { - return clib_error_return (0, "mc addr list failed: %d", error); - } else { - return NULL; - } + if (error) + { + return clib_error_return (0, "mc addr list failed: %d", error); + } + else + { + return NULL; + } } -struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) +struct rte_mbuf * +dpdk_replicate_packet_mb (vlib_buffer_t * b) { - vlib_main_t * vm = vlib_get_main(); - vlib_buffer_main_t * bm = vm->buffer_main; - struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0; + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0; u8 nb_segs, nb_segs_left; u32 copy_bytes; - unsigned socket_id = rte_socket_id(); + unsigned socket_id = rte_socket_id (); ASSERT (bm->pktmbuf_pools[socket_id]); - pkt_mb = rte_mbuf_from_vlib_buffer(b); + pkt_mb = rte_mbuf_from_vlib_buffer (b); nb_segs = pkt_mb->nb_segs; for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--) { - if (PREDICT_FALSE(pkt_mb == 0)) + if (PREDICT_FALSE (pkt_mb == 0)) { clib_warning ("Missing %d mbuf chain segment(s): " "(nb_segs = %d, nb_segs_left = %d)!", nb_segs - nb_segs_left, nb_segs, nb_segs_left); if (first_mb) - rte_pktmbuf_free(first_mb); + rte_pktmbuf_free (first_mb); return NULL; } new_mb = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); - if (PREDICT_FALSE(new_mb == 0)) + if (PREDICT_FALSE (new_mb == 0)) { if (first_mb) - rte_pktmbuf_free(first_mb); + rte_pktmbuf_free (first_mb); return NULL; } - + /* * Copy packet info into 1st segment. */ @@ -117,7 +125,7 @@ struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len; first_mb->nb_segs = pkt_mb->nb_segs; first_mb->port = pkt_mb->port; -#ifdef DAW_FIXME // TX Offload support TBD +#ifdef DAW_FIXME // TX Offload support TBD first_mb->vlan_macip = pkt_mb->vlan_macip; first_mb->hash = pkt_mb->hash; first_mb->ol_flags = pkt_mb->ol_flags @@ -125,58 +133,59 @@ struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) } else { - ASSERT(prev_mb_next != 0); + ASSERT (prev_mb_next != 0); *prev_mb_next = new_mb; } - + /* * Copy packet segment data into new mbuf segment. */ rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len; copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM; - ASSERT(copy_bytes <= pkt_mb->buf_len); - clib_memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes); + ASSERT (copy_bytes <= pkt_mb->buf_len); + clib_memcpy (new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes); prev_mb_next = &new_mb->next; pkt_mb = pkt_mb->next; } - ASSERT(pkt_mb == 0); - __rte_mbuf_sanity_check(first_mb, 1); + ASSERT (pkt_mb == 0); + __rte_mbuf_sanity_check (first_mb, 1); return first_mb; } -struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b) +struct rte_mbuf * +dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b) { - vlib_main_t * vm = vlib_get_main(); - vlib_buffer_main_t * bm = vm->buffer_main; - struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0; + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0; u8 nb_segs, nb_segs_left; - unsigned socket_id = rte_socket_id(); + unsigned socket_id = rte_socket_id (); ASSERT (bm->pktmbuf_pools[socket_id]); - pkt_mb = rte_mbuf_from_vlib_buffer(b); + pkt_mb = rte_mbuf_from_vlib_buffer (b); nb_segs = pkt_mb->nb_segs; for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--) { - if (PREDICT_FALSE(pkt_mb == 0)) + if (PREDICT_FALSE (pkt_mb == 0)) { clib_warning ("Missing %d mbuf chain segment(s): " "(nb_segs = %d, nb_segs_left = %d)!", nb_segs - nb_segs_left, nb_segs, nb_segs_left); if (first_mb) - rte_pktmbuf_free(first_mb); + rte_pktmbuf_free (first_mb); return NULL; } - new_mb = rte_pktmbuf_clone(pkt_mb, bm->pktmbuf_pools[socket_id]); - if (PREDICT_FALSE(new_mb == 0)) + new_mb = rte_pktmbuf_clone (pkt_mb, bm->pktmbuf_pools[socket_id]); + if (PREDICT_FALSE (new_mb == 0)) { if (first_mb) - rte_pktmbuf_free(first_mb); + rte_pktmbuf_free (first_mb); return NULL; } - + /* * Copy packet info into 1st segment. */ @@ -186,7 +195,7 @@ struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b) rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len; first_mb->nb_segs = pkt_mb->nb_segs; first_mb->port = pkt_mb->port; -#ifdef DAW_FIXME // TX Offload support TBD +#ifdef DAW_FIXME // TX Offload support TBD first_mb->vlan_macip = pkt_mb->vlan_macip; first_mb->hash = pkt_mb->hash; first_mb->ol_flags = pkt_mb->ol_flags @@ -194,10 +203,10 @@ struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b) } else { - ASSERT(prev_mb_next != 0); + ASSERT (prev_mb_next != 0); *prev_mb_next = new_mb; } - + /* * Copy packet segment data into new mbuf segment. */ @@ -207,8 +216,8 @@ struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b) pkt_mb = pkt_mb->next; } - ASSERT(pkt_mb == 0); - __rte_mbuf_sanity_check(first_mb, 1); + ASSERT (pkt_mb == 0); + __rte_mbuf_sanity_check (first_mb, 1); return first_mb; @@ -219,48 +228,47 @@ static void dpdk_tx_trace_buffer (dpdk_main_t * dm, vlib_node_runtime_t * node, dpdk_device_t * xd, - u16 queue_id, - u32 buffer_index, - vlib_buffer_t * buffer) + u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) { - vlib_main_t * vm = vlib_get_main(); - dpdk_tx_dma_trace_t * t0; - struct rte_mbuf * mb; + vlib_main_t *vm = vlib_get_main (); + dpdk_tx_dma_trace_t *t0; + struct rte_mbuf *mb; - mb = rte_mbuf_from_vlib_buffer(buffer); + mb = rte_mbuf_from_vlib_buffer (buffer); t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); t0->queue_index = queue_id; t0->device_index = xd->device_index; t0->buffer_index = buffer_index; clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data)); + clib_memcpy (&t0->buffer, buffer, + sizeof (buffer[0]) - sizeof (buffer->pre_data)); clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, - sizeof (t0->buffer.pre_data)); + sizeof (t0->buffer.pre_data)); } /* * This function calls the dpdk's tx_burst function to transmit the packets * on the tx_vector. It manages a lock per-device if the device does not - * support multiple queues. It returns the number of packets untransmitted - * on the tx_vector. If all packets are transmitted (the normal case), the + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the * function returns 0. - * - * The tx_burst function may not be able to transmit all packets because the + * + * The tx_burst function may not be able to transmit all packets because the * dpdk ring is full. If a flowcontrol callback function has been configured - * then the function simply returns. If no callback has been configured, the - * function will retry calling tx_burst with the remaining packets. This will + * then the function simply returns. If no callback has been configured, the + * function will retry calling tx_burst with the remaining packets. This will * continue until all packets are transmitted or tx_burst indicates no packets * could be transmitted. (The caller can drop the remaining packets.) * * The function assumes there is at least one packet on the tx_vector. */ static_always_inline -u32 tx_burst_vector_internal (vlib_main_t * vm, - dpdk_device_t * xd, - struct rte_mbuf ** tx_vector) + u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf **tx_vector) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; u32 n_packets; u32 tx_head; u32 tx_tail; @@ -269,7 +277,7 @@ u32 tx_burst_vector_internal (vlib_main_t * vm, int queue_id; tx_ring_hdr_t *ring; - ring = vec_header(tx_vector, sizeof(*ring)); + ring = vec_header (tx_vector, sizeof (*ring)); n_packets = ring->tx_head - ring->tx_tail; @@ -279,7 +287,7 @@ u32 tx_burst_vector_internal (vlib_main_t * vm, * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to * unpredictable results. */ - ASSERT(n_packets > 0); + ASSERT (n_packets > 0); /* * Check for tx_vector overflow. If this fails it is a system configuration @@ -288,229 +296,255 @@ u32 tx_burst_vector_internal (vlib_main_t * vm, * a bit because it decreases the probability of having to issue two tx_burst * calls due to a ring wrap. */ - ASSERT(n_packets < DPDK_TX_RING_SIZE); + ASSERT (n_packets < DPDK_TX_RING_SIZE); /* * If there is no flowcontrol callback, there is only temporary buffering * on the tx_vector and so the tail should always be 0. */ - ASSERT(dm->flowcontrol_callback || ring->tx_tail == 0); + ASSERT (dm->flowcontrol_callback || ring->tx_tail == 0); /* - * If there is a flowcontrol callback, don't retry any incomplete tx_bursts. + * If there is a flowcontrol callback, don't retry any incomplete tx_bursts. * Apply backpressure instead. If there is no callback, keep retrying until - * a tx_burst sends no packets. n_retry of 255 essentially means no retry + * a tx_burst sends no packets. n_retry of 255 essentially means no retry * limit. */ n_retry = dm->flowcontrol_callback ? 0 : 255; queue_id = vm->cpu_index; - do { + do + { /* start the burst at the tail */ tx_tail = ring->tx_tail % DPDK_TX_RING_SIZE; - /* + /* * This device only supports one TX queue, * and we're running multi-threaded... */ - if (PREDICT_FALSE(xd->dev_type != VNET_DPDK_DEV_VHOST_USER && - xd->lockp != 0)) - { - queue_id = queue_id % xd->tx_q_used; - while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) - /* zzzz */ - queue_id = (queue_id + 1) % xd->tx_q_used; - } - - if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH)) - { - if (PREDICT_TRUE(tx_head > tx_tail)) - { - /* no wrap, transmit in one burst */ - rv = rte_eth_tx_burst(xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (tx_head-tx_tail)); - } - else - { - /* - * This can only happen if there is a flowcontrol callback. - * We need to split the transmit into two calls: one for - * the packets up to the wrap point, and one to continue - * at the start of the ring. - * Transmit pkts up to the wrap point. - */ - rv = rte_eth_tx_burst(xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); - - /* - * If we transmitted everything we wanted, then allow 1 retry - * so we can try to transmit the rest. If we didn't transmit - * everything, stop now. - */ - n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; - } - } + if (PREDICT_FALSE (xd->dev_type != VNET_DPDK_DEV_VHOST_USER && + xd->lockp != 0)) + { + queue_id = queue_id % xd->tx_q_used; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) + /* zzzz */ + queue_id = (queue_id + 1) % xd->tx_q_used; + } + + if (PREDICT_TRUE (xd->dev_type == VNET_DPDK_DEV_ETH)) + { + if (PREDICT_TRUE (tx_head > tx_tail)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + /* + * This can only happen if there is a flowcontrol callback. + * We need to split the transmit into two calls: one for + * the packets up to the wrap point, and one to continue + * at the start of the ring. + * Transmit pkts up to the wrap point. + */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - + tx_tail)); + + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + } #if DPDK_VHOST_USER else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) - { - u32 offset = 0; - if (xd->need_txlock) { - queue_id = 0; - while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)); - } - else { - dpdk_device_and_queue_t * dq; - vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index]) - { - if (xd->device_index == dq->device) - break; - } - assert (dq); - offset = dq->queue_id * VIRTIO_QNUM; - } - if (PREDICT_TRUE(tx_head > tx_tail)) - { - int i; u32 bytes = 0; - struct rte_mbuf **pkts = &tx_vector[tx_tail]; - for (i = 0; i < (tx_head - tx_tail); i++) { - struct rte_mbuf *buff = pkts[i]; - bytes += rte_pktmbuf_data_len(buff); - } - - /* no wrap, transmit in one burst */ - rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ, - &tx_vector[tx_tail], - (uint16_t) (tx_head-tx_tail)); - if (PREDICT_TRUE(rv > 0)) - { - dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); - vring->packets += rv; - vring->bytes += bytes; - - if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) { - vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); - vring->n_since_last_int += rv; - - f64 now = vlib_time_now (vm); - if (vring->int_deadline < now || - vring->n_since_last_int > dm->conf->vhost_coalesce_frames) - dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ); - } - - int c = rv; - while(c--) - rte_pktmbuf_free (tx_vector[tx_tail+c]); - } - } - else - { - /* - * If we transmitted everything we wanted, then allow 1 retry - * so we can try to transmit the rest. If we didn't transmit - * everything, stop now. - */ - int i; u32 bytes = 0; - struct rte_mbuf **pkts = &tx_vector[tx_tail]; - for (i = 0; i < (DPDK_TX_RING_SIZE - tx_tail); i++) { - struct rte_mbuf *buff = pkts[i]; - bytes += rte_pktmbuf_data_len(buff); - } - rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ, - &tx_vector[tx_tail], - (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); - - if (PREDICT_TRUE(rv > 0)) - { - dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); - vring->packets += rv; - vring->bytes += bytes; - - if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) { - vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); - vring->n_since_last_int += rv; - - f64 now = vlib_time_now (vm); - if (vring->int_deadline < now || - vring->n_since_last_int > dm->conf->vhost_coalesce_frames) - dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ); - } - - int c = rv; - while(c--) - rte_pktmbuf_free (tx_vector[tx_tail+c]); - } - - n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; - } - - if (xd->need_txlock) - *xd->lockp[queue_id] = 0; - } + { + u32 offset = 0; + if (xd->need_txlock) + { + queue_id = 0; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)); + } + else + { + dpdk_device_and_queue_t *dq; + vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index]) + { + if (xd->device_index == dq->device) + break; + } + assert (dq); + offset = dq->queue_id * VIRTIO_QNUM; + } + if (PREDICT_TRUE (tx_head > tx_tail)) + { + int i; + u32 bytes = 0; + struct rte_mbuf **pkts = &tx_vector[tx_tail]; + for (i = 0; i < (tx_head - tx_tail); i++) + { + struct rte_mbuf *buff = pkts[i]; + bytes += rte_pktmbuf_data_len (buff); + } + + /* no wrap, transmit in one burst */ + rv = + rte_vhost_enqueue_burst (&xd->vu_vhost_dev, + offset + VIRTIO_RXQ, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + if (PREDICT_TRUE (rv > 0)) + { + dpdk_vu_vring *vring = + &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); + vring->packets += rv; + vring->bytes += bytes; + + if (dpdk_vhost_user_want_interrupt + (xd, offset + VIRTIO_RXQ)) + { + vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); + vring->n_since_last_int += rv; + + f64 now = vlib_time_now (vm); + if (vring->int_deadline < now || + vring->n_since_last_int > + dm->conf->vhost_coalesce_frames) + dpdk_vhost_user_send_interrupt (vm, xd, + offset + VIRTIO_RXQ); + } + + int c = rv; + while (c--) + rte_pktmbuf_free (tx_vector[tx_tail + c]); + } + } + else + { + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + int i; + u32 bytes = 0; + struct rte_mbuf **pkts = &tx_vector[tx_tail]; + for (i = 0; i < (DPDK_TX_RING_SIZE - tx_tail); i++) + { + struct rte_mbuf *buff = pkts[i]; + bytes += rte_pktmbuf_data_len (buff); + } + rv = + rte_vhost_enqueue_burst (&xd->vu_vhost_dev, + offset + VIRTIO_RXQ, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - + tx_tail)); + + if (PREDICT_TRUE (rv > 0)) + { + dpdk_vu_vring *vring = + &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); + vring->packets += rv; + vring->bytes += bytes; + + if (dpdk_vhost_user_want_interrupt + (xd, offset + VIRTIO_RXQ)) + { + vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); + vring->n_since_last_int += rv; + + f64 now = vlib_time_now (vm); + if (vring->int_deadline < now || + vring->n_since_last_int > + dm->conf->vhost_coalesce_frames) + dpdk_vhost_user_send_interrupt (vm, xd, + offset + VIRTIO_RXQ); + } + + int c = rv; + while (c--) + rte_pktmbuf_free (tx_vector[tx_tail + c]); + } + + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + + if (xd->need_txlock) + *xd->lockp[queue_id] = 0; + } #endif #if RTE_LIBRTE_KNI else if (xd->dev_type == VNET_DPDK_DEV_KNI) - { - if (PREDICT_TRUE(tx_head > tx_tail)) - { - /* no wrap, transmit in one burst */ - rv = rte_kni_tx_burst(xd->kni, - &tx_vector[tx_tail], - (uint16_t) (tx_head-tx_tail)); - } - else - { - /* - * This can only happen if there is a flowcontrol callback. - * We need to split the transmit into two calls: one for - * the packets up to the wrap point, and one to continue - * at the start of the ring. - * Transmit pkts up to the wrap point. - */ - rv = rte_kni_tx_burst(xd->kni, - &tx_vector[tx_tail], - (uint16_t) (DPDK_TX_RING_SIZE - tx_tail)); - - /* - * If we transmitted everything we wanted, then allow 1 retry - * so we can try to transmit the rest. If we didn't transmit - * everything, stop now. - */ - n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; - } - } + { + if (PREDICT_TRUE (tx_head > tx_tail)) + { + /* no wrap, transmit in one burst */ + rv = rte_kni_tx_burst (xd->kni, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + /* + * This can only happen if there is a flowcontrol callback. + * We need to split the transmit into two calls: one for + * the packets up to the wrap point, and one to continue + * at the start of the ring. + * Transmit pkts up to the wrap point. + */ + rv = rte_kni_tx_burst (xd->kni, + &tx_vector[tx_tail], + (uint16_t) (DPDK_TX_RING_SIZE - + tx_tail)); + + /* + * If we transmitted everything we wanted, then allow 1 retry + * so we can try to transmit the rest. If we didn't transmit + * everything, stop now. + */ + n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0; + } + } #endif else - { - ASSERT(0); - rv = 0; - } - - if (PREDICT_FALSE(xd->dev_type != VNET_DPDK_DEV_VHOST_USER && - xd->lockp != 0)) - *xd->lockp[queue_id] = 0; - - if (PREDICT_FALSE(rv < 0)) - { - // emit non-fatal message, bump counter - vnet_main_t * vnm = dm->vnet_main; - vnet_interface_main_t * im = &vnm->interface_main; - u32 node_index; - - node_index = vec_elt_at_index(im->hw_interfaces, - xd->vlib_hw_if_index)->tx_node_index; - - vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); - clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, rv); - return n_packets; // untransmitted packets - } - ring->tx_tail += (u16)rv; + { + ASSERT (0); + rv = 0; + } + + if (PREDICT_FALSE (xd->dev_type != VNET_DPDK_DEV_VHOST_USER && + xd->lockp != 0)) + *xd->lockp[queue_id] = 0; + + if (PREDICT_FALSE (rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t *vnm = dm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index (im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, + rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16) rv; n_packets -= (uint16_t) rv; - } while (rv && n_packets && (n_retry>0)); + } + while (rv && n_packets && (n_retry > 0)); return n_packets; } @@ -518,20 +552,21 @@ u32 tx_burst_vector_internal (vlib_main_t * vm, /* * This function transmits any packets on the interface's tx_vector and returns - * the number of packets untransmitted on the tx_vector. If the tx_vector is - * empty the function simply returns 0. + * the number of packets untransmitted on the tx_vector. If the tx_vector is + * empty the function simply returns 0. * * It is intended to be called by a traffic manager which has flowed-off an * interface to see if the interface can be flowed-on again. */ -u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) +u32 +dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; int queue_id; - struct rte_mbuf ** tx_vector; + struct rte_mbuf **tx_vector; tx_ring_hdr_t *ring; - + /* param is dev_instance and not hw_if_index to save another lookup */ xd = vec_elt_at_index (dm->devices, dev_instance); @@ -539,8 +574,8 @@ u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) tx_vector = xd->tx_vectors[queue_id]; /* If no packets on the ring, don't bother calling tx function */ - ring = vec_header(tx_vector, sizeof(*ring)); - if (ring->tx_head == ring->tx_tail) + ring = vec_header (tx_vector, sizeof (*ring)); + if (ring->tx_head == ring->tx_tail) { return 0; } @@ -550,14 +585,14 @@ u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) /* * Transmits the packets on the frame to the interface associated with the - * node. It first copies packets on the frame to a tx_vector containing the - * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal * which calls the dpdk tx_burst function. * * The tx_vector is treated slightly differently depending on whether or * not a flowcontrol callback function has been configured. If there is no * callback, the tx_vector is a temporary array of rte_mbuf packet pointers. - * Its entries are written and consumed before the function exits. + * Its entries are written and consumed before the function exits. * * If there is a callback then the transmit is being invoked in the presence * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf @@ -569,16 +604,15 @@ u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) */ static uword dpdk_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * f) + vlib_node_runtime_t * node, vlib_frame_t * f) { - dpdk_main_t * dm = &dpdk_main; - vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, rd->dev_instance); + dpdk_main_t *dm = &dpdk_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); u32 n_packets = f->n_vectors; u32 n_left; - u32 * from; - struct rte_mbuf ** tx_vector; + u32 *from; + struct rte_mbuf **tx_vector; int i; int queue_id; u32 my_cpu; @@ -591,42 +625,42 @@ dpdk_interface_tx (vlib_main_t * vm, queue_id = my_cpu; tx_vector = xd->tx_vectors[queue_id]; - ring = vec_header(tx_vector, sizeof(*ring)); + ring = vec_header (tx_vector, sizeof (*ring)); n_on_ring = ring->tx_head - ring->tx_tail; from = vlib_frame_vector_args (f); - ASSERT(n_packets <= VLIB_FRAME_SIZE); + ASSERT (n_packets <= VLIB_FRAME_SIZE); - if (PREDICT_FALSE(n_on_ring + n_packets > DPDK_TX_RING_SIZE)) + if (PREDICT_FALSE (n_on_ring + n_packets > DPDK_TX_RING_SIZE)) { /* - * Overflowing the ring should never happen. + * Overflowing the ring should never happen. * If it does then drop the whole frame. */ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, - n_packets); - - while (n_packets--) - { - u32 bi0 = from[n_packets]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer(b0); - rte_pktmbuf_free (mb0); - } + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); + rte_pktmbuf_free (mb0); + } return n_on_ring; } - if (PREDICT_FALSE(dm->tx_pcap_enable)) + if (PREDICT_FALSE (dm->tx_pcap_enable)) { n_left = n_packets; while (n_left > 0) - { + { u32 bi0 = from[0]; - vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (dm->pcap_sw_if_index == 0 || - dm->pcap_sw_if_index == vnet_buffer(b0)->sw_if_index [VLIB_TX]) - pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); from++; n_left--; } @@ -640,10 +674,10 @@ dpdk_interface_tx (vlib_main_t * vm, { u32 bi0, bi1; u32 pi0, pi1; - struct rte_mbuf * mb0, * mb1; - struct rte_mbuf * prefmb0, * prefmb1; - vlib_buffer_t * b0, * b1; - vlib_buffer_t * pref0, * pref1; + struct rte_mbuf *mb0, *mb1; + struct rte_mbuf *prefmb0, *prefmb1; + vlib_buffer_t *b0, *b1; + vlib_buffer_t *pref0, *pref1; i16 delta0, delta1; u16 new_data_len0, new_data_len1; u16 new_pkt_len0, new_pkt_len1; @@ -654,33 +688,32 @@ dpdk_interface_tx (vlib_main_t * vm, pref0 = vlib_get_buffer (vm, pi0); pref1 = vlib_get_buffer (vm, pi1); - prefmb0 = rte_mbuf_from_vlib_buffer(pref0); - prefmb1 = rte_mbuf_from_vlib_buffer(pref1); + prefmb0 = rte_mbuf_from_vlib_buffer (pref0); + prefmb1 = rte_mbuf_from_vlib_buffer (pref1); - CLIB_PREFETCH(prefmb0, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH(pref0, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH(prefmb1, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH(pref1, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (prefmb0, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (pref0, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (prefmb1, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (pref1, CLIB_CACHE_LINE_BYTES, LOAD); bi0 = from[0]; bi1 = from[1]; from += 2; - + b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); - mb0 = rte_mbuf_from_vlib_buffer(b0); - mb1 = rte_mbuf_from_vlib_buffer(b1); + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb1 = rte_mbuf_from_vlib_buffer (b1); any_clone = (b0->flags & VLIB_BUFFER_RECYCLE) - | (b1->flags & VLIB_BUFFER_RECYCLE); - if (PREDICT_FALSE(any_clone != 0)) - { - if (PREDICT_FALSE - ((b0->flags & VLIB_BUFFER_RECYCLE) != 0)) + | (b1->flags & VLIB_BUFFER_RECYCLE); + if (PREDICT_FALSE (any_clone != 0)) + { + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0)) { - struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0); - if (PREDICT_FALSE(mb0_new == 0)) + struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0); + if (PREDICT_FALSE (mb0_new == 0)) { vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); @@ -690,11 +723,10 @@ dpdk_interface_tx (vlib_main_t * vm, mb0 = mb0_new; vec_add1 (dm->recycle[my_cpu], bi0); } - if (PREDICT_FALSE - ((b1->flags & VLIB_BUFFER_RECYCLE) != 0)) + if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_RECYCLE) != 0)) { - struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1); - if (PREDICT_FALSE(mb1_new == 0)) + struct rte_mbuf *mb1_new = dpdk_replicate_packet_mb (b1); + if (PREDICT_FALSE (mb1_new == 0)) { vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); @@ -706,15 +738,15 @@ dpdk_interface_tx (vlib_main_t * vm, } } - delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len; - delta1 = PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + delta1 = PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len; - - new_data_len0 = (u16)((i16) mb0->data_len + delta0); - new_data_len1 = (u16)((i16) mb1->data_len + delta1); - new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0); - new_pkt_len1 = (u16)((i16) mb1->pkt_len + delta1); + + new_data_len0 = (u16) ((i16) mb0->data_len + delta0); + new_data_len1 = (u16) ((i16) mb1->data_len + delta1); + new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0); + new_pkt_len1 = (u16) ((i16) mb1->pkt_len + delta1); b0->current_length = new_data_len0; b1->current_length = new_data_len1; @@ -723,62 +755,62 @@ dpdk_interface_tx (vlib_main_t * vm, mb0->pkt_len = new_pkt_len0; mb1->pkt_len = new_pkt_len1; - mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ? - mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data); - mb1->data_off = (PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL)) ? - mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data); + mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data); + mb1->data_off = (PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb1->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b1->current_data); - if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) { - if (b0->flags & VLIB_BUFFER_IS_TRACED) + if (b0->flags & VLIB_BUFFER_IS_TRACED) dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - if (b1->flags & VLIB_BUFFER_IS_TRACED) + if (b1->flags & VLIB_BUFFER_IS_TRACED) dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); } - if (PREDICT_TRUE(any_clone == 0)) - { + if (PREDICT_TRUE (any_clone == 0)) + { tx_vector[i % DPDK_TX_RING_SIZE] = mb0; - i++; + i++; tx_vector[i % DPDK_TX_RING_SIZE] = mb1; - i++; - } + i++; + } else - { - /* cloning was done, need to check for failure */ - if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { + { + /* cloning was done, need to check for failure */ + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { tx_vector[i % DPDK_TX_RING_SIZE] = mb0; - i++; - } - if (PREDICT_TRUE((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { + i++; + } + if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { tx_vector[i % DPDK_TX_RING_SIZE] = mb1; - i++; - } - } - + i++; + } + } + n_left -= 2; } while (n_left > 0) { u32 bi0; - struct rte_mbuf * mb0; - vlib_buffer_t * b0; + struct rte_mbuf *mb0; + vlib_buffer_t *b0; i16 delta0; u16 new_data_len0; u16 new_pkt_len0; bi0 = from[0]; from++; - + b0 = vlib_get_buffer (vm, bi0); - mb0 = rte_mbuf_from_vlib_buffer(b0); - if (PREDICT_FALSE((b0->flags & VLIB_BUFFER_RECYCLE) != 0)) + mb0 = rte_mbuf_from_vlib_buffer (b0); + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0)) { - struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0); - if (PREDICT_FALSE(mb0_new == 0)) + struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0); + if (PREDICT_FALSE (mb0_new == 0)) { vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); @@ -789,27 +821,27 @@ dpdk_interface_tx (vlib_main_t * vm, vec_add1 (dm->recycle[my_cpu], bi0); } - delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : + delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 : vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len; - - new_data_len0 = (u16)((i16) mb0->data_len + delta0); - new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0); - + + new_data_len0 = (u16) ((i16) mb0->data_len + delta0); + new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0); + b0->current_length = new_data_len0; mb0->data_len = new_data_len0; mb0->pkt_len = new_pkt_len0; - mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ? - mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data); + mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ? + mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data); - if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) if (b0->flags & VLIB_BUFFER_IS_TRACED) dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { tx_vector[i % DPDK_TX_RING_SIZE] = mb0; - i++; - } + i++; + } n_left--; } @@ -824,86 +856,92 @@ dpdk_interface_tx (vlib_main_t * vm, * tx_pkts is the number of packets successfully transmitted * This is the number originally on ring minus the number remaining on ring */ - tx_pkts = n_on_ring - n_packets; + tx_pkts = n_on_ring - n_packets; - if (PREDICT_FALSE(dm->flowcontrol_callback != 0)) + if (PREDICT_FALSE (dm->flowcontrol_callback != 0)) { - if (PREDICT_FALSE(n_packets)) - { - /* Callback may want to enable flowcontrol */ - dm->flowcontrol_callback(vm, xd->vlib_hw_if_index, ring->tx_head - ring->tx_tail); - } - else - { - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } + if (PREDICT_FALSE (n_packets)) + { + /* Callback may want to enable flowcontrol */ + dm->flowcontrol_callback (vm, xd->vlib_hw_if_index, + ring->tx_head - ring->tx_tail); + } + else + { + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } } - else + else { /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE(n_packets)) - { - vlib_simple_counter_main_t * cm; - vnet_main_t * vnm = vnet_get_main(); + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, n_packets); + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, n_packets); - while (n_packets--) - rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); - } + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } - /* Reset head/tail to avoid unnecessary wrap */ + /* Reset head/tail to avoid unnecessary wrap */ ring->tx_head = 0; ring->tx_tail = 0; } /* Recycle replicated buffers */ - if (PREDICT_FALSE(vec_len(dm->recycle[my_cpu]))) + if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) { - vlib_buffer_free (vm, dm->recycle[my_cpu], vec_len(dm->recycle[my_cpu])); - _vec_len(dm->recycle[my_cpu]) = 0; + vlib_buffer_free (vm, dm->recycle[my_cpu], + vec_len (dm->recycle[my_cpu])); + _vec_len (dm->recycle[my_cpu]) = 0; } - ASSERT(ring->tx_head >= ring->tx_tail); + ASSERT (ring->tx_head >= ring->tx_tail); return tx_pkts; } -static int dpdk_device_renumber (vnet_hw_interface_t * hi, - u32 new_dev_instance) +static int +dpdk_device_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance) { #if DPDK_VHOST_USER - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) { - clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)", - hi->sw_if_index); - return 0; - } + if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) + { + clib_warning + ("cannot renumber non-vhost-user interface (sw_if_index: %d)", + hi->sw_if_index); + return 0; + } xd->vu_if_id = new_dev_instance; #endif return 0; } -static void dpdk_clear_hw_interface_counters (u32 instance) +static void +dpdk_clear_hw_interface_counters (u32 instance) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); /* - * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * DAW-FIXME: VMXNET3 device stop/start doesn't work, * therefore fake the stop in the dpdk driver by - * silently dropping all of the incoming pkts instead of + * silently dropping all of the incoming pkts instead of * stopping the driver / hardware. */ if (xd->admin_up != 0xff) @@ -914,10 +952,10 @@ static void dpdk_clear_hw_interface_counters (u32 instance) */ dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof(xd->stats)); + clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); clib_memcpy (xd->last_cleared_xstats, xd->xstats, - vec_len(xd->last_cleared_xstats) * - sizeof(xd->last_cleared_xstats[0])); + vec_len (xd->last_cleared_xstats) * + sizeof (xd->last_cleared_xstats[0])); } else { @@ -926,60 +964,68 @@ static void dpdk_clear_hw_interface_counters (u32 instance) * so we're only calling xstats_reset() here. */ rte_eth_xstats_reset (xd->device_index); - memset (&xd->stats, 0, sizeof(xd->stats)); + memset (&xd->stats, 0, sizeof (xd->stats)); memset (&xd->last_stats, 0, sizeof (xd->last_stats)); } #if DPDK_VHOST_USER - if (PREDICT_FALSE(xd->dev_type == VNET_DPDK_DEV_VHOST_USER)) { - int i; - for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) { - xd->vu_intf->vrings[i].packets = 0; - xd->vu_intf->vrings[i].bytes = 0; + if (PREDICT_FALSE (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)) + { + int i; + for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) + { + xd->vu_intf->vrings[i].packets = 0; + xd->vu_intf->vrings[i].bytes = 0; + } } - } #endif } #ifdef RTE_LIBRTE_KNI static int -kni_config_network_if(u8 port_id, u8 if_up) +kni_config_network_if (u8 port_id, u8 if_up) { - vnet_main_t * vnm = vnet_get_main(); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; uword *p; p = hash_get (dm->dpdk_device_by_kni_port_id, port_id); - if (p == 0) { - clib_warning("unknown interface"); - return 0; - } else { - xd = vec_elt_at_index (dm->devices, p[0]); - } + if (p == 0) + { + clib_warning ("unknown interface"); + return 0; + } + else + { + xd = vec_elt_at_index (dm->devices, p[0]); + } vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, - if_up ? VNET_HW_INTERFACE_FLAG_LINK_UP | - ETH_LINK_FULL_DUPLEX : 0); + if_up ? VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX : 0); return 0; } static int -kni_change_mtu(u8 port_id, unsigned new_mtu) +kni_change_mtu (u8 port_id, unsigned new_mtu) { - vnet_main_t * vnm = vnet_get_main(); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; uword *p; - vnet_hw_interface_t * hif; + vnet_hw_interface_t *hif; p = hash_get (dm->dpdk_device_by_kni_port_id, port_id); - if (p == 0) { - clib_warning("unknown interface"); - return 0; - } else { - xd = vec_elt_at_index (dm->devices, p[0]); - } + if (p == 0) + { + clib_warning ("unknown interface"); + return 0; + } + else + { + xd = vec_elt_at_index (dm->devices, p[0]); + } hif = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); hif->max_packet_bytes = new_mtu; @@ -991,68 +1037,69 @@ kni_change_mtu(u8 port_id, unsigned new_mtu) static clib_error_t * dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { - vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index); + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hif->dev_instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); int rv = 0; #ifdef RTE_LIBRTE_KNI if (xd->dev_type == VNET_DPDK_DEV_KNI) - { + { if (is_up) - { - struct rte_kni_conf conf; - struct rte_kni_ops ops; - vlib_main_t * vm = vlib_get_main(); - vlib_buffer_main_t * bm = vm->buffer_main; - memset(&conf, 0, sizeof(conf)); - snprintf(conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id); - conf.mbuf_size = VLIB_BUFFER_DATA_SIZE; - memset(&ops, 0, sizeof(ops)); - ops.port_id = xd->kni_port_id; - ops.change_mtu = kni_change_mtu; - ops.config_network_if = kni_config_network_if; - - xd->kni = rte_kni_alloc(bm->pktmbuf_pools[rte_socket_id()], &conf, &ops); - if (!xd->kni) - { - clib_warning("failed to allocate kni interface"); - } - else - { - hif->max_packet_bytes = 1500; /* kni interface default value */ - xd->admin_up = 1; - } - } + { + struct rte_kni_conf conf; + struct rte_kni_ops ops; + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + memset (&conf, 0, sizeof (conf)); + snprintf (conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id); + conf.mbuf_size = VLIB_BUFFER_DATA_SIZE; + memset (&ops, 0, sizeof (ops)); + ops.port_id = xd->kni_port_id; + ops.change_mtu = kni_change_mtu; + ops.config_network_if = kni_config_network_if; + + xd->kni = + rte_kni_alloc (bm->pktmbuf_pools[rte_socket_id ()], &conf, &ops); + if (!xd->kni) + { + clib_warning ("failed to allocate kni interface"); + } + else + { + hif->max_packet_bytes = 1500; /* kni interface default value */ + xd->admin_up = 1; + } + } else - { - xd->admin_up = 0; - int kni_rv; - - kni_rv = rte_kni_release(xd->kni); - if (kni_rv < 0) - clib_warning ("rte_kni_release returned %d", kni_rv); - } + { + xd->admin_up = 0; + int kni_rv; + + kni_rv = rte_kni_release (xd->kni); + if (kni_rv < 0) + clib_warning ("rte_kni_release returned %d", kni_rv); + } return 0; - } + } #endif #if DPDK_VHOST_USER if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) { if (is_up) - { - if (xd->vu_is_running) - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP | - ETH_LINK_FULL_DUPLEX ); - xd->admin_up = 1; - } + { + if (xd->vu_is_running) + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX); + xd->admin_up = 1; + } else - { - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); - xd->admin_up = 0; - } + { + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + xd->admin_up = 0; + } return 0; } @@ -1064,18 +1111,18 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) f64 now = vlib_time_now (dm->vlib_main); /* - * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * DAW-FIXME: VMXNET3 device stop/start doesn't work, * therefore fake the stop in the dpdk driver by - * silently dropping all of the incoming pkts instead of + * silently dropping all of the incoming pkts instead of * stopping the driver / hardware. */ if (xd->admin_up == 0) rv = rte_eth_dev_start (xd->device_index); if (xd->promisc) - rte_eth_promiscuous_enable(xd->device_index); + rte_eth_promiscuous_enable (xd->device_index); else - rte_eth_promiscuous_disable(xd->device_index); + rte_eth_promiscuous_disable (xd->device_index); rte_eth_allmulticast_enable (xd->device_index); xd->admin_up = 1; @@ -1091,38 +1138,37 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) * stopping the driver / hardware. */ if (xd->pmd != VNET_DPDK_PMD_VMXNET3) - xd->admin_up = 0; + xd->admin_up = 0; else - xd->admin_up = ~0; + xd->admin_up = ~0; rte_eth_allmulticast_disable (xd->device_index); vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); /* - * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * DAW-FIXME: VMXNET3 device stop/start doesn't work, * therefore fake the stop in the dpdk driver by - * silently dropping all of the incoming pkts instead of + * silently dropping all of the incoming pkts instead of * stopping the driver / hardware. */ if (xd->pmd != VNET_DPDK_PMD_VMXNET3) - rte_eth_dev_stop (xd->device_index); + rte_eth_dev_stop (xd->device_index); /* For bonded interface, stop slave links */ - if (xd->pmd == VNET_DPDK_PMD_BOND) - { - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get(xd->device_index, slink, 16); - while (nlink >=1) + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) { u8 dpdk_port = slink[--nlink]; rte_eth_dev_stop (dpdk_port); } - } + } } if (rv < 0) - clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", - rv); + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); return /* no error */ 0; } @@ -1131,76 +1177,83 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) * Dynamically redirect all pkts from a specific interface * to the specified node */ -static void dpdk_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, - u32 node_index) +static void +dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) { - dpdk_main_t * xm = &dpdk_main; + dpdk_main_t *xm = &dpdk_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance); - + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + /* Shut off redirection */ if (node_index == ~0) { xd->per_interface_next_index = node_index; return; } - - xd->per_interface_next_index = + + xd->per_interface_next_index = vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); } static clib_error_t * dpdk_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t * st, - int is_add) + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) { - dpdk_main_t * xm = &dpdk_main; + dpdk_main_t *xm = &dpdk_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance); - vnet_sw_interface_t * t = (vnet_sw_interface_t *) st; + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; int r, vlan_offload; u32 prev_subifs = xd->vlan_subifs; - if (is_add) xd->vlan_subifs++; - else if (xd->vlan_subifs) xd->vlan_subifs--; + if (is_add) + xd->vlan_subifs++; + else if (xd->vlan_subifs) + xd->vlan_subifs--; if (xd->dev_type != VNET_DPDK_DEV_ETH) - return 0; + return 0; /* currently we program VLANS only for IXGBE VF and I40E VF */ - if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && - (xd->pmd != VNET_DPDK_PMD_I40EVF)) - return 0; + if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + return 0; if (t->sub.eth.flags.no_tags == 1) - return 0; + return 0; - if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1 )) { - xd->vlan_subifs = prev_subifs; - return clib_error_return (0, "unsupported VLAN setup"); - } + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) + { + xd->vlan_subifs = prev_subifs; + return clib_error_return (0, "unsupported VLAN setup"); + } - vlan_offload = rte_eth_dev_get_vlan_offload(xd->device_index); + vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; - if ((r = rte_eth_dev_set_vlan_offload(xd->device_index, vlan_offload))) { - xd->vlan_subifs = prev_subifs; - return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", - xd->device_index, r); - } + if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) + { + xd->vlan_subifs = prev_subifs; + return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + } - if ((r = rte_eth_dev_vlan_filter(xd->device_index, t->sub.eth.outer_vlan_id, is_add))) { - xd->vlan_subifs = prev_subifs; - return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", - xd->device_index, r); - } + if ((r = + rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, + is_add))) + { + xd->vlan_subifs = prev_subifs; + return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + } return 0; } +/* *INDENT-OFF* */ VNET_DEVICE_CLASS (dpdk_device_class) = { .name = "dpdk", .tx_function = dpdk_interface_tx, @@ -1217,11 +1270,12 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { .name_renumber = dpdk_device_renumber, }; -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, - dpdk_interface_tx) +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) +/* *INDENT-ON* */ -void dpdk_set_flowcontrol_callback (vlib_main_t *vm, - dpdk_flowcontrol_callback_t callback) +void +dpdk_set_flowcontrol_callback (vlib_main_t * vm, + dpdk_flowcontrol_callback_t callback) { dpdk_main.flowcontrol_callback = callback; } @@ -1229,72 +1283,79 @@ void dpdk_set_flowcontrol_callback (vlib_main_t *vm, #define UP_DOWN_FLAG_EVENT 1 -u32 dpdk_get_admin_up_down_in_progress (void) +u32 +dpdk_get_admin_up_down_in_progress (void) { return dpdk_main.admin_up_down_in_progress; } uword admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, - vlib_frame_t * f) + vlib_node_runtime_t * rt, vlib_frame_t * f) { - clib_error_t * error = 0; + clib_error_t *error = 0; uword event_type; uword *event_data = 0; u32 sw_if_index; u32 flags; - while (1) - { + while (1) + { vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); dpdk_main.admin_up_down_in_progress = 1; - switch (event_type) { - case UP_DOWN_FLAG_EVENT: - { - if (vec_len(event_data) == 2) { - sw_if_index = event_data[0]; - flags = event_data[1]; - error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags); - clib_error_report(error); - } - } - break; - } + switch (event_type) + { + case UP_DOWN_FLAG_EVENT: + { + if (vec_len (event_data) == 2) + { + sw_if_index = event_data[0]; + flags = event_data[1]; + error = + vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, + flags); + clib_error_report (error); + } + } + break; + } vec_reset_length (event_data); dpdk_main.admin_up_down_in_progress = 0; } - return 0; /* or not */ + return 0; /* or not */ } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { .function = admin_up_down_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "admin-up-down-process", .process_log2_n_stack_bytes = 17, // 256KB }; +/* *INDENT-ON* */ /* - * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down - * process. Useful for avoiding long blocking delays (>150ms) in the dpdk + * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down + * process. Useful for avoiding long blocking delays (>150ms) in the dpdk * drivers. * WARNING: when posting this event, no other interface-related calls should * be made (e.g. vnet_create_sw_interface()) while the event is being - * processed (admin_up_down_in_progress). This is required in order to avoid + * processed (admin_up_down_in_progress). This is required in order to avoid * race conditions in manipulating interface data structures. */ -void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags) +void +post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, u32 flags) { - uword * d = vlib_process_signal_event_data - (vm, admin_up_down_process_node.index, - UP_DOWN_FLAG_EVENT, 2, sizeof(u32)); + uword *d = vlib_process_signal_event_data + (vm, admin_up_down_process_node.index, + UP_DOWN_FLAG_EVENT, 2, sizeof (u32)); d[0] = sw_if_index; d[1] = flags; } @@ -1302,33 +1363,37 @@ void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags) /* * Return a copy of the DPDK port stats in dest. */ -clib_error_t* -dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest) +clib_error_t * +dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats *dest) { - dpdk_main_t * dm = &dpdk_main; - vnet_main_t * vnm = vnet_get_main(); - vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - if (!dest) { - return clib_error_return (0, "Missing or NULL argument"); - } - if (!xd) { - return clib_error_return (0, "Unable to get DPDK device from HW interface"); - } + dpdk_main_t *dm = &dpdk_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + if (!dest) + { + return clib_error_return (0, "Missing or NULL argument"); + } + if (!xd) + { + return clib_error_return (0, + "Unable to get DPDK device from HW interface"); + } dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - clib_memcpy(dest, &xd->stats, sizeof(xd->stats)); + clib_memcpy (dest, &xd->stats, sizeof (xd->stats)); return (0); } /* * Return the number of dpdk mbufs */ -u32 dpdk_num_mbufs (void) +u32 +dpdk_num_mbufs (void) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; return dm->conf->num_mbufs; } @@ -1336,10 +1401,11 @@ u32 dpdk_num_mbufs (void) /* * Return the pmd type for a given hardware interface */ -dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi) +dpdk_pmd_t +dpdk_get_pmd_type (vnet_hw_interface_t * hi) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; assert (hi); @@ -1353,16 +1419,25 @@ dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi) /* * Return the cpu socket for a given hardware interface */ -i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi) +i8 +dpdk_get_cpu_socket (vnet_hw_interface_t * hi) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; assert (hi); - xd = vec_elt_at_index(dm->devices, hi->dev_instance); + xd = vec_elt_at_index (dm->devices, hi->dev_instance); assert (xd); return xd->cpu_socket; } + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h index fba32317b8c..90a50dafa43 100644 --- a/vnet/vnet/devices/dpdk/dpdk.h +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -75,11 +75,12 @@ extern vnet_device_class_t dpdk_device_class; extern vlib_node_registration_t dpdk_input_node; extern vlib_node_registration_t handoff_dispatch_node; -typedef enum { - VNET_DPDK_DEV_ETH = 1, /* Standard DPDK PMD driver */ - VNET_DPDK_DEV_KNI, /* Kernel NIC Interface */ +typedef enum +{ + VNET_DPDK_DEV_ETH = 1, /* Standard DPDK PMD driver */ + VNET_DPDK_DEV_KNI, /* Kernel NIC Interface */ VNET_DPDK_DEV_VHOST_USER, - VNET_DPDK_DEV_UNKNOWN, /* must be last */ + VNET_DPDK_DEV_UNKNOWN, /* must be last */ } dpdk_device_type_t; #define foreach_dpdk_pmd \ @@ -100,15 +101,17 @@ typedef enum { _ ("rte_cxgbe_pmd", CXGBE) \ _ ("rte_dpaa2_dpni", DPAA2) -typedef enum { +typedef enum +{ VNET_DPDK_PMD_NONE, #define _(s,f) VNET_DPDK_PMD_##f, foreach_dpdk_pmd #undef _ - VNET_DPDK_PMD_UNKNOWN, /* must be last */ + VNET_DPDK_PMD_UNKNOWN, /* must be last */ } dpdk_pmd_t; -typedef enum { +typedef enum +{ VNET_DPDK_PORT_TYPE_ETH_1G, VNET_DPDK_PORT_TYPE_ETH_10G, VNET_DPDK_PORT_TYPE_ETH_40G, @@ -118,14 +121,16 @@ typedef enum { VNET_DPDK_PORT_TYPE_UNKNOWN, } dpdk_port_type_t; -typedef struct { +typedef struct +{ f64 deadline; - vlib_frame_t * frame; + vlib_frame_t *frame; } dpdk_frame_t; #define DPDK_EFD_MAX_DISCARD_RATE 10 -typedef struct { +typedef struct +{ u16 last_burst_sz; u16 max_burst_sz; u32 full_frames_cnt; @@ -138,7 +143,8 @@ typedef struct { } dpdk_efd_agent_t; #if DPDK_VHOST_USER -typedef struct { +typedef struct +{ int callfd; int kickfd; int errfd; @@ -150,7 +156,8 @@ typedef struct { u64 bytes; } dpdk_vu_vring; -typedef struct { +typedef struct +{ u32 is_up; u32 unix_fd; u32 unix_file_index; @@ -169,22 +176,23 @@ typedef struct { } dpdk_vu_intf_t; #endif -typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t *vm, - u32 hw_if_index, - u32 n_packets); +typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t * vm, + u32 hw_if_index, u32 n_packets); /* * The header for the tx_vector in dpdk_device_t. * Head and tail are indexes into the tx_vector and are of type * u64 so they never overflow. */ -typedef struct { +typedef struct +{ u64 tx_head; u64 tx_tail; } tx_ring_hdr_t; -typedef struct { - CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); volatile u32 **lockp; /* Instance ID */ @@ -197,11 +205,11 @@ typedef struct { u32 per_interface_next_index; /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ - struct rte_mbuf *** tx_vectors; /* one per worker thread */ - struct rte_mbuf *** rx_vectors; + struct rte_mbuf ***tx_vectors; /* one per worker thread */ + struct rte_mbuf ***rx_vectors; /* vector of traced contexts, per device */ - u32 * d_trace_buffers; + u32 *d_trace_buffers; /* number of sub-interfaces */ u16 vlan_subifs; @@ -213,16 +221,16 @@ typedef struct { u8 admin_up; u8 promisc; - CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - u8 * interface_name_suffix; + u8 *interface_name_suffix; /* PMD related */ u16 tx_q_used; u16 rx_q_used; u16 nb_rx_desc; u16 nb_tx_desc; - u16 * cpu_socket_id_by_queue; + u16 *cpu_socket_id_by_queue; struct rte_eth_conf port_conf; struct rte_eth_txconf tx_conf; @@ -233,7 +241,7 @@ typedef struct { #if DPDK_VHOST_USER /* vhost-user related */ u32 vu_if_id; - struct virtio_net vu_vhost_dev; + struct virtio_net vu_vhost_dev; u32 vu_is_running; dpdk_vu_intf_t *vu_intf; #endif @@ -248,36 +256,38 @@ typedef struct { struct rte_eth_stats last_stats; struct rte_eth_stats last_cleared_stats; #if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 0) - struct rte_eth_xstat * xstats; - struct rte_eth_xstat * last_cleared_xstats; + struct rte_eth_xstat *xstats; + struct rte_eth_xstat *last_cleared_xstats; #else - struct rte_eth_xstats * xstats; - struct rte_eth_xstats * last_cleared_xstats; + struct rte_eth_xstats *xstats; + struct rte_eth_xstats *last_cleared_xstats; #endif f64 time_last_stats_update; dpdk_port_type_t port_type; dpdk_efd_agent_t efd_agent; - u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */ + u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */ } dpdk_device_t; #define DPDK_TX_RING_SIZE (4 * 1024) #define DPDK_STATS_POLL_INTERVAL (10.0) -#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ +#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ #define DPDK_LINK_POLL_INTERVAL (3.0) -#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ +#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ -typedef struct { - CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); /* total input packet counter */ u64 aggregate_rx_packets; } dpdk_worker_t; -typedef struct { +typedef struct +{ u32 device; u16 queue_id; } dpdk_device_and_queue_t; @@ -291,7 +301,8 @@ typedef struct { #define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT 90 #define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH 6 -typedef struct dpdk_efd_t { +typedef struct dpdk_efd_t +{ u16 enabled; u16 queue_hi_thresh; u16 consec_full_frames_hi_thresh; @@ -305,10 +316,11 @@ typedef struct dpdk_efd_t { _ (num_tx_desc) \ _ (rss_fn) -typedef struct { - vlib_pci_addr_t pci_addr; - u8 is_blacklisted; - u8 vlan_strip_offload; +typedef struct +{ + vlib_pci_addr_t pci_addr; + u8 is_blacklisted; + u8 vlan_strip_offload; #define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 #define DPDK_DEVICE_VLAN_STRIP_OFF 1 #define DPDK_DEVICE_VLAN_STRIP_ON 2 @@ -319,12 +331,13 @@ typedef struct { clib_bitmap_t * workers; } dpdk_device_config_t; -typedef struct { +typedef struct +{ /* Config stuff */ - u8 ** eal_init_args; - u8 * eal_init_args_str; - u8 * uio_driver_name; + u8 **eal_init_args; + u8 *eal_init_args_str; + u8 *uio_driver_name; u8 no_multi_seg; u8 enable_tcp_udp_checksum; @@ -334,7 +347,7 @@ typedef struct { u32 coremask; u32 nchannels; u32 num_mbufs; - u8 num_kni;/* while kni_init allows u32, port_id in callback fn is only u8 */ + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ /* * format interface names ala xxxEthernet%d/%d/%d instead of @@ -351,21 +364,22 @@ typedef struct { /* per-device config */ dpdk_device_config_t default_devconf; - dpdk_device_config_t * dev_confs; - uword * device_config_index_by_pci_addr; + dpdk_device_config_t *dev_confs; + uword *device_config_index_by_pci_addr; } dpdk_config_main_t; dpdk_config_main_t dpdk_config_main; -typedef struct { +typedef struct +{ /* Devices */ - dpdk_device_t * devices; - dpdk_device_and_queue_t ** devices_by_cpu; + dpdk_device_t *devices; + dpdk_device_and_queue_t **devices_by_cpu; /* per-thread recycle lists */ - u32 ** recycle; + u32 **recycle; /* buffer flags template, configurable to enable/disable tcp / udp cksum */ u32 buffer_flags_template; @@ -377,7 +391,7 @@ typedef struct { u32 vlib_buffer_free_list_index; /* dpdk worker "threads" */ - dpdk_worker_t * workers; + dpdk_worker_t *workers; /* Ethernet input node index */ @@ -386,15 +400,15 @@ typedef struct { /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ int tx_pcap_enable; pcap_main_t pcap_main; - u8 * pcap_filename; + u8 *pcap_filename; u32 pcap_sw_if_index; u32 pcap_pkts_to_capture; /* hashes */ - uword * dpdk_device_by_kni_port_id; - uword * vu_sw_if_index_by_listener_fd; - uword * vu_sw_if_index_by_sock_fd; - u32 * vu_inactive_interfaces_device_index; + uword *dpdk_device_by_kni_port_id; + uword *vu_sw_if_index_by_listener_fd; + uword *vu_sw_if_index_by_sock_fd; + u32 *vu_inactive_interfaces_device_index; u32 next_vu_if_id; @@ -421,14 +435,15 @@ typedef struct { u32 poll_sleep; /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; - dpdk_config_main_t * conf; + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + dpdk_config_main_t *conf; } dpdk_main_t; dpdk_main_t dpdk_main; -typedef enum { +typedef enum +{ DPDK_RX_NEXT_IP4_INPUT, DPDK_RX_NEXT_IP6_INPUT, DPDK_RX_NEXT_MPLS_INPUT, @@ -437,7 +452,8 @@ typedef enum { DPDK_RX_N_NEXT, } dpdk_rx_next_t; -typedef struct { +typedef struct +{ u32 buffer_index; u16 device_index; u8 queue_index; @@ -446,37 +462,38 @@ typedef struct { vlib_buffer_t buffer; } dpdk_tx_dma_trace_t; -typedef struct { +typedef struct +{ u32 buffer_index; u16 device_index; u16 queue_index; struct rte_mbuf mb; - vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ - u8 data[256]; /* First 256 data bytes, used for hexdump */ + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ + u8 data[256]; /* First 256 data bytes, used for hexdump */ } dpdk_rx_dma_trace_t; void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); void dpdk_set_next_node (dpdk_rx_next_t, char *); -clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address); +clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); -clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr); +clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr); void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); -clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); +clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); -void dpdk_set_flowcontrol_callback (vlib_main_t *vm, - dpdk_flowcontrol_callback_t callback); +void dpdk_set_flowcontrol_callback (vlib_main_t * vm, + dpdk_flowcontrol_callback_t callback); u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); -void set_efd_bitmap (u8 *bitmap, u32 value, u32 op); +void set_efd_bitmap (u8 * bitmap, u32 value, u32 op); -struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b); -struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); #define foreach_dpdk_error \ _(NONE, "no error") \ @@ -492,113 +509,113 @@ struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); _(MPLS_EFD_DROP_PKTS, "MPLS Early Fast Discard rx drops") \ _(VLAN_EFD_DROP_PKTS, "VLAN Early Fast Discard rx drops") -typedef enum { +typedef enum +{ #define _(f,s) DPDK_ERROR_##f, foreach_dpdk_error #undef _ - DPDK_N_ERROR, + DPDK_N_ERROR, } dpdk_error_t; /* * Increment EFD drop counter */ -static_always_inline -void increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count) +static_always_inline void +increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count) { - vlib_node_t *my_n; + vlib_node_t *my_n; - my_n = vlib_get_node (vm, dpdk_input_node.index); - vm->error_main.counters[my_n->error_heap_index+counter_index] += count; + my_n = vlib_get_node (vm, dpdk_input_node.index); + vm->error_main.counters[my_n->error_heap_index + counter_index] += count; } int dpdk_set_stat_poll_interval (f64 interval); int dpdk_set_link_state_poll_interval (f64 interval); void dpdk_update_link_state (dpdk_device_t * xd, f64 now); -void dpdk_device_lock_init(dpdk_device_t * xd); -void dpdk_device_lock_free(dpdk_device_t * xd); -void dpdk_efd_update_counters(dpdk_device_t *xd, u32 n_buffers, u16 enabled); -u32 is_efd_discardable(vlib_thread_main_t *tm, - vlib_buffer_t * b0, - struct rte_mbuf *mb); +void dpdk_device_lock_init (dpdk_device_t * xd); +void dpdk_device_lock_free (dpdk_device_t * xd); +void dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, + u16 enabled); +u32 is_efd_discardable (vlib_thread_main_t * tm, vlib_buffer_t * b0, + struct rte_mbuf *mb); #if DPDK_VHOST_USER /* dpdk vhost-user interrupt management */ -u8 dpdk_vhost_user_want_interrupt (dpdk_device_t *xd, int idx); +u8 dpdk_vhost_user_want_interrupt (dpdk_device_t * xd, int idx); void dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd, - int idx); + int idx); #endif -static inline u64 vnet_get_aggregate_rx_packets (void) +static inline u64 +vnet_get_aggregate_rx_packets (void) { - dpdk_main_t * dm = &dpdk_main; - u64 sum = 0; - dpdk_worker_t * dw; + dpdk_main_t *dm = &dpdk_main; + u64 sum = 0; + dpdk_worker_t *dw; - vec_foreach(dw, dm->workers) - sum += dw->aggregate_rx_packets; + vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets; - return sum; + return sum; } void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, - u32 * buffers, - uword n_buffers); + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers); #define EFD_OPERATION_LESS_THAN 0 #define EFD_OPERATION_GREATER_OR_EQUAL 1 -void efd_config(u32 enabled, - u32 ip_prec, u32 ip_op, - u32 mpls_exp, u32 mpls_op, - u32 vlan_cos, u32 vlan_op); +void efd_config (u32 enabled, + u32 ip_prec, u32 ip_op, + u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op); -void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags); +void post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, + u32 flags); #if DPDK_VHOST_USER typedef struct vhost_user_memory vhost_user_memory_t; void dpdk_vhost_user_process_init (void **ctx); void dpdk_vhost_user_process_cleanup (void *ctx); -uword dpdk_vhost_user_process_if (vlib_main_t *vm, dpdk_device_t *xd, void *ctx); +uword dpdk_vhost_user_process_if (vlib_main_t * vm, dpdk_device_t * xd, + void *ctx); // vhost-user calls int dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, - const char * sock_filename, - u8 is_server, - u32 * sw_if_index, - u64 feature_mask, - u8 renumber, u32 custom_dev_instance, - u8 *hwaddr); + const char *sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance, + u8 * hwaddr); int dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, - const char * sock_filename, - u8 is_server, - u32 sw_if_index, - u64 feature_mask, - u8 renumber, u32 custom_dev_instance); + const char *sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance); int dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, - u32 sw_if_index); + u32 sw_if_index); int dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, - vhost_user_intf_details_t **out_vuids); + vhost_user_intf_details_t ** out_vuids); #endif u32 dpdk_get_admin_up_down_in_progress (void); u32 dpdk_num_mbufs (void); -dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi); +dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t * hi); -i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi); +i8 dpdk_get_cpu_socket (vnet_hw_interface_t * hi); -void * dpdk_input_multiarch_select(); -void * dpdk_input_rss_multiarch_select(); -void * dpdk_input_efd_multiarch_select(); +void *dpdk_input_multiarch_select (); +void *dpdk_input_rss_multiarch_select (); +void *dpdk_input_efd_multiarch_select (); -clib_error_t* -dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest); +clib_error_t *dpdk_get_hw_interface_stats (u32 hw_if_index, + struct rte_eth_stats *dest); format_function_t format_dpdk_device_name; format_function_t format_dpdk_device; @@ -607,11 +624,11 @@ format_function_t format_dpdk_rx_dma_trace; format_function_t format_dpdk_rte_mbuf; format_function_t format_dpdk_rx_rte_mbuf; unformat_function_t unformat_socket_mem; -clib_error_t * unformat_rss_fn(unformat_input_t * input, uword * rss_fn); +clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); static inline void -dpdk_pmd_constructor_init() +dpdk_pmd_constructor_init () { /* Add references to DPDK Driver Constructor functions to get the dynamic * loader to pull in the driver library & run the constructors. @@ -626,58 +643,43 @@ dpdk_pmd_constructor_init() #ifdef RTE_LIBRTE_EM_PMD _(em_pmd_drv) #endif - #ifdef RTE_LIBRTE_IGB_PMD - _(pmd_igb_drv) + _(pmd_igb_drv) #endif - #ifdef RTE_LIBRTE_IXGBE_PMD - _(rte_ixgbe_driver) + _(rte_ixgbe_driver) #endif - #ifdef RTE_LIBRTE_I40E_PMD - _(rte_i40e_driver) - _(rte_i40evf_driver) + _(rte_i40e_driver) _(rte_i40evf_driver) #endif - #ifdef RTE_LIBRTE_FM10K_PMD - _(rte_fm10k_driver) + _(rte_fm10k_driver) #endif - #ifdef RTE_LIBRTE_VIRTIO_PMD - _(rte_virtio_driver) + _(rte_virtio_driver) #endif - #ifdef RTE_LIBRTE_VMXNET3_PMD - _(rte_vmxnet3_driver) + _(rte_vmxnet3_driver) #endif - #ifdef RTE_LIBRTE_VICE_PMD - _(rte_vice_driver) + _(rte_vice_driver) #endif - #ifdef RTE_LIBRTE_ENIC_PMD - _(rte_enic_driver) + _(rte_enic_driver) #endif - #ifdef RTE_LIBRTE_PMD_AF_PACKET - _(pmd_af_packet_drv) + _(pmd_af_packet_drv) #endif - #ifdef RTE_LIBRTE_CXGBE_PMD - _(rte_cxgbe_driver) + _(rte_cxgbe_driver) #endif - #ifdef RTE_LIBRTE_PMD_BOND - _(bond_drv) + _(bond_drv) #endif - #ifdef RTE_LIBRTE_DPAA2_PMD - _(pmd_dpaa2_drv) + _(pmd_dpaa2_drv) #endif - #undef _ - /* * At the moment, the ThunderX NIC driver doesn't have * an entry point named "devinitfn_rte_xxx_driver" @@ -688,17 +690,22 @@ dpdk_pmd_constructor_init() __attribute__((unused)) void (* volatile pf)(void); \ pf = d; \ } while(0); - #ifdef RTE_LIBRTE_THUNDERVNIC_PMD - _(rte_nicvf_pmd_init) + _(rte_nicvf_pmd_init) #endif #undef _ - } uword admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, - vlib_frame_t * f); + vlib_node_runtime_t * rt, vlib_frame_t * f); #endif /* __included_dpdk_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/dpdk_priv.h b/vnet/vnet/devices/dpdk/dpdk_priv.h index d4d14222de3..4ecb1b82502 100644 --- a/vnet/vnet/devices/dpdk/dpdk_priv.h +++ b/vnet/vnet/devices/dpdk/dpdk_priv.h @@ -56,121 +56,24 @@ _(proc-type) \ _(file-prefix) \ _(vdev) -static inline u32 -dpdk_rx_burst ( dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) -{ - u32 n_buffers; - u32 n_left; - u32 n_this_chunk; - - n_left = VLIB_FRAME_SIZE; - n_buffers = 0; - - if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH)) - { - while (n_left) - { - n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, - xd->rx_vectors[queue_id] + n_buffers, n_left); - n_buffers += n_this_chunk; - n_left -= n_this_chunk; - - /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ - if (n_this_chunk < 32) - break; - } - } -#if DPDK_VHOST_USER - else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) - { - vlib_main_t * vm = vlib_get_main(); - vlib_buffer_main_t * bm = vm->buffer_main; - unsigned socket_id = rte_socket_id(); - u32 offset = 0; - - offset = queue_id * VIRTIO_QNUM; - - struct vhost_virtqueue *vq = - xd->vu_vhost_dev.virtqueue[offset + VIRTIO_TXQ]; - - if (PREDICT_FALSE(!vq->enabled)) - return 0; - - struct rte_mbuf **pkts = xd->rx_vectors[queue_id]; - while (n_left) { - n_this_chunk = rte_vhost_dequeue_burst(&xd->vu_vhost_dev, - offset + VIRTIO_TXQ, - bm->pktmbuf_pools[socket_id], - pkts + n_buffers, - n_left); - n_buffers += n_this_chunk; - n_left -= n_this_chunk; - if (n_this_chunk == 0) - break; - } - - int i; u32 bytes = 0; - for (i = 0; i < n_buffers; i++) { - struct rte_mbuf *buff = pkts[i]; - bytes += rte_pktmbuf_data_len(buff); - } - - f64 now = vlib_time_now (vm); - - dpdk_vu_vring *vring = NULL; - /* send pending interrupts if needed */ - if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_TXQ)) { - vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); - vring->n_since_last_int += n_buffers; - - if ((vring->n_since_last_int && (vring->int_deadline < now)) - || (vring->n_since_last_int > dm->conf->vhost_coalesce_frames)) - dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_TXQ); - } - - vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); - vring->packets += n_buffers; - vring->bytes += bytes; - - if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) { - if (vring->n_since_last_int && (vring->int_deadline < now)) - dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ); - } - - } -#endif -#ifdef RTE_LIBRTE_KNI - else if (xd->dev_type == VNET_DPDK_DEV_KNI) - { - n_buffers = rte_kni_rx_burst(xd->kni, xd->rx_vectors[queue_id], VLIB_FRAME_SIZE); - rte_kni_handle_request(xd->kni); - } -#endif - else - { - ASSERT(0); - } - - return n_buffers; -} - - static inline void dpdk_get_xstats (dpdk_device_t * xd) { int len; - if ((len = rte_eth_xstats_get(xd->device_index, NULL, 0)) > 0) + if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) { - vec_validate(xd->xstats, len - 1); - vec_validate(xd->last_cleared_xstats, len - 1); + vec_validate (xd->xstats, len - 1); + vec_validate (xd->last_cleared_xstats, len - 1); - len = rte_eth_xstats_get(xd->device_index, xd->xstats, vec_len(xd->xstats)); + len = + rte_eth_xstats_get (xd->device_index, xd->xstats, + vec_len (xd->xstats)); - ASSERT(vec_len(xd->xstats) == len); - ASSERT(vec_len(xd->last_cleared_xstats) == len); + ASSERT (vec_len (xd->xstats) == len); + ASSERT (vec_len (xd->last_cleared_xstats) == len); - _vec_len(xd->xstats) = len; - _vec_len(xd->last_cleared_xstats) = len; + _vec_len (xd->xstats) = len; + _vec_len (xd->last_cleared_xstats) = len; } } @@ -179,9 +82,9 @@ dpdk_get_xstats (dpdk_device_t * xd) static inline void dpdk_update_counters (dpdk_device_t * xd, f64 now) { - vlib_simple_counter_main_t * cm; - vnet_main_t * vnm = vnet_get_main(); - u32 my_cpu = os_get_cpu_number(); + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + u32 my_cpu = os_get_cpu_number (); u64 rxerrors, last_rxerrors; /* only update counters for PMD interfaces */ @@ -202,37 +105,45 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) /* maybe bump interface rx no buffer counter */ if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_NO_BUF); + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.rx_nombuf - - xd->last_stats.rx_nombuf); - } + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } /* missed pkt counter */ if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_MISS); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.imissed - - xd->last_stats.imissed); - } + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } rxerrors = xd->stats.ierrors; last_rxerrors = xd->last_stats.ierrors; if (PREDICT_FALSE (rxerrors != last_rxerrors)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_ERROR); + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - rxerrors - last_rxerrors); - } + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } } - dpdk_get_xstats(xd); + dpdk_get_xstats (xd); } + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/format.c b/vnet/vnet/devices/dpdk/format.c index 075f35777b5..1f401a9e01b 100644 --- a/vnet/vnet/devices/dpdk/format.c +++ b/vnet/vnet/devices/dpdk/format.c @@ -150,14 +150,15 @@ #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ -u8 * format_dpdk_device_name (u8 * s, va_list * args) +u8 * +format_dpdk_device_name (u8 * s, va_list * args) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; char *devname_format; char *device_name; u32 i = va_arg (*args, u32); struct rte_eth_dev_info dev_info; - u8 * ret; + u8 *ret; if (dm->conf->interface_name_format_decimal) devname_format = "%s%d/%d/%d"; @@ -165,14 +166,17 @@ u8 * format_dpdk_device_name (u8 * s, va_list * args) devname_format = "%s%x/%x/%x"; #ifdef RTE_LIBRTE_KNI - if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) { - return format(s, "kni%d", dm->devices[i].kni_port_id); - } else + if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) + { + return format (s, "kni%d", dm->devices[i].kni_port_id); + } + else #endif #if DPDK_VHOST_USER - if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) { - return format(s, "VirtualEthernet0/0/%d", dm->devices[i].vu_if_id); - } + if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) + { + return format (s, "VirtualEthernet0/0/%d", dm->devices[i].vu_if_id); + } #endif switch (dm->devices[i].port_type) { @@ -189,15 +193,15 @@ u8 * format_dpdk_device_name (u8 * s, va_list * args) break; case VNET_DPDK_PORT_TYPE_ETH_BOND: - return format(s, "BondEthernet%d", dm->devices[i].device_index); + return format (s, "BondEthernet%d", dm->devices[i].device_index); case VNET_DPDK_PORT_TYPE_ETH_SWITCH: device_name = "EthernetSwitch"; break; case VNET_DPDK_PORT_TYPE_AF_PACKET: - rte_eth_dev_info_get(i, &dev_info); - return format(s, "af_packet%d", dm->devices[i].af_packet_port_id); + rte_eth_dev_info_get (i, &dev_info); + return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); default: case VNET_DPDK_PORT_TYPE_UNKNOWN: @@ -205,105 +209,110 @@ u8 * format_dpdk_device_name (u8 * s, va_list * args) break; } - rte_eth_dev_info_get(i, &dev_info); + rte_eth_dev_info_get (i, &dev_info); ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, - dev_info.pci_dev->addr.devid, - dev_info.pci_dev->addr.function); + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); if (dm->devices[i].interface_name_suffix) return format (ret, "/%s", dm->devices[i].interface_name_suffix); return ret; } -static u8 * format_dpdk_device_type (u8 * s, va_list * args) +static u8 * +format_dpdk_device_type (u8 * s, va_list * args) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; char *dev_type; u32 i = va_arg (*args, u32); - if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) { - return format(s, "Kernel NIC Interface"); - } else if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) { - return format(s, "vhost-user interface"); - } + if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) + { + return format (s, "Kernel NIC Interface"); + } + else if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) + { + return format (s, "vhost-user interface"); + } switch (dm->devices[i].pmd) { case VNET_DPDK_PMD_E1000EM: - dev_type = "Intel 82540EM (e1000)"; - break; + dev_type = "Intel 82540EM (e1000)"; + break; case VNET_DPDK_PMD_IGB: - dev_type = "Intel e1000"; - break; + dev_type = "Intel e1000"; + break; case VNET_DPDK_PMD_I40E: - dev_type = "Intel X710/XL710 Family"; - break; + dev_type = "Intel X710/XL710 Family"; + break; case VNET_DPDK_PMD_I40EVF: - dev_type = "Intel X710/XL710 Family VF"; - break; + dev_type = "Intel X710/XL710 Family VF"; + break; case VNET_DPDK_PMD_FM10K: - dev_type = "Intel FM10000 Family Ethernet Switch"; - break; + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; case VNET_DPDK_PMD_IGBVF: - dev_type = "Intel e1000 VF"; - break; + dev_type = "Intel e1000 VF"; + break; case VNET_DPDK_PMD_VIRTIO: - dev_type = "Red Hat Virtio"; - break; + dev_type = "Red Hat Virtio"; + break; case VNET_DPDK_PMD_IXGBEVF: - dev_type = "Intel 82599 VF"; - break; + dev_type = "Intel 82599 VF"; + break; case VNET_DPDK_PMD_IXGBE: - dev_type = "Intel 82599"; - break; + dev_type = "Intel 82599"; + break; case VNET_DPDK_PMD_ENIC: - dev_type = "Cisco VIC"; - break; + dev_type = "Cisco VIC"; + break; case VNET_DPDK_PMD_CXGBE: - dev_type = "Chelsio T4/T5"; - break; + dev_type = "Chelsio T4/T5"; + break; case VNET_DPDK_PMD_VMXNET3: - dev_type = "VMware VMXNET3"; - break; + dev_type = "VMware VMXNET3"; + break; case VNET_DPDK_PMD_AF_PACKET: - dev_type = "af_packet"; - break; + dev_type = "af_packet"; + break; case VNET_DPDK_PMD_BOND: - dev_type = "Ethernet Bonding"; - break; + dev_type = "Ethernet Bonding"; + break; case VNET_DPDK_PMD_DPAA2: - dev_type = "NXP DPAA2 Mac"; - break; + dev_type = "NXP DPAA2 Mac"; + break; default: case VNET_DPDK_PMD_UNKNOWN: - dev_type = "### UNKNOWN ###"; - break; + dev_type = "### UNKNOWN ###"; + break; } return format (s, dev_type); } -static u8 * format_dpdk_link_status (u8 * s, va_list * args) +static u8 * +format_dpdk_link_status (u8 * s, va_list * args) { - dpdk_device_t * xd = va_arg (*args, dpdk_device_t *); - struct rte_eth_link * l = &xd->link; - vnet_main_t * vnm = vnet_get_main(); - vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link *l = &xd->link; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); s = format (s, "%s ", l->link_status ? "up" : "down"); if (l->link_status) @@ -311,7 +320,7 @@ static u8 * format_dpdk_link_status (u8 * s, va_list * args) u32 promisc = rte_eth_promiscuous_get (xd->device_index); s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? - "full" : "half"); + "full" : "half"); s = format (s, "speed %u mtu %d %s\n", l->link_speed, hi->max_packet_bytes, promisc ? " promisc" : ""); } @@ -331,56 +340,54 @@ if (bitmap & v) { \ s = format(s, "%s ", str); \ } -static u8 * format_dpdk_rss_hf_name(u8 * s, va_list * args) +static u8 * +format_dpdk_rss_hf_name (u8 * s, va_list * args) { u64 bitmap = va_arg (*args, u64); int next_split = _line_len; int indent = format_get_indent (s); if (!bitmap) - return format(s, "none"); + return format (s, "none"); - foreach_dpdk_rss_hf - - return s; + foreach_dpdk_rss_hf return s; } -static u8 * format_dpdk_rx_offload_caps(u8 * s, va_list * args) +static u8 * +format_dpdk_rx_offload_caps (u8 * s, va_list * args) { u32 bitmap = va_arg (*args, u32); int next_split = _line_len; int indent = format_get_indent (s); if (!bitmap) - return format(s, "none"); - - foreach_dpdk_rx_offload_caps + return format (s, "none"); - return s; + foreach_dpdk_rx_offload_caps return s; } -static u8 * format_dpdk_tx_offload_caps(u8 * s, va_list * args) +static u8 * +format_dpdk_tx_offload_caps (u8 * s, va_list * args) { u32 bitmap = va_arg (*args, u32); int next_split = _line_len; int indent = format_get_indent (s); if (!bitmap) - return format(s, "none"); - - foreach_dpdk_tx_offload_caps + return format (s, "none"); - return s; + foreach_dpdk_tx_offload_caps return s; } #undef _line_len #undef _ -u8 * format_dpdk_device (u8 * s, va_list * args) +u8 * +format_dpdk_device (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); int verbose = va_arg (*args, int); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, dev_instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); uword indent = format_get_indent (s); f64 now = vlib_time_now (dm->vlib_main); struct rte_eth_dev_info di; @@ -390,77 +397,78 @@ u8 * format_dpdk_device (u8 * s, va_list * args) s = format (s, "%U\n%Ucarrier %U", format_dpdk_device_type, xd->device_index, - format_white_space, indent + 2, - format_dpdk_link_status, xd); + format_white_space, indent + 2, format_dpdk_link_status, xd); - rte_eth_dev_info_get(xd->device_index, &di); + rte_eth_dev_info_get (xd->device_index, &di); if (verbose > 1 && xd->dev_type == VNET_DPDK_DEV_ETH) { - struct rte_pci_device * pci; + struct rte_pci_device *pci; struct rte_eth_rss_conf rss_conf; int vlan_off; int retval; rss_conf.rss_key = 0; - retval = rte_eth_dev_rss_hash_conf_get(xd->device_index, &rss_conf); + retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); if (retval < 0) - clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); + clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); pci = di.pci_dev; if (pci) - s = format(s, "%Upci id: device %04x:%04x subsystem %04x:%04x\n" - "%Upci address: %04x:%02x:%02x.%02x\n", - format_white_space, indent + 2, - pci->id.vendor_id, pci->id.device_id, - pci->id.subsystem_vendor_id, - pci->id.subsystem_device_id, - format_white_space, indent + 2, - pci->addr.domain, pci->addr.bus, - pci->addr.devid, pci->addr.function); - s = format(s, "%Umax rx packet len: %d\n", - format_white_space, indent + 2, di.max_rx_pktlen); - s = format(s, "%Umax num of queues: rx %d tx %d\n", - format_white_space, indent + 2, di.max_rx_queues, di.max_tx_queues); - s = format(s, "%Upromiscuous: unicast %s all-multicast %s\n", - format_white_space, indent + 2, - rte_eth_promiscuous_get(xd->device_index) ? "on" : "off", - rte_eth_promiscuous_get(xd->device_index) ? "on" : "off"); - vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); - s = format(s, "%Uvlan offload: strip %s filter %s qinq %s\n", - format_white_space, indent + 2, - vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); - s = format(s, "%Urx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_rx_offload_caps, di.rx_offload_capa); - s = format(s, "%Utx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_tx_offload_caps, di.tx_offload_capa); - s = format(s, "%Urss active: %U\n" - "%Urss supported: %U\n", - format_white_space, indent + 2, - format_dpdk_rss_hf_name, rss_conf.rss_hf, - format_white_space, indent + 2, - format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + s = + format (s, + "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, pci->id.vendor_id, + pci->id.device_id, pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, pci->addr.devid, + pci->addr.function); + s = + format (s, "%Umax rx packet len: %d\n", format_white_space, + indent + 2, di.max_rx_pktlen); + s = + format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, + indent + 2, di.max_rx_queues, di.max_tx_queues); + s = + format (s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format (s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format (s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format (s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); } - if (verbose && xd->dev_type == VNET_DPDK_DEV_VHOST_USER) { - s = format(s, "%Uqueue size (max): rx %d (%d) tx %d (%d)\n", - format_white_space, indent + 2, - xd->rx_q_used, xd->rx_q_used, - xd->tx_q_used, xd->tx_q_used); + if (verbose && xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + s = format (s, "%Uqueue size (max): rx %d (%d) tx %d (%d)\n", + format_white_space, indent + 2, + xd->rx_q_used, xd->rx_q_used, xd->tx_q_used, xd->tx_q_used); } s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", - format_white_space, indent + 2, - xd->rx_q_used, xd->nb_rx_desc, - xd->tx_q_used, xd->nb_tx_desc); + format_white_space, indent + 2, + xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); if (xd->cpu_socket > -1) s = format (s, "%Ucpu socket %d\n", - format_white_space, indent + 2, xd->cpu_socket); + format_white_space, indent + 2, xd->cpu_socket); /* $$$ MIB counters */ { @@ -476,20 +484,21 @@ u8 * format_dpdk_device (u8 * s, va_list * args) #undef _ } - u8 * xs = 0; + u8 *xs = 0; u32 i = 0; #if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 0) - struct rte_eth_xstats * xstat, * last_xstat; + struct rte_eth_xstats *xstat, *last_xstat; #else - struct rte_eth_xstat * xstat, * last_xstat; - struct rte_eth_xstat_name * xstat_names = 0; + struct rte_eth_xstat *xstat, *last_xstat; + struct rte_eth_xstat_name *xstat_names = 0; int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); vec_validate (xstat_names, len - 1); rte_eth_xstats_get_names (xd->device_index, xstat_names, len); #endif - ASSERT(vec_len(xd->xstats) == vec_len(xd->last_cleared_xstats)); + ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); + /* *INDENT-OFF* */ vec_foreach_index(i, xd->xstats) { u64 delta = 0; @@ -511,70 +520,81 @@ u8 * format_dpdk_device (u8 * s, va_list * args) vec_free(name); } } + /* *INDENT-ON* */ #if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 0) vec_free (xstat_names); #endif #if DPDK_VHOST_USER - if (verbose && xd->dev_type == VNET_DPDK_DEV_VHOST_USER) { - int i; - for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) { - u8 * name; - if (verbose == 2 || xd->vu_intf->vrings[i].packets) { - if (i & 1) { - name = format(NULL, "tx q%d packets", i >> 1); - } else { - name = format(NULL, "rx q%d packets", i >> 1); - } - xs = format(xs, "\n%U%-38U%16Ld", - format_white_space, indent + 4, - format_c_identifier, name, xd->vu_intf->vrings[i].packets); - vec_free(name); - - if (i & 1) { - name = format(NULL, "tx q%d bytes", i >> 1); - } else { - name = format(NULL, "rx q%d bytes", i >> 1); - } - xs = format(xs, "\n%U%-38U%16Ld", - format_white_space, indent + 4, - format_c_identifier, name, xd->vu_intf->vrings[i].bytes); - vec_free(name); - } - } + if (verbose && xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + int i; + for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) + { + u8 *name; + if (verbose == 2 || xd->vu_intf->vrings[i].packets) + { + if (i & 1) + { + name = format (NULL, "tx q%d packets", i >> 1); + } + else + { + name = format (NULL, "rx q%d packets", i >> 1); + } + xs = format (xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, + xd->vu_intf->vrings[i].packets); + vec_free (name); + + if (i & 1) + { + name = format (NULL, "tx q%d bytes", i >> 1); + } + else + { + name = format (NULL, "rx q%d bytes", i >> 1); + } + xs = format (xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, + xd->vu_intf->vrings[i].bytes); + vec_free (name); + } + } } #endif if (xs) { - s = format(s, "\n%Uextended stats:%v", - format_white_space, indent + 2, xs); - vec_free(xs); + s = format (s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free (xs); } return s; } -u8 * format_dpdk_tx_dma_trace (u8 * s, va_list * va) +u8 * +format_dpdk_tx_dma_trace (u8 * s, va_list * va) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main(); - dpdk_tx_dma_trace_t * t = va_arg (*va, dpdk_tx_dma_trace_t *); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); uword indent = format_get_indent (s); - vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); s = format (s, "%U tx queue %d", - format_vnet_sw_interface_name, vnm, sw, - t->queue_index); + format_vnet_sw_interface_name, vnm, sw, t->queue_index); s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent, - t->buffer_index, - format_vlib_buffer, &t->buffer); + t->buffer_index, format_vlib_buffer, &t->buffer); s = format (s, "\n%U%U", format_white_space, indent, format_ethernet_header_with_length, t->buffer.pre_data, @@ -583,26 +603,25 @@ u8 * format_dpdk_tx_dma_trace (u8 * s, va_list * va) return s; } -u8 * format_dpdk_rx_dma_trace (u8 * s, va_list * va) +u8 * +format_dpdk_rx_dma_trace (u8 * s, va_list * va) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main(); - dpdk_rx_dma_trace_t * t = va_arg (*va, dpdk_rx_dma_trace_t *); - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index); - format_function_t * f; + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t *f; uword indent = format_get_indent (s); - vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); s = format (s, "%U rx queue %d", - format_vnet_sw_interface_name, vnm, sw, - t->queue_index); + format_vnet_sw_interface_name, vnm, sw, t->queue_index); s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent, - t->buffer_index, - format_vlib_buffer, &t->buffer); + t->buffer_index, format_vlib_buffer, &t->buffer); #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS s = format (s, "\n%U%U", @@ -616,10 +635,11 @@ u8 * format_dpdk_rx_dma_trace (u8 * s, va_list * va) if (vm->trace_main.verbose) { s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, - t->mb.data_len > sizeof(t->data) ? " (truncated)": ""); + t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); s = format (s, "\n%U%U", format_white_space, indent + 4, format_hexdump, &t->data, - t->mb.data_len > sizeof(t->data) ? sizeof(t->data) : t->mb.data_len); + t->mb.data_len > + sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); } f = node->format_buffer; if (!f) @@ -631,10 +651,11 @@ u8 * format_dpdk_rx_dma_trace (u8 * s, va_list * va) } -static inline u8 * format_dpdk_pkt_types (u8 * s, va_list * va) +static inline u8 * +format_dpdk_pkt_types (u8 * s, va_list * va) { u32 *pkt_types = va_arg (*va, u32 *); - uword indent __attribute__((unused)) = format_get_indent (s) + 2; + uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; if (!*pkt_types) return s; @@ -649,13 +670,12 @@ static inline u8 * format_dpdk_pkt_types (u8 * s, va_list * va) } foreach_dpdk_pkt_type - #undef _ - - return s; + return s; } -static inline u8 * format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +static inline u8 * +format_dpdk_pkt_offload_flags (u8 * s, va_list * va) { u64 *ol_flags = va_arg (*va, u64 *); uword indent = format_get_indent (s) + 2; @@ -673,64 +693,68 @@ static inline u8 * format_dpdk_pkt_offload_flags (u8 * s, va_list * va) } foreach_dpdk_pkt_offload_flag - #undef _ - - return s; + return s; } -u8 * format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) +u8 * +format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) { - ethernet_vlan_header_tv_t * vlan_hdr = va_arg (*va, ethernet_vlan_header_tv_t *); + ethernet_vlan_header_tv_t *vlan_hdr = + va_arg (*va, ethernet_vlan_header_tv_t *); - if (clib_net_to_host_u16(vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) { - s = format (s, "%U 802.1q vlan ", - format_ethernet_vlan_tci, - clib_net_to_host_u16(vlan_hdr->priority_cfi_and_id)); - vlan_hdr++; + if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) + { + s = format (s, "%U 802.1q vlan ", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + vlan_hdr++; } - s = format (s, "%U", - format_ethernet_vlan_tci, - clib_net_to_host_u16(vlan_hdr->priority_cfi_and_id)); + s = format (s, "%U", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - return s; + return s; } -u8 * format_dpdk_rte_mbuf (u8 * s, va_list * va) +u8 * +format_dpdk_rte_mbuf (u8 * s, va_list * va) { - struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *); + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); uword indent = format_get_indent (s) + 2; s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" - "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" - "\n%Upacket_type 0x%x", - mb->port, mb->nb_segs, mb->pkt_len, - format_white_space, indent, - mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, mb->buf_physaddr, - format_white_space, indent, - mb->packet_type); + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, + mb->buf_physaddr, format_white_space, indent, mb->packet_type); if (mb->ol_flags) s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_offload_flags, &mb->ol_flags); + format_dpdk_pkt_offload_flags, &mb->ol_flags); - if (mb->ol_flags & PKT_RX_VLAN_PKT) { - ethernet_vlan_header_tv_t * vlan_hdr = ((ethernet_vlan_header_tv_t *)&(eth_hdr->type)); - s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); - } + if (mb->ol_flags & PKT_RX_VLAN_PKT) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } if (mb->packet_type) s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_types, &mb->packet_type); + format_dpdk_pkt_types, &mb->packet_type); return s; } #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS -static inline u8 * format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va) +static inline u8 * +format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va) { u16 *ol_flags = va_arg (*va, u16 *); uword indent = format_get_indent (s) + 2; @@ -748,15 +772,14 @@ static inline u8 * format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va) } foreach_dpdk_pkt_ext_offload_flag - #undef _ - - return s; + return s; } -u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va) +u8 * +format_dpdk_rx_rte_mbuf (u8 * s, va_list * va) { - struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *); + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); ethernet_header_t *eth_hdr = va_arg (*args, ethernet_header_t *); uword indent = format_get_indent (s) + 2; @@ -765,26 +788,27 @@ u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va) * are only valid for the 1st mbuf segment. */ s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" - "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x" - "\n%Upacket_type 0x%x", - mb->port, mb->nb_segs, mb->pkt_len, - format_white_space, indent, - mb->buf_len, mb->data_len, mb->ol_flags, - format_white_space, indent, - mb->packet_type); + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, + format_white_space, indent, mb->packet_type); if (mb->ol_flags) s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_rx_offload_flags, &mb->ol_flags); + format_dpdk_pkt_rx_offload_flags, &mb->ol_flags); - if (mb->ol_flags & PKT_RX_VLAN_PKT) { - ethernet_vlan_header_tv_t * vlan_hdr = ((ethernet_vlan_header_tv_t *)&(eth_hdr->type)); - s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); - } + if (mb->ol_flags & PKT_RX_VLAN_PKT) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } if (mb->packet_type) s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_types, &mb->packet_type); + format_dpdk_pkt_types, &mb->packet_type); return s; } @@ -793,23 +817,23 @@ u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va) uword unformat_socket_mem (unformat_input_t * input, va_list * va) { - uword ** r = va_arg (* va, uword **); + uword **r = va_arg (*va, uword **); int i = 0; u32 mem; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, ",")) - hash_set (*r, i, 1024); + hash_set (*r, i, 1024); else if (unformat (input, "%u,", &mem)) - hash_set (*r, i, mem); + hash_set (*r, i, mem); else if (unformat (input, "%u", &mem)) - hash_set (*r, i, mem); + hash_set (*r, i, mem); else - { - unformat_put_input (input); - goto done; - } + { + unformat_put_input (input); + goto done; + } i++; } @@ -823,7 +847,7 @@ unformat_rss_fn (unformat_input_t * input, uword * rss_fn) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (0) - ; + ; #undef _ #define _(f, s) \ else if (unformat (input, s)) \ @@ -831,12 +855,19 @@ unformat_rss_fn (unformat_input_t * input, uword * rss_fn) foreach_dpdk_rss_hf #undef _ - - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } } return 0; } + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index 239ae3bbed6..705c0371b97 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -50,110 +50,115 @@ void *vlib_weakly_linked_functions[] = { static struct rte_eth_conf port_conf_template = { .rxmode = { - .split_hdr_size = 0, - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ - }, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - }, + .mq_mode = ETH_MQ_TX_NONE, + }, }; clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) { - vlib_main_t * vm = vlib_get_main(); - vlib_buffer_main_t * bm = vm->buffer_main; + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; int rv; int j; - ASSERT(os_get_cpu_number() == 0); + ASSERT (os_get_cpu_number () == 0); - if (xd->admin_up) { - vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } + if (xd->admin_up) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); + xd->tx_q_used, &xd->port_conf); if (rv < 0) return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); + xd->device_index, rv); /* Set up one TX-queue per worker thread */ for (j = 0; j < xd->tx_q_used; j++) { - rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); /* retry with any other CPU socket */ if (rv < 0) - rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); if (rv < 0) - break; + break; } - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); for (j = 0; j < xd->rx_q_used; j++) { - rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]); + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + bm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); /* retry with any other CPU socket */ if (rv < 0) - rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]); + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + bm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); } - if (xd->admin_up) { + if (xd->admin_up) + { int rv; rv = rte_eth_dev_start (xd->device_index); if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } return 0; } -static u32 dpdk_flag_change (vnet_main_t * vnm, - vnet_hw_interface_t * hi, - u32 flags) +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); u32 old = 0; - if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)) + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) { old = xd->promisc; xd->promisc = flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL; - + if (xd->admin_up) { if (xd->promisc) - rte_eth_promiscuous_enable(xd->device_index); + rte_eth_promiscuous_enable (xd->device_index); else - rte_eth_promiscuous_disable(xd->device_index); + rte_eth_promiscuous_disable (xd->device_index); } } - else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)) + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) { /* * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the + * driver to dynamically change the mtu. If/when the * VIC firmware gets fixed, then this should be removed. */ if (xd->pmd == VNET_DPDK_PMD_ENIC) @@ -163,69 +168,66 @@ static u32 dpdk_flag_change (vnet_main_t * vnm, /* * Restore mtu to what has been set by CIMC in the firmware cfg. */ - rte_eth_dev_info_get(xd->device_index, &dev_info); + rte_eth_dev_info_get (xd->device_index, &dev_info); hi->max_packet_bytes = dev_info.max_rx_pktlen; - vlib_cli_output (vlib_get_main(), + vlib_cli_output (vlib_get_main (), "Cisco VIC mtu can only be changed " "using CIMC then rebooting the server!"); } else { int rv; - + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; if (xd->admin_up) rte_eth_dev_stop (xd->device_index); - rv = rte_eth_dev_configure - (xd->device_index, - xd->rx_q_used, - xd->tx_q_used, - &xd->port_conf); + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); if (rv < 0) - vlib_cli_output (vlib_get_main(), + vlib_cli_output (vlib_get_main (), "rte_eth_dev_configure[%d]: err %d", xd->device_index, rv); - rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes); + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); if (xd->admin_up) - { - int rv = rte_eth_dev_start (xd->device_index); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } + { + int rv = rte_eth_dev_start (xd->device_index); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } } } return old; } void -dpdk_device_lock_init(dpdk_device_t * xd) +dpdk_device_lock_init (dpdk_device_t * xd) { int q; - vec_validate(xd->lockp, xd->tx_q_used - 1); + vec_validate (xd->lockp, xd->tx_q_used - 1); for (q = 0; q < xd->tx_q_used; q++) { xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); + CLIB_CACHE_LINE_BYTES); memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); - } + } xd->need_txlock = 1; } void -dpdk_device_lock_free(dpdk_device_t * xd) +dpdk_device_lock_free (dpdk_device_t * xd) { int q; - for (q = 0; q < vec_len(xd->lockp); q++) - clib_mem_free((void *) xd->lockp[q]); - vec_free(xd->lockp); + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); xd->lockp = 0; xd->need_txlock = 0; } @@ -236,17 +238,17 @@ dpdk_lib_init (dpdk_main_t * dm) u32 nports; u32 nb_desc = 0; int i; - clib_error_t * error; - vlib_main_t * vm = vlib_get_main(); - vlib_thread_main_t * tm = vlib_get_thread_main(); - vlib_node_runtime_t * rt; - vnet_sw_interface_t * sw; - vnet_hw_interface_t * hi; - dpdk_device_t * xd; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_node_runtime_t *rt; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; vlib_pci_addr_t last_pci_addr; u32 last_pci_addr_port = 0; - vlib_thread_registration_t * tr; - uword * p; + vlib_thread_registration_t *tr; + uword *p; u32 next_cpu = 0; u8 af_packet_port_id = 0; @@ -256,7 +258,7 @@ dpdk_lib_init (dpdk_main_t * dm) dm->input_cpu_count = 1; rt = vlib_node_get_runtime (vm, dpdk_input_node.index); - rt->function = dpdk_input_multiarch_select(); + rt->function = dpdk_input_multiarch_select (); /* find out which cpus will be used for input */ p = hash_get_mem (tm->thread_registrations_by_name, "workers"); @@ -269,13 +271,13 @@ dpdk_lib_init (dpdk_main_t * dm) } vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); + CLIB_CACHE_LINE_BYTES); vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); + CLIB_CACHE_LINE_BYTES); - nports = rte_eth_dev_count(); - if (nports < 1) + nports = rte_eth_dev_count (); + if (nports < 1) { clib_warning ("DPDK drivers found no ports..."); } @@ -283,13 +285,14 @@ dpdk_lib_init (dpdk_main_t * dm) if (CLIB_DEBUG > 0) clib_warning ("DPDK drivers found %d ports...", nports); - /* + /* * All buffers are all allocated from the same rte_mempool. * Thus they all have the same number of data bytes. */ - dm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list ( - vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "dpdk rx"); + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); if (dm->conf->enable_tcp_udp_checksum) dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT @@ -301,21 +304,23 @@ dpdk_lib_init (dpdk_main_t * dm) u8 vlan_strip = 0; int j; struct rte_eth_dev_info dev_info; - clib_error_t * rv; + clib_error_t *rv; struct rte_eth_link l; - dpdk_device_config_t * devconf = 0; + dpdk_device_config_t *devconf = 0; vlib_pci_addr_t pci_addr; - uword * p = 0; + uword *p = 0; - rte_eth_dev_info_get(i, &dev_info); - if (dev_info.pci_dev) /* bonded interface has no pci info */ - { + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { pci_addr.domain = dev_info.pci_dev->addr.domain; pci_addr.bus = dev_info.pci_dev->addr.bus; pci_addr.slot = dev_info.pci_dev->addr.devid; pci_addr.function = dev_info.pci_dev->addr.function; - p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } if (p) devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); @@ -326,15 +331,16 @@ dpdk_lib_init (dpdk_main_t * dm) vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - xd->cpu_socket = (i8) rte_eth_dev_socket_id(i); + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); /* Handle interface naming for devices with multiple ports sharing same PCI ID */ if (dev_info.pci_dev) { - struct rte_eth_dev_info di = {0}; + struct rte_eth_dev_info di = { 0 }; rte_eth_dev_info_get (i + 1, &di); if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && - memcmp(&dev_info.pci_dev->addr, &di.pci_dev->addr, sizeof(struct rte_pci_addr)) == 0) + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) { xd->interface_name_suffix = format (0, "0"); last_pci_addr.as_u32 = pci_addr.as_u32; @@ -342,7 +348,8 @@ dpdk_lib_init (dpdk_main_t * dm) } else if (pci_addr.as_u32 == last_pci_addr.as_u32) { - xd->interface_name_suffix = format (0, "%u", i - last_pci_addr_port); + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); } else { @@ -352,187 +359,195 @@ dpdk_lib_init (dpdk_main_t * dm) else last_pci_addr.as_u32 = ~0; - clib_memcpy(&xd->tx_conf, &dev_info.default_txconf, - sizeof(struct rte_eth_txconf)); + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); if (dm->conf->no_multi_seg) - { - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 0; - } + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } else - { - xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 1; - } + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + } - clib_memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf)); + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); - xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains); + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); - if (devconf->num_tx_queues > 0 && devconf->num_tx_queues < xd->tx_q_used) - xd->tx_q_used = clib_min(xd->tx_q_used, devconf->num_tx_queues); + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); if (devconf->num_rx_queues > 1 && dm->use_rss == 0) { - rt->function = dpdk_input_rss_multiarch_select(); + rt->function = dpdk_input_rss_multiarch_select (); dm->use_rss = 1; } - if (devconf->num_rx_queues > 1 && dev_info.max_rx_queues >= devconf->num_rx_queues) - { - xd->rx_q_used = devconf->num_rx_queues; - xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - if (devconf->rss_fn == 0) - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; - else - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; - } + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } else - xd->rx_q_used = 1; + xd->rx_q_used = 1; xd->dev_type = VNET_DPDK_DEV_ETH; /* workaround for drivers not setting driver_name */ if ((!dev_info.driver_name) && (dev_info.pci_dev)) - dev_info.driver_name = dev_info.pci_dev->driver->name; - ASSERT(dev_info.driver_name); + dev_info.driver_name = dev_info.pci_dev->driver->name; + ASSERT (dev_info.driver_name); - if (!xd->pmd) { + if (!xd->pmd) + { #define _(s,f) else if (dev_info.driver_name && \ !strcmp(dev_info.driver_name, s)) \ xd->pmd = VNET_DPDK_PMD_##f; - if (0) - ; - foreach_dpdk_pmd + if (0) + ; + foreach_dpdk_pmd #undef _ - else - xd->pmd = VNET_DPDK_PMD_UNKNOWN; - - - switch (xd->pmd) { - /* 1G adapters */ - case VNET_DPDK_PMD_E1000EM: - case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - break; - - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: - case VNET_DPDK_PMD_IXGBEVF: - case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; - break; - case VNET_DPDK_PMD_DPAA2: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; - /* Cisco VIC */ - case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait(i, &l); - xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC; - if (l.link_speed == 40000) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; - } - else - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; - } - break; - - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; - - switch (dev_info.pci_dev->id.device_id) { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait(i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - /* Intel Red Rock Canyon */ - case VNET_DPDK_PMD_FM10K: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; - xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; - xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; - break; - - /* virtio */ - case VNET_DPDK_PMD_VIRTIO: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; - xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; - break; - - /* vmxnet3 */ - case VNET_DPDK_PMD_VMXNET3: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - break; - - case VNET_DPDK_PMD_AF_PACKET: - xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; - xd->af_packet_port_id = af_packet_port_id++; - break; - - case VNET_DPDK_PMD_BOND: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; - break; - - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC; + if (l.link_speed == 40000) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + } + else + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + } + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } - if (devconf->num_rx_desc) - xd->nb_rx_desc = devconf->num_rx_desc; + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; - if (devconf->num_tx_desc) - xd->nb_tx_desc = devconf->num_tx_desc; - } + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } /* * Ensure default mtu is not > the mtu read from the hardware. @@ -540,72 +555,73 @@ dpdk_lib_init (dpdk_main_t * dm) * not be available. */ if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) - { - /* - * This device does not support the platforms's max frame - * size. Use it's advertised mru instead. - */ - xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; - } + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } else - { - xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; - - /* - * Some platforms do not account for Ethernet FCS (4 bytes) in - * MTU calculations. To interop with them increase mru but only - * if the device's settings can support it. - */ - if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && - xd->port_conf.rxmode.hw_strip_crc) - { - /* - * Allow additional 4 bytes (for Ethernet FCS). These bytes are - * stripped by h/w and so will not consume any buffer memory. - */ - xd->port_conf.rxmode.max_rx_pkt_len += 4; - } - } + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; -#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + +#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) /* * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts */ if (xd->pmd == VNET_DPDK_PMD_VMXNET3) - { - xd->port_conf.rxmode.max_rx_pkt_len = 1518; - xd->port_conf.rxmode.jumbo_frame = 0; - } + { + xd->port_conf.rxmode.max_rx_pkt_len = 1518; + xd->port_conf.rxmode.jumbo_frame = 0; + } #endif if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) - { - f64 now = vlib_time_now(vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - clib_memcpy (addr+2, &rnd, sizeof(rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } else - rte_eth_macaddr_get(i,(struct ether_addr *)addr); + rte_eth_macaddr_get (i, (struct ether_addr *) addr); if (xd->tx_q_used < tm->n_vlib_mains) - dpdk_device_lock_init(xd); + dpdk_device_lock_init (xd); xd->device_index = xd - dm->devices; - ASSERT(i == xd->device_index); + ASSERT (i == xd->device_index); xd->per_interface_next_index = ~0; /* assign interface to input thread */ - dpdk_device_and_queue_t * dq; + dpdk_device_and_queue_t *dq; int q; if (devconf->workers) { int i; q = 0; + /* *INDENT-OFF* */ clib_bitmap_foreach (i, devconf->workers, ({ int cpu = dm->input_cpu_first_index + i; unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; @@ -615,6 +631,7 @@ dpdk_lib_init (dpdk_main_t * dm) dq->device = xd->device_index; dq->queue_id = q++; })); + /* *INDENT-ON* */ } else for (q = 0; q < xd->rx_q_used; q++) @@ -626,13 +643,13 @@ dpdk_lib_init (dpdk_main_t * dm) * numa node for worker thread handling this queue * needed for taking buffers from the right mempool */ - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); /* * construct vector of (device,queue) pairs for each worker thread */ - vec_add2(dm->devices_by_cpu[cpu], dq, 1); + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); dq->device = xd->device_index; dq->queue_id = q; @@ -642,48 +659,45 @@ dpdk_lib_init (dpdk_main_t * dm) } vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); + CLIB_CACHE_LINE_BYTES); for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, - sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j< xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } - rv = dpdk_port_setup(dm, xd); + rv = dpdk_port_setup (dm, xd); if (rv) - return rv; + return rv; /* count the number of descriptors used for this device */ nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; error = ethernet_register_interface - (dm->vnet_main, - dpdk_device_class.index, - xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, - dpdk_flag_change); + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); if (error) - return error; - + return error; + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); xd->vlib_sw_if_index = sw->sw_if_index; hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); /* * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the + * driver to dynamically change the mtu. If/when the * VIC firmware gets fixed, then this should be removed. */ if (xd->pmd == VNET_DPDK_PMD_ENIC) @@ -693,117 +707,115 @@ dpdk_lib_init (dpdk_main_t * dm) */ hi->max_packet_bytes = dev_info.max_rx_pktlen; if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) - vlan_strip = 1; /* remove vlan tag from VIC port by default */ + vlan_strip = 1; /* remove vlan tag from VIC port by default */ else - clib_warning("VLAN strip disabled for interface\n"); + clib_warning ("VLAN strip disabled for interface\n"); } else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) vlan_strip = 1; if (vlan_strip) - { + { int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - if (rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off) == 0) - clib_warning("VLAN strip enabled for interface\n"); + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); else - clib_warning("VLAN strip cannot be supported by interface\n"); - } + clib_warning ("VLAN strip cannot be supported by interface\n"); + } -#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) +#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) /* * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts */ else if (xd->pmd == VNET_DPDK_PMD_VMXNET3) - hi->max_packet_bytes = 1518; + hi->max_packet_bytes = 1518; #endif - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof(ethernet_header_t); + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes); + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); } #ifdef RTE_LIBRTE_KNI - if (dm->conf->num_kni) { - clib_warning("Initializing KNI interfaces..."); - rte_kni_init(dm->conf->num_kni); - for (i = 0; i < dm->conf->num_kni; i++) + if (dm->conf->num_kni) { - u8 addr[6]; - int j; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->dev_type = VNET_DPDK_DEV_KNI; - - xd->device_index = xd - dm->devices; - ASSERT(nports + i == xd->device_index); - xd->per_interface_next_index = ~0; - xd->kni_port_id = i; - xd->cpu_socket = -1; - hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices); - xd->rx_q_used = 1; - - /* assign interface to input thread */ - dpdk_device_and_queue_t * dq; - vec_add2(dm->devices_by_cpu[dm->input_cpu_first_index], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, - sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } + clib_warning ("Initializing KNI interfaces..."); + rte_kni_init (dm->conf->num_kni); + for (i = 0; i < dm->conf->num_kni; i++) + { + u8 addr[6]; + int j; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->dev_type = VNET_DPDK_DEV_KNI; + + xd->device_index = xd - dm->devices; + ASSERT (nports + i == xd->device_index); + xd->per_interface_next_index = ~0; + xd->kni_port_id = i; + xd->cpu_socket = -1; + hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices); + xd->rx_q_used = 1; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + vec_add2 (dm->devices_by_cpu[dm->input_cpu_first_index], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j< xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } - /* FIXME Set up one TX-queue per worker thread */ + /* FIXME Set up one TX-queue per worker thread */ - { - f64 now = vlib_time_now(vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - clib_memcpy (addr+2, &rnd, sizeof(rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } - error = ethernet_register_interface - (dm->vnet_main, - dpdk_device_class.index, - xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, - dpdk_flag_change); + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); - if (error) - return error; + if (error) + return error; - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + } } - } #endif - if (nb_desc > dm->conf->num_mbufs) + if (nb_desc > dm->conf->num_mbufs) clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); + dm->conf->num_mbufs, nb_desc); /* init next vhost-user if index */ dm->next_vu_if_id = 0; @@ -814,13 +826,14 @@ dpdk_lib_init (dpdk_main_t * dm) static void dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) { - vlib_pci_main_t * pm = &pci_main; - clib_error_t * error; - vlib_pci_device_t * d; - pci_config_header_t * c; - u8 * pci_addr = 0; + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + pci_config_header_t *c; + u8 *pci_addr = 0; int num_whitelisted = vec_len (conf->dev_confs); + /* *INDENT-OFF* */ pool_foreach (d, pm->pci_devs, ({ dpdk_device_config_t * devconf = 0; c = &d->config0.header; @@ -878,15 +891,17 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) clib_error_report (error); } })); + /* *INDENT-ON* */ vec_free (pci_addr); } static clib_error_t * -dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unformat_input_t * input, u8 is_default) +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) { - clib_error_t * error = 0; - uword * p; - dpdk_device_config_t * devconf; + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; unformat_input_t sub_input; if (is_default) @@ -900,11 +915,13 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma if (!p) { pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, devconf - conf->dev_confs); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); } else - return clib_error_return(0, "duplicate configuration for PCI address %U", - format_vlib_pci_addr, &pci_addr); + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); } devconf->pci_addr.as_u32 = pci_addr.as_u32; @@ -925,16 +942,18 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma else if (unformat (input, "workers %U", unformat_bitmap_list, &devconf->workers)) ; - else if (unformat (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) - { - error = unformat_rss_fn(&sub_input, &devconf->rss_fn); - if (error) - break; - } + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } else if (unformat (input, "vlan-strip-offload off")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; else if (unformat (input, "vlan-strip-offload on")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; else { error = clib_error_return (0, "unknown input `%U'", @@ -947,12 +966,15 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma return error; if (devconf->workers && devconf->num_rx_queues == 0) - devconf->num_rx_queues = clib_bitmap_count_set_bits(devconf->workers); + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); else if (devconf->workers && - clib_bitmap_count_set_bits(devconf->workers) != devconf->num_rx_queues) - error = clib_error_return (0, "%U: number of worker threadds must be " - "equal to number of rx queues", - format_vlib_pci_addr, &pci_addr); + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); return error; } @@ -960,15 +982,15 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unforma static clib_error_t * dpdk_config (vlib_main_t * vm, unformat_input_t * input) { - clib_error_t * error = 0; - dpdk_main_t * dm = &dpdk_main; - dpdk_config_main_t * conf = &dpdk_config_main; - vlib_thread_main_t * tm = vlib_get_thread_main(); - dpdk_device_config_t * devconf; + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; vlib_pci_addr_t pci_addr; unformat_input_t sub_input; - u8 * s, * tmp = 0; - u8 * rte_cmd = 0, * ethname = 0; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; u32 log_level; int ret, i; int num_whitelisted = 0; @@ -976,41 +998,45 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u8 no_huge = 0; u8 huge_dir = 0; u8 file_prefix = 0; - u8 * socket_mem = 0; + u8 *socket_mem = 0; conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); // MATT-FIXME: inverted virtio-vhost logic to use virtio by default conf->use_virtio_vhost = 1; - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { /* Prime the pump */ if (unformat (input, "no-hugetlb")) - { - vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); - no_huge = 1; - } + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } else if (unformat (input, "enable-tcp-udp-checksum")) conf->enable_tcp_udp_checksum = 1; else if (unformat (input, "decimal-interface-names")) - conf->interface_name_format_decimal = 1; + conf->interface_name_format_decimal = 1; else if (unformat (input, "no-multi-seg")) - conf->no_multi_seg = 1; + conf->no_multi_seg = 1; else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, &sub_input)) { - error = dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~1, &sub_input, 1); + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); if (error) return error; } - else if (unformat (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, - unformat_vlib_cli_sub_input, &sub_input)) + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) { error = dpdk_device_config (conf, pci_addr, &sub_input, 0); @@ -1029,19 +1055,25 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) num_whitelisted++; } else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) - ; + ; else if (unformat (input, "kni %d", &conf->num_kni)) - ; + ; else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) ; else if (unformat (input, "socket-mem %s", &socket_mem)) ; - else if (unformat (input, "vhost-user-coalesce-frames %d", &conf->vhost_coalesce_frames)) - ; - else if (unformat (input, "vhost-user-coalesce-time %f", &conf->vhost_coalesce_time)) - ; + else + if (unformat + (input, "vhost-user-coalesce-frames %d", + &conf->vhost_coalesce_frames)) + ; + else + if (unformat + (input, "vhost-user-coalesce-time %f", + &conf->vhost_coalesce_time)) + ; else if (unformat (input, "enable-vhost-user")) - conf->use_virtio_vhost = 0; + conf->use_virtio_vhost = 0; else if (unformat (input, "no-pci")) { no_pci = 1; @@ -1049,7 +1081,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->eal_init_args, tmp); } else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) - ; + ; #define _(a) \ else if (unformat(input, #a)) \ @@ -1059,7 +1091,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } foreach_eal_double_hyphen_predicate_arg #undef _ - #define _(a) \ else if (unformat(input, #a " %s", &s)) \ { \ @@ -1074,7 +1105,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } foreach_eal_double_hyphen_arg #undef _ - #define _(a,b) \ else if (unformat(input, #a " %s", &s)) \ { \ @@ -1083,9 +1113,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (s, 0); \ vec_add1 (conf->eal_init_args, s); \ } - foreach_eal_single_hyphen_arg + foreach_eal_single_hyphen_arg #undef _ - #define _(a,b) \ else if (unformat(input, #a " %s", &s)) \ { \ @@ -1095,18 +1124,17 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->eal_init_args, s); \ conf->a##_set_manually = 1; \ } - foreach_eal_single_hyphen_mandatory_arg + foreach_eal_single_hyphen_mandatory_arg #undef _ + else if (unformat (input, "default")) + ; - else if (unformat(input, "default")) - ; - - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } } if (!conf->uio_driver_name) @@ -1117,20 +1145,20 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) */ if (!no_huge && !huge_dir) { - u32 x, * mem_by_socket = 0; + u32 x, *mem_by_socket = 0; uword c = 0; u8 use_1g = 1; u8 use_2m = 1; u8 less_than_1g = 1; int rv; - umount(DEFAULT_HUGE_DIR); + umount (DEFAULT_HUGE_DIR); /* Process "socket-mem" parameter value */ if (vec_len (socket_mem)) { unformat_input_t in; - unformat_init_vector(&in, socket_mem); + unformat_init_vector (&in, socket_mem); while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) { if (unformat (&in, "%u,", &x)) @@ -1142,26 +1170,29 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else break; - vec_add1(mem_by_socket, x); + vec_add1 (mem_by_socket, x); if (x > 1023) less_than_1g = 0; } - /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ - unformat_free(&in); - socket_mem = 0; + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; } else { + /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { vec_validate(mem_by_socket, c); mem_by_socket[c] = 256; /* default per-socket mem */ } )); + /* *INDENT-ON* */ } /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { u32 pages_avail, page_size, mem; @@ -1216,69 +1247,70 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_free(p); vec_free(path); })); + /* *INDENT-ON* */ if (mem_by_socket == 0) - { - error = clib_error_return (0, "mem_by_socket NULL"); - goto done; - } + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } _vec_len (mem_by_socket) = c + 1; /* regenerate socket_mem string */ vec_foreach_index (x, mem_by_socket) socket_mem = format (socket_mem, "%s%u", - socket_mem ? "," : "", - mem_by_socket[x]); + socket_mem ? "," : "", mem_by_socket[x]); socket_mem = format (socket_mem, "%c", 0); vec_free (mem_by_socket); - rv = mkdir(VPP_RUN_DIR, 0755); + rv = mkdir (VPP_RUN_DIR, 0755); if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } - rv = mkdir(DEFAULT_HUGE_DIR, 0755); + rv = mkdir (DEFAULT_HUGE_DIR, 0755); if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); - goto done; - } + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } if (use_1g && !(less_than_1g && use_2m)) - { - rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); - } + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } else if (use_2m) - { - rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); - } + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } else - { - return clib_error_return (0, "not enough free huge pages"); - } + { + return clib_error_return (0, "not enough free huge pages"); + } if (rv) - { - error = clib_error_return (0, "mount failed %d", errno); - goto done; - } + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } tmp = format (0, "--huge-dir%c", 0); vec_add1 (conf->eal_init_args, tmp); tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); vec_add1 (conf->eal_init_args, tmp); if (!file_prefix) - { - tmp = format (0, "--file-prefix%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "vpp%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } } vec_free (rte_cmd); @@ -1290,24 +1322,24 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) /* I'll bet that -c and -n must be the first and second args... */ if (!conf->coremask_set_manually) { - vlib_thread_registration_t * tr; - uword * coremask = 0; + vlib_thread_registration_t *tr; + uword *coremask = 0; int i; /* main thread core */ - coremask = clib_bitmap_set(coremask, tm->main_lcore, 1); + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); for (i = 0; i < vec_len (tm->registrations); i++) - { - tr = tm->registrations[i]; - coremask = clib_bitmap_or(coremask, tr->coremask); - } + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } vec_insert (conf->eal_init_args, 2, 1); conf->eal_init_args[1] = (u8 *) "-c"; tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); conf->eal_init_args[2] = tmp; - clib_bitmap_free(coremask); + clib_bitmap_free (coremask); } if (!conf->nchannels_set_manually) @@ -1318,13 +1350,14 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) conf->eal_init_args[4] = tmp; } - if (no_pci == 0 && geteuid() == 0) - dpdk_bind_devices_to_uio(conf); + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); #define _(x) \ if (devconf->x == 0 && conf->default_devconf.x > 0) \ devconf->x = conf->default_devconf.x ; + /* *INDENT-OFF* */ pool_foreach (devconf, conf->dev_confs, ({ /* default per-device config items */ @@ -1346,6 +1379,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->eal_init_args, tmp); } })); + /* *INDENT-ON* */ #undef _ @@ -1363,7 +1397,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) /* NULL terminate the "argv" vector, in case of stupidity */ vec_add1 (conf->eal_init_args, 0); - _vec_len(conf->eal_init_args) -= 1; + _vec_len (conf->eal_init_args) -= 1; /* Set up DPDK eal and packet mbuf pool early. */ @@ -1374,188 +1408,197 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vm = vlib_get_main (); /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len(conf->eal_init_args); i++) - conf->eal_init_args_str = format(conf->eal_init_args_str, "%s ", - conf->eal_init_args[i]); + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); - ret = rte_eal_init(vec_len(conf->eal_init_args), (char **) conf->eal_init_args); + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); /* lazy umount hugepages */ - umount2(DEFAULT_HUGE_DIR, MNT_DETACH); + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); if (ret < 0) return clib_error_return (0, "rte_eal_init returned %d", ret); /* Dump the physical memory layout prior to creating the mbuf_pool */ - fprintf(stdout, "DPDK physical memory layout:\n"); - rte_dump_physmem_layout(stdout); + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); /* main thread 1st */ - error = vlib_buffer_pool_create(vm, conf->num_mbufs, rte_socket_id()); + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); if (error) return error; for (i = 0; i < RTE_MAX_LCORE; i++) { - error = vlib_buffer_pool_create(vm, conf->num_mbufs, - rte_lcore_to_socket_id(i)); + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); if (error) - return error; + return error; } - done: +done: return error; } VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); -void dpdk_update_link_state (dpdk_device_t * xd, f64 now) +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) { - vnet_main_t * vnm = vnet_get_main(); - struct rte_eth_link prev_link = xd->link; - u32 hw_flags = 0; - u8 hw_flags_chg = 0; + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; - /* only update link state for PMD interfaces */ - if (xd->dev_type != VNET_DPDK_DEV_ETH) - return; + /* only update link state for PMD interfaces */ + if (xd->dev_type != VNET_DPDK_DEV_ETH) + return; - xd->time_last_link_update = now ? now : xd->time_last_link_update; - memset(&xd->link, 0, sizeof(xd->link)); - rte_eth_link_get_nowait (xd->device_index, &xd->link); + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); - if (LINK_STATE_ELOGS) + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = { - vlib_main_t * vm = vlib_get_main(); - ELOG_TYPE_DECLARE(e) = { - .format = - "update-link-state: sw_if_index %d, admin_up %d," - "old link_state %d new link_state %d", - .format_args = "i4i1i1i1", - }; - - struct { u32 sw_if_index; u8 admin_up; - u8 old_link_state; u8 new_link_state;} *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->admin_up = xd->admin_up; - ed->old_link_state = (u8) - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); - ed->new_link_state = (u8) xd->link.link_status; - } + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; - if ((xd->admin_up == 1) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + struct { - hw_flags_chg = 1; - hw_flags |= (xd->link.link_status ? - VNET_HW_INTERFACE_FLAG_LINK_UP: 0); - } + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = xd->admin_up; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } - if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) - { - hw_flags_chg = 1; - switch (xd->link.link_duplex) - { - case ETH_LINK_HALF_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; - break; - case ETH_LINK_FULL_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; - break; - default: - break; - } - } + if ((xd->admin_up == 1) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_SPEED_NUM_10M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_SPEED_NUM_100M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_SPEED_NUM_1G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_SPEED_NUM_10G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_SPEED_NUM_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning("unknown link speed %d", xd->link.link_speed); - break; - } - } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } #else - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_LINK_SPEED_10: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_LINK_SPEED_100: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_LINK_SPEED_1000: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_LINK_SPEED_10000: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_LINK_SPEED_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning("unknown link speed %d", xd->link.link_speed); - break; - } - } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_LINK_SPEED_10: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_LINK_SPEED_100: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_LINK_SPEED_1000: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_LINK_SPEED_10000: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_LINK_SPEED_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } #endif - if (hw_flags_chg) - { - if (LINK_STATE_ELOGS) - { - vlib_main_t * vm = vlib_get_main(); - - ELOG_TYPE_DECLARE(e) = { - .format = "update-link-state: sw_if_index %d, new flags %d", - .format_args = "i4i4", - }; - - struct { u32 sw_if_index; u32 flags; } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->flags = hw_flags; - } - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); - } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } } static uword -dpdk_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, - vlib_frame_t * f) +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { - clib_error_t * error; - vnet_main_t * vnm = vnet_get_main(); - dpdk_main_t * dm = &dpdk_main; - ethernet_main_t * em = ðernet_main; - dpdk_device_t * xd; - vlib_thread_main_t * tm = vlib_get_thread_main(); + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); #if DPDK_VHOST_USER void *vu_state; #endif @@ -1563,100 +1606,117 @@ dpdk_process (vlib_main_t * vm, error = dpdk_lib_init (dm); - /* + /* * Turn on the input node if we found some devices to drive * and we're not running worker threads or i/o threads */ - if (error == 0 && vec_len(dm->devices) > 0) + if (error == 0 && vec_len (dm->devices) > 0) { - if (tm->n_vlib_mains == 1) - vlib_node_set_state (vm, dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i=0; i < tm->n_vlib_mains; i++) - if (vec_len(dm->devices_by_cpu[i]) > 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); } if (error) clib_error_report (error); #if DPDK_VHOST_USER - dpdk_vhost_user_process_init(&vu_state); + dpdk_vhost_user_process_init (&vu_state); #endif tm->worker_thread_release = 1; f64 now = vlib_time_now (vm); vec_foreach (xd, dm->devices) - { - dpdk_update_link_state (xd, now); - } + { + dpdk_update_link_state (xd, now); + } -{ /* - * Extra set up for bond interfaces: - * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. - */ - int nports = rte_eth_dev_count(); - if (nports > 0) { - for (i = 0; i < nports; i++) { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get(i, &dev_info); - if (!dev_info.driver_name) + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) dev_info.driver_name = dev_info.pci_dev->driver->name; - ASSERT(dev_info.driver_name); - if (strncmp(dev_info.driver_name, "rte_bond_pmd", 12) == 0) { - u8 addr[6]; - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get(i, slink, 16); - if (nlink > 0) { - vnet_hw_interface_t * bhi; - ethernet_interface_t * bei; - int rv; - - /* Get MAC of 1st slave link */ - rte_eth_macaddr_get(slink[0], (struct ether_addr *)addr); - /* Set MAC of bounded interface to that of 1st slave link */ - rv = rte_eth_bond_mac_address_set(i, (struct ether_addr *)addr); - if (rv < 0) - clib_warning("Failed to set MAC address"); - - /* Populate MAC of bonded interface in VPP hw tables */ - bhi = vnet_get_hw_interface( - vnm, dm->devices[i].vlib_hw_if_index); - bei = pool_elt_at_index(em->interfaces, bhi->hw_instance); - clib_memcpy(bhi->hw_address, addr, 6); - clib_memcpy(bei->address, addr, 6); - /* Init l3 packet size allowed on bonded interface */ - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof(ethernet_header_t); - while (nlink >= 1) { /* for all slave links */ - int slave = slink[--nlink]; - dpdk_device_t * sdev = &dm->devices[slave]; - vnet_hw_interface_t * shi; - vnet_sw_interface_t * ssi; - /* Add MAC to all slave links except the first one */ - if (nlink) rte_eth_dev_mac_addr_add( - slave, (struct ether_addr *)addr, 0); - /* Set slaves bitmap for bonded interface */ - bhi->bond_info = clib_bitmap_set( - bhi->bond_info, sdev->vlib_hw_if_index, 1); - /* Set slave link flags on slave interface */ - shi = vnet_get_hw_interface(vnm, sdev->vlib_hw_if_index); - ssi = vnet_get_sw_interface(vnm, sdev->vlib_sw_if_index); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; - ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + } } } } } } -} while (1) { @@ -1664,58 +1724,62 @@ dpdk_process (vlib_main_t * vm, * check each time through the loop in case intervals are changed */ f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? - dm->link_state_poll_interval : dm->stat_poll_interval; + dm->link_state_poll_interval : dm->stat_poll_interval; vlib_process_wait_for_event_or_clock (vm, min_wait); - if (dpdk_get_admin_up_down_in_progress()) - /* skip the poll if an admin up down is in progress (on any interface) */ - continue; + if (dpdk_get_admin_up_down_in_progress ()) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; vec_foreach (xd, dm->devices) - { - f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) - dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) - dpdk_update_link_state (xd, now); + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); #if DPDK_VHOST_USER - if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) - if (dpdk_vhost_user_process_if(vm, xd, vu_state) != 0) - continue; + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + if (dpdk_vhost_user_process_if (vm, xd, vu_state) != 0) + continue; #endif - } + } } #if DPDK_VHOST_USER - dpdk_vhost_user_process_cleanup(vu_state); + dpdk_vhost_user_process_cleanup (vu_state); #endif - return 0; + return 0; } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_process_node,static) = { .function = dpdk_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "dpdk-process", .process_log2_n_stack_bytes = 17, }; +/* *INDENT-ON* */ -int dpdk_set_stat_poll_interval (f64 interval) +int +dpdk_set_stat_poll_interval (f64 interval) { if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); + return (VNET_API_ERROR_INVALID_VALUE); dpdk_main.stat_poll_interval = interval; return 0; } -int dpdk_set_link_state_poll_interval (f64 interval) +int +dpdk_set_link_state_poll_interval (f64 interval) { if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); + return (VNET_API_ERROR_INVALID_VALUE); dpdk_main.link_state_poll_interval = interval; @@ -1725,24 +1789,24 @@ int dpdk_set_link_state_poll_interval (f64 interval) clib_error_t * dpdk_init (vlib_main_t * vm) { - dpdk_main_t * dm = &dpdk_main; - vlib_node_t * ei; - clib_error_t * error = 0; - vlib_thread_main_t * tm = vlib_get_thread_main(); + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); /* verify that structs are cacheline aligned */ - ASSERT(offsetof(dpdk_device_t, cacheline0) == 0); - ASSERT(offsetof(dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES); - ASSERT(offsetof(dpdk_worker_t, cacheline0) == 0); - ASSERT(offsetof(frame_queue_trace_t, cacheline0) == 0); + ASSERT (offsetof (dpdk_device_t, cacheline0) == 0); + ASSERT (offsetof (dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES); + ASSERT (offsetof (dpdk_worker_t, cacheline0) == 0); + ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0); dm->vlib_main = vm; - dm->vnet_main = vnet_get_main(); + dm->vnet_main = vnet_get_main (); dm->conf = &dpdk_config_main; ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); + return clib_error_return (0, "ethernet-input node AWOL"); dm->ethernet_input_node_index = ei->index; @@ -1760,19 +1824,18 @@ dpdk_init (vlib_main_t * vm) /* initialize EFD (early fast discard) default settings */ dm->efd.enabled = DPDK_EFD_DISABLED; dm->efd.queue_hi_thresh = ((DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT * - DPDK_NB_RX_DESC_10GE)/100); + DPDK_NB_RX_DESC_10GE) / 100); dm->efd.consec_full_frames_hi_thresh = - DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; + DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; /* vhost-user coalescence frames defaults */ dm->conf->vhost_coalesce_frames = 32; dm->conf->vhost_coalesce_time = 1e-3; /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ - dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED - | IP_BUFFER_L4_CHECKSUM_CORRECT); + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; @@ -1786,3 +1849,11 @@ dpdk_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (dpdk_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index 942274b0bf5..303b44e489a 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -49,82 +49,87 @@ */ #define VMWARE_LENGTH_BUG_WORKAROUND 0 -static char * dpdk_error_strings[] = { +static char *dpdk_error_strings[] = { #define _(n,s) s, - foreach_dpdk_error + foreach_dpdk_error #undef _ }; always_inline int -dpdk_mbuf_is_ip4(struct rte_mbuf *mb) +dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) { - return RTE_ETH_IS_IPV4_HDR(mb->packet_type) != 0; + return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; } always_inline int -dpdk_mbuf_is_ip6(struct rte_mbuf *mb) +dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) { - return RTE_ETH_IS_IPV6_HDR(mb->packet_type) != 0; + return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; } always_inline int -vlib_buffer_is_mpls(vlib_buffer_t * b) +vlib_buffer_is_mpls (vlib_buffer_t * b) { ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); } always_inline void -dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb, - vlib_buffer_t *b0, - u8 * next0, u8 * error0) +dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t * xd, + struct rte_mbuf *mb, + vlib_buffer_t * b0, u8 * next0, + u8 * error0) { u8 n0; uint16_t mb_flags = mb->ol_flags; - if (PREDICT_FALSE(mb_flags & ( + if (PREDICT_FALSE (mb_flags & ( #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS - PKT_EXT_RX_PKT_ERROR | PKT_EXT_RX_BAD_FCS | + PKT_EXT_RX_PKT_ERROR | PKT_EXT_RX_BAD_FCS | #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ - PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD - ))) + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD))) { - /* some error was flagged. determine the drop reason */ + /* some error was flagged. determine the drop reason */ n0 = DPDK_RX_NEXT_DROP; - *error0 = + *error0 = #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS - (mb_flags & PKT_EXT_RX_PKT_ERROR) ? DPDK_ERROR_RX_PACKET_ERROR : - (mb_flags & PKT_EXT_RX_BAD_FCS) ? DPDK_ERROR_RX_BAD_FCS : + (mb_flags & PKT_EXT_RX_PKT_ERROR) ? DPDK_ERROR_RX_PACKET_ERROR : + (mb_flags & PKT_EXT_RX_BAD_FCS) ? DPDK_ERROR_RX_BAD_FCS : #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */ - (mb_flags & PKT_RX_IP_CKSUM_BAD) ? DPDK_ERROR_IP_CHECKSUM_ERROR : - (mb_flags & PKT_RX_L4_CKSUM_BAD) ? DPDK_ERROR_L4_CHECKSUM_ERROR : - DPDK_ERROR_NONE; + (mb_flags & PKT_RX_IP_CKSUM_BAD) ? DPDK_ERROR_IP_CHECKSUM_ERROR : + (mb_flags & PKT_RX_L4_CKSUM_BAD) ? DPDK_ERROR_L4_CHECKSUM_ERROR : + DPDK_ERROR_NONE; } else { *error0 = DPDK_ERROR_NONE; - if (PREDICT_FALSE(xd->per_interface_next_index != ~0)) + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) { n0 = xd->per_interface_next_index; b0->flags |= BUFFER_HANDOFF_NEXT_VALID; - if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb))) - vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP4_INPUT; - else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb))) - vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP6_INPUT; - else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0))) - vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_MPLS_INPUT; + if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) + vnet_buffer (b0)->handoff.next_index = + HANDOFF_DISPATCH_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) + vnet_buffer (b0)->handoff.next_index = + HANDOFF_DISPATCH_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + vnet_buffer (b0)->handoff.next_index = + HANDOFF_DISPATCH_NEXT_MPLS_INPUT; else - vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; + vnet_buffer (b0)->handoff.next_index = + HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; } - else if (PREDICT_FALSE(xd->vlan_subifs || (mb_flags & PKT_RX_VLAN_PKT))) + else + if (PREDICT_FALSE (xd->vlan_subifs || (mb_flags & PKT_RX_VLAN_PKT))) n0 = DPDK_RX_NEXT_ETHERNET_INPUT; else { - if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb))) + if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) n0 = DPDK_RX_NEXT_IP4_INPUT; - else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb))) + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) n0 = DPDK_RX_NEXT_IP6_INPUT; - else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0))) + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) n0 = DPDK_RX_NEXT_MPLS_INPUT; else n0 = DPDK_RX_NEXT_ETHERNET_INPUT; @@ -133,15 +138,14 @@ dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb, *next0 = n0; } -void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, - u32 * buffers, - uword n_buffers) +void +dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers) { - vlib_main_t * vm = vlib_get_main(); - u32 * b, n_left; + vlib_main_t *vm = vlib_get_main (); + u32 *b, n_left; u8 next0; n_left = n_buffers; @@ -150,8 +154,8 @@ void dpdk_rx_trace (dpdk_main_t * dm, while (n_left >= 1) { u32 bi0; - vlib_buffer_t * b0; - dpdk_rx_dma_trace_t * t0; + vlib_buffer_t *b0; + dpdk_rx_dma_trace_t *t0; struct rte_mbuf *mb; u8 error0; @@ -159,9 +163,8 @@ void dpdk_rx_trace (dpdk_main_t * dm, n_left -= 1; b0 = vlib_get_buffer (vm, bi0); - mb = rte_mbuf_from_vlib_buffer(b0); - dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, - &next0, &error0); + mb = rte_mbuf_from_vlib_buffer (b0); + dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, &next0, &error0); vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->queue_index = queue_id; @@ -170,7 +173,8 @@ void dpdk_rx_trace (dpdk_main_t * dm, clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - clib_memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data)); + clib_memcpy (t0->buffer.pre_data, b0->data, + sizeof (t0->buffer.pre_data)); clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS @@ -189,29 +193,28 @@ void dpdk_rx_trace (dpdk_main_t * dm, * dpdk_efd_update_counters() * Update EFD (early-fast-discard) counters */ -void dpdk_efd_update_counters (dpdk_device_t *xd, - u32 n_buffers, - u16 enabled) +void +dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, u16 enabled) { if (enabled & DPDK_EFD_MONITOR_ENABLED) { - u64 now = clib_cpu_time_now(); + u64 now = clib_cpu_time_now (); if (xd->efd_agent.last_poll_time > 0) - { - u64 elapsed_time = (now - xd->efd_agent.last_poll_time); - if (elapsed_time > xd->efd_agent.max_poll_delay) - xd->efd_agent.max_poll_delay = elapsed_time; - } + { + u64 elapsed_time = (now - xd->efd_agent.last_poll_time); + if (elapsed_time > xd->efd_agent.max_poll_delay) + xd->efd_agent.max_poll_delay = elapsed_time; + } xd->efd_agent.last_poll_time = now; } - + xd->efd_agent.total_packet_cnt += n_buffers; xd->efd_agent.last_burst_sz = n_buffers; if (n_buffers > xd->efd_agent.max_burst_sz) xd->efd_agent.max_burst_sz = n_buffers; - if (PREDICT_FALSE(n_buffers == VLIB_FRAME_SIZE)) + if (PREDICT_FALSE (n_buffers == VLIB_FRAME_SIZE)) { xd->efd_agent.full_frames_cnt++; xd->efd_agent.consec_full_frames_cnt++; @@ -226,90 +229,194 @@ void dpdk_efd_update_counters (dpdk_device_t *xd, * returns non zero DPDK error if packet meets early-fast-discard criteria, * zero otherwise */ -u32 is_efd_discardable (vlib_thread_main_t *tm, - vlib_buffer_t * b0, - struct rte_mbuf *mb) +u32 +is_efd_discardable (vlib_thread_main_t * tm, + vlib_buffer_t * b0, struct rte_mbuf *mb) { ethernet_header_t *eh = (ethernet_header_t *) b0->data; - if (eh->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) + if (eh->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)) { ip4_header_t *ipv4 = - (ip4_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + (ip4_header_t *) & (b0->data[sizeof (ethernet_header_t)]); u8 pkt_prec = (ipv4->tos >> 5); - + return (tm->efd.ip_prec_bitmap & (1 << pkt_prec) ? - DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE); + DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE); } - else if (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_IP6)) + else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_IP6)) { ip6_header_t *ipv6 = - (ip6_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + (ip6_header_t *) & (b0->data[sizeof (ethernet_header_t)]); u8 pkt_tclass = - ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff); - + ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff); + return (tm->efd.ip_prec_bitmap & (1 << pkt_tclass) ? - DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE); + DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE); } - else if (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_MPLS_UNICAST)) + else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_MPLS_UNICAST)) { mpls_unicast_header_t *mpls = - (mpls_unicast_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + (mpls_unicast_header_t *) & (b0->data[sizeof (ethernet_header_t)]); u8 pkt_exp = ((mpls->label_exp_s_ttl >> 9) & 0x07); return (tm->efd.mpls_exp_bitmap & (1 << pkt_exp) ? - DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE); + DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE); } - else if ((eh->type == clib_net_to_host_u16(ETHERNET_TYPE_VLAN)) || - (eh->type == clib_net_to_host_u16(ETHERNET_TYPE_DOT1AD))) + else if ((eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_VLAN)) || + (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD))) { ethernet_vlan_header_t *vlan = - (ethernet_vlan_header_t *)&(b0->data[sizeof(ethernet_header_t)]); + (ethernet_vlan_header_t *) & (b0->data[sizeof (ethernet_header_t)]); u8 pkt_cos = ((vlan->priority_cfi_and_id >> 13) & 0x07); return (tm->efd.vlan_cos_bitmap & (1 << pkt_cos) ? - DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE); + DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE); } return DPDK_ERROR_NONE; } +static inline u32 +dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE (xd->dev_type == VNET_DPDK_DEV_ETH)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } +#if DPDK_VHOST_USER + else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + unsigned socket_id = rte_socket_id (); + u32 offset = 0; + + offset = queue_id * VIRTIO_QNUM; + + struct vhost_virtqueue *vq = + xd->vu_vhost_dev.virtqueue[offset + VIRTIO_TXQ]; + + if (PREDICT_FALSE (!vq->enabled)) + return 0; + + struct rte_mbuf **pkts = xd->rx_vectors[queue_id]; + while (n_left) + { + n_this_chunk = rte_vhost_dequeue_burst (&xd->vu_vhost_dev, + offset + VIRTIO_TXQ, + bm->pktmbuf_pools + [socket_id], + pkts + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + if (n_this_chunk == 0) + break; + } + + int i; + u32 bytes = 0; + for (i = 0; i < n_buffers; i++) + { + struct rte_mbuf *buff = pkts[i]; + bytes += rte_pktmbuf_data_len (buff); + } + + f64 now = vlib_time_now (vm); + + dpdk_vu_vring *vring = NULL; + /* send pending interrupts if needed */ + if (dpdk_vhost_user_want_interrupt (xd, offset + VIRTIO_TXQ)) + { + vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]); + vring->n_since_last_int += n_buffers; + + if ((vring->n_since_last_int && (vring->int_deadline < now)) + || (vring->n_since_last_int > dm->conf->vhost_coalesce_frames)) + dpdk_vhost_user_send_interrupt (vm, xd, offset + VIRTIO_TXQ); + } + + vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]); + vring->packets += n_buffers; + vring->bytes += bytes; + + if (dpdk_vhost_user_want_interrupt (xd, offset + VIRTIO_RXQ)) + { + if (vring->n_since_last_int && (vring->int_deadline < now)) + dpdk_vhost_user_send_interrupt (vm, xd, offset + VIRTIO_RXQ); + } + + } +#endif +#ifdef RTE_LIBRTE_KNI + else if (xd->dev_type == VNET_DPDK_DEV_KNI) + { + n_buffers = + rte_kni_rx_burst (xd->kni, xd->rx_vectors[queue_id], VLIB_FRAME_SIZE); + rte_kni_handle_request (xd->kni); + } +#endif + else + { + ASSERT (0); + } + + return n_buffers; +} + /* * This function is used when there are no worker threads. - * The main thread performs IO and forwards the packets. + * The main thread performs IO and forwards the packets. */ -static inline u32 dpdk_device_input ( dpdk_main_t * dm, - dpdk_device_t * xd, - vlib_node_runtime_t * node, - u32 cpu_index, - u16 queue_id, - int use_efd) +static inline u32 +dpdk_device_input (dpdk_main_t * dm, + dpdk_device_t * xd, + vlib_node_runtime_t * node, + u32 cpu_index, u16 queue_id, int use_efd) { u32 n_buffers; u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT; - u32 n_left_to_next, * to_next; + u32 n_left_to_next, *to_next; u32 mb_index; - vlib_main_t * vm = vlib_get_main(); + vlib_main_t *vm = vlib_get_main (); uword n_rx_bytes = 0; - u32 n_trace, trace_cnt __attribute__((unused)); - vlib_buffer_free_list_t * fl; + u32 n_trace, trace_cnt __attribute__ ((unused)); + vlib_buffer_free_list_t *fl; u8 efd_discard_burst = 0; u32 buffer_flags_template; - + if (xd->admin_up == 0) return 0; - n_buffers = dpdk_rx_burst(dm, xd, queue_id); + n_buffers = dpdk_rx_burst (dm, xd, queue_id); if (n_buffers == 0) { /* check if EFD (dpdk) is enabled */ - if (PREDICT_FALSE(use_efd && dm->efd.enabled)) - { - /* reset a few stats */ - xd->efd_agent.last_poll_time = 0; - xd->efd_agent.last_burst_sz = 0; - } + if (PREDICT_FALSE (use_efd && dm->efd.enabled)) + { + /* reset a few stats */ + xd->efd_agent.last_poll_time = 0; + xd->efd_agent.last_burst_sz = 0; + } return 0; } @@ -321,57 +428,57 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); /* - * DAW-FIXME: VMXNET3 device stop/start doesn't work, + * DAW-FIXME: VMXNET3 device stop/start doesn't work, * therefore fake the stop in the dpdk driver by - * silently dropping all of the incoming pkts instead of + * silently dropping all of the incoming pkts instead of * stopping the driver / hardware. */ - if (PREDICT_FALSE(xd->admin_up != 1)) + if (PREDICT_FALSE (xd->admin_up != 1)) { for (mb_index = 0; mb_index < n_buffers; mb_index++) rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); - + return 0; } /* Check for congestion if EFD (Early-Fast-Discard) is enabled * in any mode (e.g. dpdk, monitor, or drop_all) */ - if (PREDICT_FALSE(use_efd && dm->efd.enabled)) + if (PREDICT_FALSE (use_efd && dm->efd.enabled)) { /* update EFD counters */ - dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled); - - if (PREDICT_FALSE(dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) - { - /* discard all received packets */ - for (mb_index = 0; mb_index < n_buffers; mb_index++) - rte_pktmbuf_free(xd->rx_vectors[queue_id][mb_index]); - - xd->efd_agent.discard_cnt += n_buffers; - increment_efd_drop_counter(vm, - DPDK_ERROR_VLAN_EFD_DROP_PKTS, - n_buffers); - - return 0; - } - - if (PREDICT_FALSE(xd->efd_agent.consec_full_frames_cnt >= - dm->efd.consec_full_frames_hi_thresh)) - { - u32 device_queue_sz = rte_eth_rx_queue_count(xd->device_index, - queue_id); - if (device_queue_sz >= dm->efd.queue_hi_thresh) - { - /* dpdk device queue has reached the critical threshold */ - xd->efd_agent.congestion_cnt++; - - /* apply EFD to packets from the burst */ - efd_discard_burst = 1; - } - } + dpdk_efd_update_counters (xd, n_buffers, dm->efd.enabled); + + if (PREDICT_FALSE (dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) + { + /* discard all received packets */ + for (mb_index = 0; mb_index < n_buffers; mb_index++) + rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); + + xd->efd_agent.discard_cnt += n_buffers; + increment_efd_drop_counter (vm, + DPDK_ERROR_VLAN_EFD_DROP_PKTS, + n_buffers); + + return 0; + } + + if (PREDICT_FALSE (xd->efd_agent.consec_full_frames_cnt >= + dm->efd.consec_full_frames_hi_thresh)) + { + u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index, + queue_id); + if (device_queue_sz >= dm->efd.queue_hi_thresh) + { + /* dpdk device queue has reached the critical threshold */ + xd->efd_agent.congestion_cnt++; + + /* apply EFD to packets from the burst */ + efd_discard_burst = 1; + } + } } - + mb_index = 0; while (n_buffers > 0) @@ -379,65 +486,63 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, u32 bi0; u8 next0, error0; u32 l3_offset0; - vlib_buffer_t * b0, * b_seg, * b_chain = 0; + vlib_buffer_t *b0, *b_seg, *b_chain = 0; u32 cntr_type; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_buffers > 0 && n_left_to_next > 0) - { + { u8 nb_seg = 1; - struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index]; struct rte_mbuf *mb_seg = mb->next; - if (PREDICT_TRUE(n_buffers > 2)) - { - struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index+2]; - vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb); - CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); - } - - ASSERT(mb); - - b0 = vlib_buffer_from_rte_mbuf(mb); - - /* check whether EFD is looking for packets to discard */ - if (PREDICT_FALSE(efd_discard_burst)) - { - vlib_thread_main_t * tm = vlib_get_thread_main(); - - if (PREDICT_TRUE(cntr_type = is_efd_discardable(tm, b0, mb))) - { - rte_pktmbuf_free(mb); - xd->efd_agent.discard_cnt++; - increment_efd_drop_counter(vm, - cntr_type, - 1); - n_buffers--; - mb_index++; - continue; - } - } - - /* Prefetch one next segment if it exists. */ - if (PREDICT_FALSE(mb->nb_segs > 1)) - { - struct rte_mbuf *pfmb = mb->next; - vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf(pfmb); - CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + if (PREDICT_TRUE (n_buffers > 2)) + { + struct rte_mbuf *pfmb = xd->rx_vectors[queue_id][mb_index + 2]; + vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf (pfmb); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); + } + + ASSERT (mb); + + b0 = vlib_buffer_from_rte_mbuf (mb); + + /* check whether EFD is looking for packets to discard */ + if (PREDICT_FALSE (efd_discard_burst)) + { + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (PREDICT_TRUE (cntr_type = is_efd_discardable (tm, b0, mb))) + { + rte_pktmbuf_free (mb); + xd->efd_agent.discard_cnt++; + increment_efd_drop_counter (vm, cntr_type, 1); + n_buffers--; + mb_index++; + continue; + } + } + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE (mb->nb_segs > 1)) + { + struct rte_mbuf *pfmb = mb->next; + vlib_buffer_t *bp = vlib_buffer_from_rte_mbuf (pfmb); + CLIB_PREFETCH (pfmb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (bp, CLIB_CACHE_LINE_BYTES, STORE); b_chain = b0; - } + } - vlib_buffer_init_for_free_list (b0, fl); - - bi0 = vlib_get_buffer_index (vm, b0); + vlib_buffer_init_for_free_list (b0, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; - to_next[0] = bi0; - to_next++; - n_left_to_next--; - dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, &next0, &error0); #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS @@ -446,7 +551,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, * is using them for RX flags (e.g. Cisco VIC Ethernet driver) */ - if (PREDICT_TRUE(trace_cnt == 0)) + if (PREDICT_TRUE (trace_cnt == 0)) mb->ol_flags &= PKT_EXT_RX_CLR_TX_FLAGS_MASK; else trace_cnt--; @@ -456,44 +561,44 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, l3_offset0 = ((next0 == DPDK_RX_NEXT_IP4_INPUT || next0 == DPDK_RX_NEXT_IP6_INPUT || - next0 == DPDK_RX_NEXT_MPLS_INPUT) ? + next0 == DPDK_RX_NEXT_MPLS_INPUT) ? sizeof (ethernet_header_t) : 0); - b0->current_data = l3_offset0; - /* Some drivers like fm10k receive frames with - mb->data_off > RTE_PKTMBUF_HEADROOM */ - b0->current_data += mb->data_off - RTE_PKTMBUF_HEADROOM; - b0->current_length = mb->data_len - l3_offset0; + b0->current_data = l3_offset0; + /* Some drivers like fm10k receive frames with + mb->data_off > RTE_PKTMBUF_HEADROOM */ + b0->current_data += mb->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb->data_len - l3_offset0; - b0->flags = buffer_flags_template; + b0->flags = buffer_flags_template; - if (VMWARE_LENGTH_BUG_WORKAROUND) - b0->current_length -= 4; + if (VMWARE_LENGTH_BUG_WORKAROUND) + b0->current_length -= 4; - vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; n_rx_bytes += mb->pkt_len; - /* Process subsequent segments of multi-segment packets */ + /* Process subsequent segments of multi-segment packets */ while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) { - ASSERT(mb_seg != 0); + ASSERT (mb_seg != 0); - b_seg = vlib_buffer_from_rte_mbuf(mb_seg); + b_seg = vlib_buffer_from_rte_mbuf (mb_seg); vlib_buffer_init_for_free_list (b_seg, fl); - ASSERT((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT(b_seg->current_data == 0); + ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (b_seg->current_data == 0); - /* - * The driver (e.g. virtio) may not put the packet data at the start - * of the segment, so don't assume b_seg->current_data == 0 is correct. - */ - b_seg->current_data = (mb_seg->buf_addr + mb_seg->data_off) - (void *)b_seg->data; + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = + (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; b_seg->current_length = mb_seg->data_len; - b0->total_length_not_including_first_buffer += - mb_seg->data_len; + b0->total_length_not_including_first_buffer += mb_seg->data_len; b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); @@ -501,61 +606,61 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, b_chain = b_seg; mb_seg = mb_seg->next; nb_seg++; - } - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - if (PREDICT_FALSE (n_trace > mb_index)) - vec_add1 (xd->d_trace_buffers, bi0); - n_buffers--; - mb_index++; - } + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + if (PREDICT_FALSE (n_trace > mb_index)) + vec_add1 (xd->d_trace_buffers, bi0); + n_buffers--; + mb_index++; + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } if (PREDICT_FALSE (vec_len (xd->d_trace_buffers) > 0)) { dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers, - vec_len (xd->d_trace_buffers)); - vlib_set_trace_count (vm, node, n_trace - vec_len (xd->d_trace_buffers)); + vec_len (xd->d_trace_buffers)); + vlib_set_trace_count (vm, node, + n_trace - vec_len (xd->d_trace_buffers)); } - - vlib_increment_combined_counter - (vnet_get_main()->interface_main.combined_sw_if_counters + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, - xd->vlib_sw_if_index, - mb_index, n_rx_bytes); + cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - dpdk_worker_t * dw = vec_elt_at_index(dm->workers, cpu_index); + dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index); dw->aggregate_rx_packets += mb_index; return mb_index; } -static inline void poll_rate_limit(dpdk_main_t * dm) +static inline void +poll_rate_limit (dpdk_main_t * dm) { /* Limit the poll rate by sleeping for N msec between polls */ if (PREDICT_FALSE (dm->poll_sleep != 0)) - { - struct timespec ts, tsrem; + { + struct timespec ts, tsrem; - ts.tv_sec = 0; - ts.tv_nsec = 1000*1000*dm->poll_sleep; /* 1ms */ + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ - while (nanosleep(&ts, &tsrem) < 0) - { - ts = tsrem; - } - } + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + } } /** \brief Main DPDK input node @@ -595,88 +700,90 @@ static inline void poll_rate_limit(dpdk_main_t * dm) <em>Next Nodes:</em> - Static arcs to: error-drop, ethernet-input, - ip4-input-no-checksum, ip6-input, mpls-gre-input + ip4-input-no-checksum, ip6-input, mpls-gre-input - per-interface redirection, controlled by <code>xd->per_interface_next_index</code> */ static uword -dpdk_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * f) +dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; uword n_rx_packets = 0; - dpdk_device_and_queue_t * dq; - u32 cpu_index = os_get_cpu_number(); + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); /* * Poll all devices on this cpu for input/interrupts. */ + /* *INDENT-OFF* */ vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); ASSERT(dq->queue_id == 0); n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0); } + /* *INDENT-ON* */ - poll_rate_limit(dm); + poll_rate_limit (dm); return n_rx_packets; } uword dpdk_input_rss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * f) + vlib_node_runtime_t * node, vlib_frame_t * f) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; uword n_rx_packets = 0; - dpdk_device_and_queue_t * dq; - u32 cpu_index = os_get_cpu_number(); + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); /* * Poll all devices on this cpu for input/interrupts. */ + /* *INDENT-OFF* */ vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0); } + /* *INDENT-ON* */ - poll_rate_limit(dm); + poll_rate_limit (dm); return n_rx_packets; } uword dpdk_input_efd (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * f) + vlib_node_runtime_t * node, vlib_frame_t * f) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; uword n_rx_packets = 0; - dpdk_device_and_queue_t * dq; - u32 cpu_index = os_get_cpu_number(); + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); /* * Poll all devices on this cpu for input/interrupts. */ + /* *INDENT-OFF* */ vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1); } + /* *INDENT-ON* */ - poll_rate_limit(dm); + poll_rate_limit (dm); return n_rx_packets; } - +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_input_node) = { .function = dpdk_input, .type = VLIB_NODE_TYPE_INPUT, @@ -711,12 +818,14 @@ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd) CLIB_MULTIARCH_SELECT_FN(dpdk_input); CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss); CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd); +/* *INDENT-ON* */ /* * Override the next nodes for the dpdk input nodes. * Must be invoked prior to VLIB_INIT_FUNCTION calls. */ -void dpdk_set_next_node (dpdk_rx_next_t next, char *name) +void +dpdk_set_next_node (dpdk_rx_next_t next, char *name) { vlib_node_registration_t *r = &dpdk_input_node; vlib_node_registration_t *r_handoff = &handoff_dispatch_node; @@ -742,38 +851,49 @@ void dpdk_set_next_node (dpdk_rx_next_t next, char *name) * Based on the operation type, set lower/upper bits for the given index value */ void -set_efd_bitmap (u8 *bitmap, u32 value, u32 op) +set_efd_bitmap (u8 * bitmap, u32 value, u32 op) { - int ix; - - *bitmap = 0; - for (ix = 0; ix < 8; ix++) { - if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) || - ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value))){ - (*bitmap) |= (1 << ix); - } + int ix; + + *bitmap = 0; + for (ix = 0; ix < 8; ix++) + { + if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) || + ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value))) + { + (*bitmap) |= (1 << ix); + } } } void -efd_config (u32 enabled, - u32 ip_prec, u32 ip_op, - u32 mpls_exp, u32 mpls_op, - u32 vlan_cos, u32 vlan_op) +efd_config (u32 enabled, + u32 ip_prec, u32 ip_op, + u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op) { - vlib_thread_main_t * tm = vlib_get_thread_main(); - dpdk_main_t * dm = &dpdk_main; - - if (enabled) { - tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; - dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; - } else { - tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; - dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; - } + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; - set_efd_bitmap(&tm->efd.ip_prec_bitmap, ip_prec, ip_op); - set_efd_bitmap(&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op); - set_efd_bitmap(&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op); + if (enabled) + { + tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; + dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; + } + else + { + tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; + dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; + } + set_efd_bitmap (&tm->efd.ip_prec_bitmap, ip_prec, ip_op); + set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op); + set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op); } + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/devices/dpdk/vhost_user.c b/vnet/vnet/devices/dpdk/vhost_user.c index 8335963d22b..c153e2ede78 100644 --- a/vnet/vnet/devices/dpdk/vhost_user.c +++ b/vnet/vnet/devices/dpdk/vhost_user.c @@ -41,7 +41,9 @@ #if DPDK_VHOST_USER -static const char *vhost_message_str[] __attribute__((unused)) = { +/* *INDENT-OFF* */ +static const char *vhost_message_str[] __attribute__ ((unused)) = +{ [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", @@ -56,21 +58,22 @@ static const char *vhost_message_str[] __attribute__((unused)) = { [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", - [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", - [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", - [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", - [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", - [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", + [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", + [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", + [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", }; +/* *INDENT-ON* */ -static int dpdk_vhost_user_set_vring_enable(u32 hw_if_index, - u8 idx, int enable); +static int dpdk_vhost_user_set_vring_enable (u32 hw_if_index, + u8 idx, int enable); /* - * DPDK vhost-user functions + * DPDK vhost-user functions */ -/* portions taken from dpdk +/* portions taken from dpdk * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. @@ -105,34 +108,34 @@ static int dpdk_vhost_user_set_vring_enable(u32 hw_if_index, static uword -qva_to_vva(struct virtio_net *dev, uword qemu_va) +qva_to_vva (struct virtio_net *dev, uword qemu_va) { struct virtio_memory_regions *region; uword vhost_va = 0; uint32_t regionidx = 0; /* Find the region where the address lives. */ - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { - region = &dev->mem->regions[regionidx]; - if ((qemu_va >= region->userspace_address) && - (qemu_va <= region->userspace_address + - region->memory_size)) { - vhost_va = qemu_va + region->guest_phys_address + - region->address_offset - - region->userspace_address; - break; + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) + { + region = &dev->mem->regions[regionidx]; + if ((qemu_va >= region->userspace_address) && + (qemu_va <= region->userspace_address + region->memory_size)) + { + vhost_va = qemu_va + region->guest_phys_address + + region->address_offset - region->userspace_address; + break; + } } - } return vhost_va; } static dpdk_device_t * -dpdk_vhost_user_device_from_hw_if_index(u32 hw_if_index) +dpdk_vhost_user_device_from_hw_if_index (u32 hw_if_index) { - vnet_main_t *vnm = vnet_get_main(); - dpdk_main_t * dm = &dpdk_main; - vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); if (xd->dev_type != VNET_DPDK_DEV_VHOST_USER) return 0; @@ -141,124 +144,143 @@ dpdk_vhost_user_device_from_hw_if_index(u32 hw_if_index) } static dpdk_device_t * -dpdk_vhost_user_device_from_sw_if_index(u32 sw_if_index) +dpdk_vhost_user_device_from_sw_if_index (u32 sw_if_index) { - vnet_main_t *vnm = vnet_get_main(); - vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, sw_if_index); + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); ASSERT (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE); - return dpdk_vhost_user_device_from_hw_if_index(sw->hw_if_index); + return dpdk_vhost_user_device_from_hw_if_index (sw->hw_if_index); } -static void stop_processing_packets(u32 hw_if_index, u8 idx) +static void +stop_processing_packets (u32 hw_if_index, u8 idx) { - dpdk_device_t *xd = - dpdk_vhost_user_device_from_hw_if_index(hw_if_index); - assert(xd); + dpdk_device_t *xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index); + assert (xd); xd->vu_vhost_dev.virtqueue[idx]->enabled = 0; } -static void disable_interface(dpdk_device_t * xd) +static void +disable_interface (dpdk_device_t * xd) { u8 idx; int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - for (idx = 0; idx < numqs; idx++) + for (idx = 0; idx < numqs; idx++) xd->vu_vhost_dev.virtqueue[idx]->enabled = 0; xd->vu_is_running = 0; } -static inline void * map_guest_mem(dpdk_device_t * xd, uword addr) +static inline void * +map_guest_mem (dpdk_device_t * xd, uword addr) { - dpdk_vu_intf_t * vui = xd->vu_intf; - struct virtio_memory * mem = xd->vu_vhost_dev.mem; + dpdk_vu_intf_t *vui = xd->vu_intf; + struct virtio_memory *mem = xd->vu_vhost_dev.mem; int i; - for (i=0; i<mem->nregions; i++) { - if ((mem->regions[i].guest_phys_address <= addr) && - ((mem->regions[i].guest_phys_address + mem->regions[i].memory_size) > addr)) { - return (void *) ((uword)vui->region_addr[i] + addr - (uword)mem->regions[i].guest_phys_address); - } - } - DBG_SOCK("failed to map guest mem addr %lx", addr); + for (i = 0; i < mem->nregions; i++) + { + if ((mem->regions[i].guest_phys_address <= addr) && + ((mem->regions[i].guest_phys_address + + mem->regions[i].memory_size) > addr)) + { + return (void *) ((uword) vui->region_addr[i] + addr - + (uword) mem->regions[i].guest_phys_address); + } + } + DBG_SOCK ("failed to map guest mem addr %lx", addr); return 0; } static clib_error_t * -dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 *hwaddr) +dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 * hwaddr) { - dpdk_main_t * dm = &dpdk_main; - vlib_main_t * vm = vlib_get_main(); - vlib_thread_main_t * tm = vlib_get_thread_main(); - vnet_sw_interface_t * sw; - clib_error_t * error; - dpdk_device_and_queue_t * dq; + dpdk_main_t *dm = &dpdk_main; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + clib_error_t *error; + dpdk_device_and_queue_t *dq; int num_qpairs = 1; dpdk_vu_intf_t *vui = NULL; num_qpairs = dm->use_rss < 1 ? 1 : tm->n_vlib_mains; - dpdk_device_t * xd = NULL; + dpdk_device_t *xd = NULL; u8 addr[6]; int j; vlib_worker_thread_barrier_sync (vm); - int inactive_cnt = vec_len(dm->vu_inactive_interfaces_device_index); + int inactive_cnt = vec_len (dm->vu_inactive_interfaces_device_index); // if there are any inactive ifaces - if (inactive_cnt > 0) { - // take last - u32 vui_idx = dm->vu_inactive_interfaces_device_index[inactive_cnt - 1]; - if (vec_len(dm->devices) > vui_idx) { - xd = vec_elt_at_index (dm->devices, vui_idx); - if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) { - DBG_SOCK("reusing inactive vhost-user interface sw_if_index %d", xd->vlib_sw_if_index); - } else { - clib_warning("error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!", - xd->vlib_sw_if_index); - // reset so new interface is created - xd = NULL; - } + if (inactive_cnt > 0) + { + // take last + u32 vui_idx = dm->vu_inactive_interfaces_device_index[inactive_cnt - 1]; + if (vec_len (dm->devices) > vui_idx) + { + xd = vec_elt_at_index (dm->devices, vui_idx); + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) + { + DBG_SOCK + ("reusing inactive vhost-user interface sw_if_index %d", + xd->vlib_sw_if_index); + } + else + { + clib_warning + ("error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!", + xd->vlib_sw_if_index); + // reset so new interface is created + xd = NULL; + } + } + // "remove" from inactive list + _vec_len (dm->vu_inactive_interfaces_device_index) -= 1; } - // "remove" from inactive list - _vec_len(dm->vu_inactive_interfaces_device_index) -= 1; - } - if (xd) { + if (xd) + { // existing interface used - do not overwrite if_id if not needed - if (if_id != (u32)~0) - xd->vu_if_id = if_id; + if (if_id != (u32) ~ 0) + xd->vu_if_id = if_id; // reset virtqueues vui = xd->vu_intf; - for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++) { - memset(xd->vu_vhost_dev.virtqueue[j], 0, sizeof(struct vhost_virtqueue)); - xd->vu_vhost_dev.virtqueue[j]->kickfd = -1; - xd->vu_vhost_dev.virtqueue[j]->callfd = -1; - xd->vu_vhost_dev.virtqueue[j]->backend = -1; - vui->vrings[j].packets = 0; - vui->vrings[j].bytes = 0; - } + for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++) + { + memset (xd->vu_vhost_dev.virtqueue[j], 0, + sizeof (struct vhost_virtqueue)); + xd->vu_vhost_dev.virtqueue[j]->kickfd = -1; + xd->vu_vhost_dev.virtqueue[j]->callfd = -1; + xd->vu_vhost_dev.virtqueue[j]->backend = -1; + vui->vrings[j].packets = 0; + vui->vrings[j].bytes = 0; + } // reset lockp - dpdk_device_lock_free(xd); - dpdk_device_lock_init(xd); + dpdk_device_lock_free (xd); + dpdk_device_lock_init (xd); // reset tx vectors for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, - sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } // reset rx vector for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - } else { + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + } + else + { // vui was not retrieved from inactive ifaces - create new vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); xd->dev_type = VNET_DPDK_DEV_VHOST_USER; @@ -266,86 +288,93 @@ dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 *hwaddr) xd->tx_q_used = num_qpairs; xd->vu_vhost_dev.virt_qp_nb = num_qpairs; - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); - if (if_id == (u32)~0) - xd->vu_if_id = dm->next_vu_if_id++; + if (if_id == (u32) ~ 0) + xd->vu_if_id = dm->next_vu_if_id++; else - xd->vu_if_id = if_id; + xd->vu_if_id = if_id; xd->device_index = xd - dm->devices; xd->per_interface_next_index = ~0; - xd->vu_intf = clib_mem_alloc (sizeof(*(xd->vu_intf))); + xd->vu_intf = clib_mem_alloc (sizeof (*(xd->vu_intf))); - xd->vu_vhost_dev.mem = clib_mem_alloc (sizeof(struct virtio_memory) + - VHOST_MEMORY_MAX_NREGIONS * - sizeof(struct virtio_memory_regions)); + xd->vu_vhost_dev.mem = clib_mem_alloc (sizeof (struct virtio_memory) + + VHOST_MEMORY_MAX_NREGIONS * + sizeof (struct + virtio_memory_regions)); /* Will be set when guest sends VHOST_USER_SET_MEM_TABLE cmd */ xd->vu_vhost_dev.mem->nregions = 0; - /* + /* * New virtqueue structure is an array of VHOST_MAX_QUEUE_PAIRS * 2 * We need to allocate numq pairs. */ vui = xd->vu_intf; - for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++) { - xd->vu_vhost_dev.virtqueue[j] = clib_mem_alloc (sizeof(struct vhost_virtqueue)); - memset(xd->vu_vhost_dev.virtqueue[j], 0, sizeof(struct vhost_virtqueue)); - xd->vu_vhost_dev.virtqueue[j]->kickfd = -1; - xd->vu_vhost_dev.virtqueue[j]->callfd = -1; - xd->vu_vhost_dev.virtqueue[j]->backend = -1; - vui->vrings[j].packets = 0; - vui->vrings[j].bytes = 0; - } - - dpdk_device_lock_init(xd); - - DBG_SOCK("tm->n_vlib_mains: %d. TX %d, RX: %d, num_qpairs: %d, Lock: %p", - tm->n_vlib_mains, xd->tx_q_used, xd->rx_q_used, num_qpairs, xd->lockp); + for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++) + { + xd->vu_vhost_dev.virtqueue[j] = + clib_mem_alloc (sizeof (struct vhost_virtqueue)); + memset (xd->vu_vhost_dev.virtqueue[j], 0, + sizeof (struct vhost_virtqueue)); + xd->vu_vhost_dev.virtqueue[j]->kickfd = -1; + xd->vu_vhost_dev.virtqueue[j]->callfd = -1; + xd->vu_vhost_dev.virtqueue[j]->backend = -1; + vui->vrings[j].packets = 0; + vui->vrings[j].bytes = 0; + } + + dpdk_device_lock_init (xd); + + DBG_SOCK + ("tm->n_vlib_mains: %d. TX %d, RX: %d, num_qpairs: %d, Lock: %p", + tm->n_vlib_mains, xd->tx_q_used, xd->rx_q_used, num_qpairs, + xd->lockp); vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); + CLIB_CACHE_LINE_BYTES); for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, - sizeof(tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } + { + vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } // reset rx vector for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE-1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } - } + } /* * Generate random MAC address for the interface */ - if (hwaddr) { - clib_memcpy(addr, hwaddr, sizeof(addr)); - } else { - f64 now = vlib_time_now(vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - clib_memcpy (addr+2, &rnd, sizeof(rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } + if (hwaddr) + { + clib_memcpy (addr, hwaddr, sizeof (addr)); + } + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } error = ethernet_register_interface - (dm->vnet_main, - dpdk_device_class.index, - xd->device_index, + (dm->vnet_main, dpdk_device_class.index, xd->device_index, /* ethernet address */ addr, - &xd->vlib_hw_if_index, - 0); + &xd->vlib_hw_if_index, 0); if (error) return error; @@ -355,61 +384,64 @@ dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 *hwaddr) *hw_if_index = xd->vlib_hw_if_index; - DBG_SOCK("xd->device_index: %d, dm->input_cpu_count: %d, " - "dm->input_cpu_first_index: %d\n", xd->device_index, - dm->input_cpu_count, dm->input_cpu_first_index); + DBG_SOCK ("xd->device_index: %d, dm->input_cpu_count: %d, " + "dm->input_cpu_first_index: %d\n", xd->device_index, + dm->input_cpu_count, dm->input_cpu_first_index); int q, next_cpu = 0; - for (q = 0; q < num_qpairs; q++) { - int cpu = dm->input_cpu_first_index + - (next_cpu % dm->input_cpu_count); + for (q = 0; q < num_qpairs; q++) + { + int cpu = dm->input_cpu_first_index + (next_cpu % dm->input_cpu_count); unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); dq->device = xd->device_index; dq->queue_id = q; - DBG_SOCK("CPU for %d = %d. QID: %d", *hw_if_index, cpu, dq->queue_id); + DBG_SOCK ("CPU for %d = %d. QID: %d", *hw_if_index, cpu, dq->queue_id); // start polling if it was not started yet (because of no phys ifaces) - if (tm->n_vlib_mains == 1 && dpdk_input_node.state != VLIB_NODE_STATE_POLLING) - vlib_node_set_state (vm, dpdk_input_node.index, VLIB_NODE_STATE_POLLING); + if (tm->n_vlib_mains == 1 + && dpdk_input_node.state != VLIB_NODE_STATE_POLLING) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); if (tm->n_vlib_mains > 1) - vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); next_cpu++; - } + } vlib_worker_thread_barrier_release (vm); return 0; } #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) -static long get_huge_page_size(int fd) +static long +get_huge_page_size (int fd) { struct statfs s; - fstatfs(fd, &s); + fstatfs (fd, &s); return s.f_bsize; } #endif static clib_error_t * -dpdk_vhost_user_set_protocol_features(u32 hw_if_index, u64 prot_features) +dpdk_vhost_user_set_protocol_features (u32 hw_if_index, u64 prot_features) { - dpdk_device_t * xd; - xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index); - assert(xd); + dpdk_device_t *xd; + xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index); + assert (xd); xd->vu_vhost_dev.protocol_features = prot_features; return 0; } static clib_error_t * -dpdk_vhost_user_get_features(u32 hw_if_index, u64 * features) +dpdk_vhost_user_get_features (u32 hw_if_index, u64 * features) { - *features = rte_vhost_feature_get(); + *features = rte_vhost_feature_get (); #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) #define OFFLOAD_FEATURES ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ @@ -426,33 +458,34 @@ dpdk_vhost_user_get_features(u32 hw_if_index, u64 * features) *features &= (~OFFLOAD_FEATURES); #endif - DBG_SOCK("supported features: 0x%lx", *features); + DBG_SOCK ("supported features: 0x%lx", *features); return 0; } static clib_error_t * -dpdk_vhost_user_set_features(u32 hw_if_index, u64 features) +dpdk_vhost_user_set_features (u32 hw_if_index, u64 features) { - dpdk_device_t * xd; - u16 hdr_len = sizeof(struct virtio_net_hdr); + dpdk_device_t *xd; + u16 hdr_len = sizeof (struct virtio_net_hdr); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } xd->vu_vhost_dev.features = features; if (xd->vu_vhost_dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + hdr_len = sizeof (struct virtio_net_hdr_mrg_rxbuf); int numqs = VIRTIO_QNUM; u8 idx; - int prot_feature = features & - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES); + int prot_feature = features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES); numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - for (idx = 0; idx < numqs; idx++) { + for (idx = 0; idx < numqs; idx++) + { xd->vu_vhost_dev.virtqueue[idx]->vhost_hlen = hdr_len; /* * Spec says, if F_PROTOCOL_FEATURE is not set by the @@ -460,122 +493,134 @@ dpdk_vhost_user_set_features(u32 hw_if_index, u64 features) * enabled. If slave negotiates F_PROTOCOL_FEATURE, then * slave is responsible to enable it. */ - if (! prot_feature) - dpdk_vhost_user_set_vring_enable(hw_if_index, idx, 1); - } + if (!prot_feature) + dpdk_vhost_user_set_vring_enable (hw_if_index, idx, 1); + } return 0; } static clib_error_t * -dpdk_vhost_user_set_mem_table(u32 hw_if_index, vhost_user_memory_t * vum, int fd[]) +dpdk_vhost_user_set_mem_table (u32 hw_if_index, vhost_user_memory_t * vum, + int fd[]) { - struct virtio_memory * mem; + struct virtio_memory *mem; int i; - dpdk_device_t * xd; - dpdk_vu_intf_t * vui; + dpdk_device_t *xd; + dpdk_vu_intf_t *vui; - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vui = xd->vu_intf; mem = xd->vu_vhost_dev.mem; mem->nregions = vum->nregions; - for (i=0; i < mem->nregions; i++) { - u64 mapped_size, mapped_address; - - mem->regions[i].guest_phys_address = vum->regions[i].guest_phys_addr; - mem->regions[i].guest_phys_address_end = vum->regions[i].guest_phys_addr + - vum->regions[i].memory_size; - mem->regions[i].memory_size = vum->regions[i].memory_size; - mem->regions[i].userspace_address = vum->regions[i].userspace_addr; - - mapped_size = mem->regions[i].memory_size + vum->regions[i].mmap_offset; - mapped_address = pointer_to_uword(mmap(NULL, mapped_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd[i], 0)); - - if (uword_to_pointer(mapped_address, void*) == MAP_FAILED) + for (i = 0; i < mem->nregions; i++) { - clib_warning("mmap error"); - return 0; + u64 mapped_size, mapped_address; + + mem->regions[i].guest_phys_address = vum->regions[i].guest_phys_addr; + mem->regions[i].guest_phys_address_end = + vum->regions[i].guest_phys_addr + vum->regions[i].memory_size; + mem->regions[i].memory_size = vum->regions[i].memory_size; + mem->regions[i].userspace_address = vum->regions[i].userspace_addr; + + mapped_size = mem->regions[i].memory_size + vum->regions[i].mmap_offset; + mapped_address = + pointer_to_uword (mmap + (NULL, mapped_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd[i], 0)); + + if (uword_to_pointer (mapped_address, void *) == MAP_FAILED) + { + clib_warning ("mmap error"); + return 0; + } + + mapped_address += vum->regions[i].mmap_offset; + vui->region_addr[i] = mapped_address; + vui->region_fd[i] = fd[i]; + vui->region_offset[i] = vum->regions[i].mmap_offset; + mem->regions[i].address_offset = + mapped_address - mem->regions[i].guest_phys_address; + + DBG_SOCK ("map memory region %d addr 0x%lx off 0x%lx len 0x%lx", + i, vui->region_addr[i], vui->region_offset[i], mapped_size); + + if (vum->regions[i].guest_phys_addr == 0) + { + mem->base_address = vum->regions[i].userspace_addr; + mem->mapped_address = mem->regions[i].address_offset; + } } - mapped_address += vum->regions[i].mmap_offset; - vui->region_addr[i] = mapped_address; - vui->region_fd[i] = fd[i]; - vui->region_offset[i] = vum->regions[i].mmap_offset; - mem->regions[i].address_offset = mapped_address - mem->regions[i].guest_phys_address; - - DBG_SOCK("map memory region %d addr 0x%lx off 0x%lx len 0x%lx", - i, vui->region_addr[i], vui->region_offset[i], mapped_size); - - if (vum->regions[i].guest_phys_addr == 0) { - mem->base_address = vum->regions[i].userspace_addr; - mem->mapped_address = mem->regions[i].address_offset; - } - } - - disable_interface(xd); + disable_interface (xd); return 0; } static clib_error_t * -dpdk_vhost_user_set_vring_num(u32 hw_if_index, u8 idx, u32 num) +dpdk_vhost_user_set_vring_num (u32 hw_if_index, u8 idx, u32 num) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; - DBG_SOCK("idx %u num %u", idx, num); + DBG_SOCK ("idx %u num %u", idx, num); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vq = xd->vu_vhost_dev.virtqueue[idx]; vq->size = num; - stop_processing_packets(hw_if_index, idx); + stop_processing_packets (hw_if_index, idx); return 0; } static clib_error_t * -dpdk_vhost_user_set_vring_addr(u32 hw_if_index, u8 idx, uword desc, \ - uword used, uword avail, uword log) +dpdk_vhost_user_set_vring_addr (u32 hw_if_index, u8 idx, uword desc, + uword used, uword avail, uword log) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; - DBG_SOCK("idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx", - idx, desc, used, avail, log); + DBG_SOCK ("idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx", + idx, desc, used, avail, log); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vq = xd->vu_vhost_dev.virtqueue[idx]; - vq->desc = (struct vring_desc *) qva_to_vva(&xd->vu_vhost_dev, desc); - vq->used = (struct vring_used *) qva_to_vva(&xd->vu_vhost_dev, used); - vq->avail = (struct vring_avail *) qva_to_vva(&xd->vu_vhost_dev, avail); + vq->desc = (struct vring_desc *) qva_to_vva (&xd->vu_vhost_dev, desc); + vq->used = (struct vring_used *) qva_to_vva (&xd->vu_vhost_dev, used); + vq->avail = (struct vring_avail *) qva_to_vva (&xd->vu_vhost_dev, avail); #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) vq->log_guest_addr = log; #endif - if (!(vq->desc && vq->used && vq->avail)) { - clib_warning("falied to set vring addr"); - } + if (!(vq->desc && vq->used && vq->avail)) + { + clib_warning ("falied to set vring addr"); + } - if (vq->last_used_idx != vq->used->idx) { - clib_warning("last_used_idx (%u) and vq->used->idx (%u) mismatches; " - "some packets maybe resent for Tx and dropped for Rx", - vq->last_used_idx, vq->used->idx); - vq->last_used_idx = vq->used->idx; + if (vq->last_used_idx != vq->used->idx) + { + clib_warning ("last_used_idx (%u) and vq->used->idx (%u) mismatches; " + "some packets maybe resent for Tx and dropped for Rx", + vq->last_used_idx, vq->used->idx); + vq->last_used_idx = vq->used->idx; vq->last_used_idx_res = vq->used->idx; - } + } /* * Inform the guest that there is no need to inform (kick) the @@ -585,22 +630,23 @@ dpdk_vhost_user_set_vring_addr(u32 hw_if_index, u8 idx, uword desc, \ * The below function sets a flag in used table. Therefore, * should be initialized after initializing vq->used. */ - rte_vhost_enable_guest_notification(&xd->vu_vhost_dev, idx, 0); - stop_processing_packets(hw_if_index, idx); + rte_vhost_enable_guest_notification (&xd->vu_vhost_dev, idx, 0); + stop_processing_packets (hw_if_index, idx); return 0; } static clib_error_t * -dpdk_vhost_user_get_vring_base(u32 hw_if_index, u8 idx, u32 * num) +dpdk_vhost_user_get_vring_base (u32 hw_if_index, u8 idx, u32 * num) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vq = xd->vu_vhost_dev.virtqueue[idx]; *num = vq->last_used_idx; @@ -612,10 +658,10 @@ dpdk_vhost_user_get_vring_base(u32 hw_if_index, u8 idx, u32 * num) * on the descriptor specified by VHOST_USER_SET_VRING_KICK, * and stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ - DBG_SOCK("Stopping vring Q %u of device %d", idx, hw_if_index); + DBG_SOCK ("Stopping vring Q %u of device %d", idx, hw_if_index); dpdk_vu_intf_t *vui = xd->vu_intf; - vui->vrings[idx].enabled = 0; /* Reset local copy */ - vui->vrings[idx].callfd = -1; /* Reset FD */ + vui->vrings[idx].enabled = 0; /* Reset local copy */ + vui->vrings[idx].callfd = -1; /* Reset FD */ vq->enabled = 0; vq->desc = NULL; vq->used = NULL; @@ -626,55 +672,59 @@ dpdk_vhost_user_get_vring_base(u32 hw_if_index, u8 idx, u32 * num) /* Check if all Qs are disabled */ int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - for (idx = 0; idx < numqs; idx++) { - if (xd->vu_vhost_dev.virtqueue[idx]->enabled) - break; - } + for (idx = 0; idx < numqs; idx++) + { + if (xd->vu_vhost_dev.virtqueue[idx]->enabled) + break; + } /* If all vrings are disabed then disable device */ - if (idx == numqs) { - DBG_SOCK("Device %d disabled", hw_if_index); + if (idx == numqs) + { + DBG_SOCK ("Device %d disabled", hw_if_index); xd->vu_is_running = 0; - } + } return 0; } static clib_error_t * -dpdk_vhost_user_set_vring_base(u32 hw_if_index, u8 idx, u32 num) +dpdk_vhost_user_set_vring_base (u32 hw_if_index, u8 idx, u32 num) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; - DBG_SOCK("idx %u num %u", idx, num); + DBG_SOCK ("idx %u num %u", idx, num); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vq = xd->vu_vhost_dev.virtqueue[idx]; vq->last_used_idx = num; vq->last_used_idx_res = num; - stop_processing_packets(hw_if_index, idx); + stop_processing_packets (hw_if_index, idx); return 0; } static clib_error_t * -dpdk_vhost_user_set_vring_kick(u32 hw_if_index, u8 idx, int fd) +dpdk_vhost_user_set_vring_kick (u32 hw_if_index, u8 idx, int fd) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; dpdk_vu_vring *vring; struct vhost_virtqueue *vq0, *vq1, *vq; int index, vu_is_running = 0; - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vq = xd->vu_vhost_dev.virtqueue[idx]; vq->kickfd = fd; @@ -689,39 +739,43 @@ dpdk_vhost_user_set_vring_kick(u32 hw_if_index, u8 idx, int fd) int numqs = VIRTIO_QNUM; numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - for (index = 0; index < numqs; index += 2) { - vq0 = xd->vu_vhost_dev.virtqueue[index]; /* RX */ - vq1 = xd->vu_vhost_dev.virtqueue[index + 1]; /* TX */ - if (vq0->enabled && vq1->enabled) + for (index = 0; index < numqs; index += 2) { - vu_is_running = 1; - break; + vq0 = xd->vu_vhost_dev.virtqueue[index]; /* RX */ + vq1 = xd->vu_vhost_dev.virtqueue[index + 1]; /* TX */ + if (vq0->enabled && vq1->enabled) + { + vu_is_running = 1; + break; + } } - } - DBG_SOCK("SET_VRING_KICK - idx %d, running %d, fd: %d", - idx, vu_is_running, fd); + DBG_SOCK ("SET_VRING_KICK - idx %d, running %d, fd: %d", + idx, vu_is_running, fd); xd->vu_is_running = vu_is_running; - if (xd->vu_is_running && xd->admin_up) { - vnet_hw_interface_set_flags (dm->vnet_main, - xd->vlib_hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP | - ETH_LINK_FULL_DUPLEX ); - } + if (xd->vu_is_running && xd->admin_up) + { + vnet_hw_interface_set_flags (dm->vnet_main, + xd->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP | + ETH_LINK_FULL_DUPLEX); + } return 0; } static int -dpdk_vhost_user_set_vring_enable(u32 hw_if_index, u8 idx, int enable) +dpdk_vhost_user_set_vring_enable (u32 hw_if_index, u8 idx, int enable) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; dpdk_vu_intf_t *vui; - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } vui = xd->vu_intf; /* @@ -731,15 +785,16 @@ dpdk_vhost_user_set_vring_enable(u32 hw_if_index, u8 idx, int enable) * are set. If not, vq will be enabled when vring * is kicked. */ - vui->vrings[idx].enabled = enable; /* Save local copy */ + vui->vrings[idx].enabled = enable; /* Save local copy */ int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - while (numqs--) { - if (! vui->vrings[numqs].enabled) - break; - } + while (numqs--) + { + if (!vui->vrings[numqs].enabled) + break; + } - if (numqs == -1) /* All Qs are enabled */ + if (numqs == -1) /* All Qs are enabled */ xd->need_txlock = 0; else xd->need_txlock = 1; @@ -751,199 +806,215 @@ dpdk_vhost_user_set_vring_enable(u32 hw_if_index, u8 idx, int enable) return 0; } -static clib_error_t * dpdk_vhost_user_callfd_read_ready (unix_file_t * uf) +static clib_error_t * +dpdk_vhost_user_callfd_read_ready (unix_file_t * uf) { - __attribute__((unused)) int n; + __attribute__ ((unused)) int n; u8 buff[8]; - n = read(uf->file_descriptor, ((char*)&buff), 8); + n = read (uf->file_descriptor, ((char *) &buff), 8); return 0; } static clib_error_t * -dpdk_vhost_user_set_vring_call(u32 hw_if_index, u8 idx, int fd) +dpdk_vhost_user_set_vring_call (u32 hw_if_index, u8 idx, int fd) { - dpdk_device_t * xd; + dpdk_device_t *xd; struct vhost_virtqueue *vq; - unix_file_t template = {0}; + unix_file_t template = { 0 }; - DBG_SOCK("SET_VRING_CALL - idx %d, fd %d", idx, fd); + DBG_SOCK ("SET_VRING_CALL - idx %d, fd %d", idx, fd); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_index))) { - clib_warning("not a vhost-user interface"); - return 0; - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index))) + { + clib_warning ("not a vhost-user interface"); + return 0; + } dpdk_vu_intf_t *vui = xd->vu_intf; /* if there is old fd, delete it */ - if (vui->vrings[idx].callfd > 0) { - unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, - vui->vrings[idx].callfd_idx); - unix_file_del (&unix_main, uf); - } + if (vui->vrings[idx].callfd > 0) + { + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vui->vrings[idx].callfd_idx); + unix_file_del (&unix_main, uf); + } vui->vrings[idx].callfd = fd; template.read_function = dpdk_vhost_user_callfd_read_ready; template.file_descriptor = fd; vui->vrings[idx].callfd_idx = unix_file_add (&unix_main, &template); vq = xd->vu_vhost_dev.virtqueue[idx]; - vq->callfd = -1; /* We use locally saved vring->callfd; */ + vq->callfd = -1; /* We use locally saved vring->callfd; */ return 0; } u8 -dpdk_vhost_user_want_interrupt(dpdk_device_t *xd, int idx) +dpdk_vhost_user_want_interrupt (dpdk_device_t * xd, int idx) { - dpdk_vu_intf_t *vui = xd->vu_intf; - ASSERT(vui != NULL); + dpdk_vu_intf_t *vui = xd->vu_intf; + ASSERT (vui != NULL); - if (PREDICT_FALSE(vui->num_vrings <= 0)) - return 0; + if (PREDICT_FALSE (vui->num_vrings <= 0)) + return 0; - dpdk_vu_vring *vring = &(vui->vrings[idx]); - struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; + dpdk_vu_vring *vring = &(vui->vrings[idx]); + struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; - /* return if vm is interested in interrupts */ - return (vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT); + /* return if vm is interested in interrupts */ + return (vring->callfd > 0) + && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT); } void -dpdk_vhost_user_send_interrupt(vlib_main_t * vm, dpdk_device_t * xd, int idx) +dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd, int idx) { - dpdk_main_t * dm = &dpdk_main; - dpdk_vu_intf_t *vui = xd->vu_intf; - ASSERT(vui != NULL); + dpdk_main_t *dm = &dpdk_main; + dpdk_vu_intf_t *vui = xd->vu_intf; + ASSERT (vui != NULL); - if (PREDICT_FALSE(vui->num_vrings <= 0)) - return; + if (PREDICT_FALSE (vui->num_vrings <= 0)) + return; - dpdk_vu_vring *vring = &(vui->vrings[idx]); - struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; + dpdk_vu_vring *vring = &(vui->vrings[idx]); + struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx]; - /* if vm is interested in interrupts */ - if((vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { - eventfd_write(vring->callfd, (eventfd_t)1); - vring->n_since_last_int = 0; - vring->int_deadline = vlib_time_now(vm) + dm->conf->vhost_coalesce_time; + /* if vm is interested in interrupts */ + if ((vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + { + eventfd_write (vring->callfd, (eventfd_t) 1); + vring->n_since_last_int = 0; + vring->int_deadline = + vlib_time_now (vm) + dm->conf->vhost_coalesce_time; } } /* - * vhost-user interface management functions + * vhost-user interface management functions */ // initialize vui with specified attributes -static void -dpdk_vhost_user_vui_init(vnet_main_t * vnm, - dpdk_device_t *xd, int sockfd, - const char * sock_filename, - u8 is_server, u64 feature_mask, - u32 * sw_if_index) +static void +dpdk_vhost_user_vui_init (vnet_main_t * vnm, + dpdk_device_t * xd, int sockfd, + const char *sock_filename, + u8 is_server, u64 feature_mask, u32 * sw_if_index) { dpdk_vu_intf_t *vui = xd->vu_intf; - memset(vui, 0, sizeof(*vui)); + memset (vui, 0, sizeof (*vui)); vui->unix_fd = sockfd; vui->num_vrings = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM; - DBG_SOCK("dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings); + DBG_SOCK ("dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings); vui->sock_is_server = is_server; - strncpy(vui->sock_filename, sock_filename, ARRAY_LEN(vui->sock_filename)-1); + strncpy (vui->sock_filename, sock_filename, + ARRAY_LEN (vui->sock_filename) - 1); vui->sock_errno = 0; vui->is_up = 0; vui->feature_mask = feature_mask; vui->active = 1; vui->unix_file_index = ~0; - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); if (sw_if_index) - *sw_if_index = xd->vlib_sw_if_index; + *sw_if_index = xd->vlib_sw_if_index; } // register vui and start polling on it -static void -dpdk_vhost_user_vui_register(vlib_main_t * vm, dpdk_device_t *xd) +static void +dpdk_vhost_user_vui_register (vlib_main_t * vm, dpdk_device_t * xd) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; dpdk_vu_intf_t *vui = xd->vu_intf; hash_set (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd, - xd->vlib_sw_if_index); + xd->vlib_sw_if_index); } -static void dpdk_unmap_all_mem_regions(dpdk_device_t * xd) +static void +dpdk_unmap_all_mem_regions (dpdk_device_t * xd) { int i, r; dpdk_vu_intf_t *vui = xd->vu_intf; - struct virtio_memory * mem = xd->vu_vhost_dev.mem; + struct virtio_memory *mem = xd->vu_vhost_dev.mem; - for (i=0; i<mem->nregions; i++) { - if (vui->region_addr[i] != -1) { + for (i = 0; i < mem->nregions; i++) + { + if (vui->region_addr[i] != -1) + { - long page_sz = get_huge_page_size(vui->region_fd[i]); + long page_sz = get_huge_page_size (vui->region_fd[i]); - ssize_t map_sz = RTE_ALIGN_CEIL(mem->regions[i].memory_size + - vui->region_offset[i], page_sz); + ssize_t map_sz = RTE_ALIGN_CEIL (mem->regions[i].memory_size + + vui->region_offset[i], page_sz); - r = munmap((void *)(vui->region_addr[i] - vui->region_offset[i]), map_sz); + r = + munmap ((void *) (vui->region_addr[i] - vui->region_offset[i]), + map_sz); - DBG_SOCK("unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x", - i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz); + DBG_SOCK + ("unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x", + i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz); - vui->region_addr[i]= -1; + vui->region_addr[i] = -1; - if (r == -1) { - clib_unix_warning("failed to unmap memory region"); - } - close(vui->region_fd[i]); + if (r == -1) + { + clib_unix_warning ("failed to unmap memory region"); + } + close (vui->region_fd[i]); + } } - } mem->nregions = 0; } static inline void -dpdk_vhost_user_if_disconnect(dpdk_device_t * xd) +dpdk_vhost_user_if_disconnect (dpdk_device_t * xd) { - dpdk_vu_intf_t *vui = xd->vu_intf; - vnet_main_t * vnm = vnet_get_main(); - dpdk_main_t * dm = &dpdk_main; - struct vhost_virtqueue *vq; - int q; - - xd->admin_up = 0; - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); - - if (vui->unix_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); - vui->unix_file_index = ~0; + dpdk_vu_intf_t *vui = xd->vu_intf; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + struct vhost_virtqueue *vq; + int q; + + xd->admin_up = 0; + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + + if (vui->unix_file_index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); + vui->unix_file_index = ~0; } - hash_unset(dm->vu_sw_if_index_by_sock_fd, vui->unix_fd); - hash_unset(dm->vu_sw_if_index_by_listener_fd, vui->unix_fd); - close(vui->unix_fd); - vui->unix_fd = -1; - vui->is_up = 0; - - for (q = 0; q < vui->num_vrings; q++) { - vq = xd->vu_vhost_dev.virtqueue[q]; - vui->vrings[q].enabled = 0; /* Reset local copy */ - vui->vrings[q].callfd = -1; /* Reset FD */ - vq->enabled = 0; + hash_unset (dm->vu_sw_if_index_by_sock_fd, vui->unix_fd); + hash_unset (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd); + close (vui->unix_fd); + vui->unix_fd = -1; + vui->is_up = 0; + + for (q = 0; q < vui->num_vrings; q++) + { + vq = xd->vu_vhost_dev.virtqueue[q]; + vui->vrings[q].enabled = 0; /* Reset local copy */ + vui->vrings[q].callfd = -1; /* Reset FD */ + vq->enabled = 0; #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) - vq->log_guest_addr = 0; + vq->log_guest_addr = 0; #endif - vq->desc = NULL; - vq->used = NULL; - vq->avail = NULL; + vq->desc = NULL; + vq->used = NULL; + vq->avail = NULL; } - xd->vu_is_running = 0; + xd->vu_is_running = 0; - dpdk_unmap_all_mem_regions(xd); - DBG_SOCK("interface ifindex %d disconnected", xd->vlib_sw_if_index); + dpdk_unmap_all_mem_regions (xd); + DBG_SOCK ("interface ifindex %d disconnected", xd->vlib_sw_if_index); } -static clib_error_t * dpdk_vhost_user_socket_read (unix_file_t * uf) +static clib_error_t * +dpdk_vhost_user_socket_read (unix_file_t * uf) { int n; int fd, number_of_fds = 0; @@ -951,30 +1022,30 @@ static clib_error_t * dpdk_vhost_user_socket_read (unix_file_t * uf) vhost_user_msg_t msg; struct msghdr mh; struct iovec iov[1]; - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; dpdk_vu_intf_t *vui; struct cmsghdr *cmsg; - uword * p; + uword *p; u8 q; - vnet_main_t * vnm = vnet_get_main(); + vnet_main_t *vnm = vnet_get_main (); p = hash_get (dm->vu_sw_if_index_by_sock_fd, uf->file_descriptor); - if (p == 0) { - DBG_SOCK ("FD %d doesn't belong to any interface", - uf->file_descriptor); + if (p == 0) + { + DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor); return 0; } else - xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); + xd = dpdk_vhost_user_device_from_sw_if_index (p[0]); - ASSERT(xd != NULL); + ASSERT (xd != NULL); vui = xd->vu_intf; - char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))]; + char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))]; - memset(&mh, 0, sizeof(mh)); - memset(control, 0, sizeof(control)); + memset (&mh, 0, sizeof (mh)); + memset (control, 0, sizeof (control)); /* set the payload */ iov[0].iov_base = (void *) &msg; @@ -983,127 +1054,134 @@ static clib_error_t * dpdk_vhost_user_socket_read (unix_file_t * uf) mh.msg_iov = iov; mh.msg_iovlen = 1; mh.msg_control = control; - mh.msg_controllen = sizeof(control); + mh.msg_controllen = sizeof (control); - n = recvmsg(uf->file_descriptor, &mh, 0); + n = recvmsg (uf->file_descriptor, &mh, 0); if (n != VHOST_USER_MSG_HDR_SZ) goto close_socket; - if (mh.msg_flags & MSG_CTRUNC) { - goto close_socket; - } + if (mh.msg_flags & MSG_CTRUNC) + { + goto close_socket; + } - cmsg = CMSG_FIRSTHDR(&mh); + cmsg = CMSG_FIRSTHDR (&mh); if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SCM_RIGHTS) && - (cmsg->cmsg_len - CMSG_LEN(0) <= VHOST_MEMORY_MAX_NREGIONS * sizeof(int))) { - number_of_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); - clib_memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int)); - } + (cmsg->cmsg_len - CMSG_LEN (0) <= + VHOST_MEMORY_MAX_NREGIONS * sizeof (int))) + { + number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int); + clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int)); + } - /* version 1, no reply bit set*/ - if ((msg.flags & 7) != 1) { - DBG_SOCK("malformed message received. closing socket"); - goto close_socket; - } + /* version 1, no reply bit set */ + if ((msg.flags & 7) != 1) + { + DBG_SOCK ("malformed message received. closing socket"); + goto close_socket; + } { - int rv __attribute__((unused)); - /* $$$$ pay attention to rv */ - rv = read(uf->file_descriptor, ((char*)&msg) + n, msg.size); + int rv __attribute__ ((unused)); + /* $$$$ pay attention to rv */ + rv = read (uf->file_descriptor, ((char *) &msg) + n, msg.size); } - DBG_SOCK("VPP VHOST message %s", vhost_message_str[msg.request]); - switch (msg.request) { + DBG_SOCK ("VPP VHOST message %s", vhost_message_str[msg.request]); + switch (msg.request) + { case VHOST_USER_GET_FEATURES: - DBG_SOCK("if %d msg VHOST_USER_GET_FEATURES", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES", xd->vlib_hw_if_index); msg.flags |= VHOST_USER_REPLY_MASK; - dpdk_vhost_user_get_features(xd->vlib_hw_if_index, &msg.u64); + dpdk_vhost_user_get_features (xd->vlib_hw_if_index, &msg.u64); msg.u64 &= vui->feature_mask; - msg.size = sizeof(msg.u64); + msg.size = sizeof (msg.u64); break; case VHOST_USER_SET_FEATURES: - DBG_SOCK("if %d msg VHOST_USER_SET_FEATURES features 0x%016lx", - xd->vlib_hw_if_index, msg.u64); + DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016lx", + xd->vlib_hw_if_index, msg.u64); - dpdk_vhost_user_set_features(xd->vlib_hw_if_index, msg.u64); + dpdk_vhost_user_set_features (xd->vlib_hw_if_index, msg.u64); break; case VHOST_USER_SET_MEM_TABLE: - DBG_SOCK("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", - xd->vlib_hw_if_index, msg.memory.nregions); + DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", + xd->vlib_hw_if_index, msg.memory.nregions); if ((msg.memory.nregions < 1) || - (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) { + (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) + { - DBG_SOCK("number of mem regions must be between 1 and %i", - VHOST_MEMORY_MAX_NREGIONS); + DBG_SOCK ("number of mem regions must be between 1 and %i", + VHOST_MEMORY_MAX_NREGIONS); - goto close_socket; - } + goto close_socket; + } - if (msg.memory.nregions != number_of_fds) { - DBG_SOCK("each memory region must have FD"); - goto close_socket; - } + if (msg.memory.nregions != number_of_fds) + { + DBG_SOCK ("each memory region must have FD"); + goto close_socket; + } - dpdk_vhost_user_set_mem_table(xd->vlib_hw_if_index, &msg.memory, fds); + dpdk_vhost_user_set_mem_table (xd->vlib_hw_if_index, &msg.memory, fds); break; case VHOST_USER_SET_VRING_NUM: - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", - xd->vlib_hw_if_index, msg.state.index, msg.state.num); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); - if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ - (msg.state.num == 0) || /* it cannot be zero */ - (msg.state.num % 2)) /* must be power of 2 */ - goto close_socket; + if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ + (msg.state.num == 0) || /* it cannot be zero */ + (msg.state.num % 2)) /* must be power of 2 */ + goto close_socket; - dpdk_vhost_user_set_vring_num(xd->vlib_hw_if_index, msg.state.index, msg.state.num); + dpdk_vhost_user_set_vring_num (xd->vlib_hw_if_index, msg.state.index, + msg.state.num); break; case VHOST_USER_SET_VRING_ADDR: - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ADDR idx %d", - xd->vlib_hw_if_index, msg.state.index); - - dpdk_vhost_user_set_vring_addr(xd->vlib_hw_if_index, msg.state.index, - msg.addr.desc_user_addr, - msg.addr.used_user_addr, - msg.addr.avail_user_addr, - msg.addr.log_guest_addr); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d", + xd->vlib_hw_if_index, msg.state.index); + + dpdk_vhost_user_set_vring_addr (xd->vlib_hw_if_index, msg.state.index, + msg.addr.desc_user_addr, + msg.addr.used_user_addr, + msg.addr.avail_user_addr, + msg.addr.log_guest_addr); break; case VHOST_USER_SET_OWNER: - DBG_SOCK("if %d msg VHOST_USER_SET_OWNER", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", xd->vlib_hw_if_index); break; case VHOST_USER_RESET_OWNER: - DBG_SOCK("if %d msg VHOST_USER_RESET_OWNER", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", xd->vlib_hw_if_index); break; case VHOST_USER_SET_VRING_CALL: q = (u8) (msg.u64 & 0xFF); - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_CALL u64 %lx, idx: %d", - xd->vlib_hw_if_index, msg.u64, q); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %lx, idx: %d", + xd->vlib_hw_if_index, msg.u64, q); if (!(msg.u64 & 0x100)) - { - if (number_of_fds != 1) - goto close_socket; - fd = fds[0]; - } else { - fd = -1; - } - dpdk_vhost_user_set_vring_call(xd->vlib_hw_if_index, q, fd); + { + if (number_of_fds != 1) + goto close_socket; + fd = fds[0]; + } + else + { + fd = -1; + } + dpdk_vhost_user_set_vring_call (xd->vlib_hw_if_index, q, fd); break; @@ -1111,229 +1189,239 @@ static clib_error_t * dpdk_vhost_user_socket_read (unix_file_t * uf) q = (u8) (msg.u64 & 0xFF); - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_KICK u64 %lx, idx: %d", - xd->vlib_hw_if_index, msg.u64, q); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %lx, idx: %d", + xd->vlib_hw_if_index, msg.u64, q); if (!(msg.u64 & 0x100)) - { - if (number_of_fds != 1) - goto close_socket; + { + if (number_of_fds != 1) + goto close_socket; - vui->vrings[q].kickfd = fds[0]; - } + vui->vrings[q].kickfd = fds[0]; + } else - vui->vrings[q].kickfd = -1; + vui->vrings[q].kickfd = -1; - dpdk_vhost_user_set_vring_kick(xd->vlib_hw_if_index, q, vui->vrings[q].kickfd); + dpdk_vhost_user_set_vring_kick (xd->vlib_hw_if_index, q, + vui->vrings[q].kickfd); break; case VHOST_USER_SET_VRING_ERR: q = (u8) (msg.u64 & 0xFF); - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ERR u64 %lx, idx: %d", - xd->vlib_hw_if_index, msg.u64, q); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %lx, idx: %d", + xd->vlib_hw_if_index, msg.u64, q); if (!(msg.u64 & 0x100)) - { - if (number_of_fds != 1) - goto close_socket; + { + if (number_of_fds != 1) + goto close_socket; - fd = fds[0]; - } + fd = fds[0]; + } else - fd = -1; + fd = -1; vui->vrings[q].errfd = fd; break; case VHOST_USER_SET_VRING_BASE: - DBG_SOCK("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", - xd->vlib_hw_if_index, msg.state.index, msg.state.num); + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); - dpdk_vhost_user_set_vring_base(xd->vlib_hw_if_index, msg.state.index, msg.state.num); + dpdk_vhost_user_set_vring_base (xd->vlib_hw_if_index, msg.state.index, + msg.state.num); break; case VHOST_USER_GET_VRING_BASE: - DBG_SOCK("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", - xd->vlib_hw_if_index, msg.state.index, msg.state.num); + DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); msg.flags |= VHOST_USER_REPLY_MASK; - msg.size = sizeof(msg.state); + msg.size = sizeof (msg.state); - dpdk_vhost_user_get_vring_base(xd->vlib_hw_if_index, msg.state.index, &msg.state.num); + dpdk_vhost_user_get_vring_base (xd->vlib_hw_if_index, msg.state.index, + &msg.state.num); break; case VHOST_USER_NONE: - DBG_SOCK("if %d msg VHOST_USER_NONE", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_NONE", xd->vlib_hw_if_index); break; case VHOST_USER_SET_LOG_BASE: #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) - DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE", - xd->vlib_hw_if_index); - - if (msg.size != sizeof(msg.log)) { - DBG_SOCK("invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu", - msg.size, sizeof(msg.log)); - goto close_socket; - } - - if (!(xd->vu_vhost_dev.protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) { - DBG_SOCK("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received"); - goto close_socket; - } + DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", xd->vlib_hw_if_index); + + if (msg.size != sizeof (msg.log)) + { + DBG_SOCK + ("invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu", + msg.size, sizeof (msg.log)); + goto close_socket; + } + + if (! + (xd->vu_vhost_dev.protocol_features & (1 << + VHOST_USER_PROTOCOL_F_LOG_SHMFD))) + { + DBG_SOCK + ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received"); + goto close_socket; + } fd = fds[0]; /* align size to 2M page */ - long page_sz = get_huge_page_size(fd); - ssize_t map_sz = RTE_ALIGN_CEIL(msg.log.size + msg.log.offset, page_sz); + long page_sz = get_huge_page_size (fd); + ssize_t map_sz = + RTE_ALIGN_CEIL (msg.log.size + msg.log.offset, page_sz); - void *addr = mmap(0, map_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); + void *addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); - DBG_SOCK("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p", - map_sz, msg.log.offset, fd, addr); + DBG_SOCK ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p", + map_sz, msg.log.offset, fd, addr); - if (addr == MAP_FAILED) { - clib_warning("failed to map memory. errno is %d", errno); - goto close_socket; - } + if (addr == MAP_FAILED) + { + clib_warning ("failed to map memory. errno is %d", errno); + goto close_socket; + } - xd->vu_vhost_dev.log_base += pointer_to_uword(addr) + msg.log.offset; + xd->vu_vhost_dev.log_base += pointer_to_uword (addr) + msg.log.offset; xd->vu_vhost_dev.log_size = msg.log.size; msg.flags |= VHOST_USER_REPLY_MASK; - msg.size = sizeof(msg.u64); + msg.size = sizeof (msg.u64); #else - DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented", + xd->vlib_hw_if_index); #endif break; case VHOST_USER_SET_LOG_FD: - DBG_SOCK("if %d msg VHOST_USER_SET_LOG_FD", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", xd->vlib_hw_if_index); break; case VHOST_USER_GET_PROTOCOL_FEATURES: - DBG_SOCK("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", + xd->vlib_hw_if_index); msg.flags |= VHOST_USER_REPLY_MASK; msg.u64 = VHOST_USER_PROTOCOL_FEATURES; - DBG_SOCK("VHOST_USER_PROTOCOL_FEATURES: %llx", VHOST_USER_PROTOCOL_FEATURES); - msg.size = sizeof(msg.u64); + DBG_SOCK ("VHOST_USER_PROTOCOL_FEATURES: %llx", + VHOST_USER_PROTOCOL_FEATURES); + msg.size = sizeof (msg.u64); break; case VHOST_USER_SET_PROTOCOL_FEATURES: - DBG_SOCK("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES", + xd->vlib_hw_if_index); - DBG_SOCK("VHOST_USER_SET_PROTOCOL_FEATURES: 0x%lx", - msg.u64); - dpdk_vhost_user_set_protocol_features(xd->vlib_hw_if_index, - msg.u64); + DBG_SOCK ("VHOST_USER_SET_PROTOCOL_FEATURES: 0x%lx", msg.u64); + dpdk_vhost_user_set_protocol_features (xd->vlib_hw_if_index, msg.u64); break; case VHOST_USER_SET_VRING_ENABLE: - DBG_SOCK("%d VPP VHOST_USER_SET_VRING_ENABLE IDX: %d, Enable: %d", - xd->vlib_hw_if_index, msg.state.index, msg.state.num); + DBG_SOCK ("%d VPP VHOST_USER_SET_VRING_ENABLE IDX: %d, Enable: %d", + xd->vlib_hw_if_index, msg.state.index, msg.state.num); dpdk_vhost_user_set_vring_enable - (xd->vlib_hw_if_index, msg.state.index, msg.state.num); + (xd->vlib_hw_if_index, msg.state.index, msg.state.num); break; case VHOST_USER_GET_QUEUE_NUM: - DBG_SOCK("if %d msg VHOST_USER_GET_QUEUE_NUM:", - xd->vlib_hw_if_index); + DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM:", xd->vlib_hw_if_index); msg.flags |= VHOST_USER_REPLY_MASK; msg.u64 = xd->vu_vhost_dev.virt_qp_nb; - msg.size = sizeof(msg.u64); + msg.size = sizeof (msg.u64); break; default: - DBG_SOCK("unknown vhost-user message %d received. closing socket", - msg.request); + DBG_SOCK ("unknown vhost-user message %d received. closing socket", + msg.request); goto close_socket; - } + } - /* if we have pointers to descriptor table, go up*/ + /* if we have pointers to descriptor table, go up */ if (!vui->is_up && xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_TX]->desc && - xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_RX]->desc) { + xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_RX]->desc) + { - DBG_SOCK("interface %d connected", xd->vlib_sw_if_index); + DBG_SOCK ("interface %d connected", xd->vlib_sw_if_index); - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); vui->is_up = 1; xd->admin_up = 1; - } + } /* if we need to reply */ if (msg.flags & VHOST_USER_REPLY_MASK) - { - n = send(uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + { + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) - goto close_socket; - } + goto close_socket; + } return 0; close_socket: - DBG_SOCK("error: close_socket"); - dpdk_vhost_user_if_disconnect(xd); + DBG_SOCK ("error: close_socket"); + dpdk_vhost_user_if_disconnect (xd); return 0; } -static clib_error_t * dpdk_vhost_user_socket_error (unix_file_t * uf) +static clib_error_t * +dpdk_vhost_user_socket_error (unix_file_t * uf) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; - uword * p; + uword *p; p = hash_get (dm->vu_sw_if_index_by_sock_fd, uf->file_descriptor); - if (p == 0) { - DBG_SOCK ("FD %d doesn't belong to any interface", - uf->file_descriptor); + if (p == 0) + { + DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor); return 0; } else - xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); + xd = dpdk_vhost_user_device_from_sw_if_index (p[0]); - dpdk_vhost_user_if_disconnect(xd); + dpdk_vhost_user_if_disconnect (xd); return 0; } -static clib_error_t * dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf) +static clib_error_t * +dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf) { int client_fd, client_len; struct sockaddr_un client; - unix_file_t template = {0}; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = NULL; - dpdk_vu_intf_t * vui; - uword * p; - - p = hash_get (dm->vu_sw_if_index_by_listener_fd, - uf->file_descriptor); - if (p == 0) { - DBG_SOCK ("fd %d doesn't belong to any interface", - uf->file_descriptor); + unix_file_t template = { 0 }; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = NULL; + dpdk_vu_intf_t *vui; + uword *p; + + p = hash_get (dm->vu_sw_if_index_by_listener_fd, uf->file_descriptor); + if (p == 0) + { + DBG_SOCK ("fd %d doesn't belong to any interface", uf->file_descriptor); return 0; } - xd = dpdk_vhost_user_device_from_sw_if_index(p[0]); - ASSERT(xd != NULL); + xd = dpdk_vhost_user_device_from_sw_if_index (p[0]); + ASSERT (xd != NULL); vui = xd->vu_intf; - client_len = sizeof(client); + client_len = sizeof (client); client_fd = accept (uf->file_descriptor, - (struct sockaddr *)&client, - (socklen_t *)&client_len); + (struct sockaddr *) &client, + (socklen_t *) & client_len); if (client_fd < 0) - return clib_error_return_unix (0, "accept"); + return clib_error_return_unix (0, "accept"); template.read_function = dpdk_vhost_user_socket_read; template.error_function = dpdk_vhost_user_socket_error; @@ -1342,41 +1430,45 @@ static clib_error_t * dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf) vui->client_fd = client_fd; hash_set (dm->vu_sw_if_index_by_sock_fd, vui->client_fd, - xd->vlib_sw_if_index); + xd->vlib_sw_if_index); return 0; } // init server socket on specified sock_filename -static int dpdk_vhost_user_init_server_sock(const char * sock_filename, int *sockfd) +static int +dpdk_vhost_user_init_server_sock (const char *sock_filename, int *sockfd) { int rv = 0; - struct sockaddr_un un = {}; + struct sockaddr_un un = { }; int fd; /* create listening socket */ - fd = socket(AF_UNIX, SOCK_STREAM, 0); + fd = socket (AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) { - return VNET_API_ERROR_SYSCALL_ERROR_1; - } + if (fd < 0) + { + return VNET_API_ERROR_SYSCALL_ERROR_1; + } un.sun_family = AF_UNIX; - strcpy((char *) un.sun_path, (char *) sock_filename); + strcpy ((char *) un.sun_path, (char *) sock_filename); /* remove if exists */ - unlink( (char *) sock_filename); + unlink ((char *) sock_filename); - if (bind(fd, (struct sockaddr *) &un, sizeof(un)) == -1) { - rv = VNET_API_ERROR_SYSCALL_ERROR_2; - goto error; - } + if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } - if (listen(fd, 1) == -1) { - rv = VNET_API_ERROR_SYSCALL_ERROR_3; - goto error; - } + if (listen (fd, 1) == -1) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } - unix_file_t template = {0}; + unix_file_t template = { 0 }; template.read_function = dpdk_vhost_user_socksvr_accept_ready; template.file_descriptor = fd; unix_file_add (&unix_main, &template); @@ -1384,7 +1476,7 @@ static int dpdk_vhost_user_init_server_sock(const char * sock_filename, int *soc return rv; error: - close(fd); + close (fd); return rv; } @@ -1392,73 +1484,84 @@ error: * vhost-user interface control functions used from vpe api */ -int dpdk_vhost_user_create_if(vnet_main_t * vnm, vlib_main_t * vm, - const char * sock_filename, - u8 is_server, - u32 * sw_if_index, - u64 feature_mask, - u8 renumber, u32 custom_dev_instance, - u8 *hwaddr) +int +dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance, u8 * hwaddr) { - dpdk_main_t * dm = &dpdk_main; + dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; u32 hw_if_idx = ~0; int sockfd = -1; int rv = 0; // using virtio vhost user? - if (dm->conf->use_virtio_vhost) { - return vhost_user_create_if(vnm, vm, sock_filename, is_server, - sw_if_index, feature_mask, renumber, custom_dev_instance, hwaddr); - } + if (dm->conf->use_virtio_vhost) + { + return vhost_user_create_if (vnm, vm, sock_filename, is_server, + sw_if_index, feature_mask, renumber, + custom_dev_instance, hwaddr); + } - if (is_server) { - if ((rv = dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { - return rv; + if (is_server) + { + if ((rv = + dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) + { + return rv; + } } - } - if (renumber) { + if (renumber) + { // set next vhost-user if id if custom one is higher or equal if (custom_dev_instance >= dm->next_vu_if_id) - dm->next_vu_if_id = custom_dev_instance + 1; + dm->next_vu_if_id = custom_dev_instance + 1; - dpdk_create_vhost_user_if_internal(&hw_if_idx, custom_dev_instance, hwaddr); - } else - dpdk_create_vhost_user_if_internal(&hw_if_idx, (u32)~0, hwaddr); - DBG_SOCK("dpdk vhost-user interface created hw_if_index %d", hw_if_idx); + dpdk_create_vhost_user_if_internal (&hw_if_idx, custom_dev_instance, + hwaddr); + } + else + dpdk_create_vhost_user_if_internal (&hw_if_idx, (u32) ~ 0, hwaddr); + DBG_SOCK ("dpdk vhost-user interface created hw_if_index %d", hw_if_idx); - xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_idx); - ASSERT(xd != NULL); + xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_idx); + ASSERT (xd != NULL); dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server, - feature_mask, sw_if_index); + feature_mask, sw_if_index); dpdk_vhost_user_vui_register (vm, xd); return rv; } -int dpdk_vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm, - const char * sock_filename, - u8 is_server, - u32 sw_if_index, - u64 feature_mask, - u8 renumber, u32 custom_dev_instance) +int +dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; - dpdk_vu_intf_t * vui = NULL; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + dpdk_vu_intf_t *vui = NULL; u32 sw_if_idx = ~0; int sockfd = -1; int rv = 0; // using virtio vhost user? - if (dm->conf->use_virtio_vhost) { - return vhost_user_modify_if(vnm, vm, sock_filename, is_server, - sw_if_index, feature_mask, renumber, custom_dev_instance); - } + if (dm->conf->use_virtio_vhost) + { + return vhost_user_modify_if (vnm, vm, sock_filename, is_server, + sw_if_index, feature_mask, renumber, + custom_dev_instance); + } - xd = dpdk_vhost_user_device_from_sw_if_index(sw_if_index); + xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index); if (xd == NULL) return VNET_API_ERROR_INVALID_SW_IF_INDEX; @@ -1468,40 +1571,46 @@ int dpdk_vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm, // interface is inactive vui->active = 0; // disconnect interface sockets - dpdk_vhost_user_if_disconnect(xd); + dpdk_vhost_user_if_disconnect (xd); - if (is_server) { - if ((rv = dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) { - return rv; - } - } + if (is_server) + { + if ((rv = + dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) + { + return rv; + } + } dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server, - feature_mask, &sw_if_idx); + feature_mask, &sw_if_idx); - if (renumber) { - vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); - } + if (renumber) + { + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + } dpdk_vhost_user_vui_register (vm, xd); return rv; } -int dpdk_vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, - u32 sw_if_index) +int +dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index) { - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd = NULL; - dpdk_vu_intf_t * vui; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = NULL; + dpdk_vu_intf_t *vui; int rv = 0; // using virtio vhost user? - if (dm->conf->use_virtio_vhost) { - return vhost_user_delete_if(vnm, vm, sw_if_index); - } + if (dm->conf->use_virtio_vhost) + { + return vhost_user_delete_if (vnm, vm, sw_if_index); + } - xd = dpdk_vhost_user_device_from_sw_if_index(sw_if_index); + xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index); if (xd == NULL) return VNET_API_ERROR_INVALID_SW_IF_INDEX; @@ -1511,147 +1620,172 @@ int dpdk_vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, // interface is inactive vui->active = 0; // disconnect interface sockets - dpdk_vhost_user_if_disconnect(xd); + dpdk_vhost_user_if_disconnect (xd); // add to inactive interface list vec_add1 (dm->vu_inactive_interfaces_device_index, xd->device_index); ethernet_delete_interface (vnm, xd->vlib_hw_if_index); - DBG_SOCK ("deleted (deactivated) vhost-user interface sw_if_index %d", sw_if_index); + DBG_SOCK ("deleted (deactivated) vhost-user interface sw_if_index %d", + sw_if_index); return rv; } -int dpdk_vhost_user_dump_ifs(vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t **out_vuids) +int +dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_details_t ** out_vuids) { - int rv = 0; - dpdk_main_t * dm = &dpdk_main; - dpdk_device_t * xd; - dpdk_vu_intf_t * vui; - struct virtio_net * vhost_dev; - vhost_user_intf_details_t * r_vuids = NULL; - vhost_user_intf_details_t * vuid = NULL; - u32 * hw_if_indices = 0; - vnet_hw_interface_t * hi; - u8 *s = NULL; - int i; - - if (!out_vuids) - return -1; - - // using virtio vhost user? - if (dm->conf->use_virtio_vhost) { - return vhost_user_dump_ifs(vnm, vm, out_vuids); - } + int rv = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + dpdk_vu_intf_t *vui; + struct virtio_net *vhost_dev; + vhost_user_intf_details_t *r_vuids = NULL; + vhost_user_intf_details_t *vuid = NULL; + u32 *hw_if_indices = 0; + vnet_hw_interface_t *hi; + u8 *s = NULL; + int i; + + if (!out_vuids) + return -1; - vec_foreach (xd, dm->devices) { - if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && - xd->vu_intf->active) - vec_add1(hw_if_indices, xd->vlib_hw_if_index); + // using virtio vhost user? + if (dm->conf->use_virtio_vhost) + { + return vhost_user_dump_ifs (vnm, vm, out_vuids); } - for (i = 0; i < vec_len (hw_if_indices); i++) { + vec_foreach (xd, dm->devices) + { + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active) + vec_add1 (hw_if_indices, xd->vlib_hw_if_index); + } + + for (i = 0; i < vec_len (hw_if_indices); i++) + { hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); - xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_indices[i]); - if (!xd) { - clib_warning("invalid vhost-user interface hw_if_index %d", hw_if_indices[i]); - continue; - } + xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i]); + if (!xd) + { + clib_warning ("invalid vhost-user interface hw_if_index %d", + hw_if_indices[i]); + continue; + } vui = xd->vu_intf; - ASSERT(vui != NULL); + ASSERT (vui != NULL); vhost_dev = &xd->vu_vhost_dev; u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ? - vhost_dev->virtqueue[0]->vhost_hlen : 0); + vhost_dev->virtqueue[0]->vhost_hlen : 0); - vec_add2(r_vuids, vuid, 1); + vec_add2 (r_vuids, vuid, 1); vuid->sw_if_index = xd->vlib_sw_if_index; vuid->virtio_net_hdr_sz = virtio_net_hdr_sz; vuid->features = vhost_dev->features; vuid->is_server = vui->sock_is_server; - vuid->num_regions = (vhost_dev->mem != NULL ? vhost_dev->mem->nregions : 0); + vuid->num_regions = + (vhost_dev->mem != NULL ? vhost_dev->mem->nregions : 0); vuid->sock_errno = vui->sock_errno; - strncpy((char *)vuid->sock_filename, (char *)vui->sock_filename, - ARRAY_LEN(vuid->sock_filename)-1); + strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename, + ARRAY_LEN (vuid->sock_filename) - 1); s = format (s, "%v%c", hi->name, 0); - strncpy((char *)vuid->if_name, (char *)s, - ARRAY_LEN(vuid->if_name)-1); - _vec_len(s) = 0; + strncpy ((char *) vuid->if_name, (char *) s, + ARRAY_LEN (vuid->if_name) - 1); + _vec_len (s) = 0; } - vec_free (s); - vec_free (hw_if_indices); + vec_free (s); + vec_free (hw_if_indices); - *out_vuids = r_vuids; + *out_vuids = r_vuids; - return rv; + return rv; } /* * Processing functions called from dpdk process fn */ -typedef struct { - struct sockaddr_un sun; - int sockfd; - unix_file_t template; - uword *event_data; +typedef struct +{ + struct sockaddr_un sun; + int sockfd; + unix_file_t template; + uword *event_data; } dpdk_vu_process_state; -void dpdk_vhost_user_process_init (void **ctx) +void +dpdk_vhost_user_process_init (void **ctx) { - dpdk_vu_process_state *state = clib_mem_alloc (sizeof(dpdk_vu_process_state)); - memset(state, 0, sizeof(*state)); - state->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - state->sun.sun_family = AF_UNIX; - state->template.read_function = dpdk_vhost_user_socket_read; - state->template.error_function = dpdk_vhost_user_socket_error; - state->event_data = 0; - *ctx = state; + dpdk_vu_process_state *state = + clib_mem_alloc (sizeof (dpdk_vu_process_state)); + memset (state, 0, sizeof (*state)); + state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + state->sun.sun_family = AF_UNIX; + state->template.read_function = dpdk_vhost_user_socket_read; + state->template.error_function = dpdk_vhost_user_socket_error; + state->event_data = 0; + *ctx = state; } -void dpdk_vhost_user_process_cleanup (void *ctx) +void +dpdk_vhost_user_process_cleanup (void *ctx) { - clib_mem_free(ctx); + clib_mem_free (ctx); } -uword dpdk_vhost_user_process_if (vlib_main_t *vm, dpdk_device_t *xd, void *ctx) +uword +dpdk_vhost_user_process_if (vlib_main_t * vm, dpdk_device_t * xd, void *ctx) { - dpdk_main_t * dm = &dpdk_main; - dpdk_vu_process_state *state = (dpdk_vu_process_state *)ctx; - dpdk_vu_intf_t *vui = xd->vu_intf; - - if (vui->sock_is_server || !vui->active) - return 0; - - if (vui->unix_fd == -1) { - /* try to connect */ - strncpy(state->sun.sun_path, (char *) vui->sock_filename, sizeof(state->sun.sun_path) - 1); - - if (connect(state->sockfd, (struct sockaddr *) &(state->sun), sizeof(struct sockaddr_un)) == 0) { - vui->sock_errno = 0; - vui->unix_fd = state->sockfd; - state->template.file_descriptor = state->sockfd; - vui->unix_file_index = unix_file_add (&unix_main, &(state->template)); - hash_set (dm->vu_sw_if_index_by_sock_fd, state->sockfd, xd->vlib_sw_if_index); - - state->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (state->sockfd < 0) - return -1; - } else { - vui->sock_errno = errno; - } - } else { - /* check if socket is alive */ - int error = 0; - socklen_t len = sizeof (error); - int retval = getsockopt(vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len); - - if (retval) - dpdk_vhost_user_if_disconnect(xd); - } + dpdk_main_t *dm = &dpdk_main; + dpdk_vu_process_state *state = (dpdk_vu_process_state *) ctx; + dpdk_vu_intf_t *vui = xd->vu_intf; + + if (vui->sock_is_server || !vui->active) return 0; + + if (vui->unix_fd == -1) + { + /* try to connect */ + strncpy (state->sun.sun_path, (char *) vui->sock_filename, + sizeof (state->sun.sun_path) - 1); + + if (connect + (state->sockfd, (struct sockaddr *) &(state->sun), + sizeof (struct sockaddr_un)) == 0) + { + vui->sock_errno = 0; + vui->unix_fd = state->sockfd; + state->template.file_descriptor = state->sockfd; + vui->unix_file_index = + unix_file_add (&unix_main, &(state->template)); + hash_set (dm->vu_sw_if_index_by_sock_fd, state->sockfd, + xd->vlib_sw_if_index); + + state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (state->sockfd < 0) + return -1; + } + else + { + vui->sock_errno = errno; + } + } + else + { + /* check if socket is alive */ + int error = 0; + socklen_t len = sizeof (error); + int retval = + getsockopt (vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len); + + if (retval) + dpdk_vhost_user_if_disconnect (xd); + } + return 0; } /* @@ -1660,110 +1794,123 @@ uword dpdk_vhost_user_process_if (vlib_main_t *vm, dpdk_device_t *xd, void *ctx) static clib_error_t * dpdk_vhost_user_connect_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { - dpdk_main_t * dm = &dpdk_main; - unformat_input_t _line_input, * line_input = &_line_input; - u8 * sock_filename = NULL; + dpdk_main_t *dm = &dpdk_main; + unformat_input_t _line_input, *line_input = &_line_input; + u8 *sock_filename = NULL; u32 sw_if_index; u8 is_server = 0; - u64 feature_mask = (u64)~0; + u64 feature_mask = (u64) ~ 0; u8 renumber = 0; u32 custom_dev_instance = ~0; u8 hwaddr[6]; u8 *hw = NULL; - if (dm->conf->use_virtio_vhost) { - return vhost_user_connect_command_fn(vm, input, cmd); - } + if (dm->conf->use_virtio_vhost) + { + return vhost_user_connect_command_fn (vm, input, cmd); + } /* Get a line of input. */ - if (! unformat_user (input, unformat_line_input, line_input)) + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "socket %s", &sock_filename)) - ; - else if (unformat (line_input, "server")) - is_server = 1; - else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) - ; - else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr)) - hw = hwaddr; - else if (unformat (line_input, "renumber %d", &custom_dev_instance)) { - renumber = 1; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "socket %s", &sock_filename)) + ; + else if (unformat (line_input, "server")) + is_server = 1; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + ; + else + if (unformat + (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr)) + hw = hwaddr; + else if (unformat (line_input, "renumber %d", &custom_dev_instance)) + { + renumber = 1; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); } - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } unformat_free (line_input); - vnet_main_t *vnm = vnet_get_main(); + vnet_main_t *vnm = vnet_get_main (); if (sock_filename == NULL) - return clib_error_return (0, "missing socket file"); + return clib_error_return (0, "missing socket file"); - dpdk_vhost_user_create_if(vnm, vm, (char *)sock_filename, - is_server, &sw_if_index, feature_mask, - renumber, custom_dev_instance, hw); + dpdk_vhost_user_create_if (vnm, vm, (char *) sock_filename, + is_server, &sw_if_index, feature_mask, + renumber, custom_dev_instance, hw); - vec_free(sock_filename); - vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); + vec_free (sock_filename); + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (dpdk_vhost_user_connect_command, static) = { .path = "create vhost-user", .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]", .function = dpdk_vhost_user_connect_command_fn, }; +/* *INDENT-ON* */ static clib_error_t * dpdk_vhost_user_delete_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { - dpdk_main_t * dm = &dpdk_main; - clib_error_t * error = 0; - unformat_input_t _line_input, * line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index = ~0; - if (dm->conf->use_virtio_vhost) { - return vhost_user_delete_command_fn(vm, input, cmd); - } + if (dm->conf->use_virtio_vhost) + { + return vhost_user_delete_command_fn (vm, input, cmd); + } /* Get a line of input. */ - if (! unformat_user (input, unformat_line_input, line_input)) + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "sw_if_index %d", &sw_if_index)) - ; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } unformat_free (line_input); - if (sw_if_index == ~0) { + if (sw_if_index == ~0) + { error = clib_error_return (0, "invalid sw_if_index", - format_unformat_error, input); + format_unformat_error, input); return error; - } + } - vnet_main_t *vnm = vnet_get_main(); + vnet_main_t *vnm = vnet_get_main (); - dpdk_vhost_user_delete_if(vnm, vm, sw_if_index); + dpdk_vhost_user_delete_if (vnm, vm, sw_if_index); return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (dpdk_vhost_user_delete_command, static) = { .path = "delete vhost-user", .short_help = "delete vhost-user sw_if_index <nn>", .function = dpdk_vhost_user_delete_command_fn, }; +/* *INDENT-ON* */ #define foreach_dpdk_vhost_feature \ _ (VIRTIO_NET_F_MRG_RXBUF) \ @@ -1772,146 +1919,188 @@ VLIB_CLI_COMMAND (dpdk_vhost_user_delete_command, static) = { static clib_error_t * show_dpdk_vhost_user_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { - clib_error_t * error = 0; - dpdk_main_t * dm = &dpdk_main; - vnet_main_t * vnm = vnet_get_main(); - dpdk_device_t * xd; - dpdk_vu_intf_t * vui; - struct virtio_net * vhost_dev; - u32 hw_if_index, * hw_if_indices = 0; - vnet_hw_interface_t * hi; + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + vnet_main_t *vnm = vnet_get_main (); + dpdk_device_t *xd; + dpdk_vu_intf_t *vui; + struct virtio_net *vhost_dev; + u32 hw_if_index, *hw_if_indices = 0; + vnet_hw_interface_t *hi; int i, j, q; int show_descr = 0; - struct virtio_memory * mem; - struct feat_struct { u8 bit; char *str;}; + struct virtio_memory *mem; + struct feat_struct + { + u8 bit; + char *str; + }; struct feat_struct *feat_entry; static struct feat_struct feat_array[] = { #define _(f) { .str = #f, .bit = f, }, - foreach_dpdk_vhost_feature + foreach_dpdk_vhost_feature #undef _ - { .str = NULL } + {.str = NULL} }; - if (dm->conf->use_virtio_vhost) { - return show_vhost_user_command_fn(vm, input, cmd); - } - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) { - vec_add1 (hw_if_indices, hw_if_index); - vlib_cli_output(vm, "add %d", hw_if_index); + if (dm->conf->use_virtio_vhost) + { + return show_vhost_user_command_fn (vm, input, cmd); } - else if (unformat (input, "descriptors") || unformat (input, "desc") ) - show_descr = 1; - else { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + vec_add1 (hw_if_indices, hw_if_index); + vlib_cli_output (vm, "add %d", hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc")) + show_descr = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } } - } - if (vec_len (hw_if_indices) == 0) { - vec_foreach (xd, dm->devices) { - if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active) - vec_add1(hw_if_indices, xd->vlib_hw_if_index); + if (vec_len (hw_if_indices) == 0) + { + vec_foreach (xd, dm->devices) + { + if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active) + vec_add1 (hw_if_indices, xd->vlib_hw_if_index); + } } - } vlib_cli_output (vm, "DPDK vhost-user interfaces"); vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n", - dm->conf->vhost_coalesce_frames, dm->conf->vhost_coalesce_time); - - for (i = 0; i < vec_len (hw_if_indices); i++) { - hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + dm->conf->vhost_coalesce_frames, + dm->conf->vhost_coalesce_time); - if (!(xd = dpdk_vhost_user_device_from_hw_if_index(hw_if_indices[i]))) { - error = clib_error_return (0, "not dpdk vhost-user interface: '%s'", - hi->name); - goto done; - } - vui = xd->vu_intf; - vhost_dev = &xd->vu_vhost_dev; - mem = vhost_dev->mem; - u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ? - vhost_dev->virtqueue[0]->vhost_hlen : 0); - - vlib_cli_output (vm, "Interface: %v (ifindex %d)", - hi->name, hw_if_indices[i]); - - vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n", - virtio_net_hdr_sz, xd->vu_vhost_dev.features); - - feat_entry = (struct feat_struct *) &feat_array; - while(feat_entry->str) { - if (xd->vu_vhost_dev.features & (1 << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, feat_entry->bit); - feat_entry++; - } - - vlib_cli_output (vm, "\n"); - - vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", - vui->sock_filename, vui->sock_is_server ? "server" : "client", - strerror(vui->sock_errno)); - - vlib_cli_output (vm, " Memory regions (total %d)\n", mem->nregions); + for (i = 0; i < vec_len (hw_if_indices); i++) + { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); - if (mem->nregions){ - vlib_cli_output(vm, " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); - vlib_cli_output(vm, " ====== ===== ================== ================== ================== ================== ==================\n"); - } - for (j = 0; j < mem->nregions; j++) { - vlib_cli_output(vm, " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", j, - vui->region_fd[j], - mem->regions[j].guest_phys_address, - mem->regions[j].memory_size, - mem->regions[j].userspace_address, - mem->regions[j].address_offset, - vui->region_addr[j]); - } - for (q = 0; q < vui->num_vrings; q++) { - struct vhost_virtqueue *vq = vhost_dev->virtqueue[q]; - const char *qtype = (q & 1) ? "TX" : "RX"; - - vlib_cli_output(vm, "\n Virtqueue %d (%s)\n", q/2, qtype); - - vlib_cli_output(vm, " qsz %d last_used_idx %d last_used_idx_res %d\n", - vq->size, vq->last_used_idx, vq->last_used_idx_res); - - if (vq->avail && vq->used) - vlib_cli_output(vm, " avail.flags %x avail.idx %d used.flags %x used.idx %d\n", - vq->avail->flags, vq->avail->idx, vq->used->flags, vq->used->idx); - - vlib_cli_output(vm, " kickfd %d callfd %d errfd %d enabled %d\n", - vq->kickfd, vq->callfd, vui->vrings[q].errfd, vq->enabled); - - if (show_descr && vq->enabled) { - vlib_cli_output(vm, "\n descriptor table:\n"); - vlib_cli_output(vm, " id addr len flags next user_addr\n"); - vlib_cli_output(vm, " ===== ================== ===== ====== ===== ==================\n"); - for(j = 0; j < vq->size; j++) { - vlib_cli_output(vm, " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, - vq->desc[j].addr, - vq->desc[j].len, - vq->desc[j].flags, - vq->desc[j].next, - pointer_to_uword(map_guest_mem(xd, vq->desc[j].addr)));} - } + if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i]))) + { + error = clib_error_return (0, "not dpdk vhost-user interface: '%s'", + hi->name); + goto done; + } + vui = xd->vu_intf; + vhost_dev = &xd->vu_vhost_dev; + mem = vhost_dev->mem; + u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ? + vhost_dev->virtqueue[0]->vhost_hlen : 0); + + vlib_cli_output (vm, "Interface: %v (ifindex %d)", + hi->name, hw_if_indices[i]); + + vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n", + virtio_net_hdr_sz, xd->vu_vhost_dev.features); + + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (xd->vu_vhost_dev.features & (1 << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + + vlib_cli_output (vm, "\n"); + + vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", + vui->sock_filename, + vui->sock_is_server ? "server" : "client", + strerror (vui->sock_errno)); + + vlib_cli_output (vm, " Memory regions (total %d)\n", mem->nregions); + + if (mem->nregions) + { + vlib_cli_output (vm, + " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); + vlib_cli_output (vm, + " ====== ===== ================== ================== ================== ================== ==================\n"); + } + for (j = 0; j < mem->nregions; j++) + { + vlib_cli_output (vm, + " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", + j, vui->region_fd[j], + mem->regions[j].guest_phys_address, + mem->regions[j].memory_size, + mem->regions[j].userspace_address, + mem->regions[j].address_offset, + vui->region_addr[j]); + } + for (q = 0; q < vui->num_vrings; q++) + { + struct vhost_virtqueue *vq = vhost_dev->virtqueue[q]; + const char *qtype = (q & 1) ? "TX" : "RX"; + + vlib_cli_output (vm, "\n Virtqueue %d (%s)\n", q / 2, qtype); + + vlib_cli_output (vm, + " qsz %d last_used_idx %d last_used_idx_res %d\n", + vq->size, vq->last_used_idx, + vq->last_used_idx_res); + + if (vq->avail && vq->used) + vlib_cli_output (vm, + " avail.flags %x avail.idx %d used.flags %x used.idx %d\n", + vq->avail->flags, vq->avail->idx, + vq->used->flags, vq->used->idx); + + vlib_cli_output (vm, " kickfd %d callfd %d errfd %d enabled %d\n", + vq->kickfd, vq->callfd, vui->vrings[q].errfd, + vq->enabled); + + if (show_descr && vq->enabled) + { + vlib_cli_output (vm, "\n descriptor table:\n"); + vlib_cli_output (vm, + " id addr len flags next user_addr\n"); + vlib_cli_output (vm, + " ===== ================== ===== ====== ===== ==================\n"); + for (j = 0; j < vq->size; j++) + { + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, vq->desc[j].addr, vq->desc[j].len, + vq->desc[j].flags, vq->desc[j].next, + pointer_to_uword (map_guest_mem + (xd, vq->desc[j].addr))); + } + } + } + vlib_cli_output (vm, "\n"); } - vlib_cli_output (vm, "\n"); - } done: vec_free (hw_if_indices); return error; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_vhost_user_command, static) = { .path = "show vhost-user", .short_help = "show vhost-user interface", .function = show_dpdk_vhost_user_command_fn, }; +/* *INDENT-ON* */ #endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |