From eb743fad56b32cb20ad2d2cadc4760f9c25be5e1 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 20 Mar 2017 16:34:15 +0100 Subject: vnet: add device-input threadplacement infra This change adds two new debug CLI command: - "show interface placmenet" to display which thread (main or worker) is responsible for processing interface rx queue vpp# show interface placement Thread 0 (vpp_main): node af-packet-input: host-vpp1 queue 0 Thread 1 (vpp_wk_0): node af-packet-input: host-virbr0 queue 0 Thread 2 (vpp_wk_1): node af-packet-input: host-vpp2 queue 0 host-lxcbr0 queue 0 - "set interface placmenet" to assign thread (main or worker) which process specific interface rx queue vpp# set interface placement host-vpp1 queue 0 main Change-Id: Id4dd00cf2b05e10fae2125ac7cb4411b446c5e9c Signed-off-by: Damjan Marion --- src/vlib/threads.c | 14 +- src/vnet/devices/af_packet/af_packet.c | 54 +------- src/vnet/devices/af_packet/af_packet.h | 6 - src/vnet/devices/af_packet/node.c | 23 ++-- src/vnet/devices/devices.c | 240 +++++++++++++++++++++++++++++++++ src/vnet/devices/devices.h | 45 +++++++ src/vnet/interface.h | 6 + 7 files changed, 310 insertions(+), 78 deletions(-) (limited to 'src') diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 40789f59ea5..ef3a24d3857 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -685,9 +685,6 @@ start_workers (vlib_main_t * vm) clib_memcpy (rt->runtime_data, n->runtime_data, clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); - else if (CLIB_DEBUG > 0) - memset (rt->runtime_data, 0xfe, - VLIB_NODE_RUNTIME_DATA_SIZE); } nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = @@ -701,9 +698,6 @@ start_workers (vlib_main_t * vm) clib_memcpy (rt->runtime_data, n->runtime_data, clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); - else if (CLIB_DEBUG > 0) - memset (rt->runtime_data, 0xfe, - VLIB_NODE_RUNTIME_DATA_SIZE); } nm_clone->processes = vec_dup (nm->processes); @@ -1405,15 +1399,15 @@ vlib_worker_thread_fn (void *arg) clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); + /* Wait until the dpdk init sequence is complete */ + while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + e = vlib_call_init_exit_functions (vm, vm->worker_init_function_registrations, 1 /* call_once */ ); if (e) clib_error_report (e); - /* Wait until the dpdk init sequence is complete */ - while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - vlib_worker_loop (vm); } diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index e491ba473d5..5fdc59f2a20 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -67,15 +67,16 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, static clib_error_t * af_packet_fd_read_ready (unix_file_t * uf) { - vlib_main_t *vm = vlib_get_main (); af_packet_main_t *apm = &af_packet_main; + vnet_main_t *vnm = vnet_get_main (); u32 idx = uf->private_data; + af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx); apm->pending_input_bitmap = clib_bitmap_set (apm->pending_input_bitmap, idx, 1); /* Schedule the rx node */ - vlib_node_set_interrupt_pending (vm, af_packet_input_node.index); + vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0); return 0; } @@ -171,31 +172,6 @@ error: return ret; } -static void -af_packet_worker_thread_enable () -{ - /* If worker threads are enabled, switch to polling mode */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - af_packet_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - -} - -static void -af_packet_worker_thread_disable () -{ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - af_packet_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - -} - int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, u32 * sw_if_index) @@ -298,6 +274,9 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; + vnet_set_device_input_node (apif->hw_if_index, af_packet_input_node.index); + vnet_device_input_assign_thread (apif->hw_if_index, 0, /* queue */ + ~0 /* any cpu */ ); vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -307,9 +286,6 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, if (sw_if_index) *sw_if_index = apif->sw_if_index; - if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 1) - af_packet_worker_thread_enable (); - return 0; error: @@ -323,7 +299,6 @@ int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) { vnet_main_t *vnm = vnet_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif; uword *p; @@ -373,8 +348,6 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) ethernet_delete_interface (vnm, apif->hw_if_index); pool_put (apm->interfaces, apif); - if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 0) - af_packet_worker_thread_disable (); return 0; } @@ -384,24 +357,9 @@ af_packet_init (vlib_main_t * vm) { af_packet_main_t *apm = &af_packet_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; memset (apm, 0, sizeof (af_packet_main_t)); - apm->input_cpu_first_index = 0; - apm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - apm->input_cpu_first_index = tr->first_index; - apm->input_cpu_count = tr->count; - } - mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword)); vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1, diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index e00e5cb415f..50ec23785ae 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -51,12 +51,6 @@ typedef struct /* hash of host interface names */ mhash_t if_index_by_host_if_name; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; } af_packet_main_t; af_packet_main_t af_packet_main; diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index ab7fd80005f..ba337f3f70b 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -246,20 +246,18 @@ static uword af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - int i; u32 n_rx_packets = 0; - u32 cpu_index = os_get_cpu_number (); af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif; + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; - for (i = 0; i < vec_len (apm->interfaces); i++) - { - apif = vec_elt_at_index (apm->interfaces, i); - if (apif->is_admin_up && - (i % apm->input_cpu_count) == - (cpu_index - apm->input_cpu_first_index)) - n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif); - } + vec_foreach (dq, rt->devices_and_queues) + { + af_packet_if_t *apif; + apif = vec_elt_at_index (apm->interfaces, dq->dev_instance); + if (apif->is_admin_up) + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif); + } return n_rx_packets; } @@ -271,9 +269,6 @@ VLIB_REGISTER_NODE (af_packet_input_node) = { .sibling_of = "device-input", .format_trace = format_af_packet_input_trace, .type = VLIB_NODE_TYPE_INPUT, - /** - * default state is INTERRUPT mode, switch to POLLING if worker threads are enabled - */ .state = VLIB_NODE_STATE_INTERRUPT, .n_errors = AF_PACKET_INPUT_N_ERROR, .error_strings = af_packet_input_error_strings, diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 38f3002b5a9..4164522099e 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -32,6 +32,7 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, VLIB_REGISTER_NODE (device_input_node) = { .function = device_input_fn, .name = "device-input", + .runtime_data_bytes = sizeof (vnet_device_input_runtime_t), .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_DISABLED, .n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES, @@ -83,18 +84,257 @@ VNET_FEATURE_INIT (ethernet_input, static) = { }; /* *INDENT-ON* */ +static int +vnet_device_queue_sort (void *a1, void *a2) +{ + vnet_device_and_queue_t *dq1 = a1; + vnet_device_and_queue_t *dq2 = a2; + + if (dq1->dev_instance > dq2->dev_instance) + return 1; + else if (dq1->dev_instance < dq2->dev_instance) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +void +vnet_device_input_assign_thread (u32 hw_if_index, + u16 queue_id, uword cpu_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_device_main_t *vdm = &vnet_device_main; + vlib_main_t *vm; + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (hw->input_node_index > 0); + + if (vdm->first_worker_cpu_index == 0) + cpu_index = 0; + + if (cpu_index != 0 && + (cpu_index < vdm->first_worker_cpu_index || + cpu_index > vdm->last_worker_cpu_index)) + { + cpu_index = vdm->next_worker_cpu_index++; + if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index) + vdm->next_worker_cpu_index = vdm->first_worker_cpu_index; + } + + vm = vlib_mains[cpu_index]; + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); + + vec_add2 (rt->devices_and_queues, dq, 1); + dq->hw_if_index = hw_if_index; + dq->dev_instance = hw->dev_instance; + dq->queue_id = queue_id; + + vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + vec_validate (hw->input_node_cpu_index_by_queue, queue_id); + hw->input_node_cpu_index_by_queue[queue_id] = cpu_index; +} + +static int +vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, + uword cpu_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + uword old_cpu_index; + + if (hw->input_node_cpu_index_by_queue == 0) + return VNET_API_ERROR_INVALID_INTERFACE; + + if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1) + return VNET_API_ERROR_INVALID_INTERFACE; + + old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; + + if (old_cpu_index == cpu_index) + return 0; + + rt = + vlib_node_get_runtime_data (vlib_mains[old_cpu_index], + hw->input_node_index); + + vec_foreach (dq, rt->devices_and_queues) + if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) + { + vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); + goto deleted; + } + + return VNET_API_ERROR_INVALID_INTERFACE; + +deleted: + vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + + return 0; +} + +static clib_error_t * +show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *s = 0; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input"); + uword si; + int index = 0; + + /* *INDENT-OFF* */ + foreach_vlib_main (({ + clib_bitmap_foreach (si, pn->sibling_bitmap, + ({ + rt = vlib_node_get_runtime_data (this_vlib_main, si); + + if (vec_len (rt->devices_and_queues)) + s = format (s, " node %U:\n", format_vlib_node_name, vm, si); + + vec_foreach (dq, rt->devices_and_queues) + { + s = format (s, " %U queue %u\n", + format_vnet_sw_if_index_name, vnm, dq->hw_if_index, + dq->queue_id); + } + })); + if (vec_len (s) > 0) + { + vlib_cli_output(vm, "Thread %u (%v):\n%v", index, + vlib_worker_threads[index].name, s); + vec_reset_length (s); + } + index++; + })); + /* *INDENT-ON* */ + + vec_free (s); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_delete_command, static) = { + .path = "show interface placement", + .short_help = "show interface placement", + .function = show_device_placement_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_device_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_main_t *vdm = &vnet_device_main; + u32 hw_if_index = (u32) ~ 0; + u32 queue_id = (u32) 0; + u32 cpu_index = (u32) ~ 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue_id)) + ; + else if (unformat (line_input, "main", &cpu_index)) + cpu_index = 0; + else if (unformat (line_input, "worker %d", &cpu_index)) + cpu_index += vdm->first_worker_cpu_index; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu_index > vdm->last_worker_cpu_index) + return clib_error_return (0, + "please specify valid worker thread or main"); + + rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index); + + if (rv) + return clib_error_return (0, "not found"); + + vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index); + + return 0; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the interface placement: + * @cliexstart{show interface placement} + * Thread 1 (vpp_wk_0): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a interface and queue to a thread: + * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set interface placement", + .short_help = "set interface placement [queue ] [thread | main]", + .function = set_device_placement, +}; +/* *INDENT-ON* */ + static clib_error_t * vnet_device_init (vlib_main_t * vm) { vnet_device_main_t *vdm = &vnet_device_main; vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + if (tr && tr->count > 0) + { + vdm->first_worker_cpu_index = tr->first_index; + vdm->next_worker_cpu_index = tr->first_index; + vdm->last_worker_cpu_index = tr->first_index + tr->count - 1; + } return 0; } VLIB_INIT_FUNCTION (vnet_device_init); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index a5cbc35e682..bbb29fe301c 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -50,12 +50,38 @@ typedef struct typedef struct { vnet_device_per_worker_data_t *workers; + uword first_worker_cpu_index; + uword last_worker_cpu_index; + uword next_worker_cpu_index; } vnet_device_main_t; +typedef struct +{ + u32 hw_if_index; + u32 dev_instance; + u16 queue_id; +} vnet_device_and_queue_t; + +typedef struct +{ + vnet_device_and_queue_t *devices_and_queues; +} vnet_device_input_runtime_t; + extern vnet_device_main_t vnet_device_main; extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; +static inline void +vnet_set_device_input_node (u32 hw_if_index, u32 node_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + hw->input_node_index = node_index; +} + +void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id, + uword cpu_index); + static inline u64 vnet_get_aggregate_rx_packets (void) { @@ -78,6 +104,25 @@ vnet_device_increment_rx_packets (u32 cpu_index, u64 count) pwd->aggregate_rx_packets += count; } +static_always_inline vnet_device_and_queue_t * +vnet_get_device_and_queue (vlib_main_t * vm, vlib_node_runtime_t * node) +{ + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + return rt->devices_and_queues; +} + +static_always_inline void +vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id) +{ + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue)); + u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; + vlib_node_set_interrupt_pending (vlib_mains[cpu_index], + hw->input_node_index); +} + #endif /* included_vnet_vnet_device_h */ /* diff --git a/src/vnet/interface.h b/src/vnet/interface.h index ef8f9118fbe..a1ea2d6184f 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -464,6 +464,12 @@ typedef struct vnet_hw_interface_t #define VNET_HW_INTERFACE_BOND_INFO_NONE ((uword *) 0) #define VNET_HW_INTERFACE_BOND_INFO_SLAVE ((uword *) ~0) + /* Input node */ + u32 input_node_index; + + /* input node cpu index by queue */ + u32 *input_node_cpu_index_by_queue; + } vnet_hw_interface_t; extern vnet_device_class_t vnet_local_interface_device_class; -- cgit 1.2.3-korg