summaryrefslogtreecommitdiffstats
path: root/docs/developer/corearchitecture/multiarch
diff options
context:
space:
mode:
Diffstat (limited to 'docs/developer/corearchitecture/multiarch')
-rw-r--r--docs/developer/corearchitecture/multiarch/arbfns.rst87
-rw-r--r--docs/developer/corearchitecture/multiarch/index.rst12
-rw-r--r--docs/developer/corearchitecture/multiarch/nodefns.rst138
3 files changed, 237 insertions, 0 deletions
diff --git a/docs/developer/corearchitecture/multiarch/arbfns.rst b/docs/developer/corearchitecture/multiarch/arbfns.rst
new file mode 100644
index 00000000000..d469bd8a140
--- /dev/null
+++ b/docs/developer/corearchitecture/multiarch/arbfns.rst
@@ -0,0 +1,87 @@
+Multi-Architecture Arbitrary Function Cookbook
+==============================================
+
+Optimizing arbitrary functions for multiple architectures is simple
+enough, and very similar to process used to produce multi-architecture
+graph node dispatch functions.
+
+As with multi-architecture graph nodes, we compile source files
+multiple times, generating multiple implementations of the original
+function, and a public selector function.
+
+Details
+-------
+
+Decorate function definitions with CLIB_MARCH_FN macros. For example:
+
+Change the original function prototype...
+
+::
+
+ u32 vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
+ u32 frame_flags)
+
+...by recasting the function name and return type as the first two
+arguments to the CLIB_MARCH_FN macro:
+
+::
+
+ CLIB_MARCH_FN (vlib_frame_alloc_to_node, u32, vlib_main_t * vm,
+ u32 to_node_index, u32 frame_flags)
+
+In the actual vpp image, several versions of vlib_frame_alloc_to_node
+will appear: vlib_frame_alloc_to_node_avx2,
+vlib_frame_alloc_to_node_avx512, and so forth.
+
+
+For each multi-architecture function, use the CLIB_MARCH_FN_SELECT
+macro to help generate the one-and-only multi-architecture selector
+function:
+
+::
+
+ #ifndef CLIB_MARCH_VARIANT
+ u32
+ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
+ u32 frame_flags)
+ {
+ return CLIB_MARCH_FN_SELECT (vlib_frame_alloc_to_node)
+ (vm, to_node_index, frame_flags);
+ }
+ #endif /* CLIB_MARCH_VARIANT */
+
+Once bound, the multi-architecture selector function is about as
+expensive as an indirect function call; which is to say: not very
+expensive.
+
+Modify CMakeLists.txt
+---------------------
+
+If the component in question already lists "MULTIARCH_SOURCES", simply
+add the indicated .c file to the list. Otherwise, add as shown
+below. Note that the added file "new_multiarch_node.c" should appear in
+*both* SOURCES and MULTIARCH_SOURCES:
+
+::
+
+ add_vpp_plugin(myplugin
+ SOURCES
+ multiarch_code.c
+ ...
+
+ MULTIARCH_SOURCES
+ multiarch_code.c
+ ...
+ )
+
+A Word to the Wise
+------------------
+
+A file which liberally mixes functions worth compiling for multiple
+architectures and functions which are not will end up full of
+#ifndef CLIB_MARCH_VARIANT conditionals. This won't do a thing to make
+the code look any better.
+
+Depending on requirements, it may make sense to move functions to
+(new) files to reduce complexity and/or improve legibility of the
+resulting code.
diff --git a/docs/developer/corearchitecture/multiarch/index.rst b/docs/developer/corearchitecture/multiarch/index.rst
new file mode 100644
index 00000000000..824a8e68438
--- /dev/null
+++ b/docs/developer/corearchitecture/multiarch/index.rst
@@ -0,0 +1,12 @@
+.. _multiarch:
+
+Multi-architecture support
+==========================
+
+This reference guide describes how to use the vpp multi-architecture support scheme
+
+.. toctree::
+ :maxdepth: 1
+
+ nodefns
+ arbfns
diff --git a/docs/developer/corearchitecture/multiarch/nodefns.rst b/docs/developer/corearchitecture/multiarch/nodefns.rst
new file mode 100644
index 00000000000..9647e64f08c
--- /dev/null
+++ b/docs/developer/corearchitecture/multiarch/nodefns.rst
@@ -0,0 +1,138 @@
+Multi-Architecture Graph Node Cookbook
+======================================
+
+In the context of graph node dispatch functions, it's easy enough to
+use the vpp multi-architecture support setup. The point of the scheme
+is simple: for performance-critical nodes, generate multiple CPU
+hardware-dependent versions of the node dispatch functions, and pick
+the best one at runtime.
+
+The vpp scheme is simple enough to use, but details matter.
+
+100,000 foot view
+-----------------
+
+We compile entire graph node dispatch function implementation files
+multiple times. These compilations give rise to multiple versions of
+the graph node dispatch functions. Per-node constructor-functions
+interrogate CPU hardware, select the node dispatch function variant to
+use, and set the vlib_node_registration_t ".function" member to the
+address of the selected variant.
+
+Details
+-------
+
+Declare the node dispatch function as shown, using the VLIB\_NODE\_FN macro. The
+name of the node function **MUST** match the name of the graph node.
+
+::
+
+ VLIB_NODE_FN (ip4_sdp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+ {
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ return ip46_sdp_inline (vm, node, frame, 1 /* is_ip4 */ ,
+ 1 /* is_trace */ );
+ else
+ return ip46_sdp_inline (vm, node, frame, 1 /* is_ip4 */ ,
+ 0 /* is_trace */ );
+ }
+
+We need to generate *precisely one copy* of the
+vlib_node_registration_t, error strings, and packet trace decode function.
+
+Simply bracket these items with "#ifndef CLIB_MARCH_VARIANT...#endif":
+
+::
+
+ #ifndef CLIB_MARCH_VARIANT
+ static u8 *
+ format_sdp_trace (u8 * s, va_list * args)
+ {
+ <snip>
+ }
+ #endif
+
+ ...
+
+ #ifndef CLIB_MARCH_VARIANT
+ static char *sdp_error_strings[] = {
+ #define _(sym,string) string,
+ foreach_sdp_error
+ #undef _
+ };
+ #endif
+
+ ...
+
+ #ifndef CLIB_MARCH_VARIANT
+ VLIB_REGISTER_NODE (ip4_sdp_node) =
+ {
+ // DO NOT set the .function structure member.
+ // The multiarch selection __attribute__((constructor)) function
+ // takes care of it at runtime
+ .name = "ip4-sdp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sdp_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(sdp_error_strings),
+ .error_strings = sdp_error_strings,
+
+ .n_next_nodes = SDP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [SDP_NEXT_DROP] = "ip4-drop",
+ },
+ };
+ #endif
+
+To belabor the point: *do not* set the ".function" member! That's the job of the multi-arch
+selection \_\_attribute\_\_((constructor)) function
+
+Always inline node dispatch functions
+-------------------------------------
+
+It's typical for a graph dispatch function to contain one or more
+calls to an inline function. See above. If your node dispatch function
+is structured that way, make *ABSOLUTELY CERTAIN* to use the
+"always_inline" macro:
+
+::
+
+ always_inline uword
+ ip46_sdp_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_ip4, int is_trace)
+ { ... }
+
+Otherwise, the compiler is highly likely NOT to build multiple
+versions of the guts of your dispatch function.
+
+It's fairly easy to spot this mistake in "perf top." If you see, for
+example, a bunch of functions with names of the form
+"xxx_node_fn_avx2" in the profile, *BUT* your brand-new node function
+shows up with a name of the form "xxx_inline.isra.1", it's quite likely
+that the inline was declared "static inline" instead of "always_inline".
+
+Modify CMakeLists.txt
+---------------------
+
+If the component in question already lists "MULTIARCH_SOURCES", simply
+add the indicated .c file to the list. Otherwise, add as shown
+below. Note that the added file "new_multiarch_node.c" should appear in
+*both* SOURCES and MULTIARCH_SOURCES:
+
+::
+
+ add_vpp_plugin(myplugin
+ SOURCES
+ new_multiarch_node.c
+ ...
+
+ MULTIARCH_SOURCES
+ new_ multiarch_node.c
+ ...
+ )