1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
|
.. BSD LICENSE
Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.. _multi_process_app:
Multi-process Sample Application
================================
This chapter describes the example applications for multi-processing that are included in the DPDK.
Example Applications
--------------------
Building the Sample Applications
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The multi-process example applications are built in the same way as other sample applications,
and as documented in the *DPDK Getting Started Guide*.
To build all the example applications:
#. Set RTE_SDK and go to the example directory:
.. code-block:: console
export RTE_SDK=/path/to/rte_sdk
cd ${RTE_SDK}/examples/multi_process
#. Set the target (a default target will be used if not specified). For example:
.. code-block:: console
export RTE_TARGET=x86_64-native-linuxapp-gcc
See the *DPDK Getting Started Guide* for possible RTE_TARGET values.
#. Build the applications:
.. code-block:: console
make
.. note::
If just a specific multi-process application needs to be built,
the final make command can be run just in that application's directory,
rather than at the top-level multi-process directory.
Basic Multi-process Example
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The examples/simple_mp folder in the DPDK release contains a basic example application to demonstrate how
two DPDK processes can work together using queues and memory pools to share information.
Running the Application
^^^^^^^^^^^^^^^^^^^^^^^
To run the application, start one copy of the simple_mp binary in one terminal,
passing at least two cores in the coremask, as follows:
.. code-block:: console
./build/simple_mp -c 3 -n 4 --proc-type=primary
For the first DPDK process run, the proc-type flag can be omitted or set to auto,
since all DPDK processes will default to being a primary instance,
meaning they have control over the hugepage shared memory regions.
The process should start successfully and display a command prompt as follows:
.. code-block:: console
$ ./build/simple_mp -c 3 -n 4 --proc-type=primary
EAL: coremask set to 3
EAL: Detected lcore 0 on socket 0
EAL: Detected lcore 1 on socket 0
EAL: Detected lcore 2 on socket 0
EAL: Detected lcore 3 on socket 0
...
EAL: Requesting 2 pages of size 1073741824
EAL: Requesting 768 pages of size 2097152
EAL: Ask a virtual area of 0x40000000 bytes
EAL: Virtual area found at 0x7ff200000000 (size = 0x40000000)
...
EAL: check igb_uio module
EAL: check module finished
EAL: Master core 0 is ready (tid=54e41820)
EAL: Core 1 is ready (tid=53b32700)
Starting core 1
simple_mp >
To run the secondary process to communicate with the primary process,
again run the same binary setting at least two cores in the coremask:
.. code-block:: console
./build/simple_mp -c C -n 4 --proc-type=secondary
When running a secondary process such as that shown above, the proc-type parameter can again be specified as auto.
However, omitting the parameter altogether will cause the process to try and start as a primary rather than secondary process.
Once the process type is specified correctly,
the process starts up, displaying largely similar status messages to the primary instance as it initializes.
Once again, you will be presented with a command prompt.
Once both processes are running, messages can be sent between them using the send command.
At any stage, either process can be terminated using the quit command.
.. code-block:: console
EAL: Master core 10 is ready (tid=b5f89820) EAL: Master core 8 is ready (tid=864a3820)
EAL: Core 11 is ready (tid=84ffe700) EAL: Core 9 is ready (tid=85995700)
Starting core 11 Starting core 9
simple_mp > send hello_secondary simple_mp > core 9: Received 'hello_secondary'
simple_mp > core 11: Received 'hello_primary' simple_mp > send hello_primary
simple_mp > quit simple_mp > quit
.. note::
If the primary instance is terminated, the secondary instance must also be shut-down and restarted after the primary.
This is necessary because the primary instance will clear and reset the shared memory regions on startup,
invalidating the secondary process's pointers.
The secondary process can be stopped and restarted without affecting the primary process.
How the Application Works
^^^^^^^^^^^^^^^^^^^^^^^^^
The core of this example application is based on using two queues and a single memory pool in shared memory.
These three objects are created at startup by the primary process,
since the secondary process cannot create objects in memory as it cannot reserve memory zones,
and the secondary process then uses lookup functions to attach to these objects as it starts up.
.. code-block:: c
if (rte_eal_process_type() == RTE_PROC_PRIMARY){
send_ring = rte_ring_create(_PRI_2_SEC, ring_size, SOCKET0, flags);
recv_ring = rte_ring_create(_SEC_2_PRI, ring_size, SOCKET0, flags);
message_pool = rte_mempool_create(_MSG_POOL, pool_size, string_size, pool_cache, priv_data_sz, NULL, NULL, NULL, NULL, SOCKET0, flags);
} else {
recv_ring = rte_ring_lookup(_PRI_2_SEC);
send_ring = rte_ring_lookup(_SEC_2_PRI);
message_pool = rte_mempool_lookup(_MSG_POOL);
}
Note, however, that the named ring structure used as send_ring in the primary process is the recv_ring in the secondary process.
Once the rings and memory pools are all available in both the primary and secondary processes,
the application simply dedicates two threads to sending and receiving messages respectively.
The receive thread simply dequeues any messages on the receive ring, prints them,
and frees the buffer space used by the messages back to the memory pool.
The send thread makes use of the command-prompt library to interactively request user input for messages to send.
Once a send command is issued by the user, a buffer is allocated from the memory pool, filled in with the message contents,
then enqueued on the appropriate rte_ring.
Symmetric Multi-process Example
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The second example of DPDK multi-process support demonstrates how a set of processes can run in parallel,
with each process performing the same set of packet- processing operations.
(Since each process is identical in functionality to the others,
we refer to this as symmetric multi-processing, to differentiate it from asymmetric multi- processing -
such as a client-server mode of operation seen in the next example,
where different processes perform different tasks, yet co-operate to form a packet-processing system.)
The following diagram shows the data-flow through the application, using two processes.
.. _figure_sym_multi_proc_app:
.. figure:: img/sym_multi_proc_app.*
Example Data Flow in a Symmetric Multi-process Application
As the diagram shows, each process reads packets from each of the network ports in use.
RSS is used to distribute incoming packets on each port to different hardware RX queues.
Each process reads a different RX queue on each port and so does not contend with any other process for that queue access.
Similarly, each process writes outgoing packets to a different TX queue on each port.
Running the Application
^^^^^^^^^^^^^^^^^^^^^^^
As with the simple_mp example, the first instance of the symmetric_mp process must be run as the primary instance,
though with a number of other application- specific parameters also provided after the EAL arguments.
These additional parameters are:
* -p <portmask>, where portmask is a hexadecimal bitmask of what ports on the system are to be used.
For example: -p 3 to use ports 0 and 1 only.
* --num-procs <N>, where N is the total number of symmetric_mp instances that will be run side-by-side to perform packet processing.
This parameter is used to configure the appropriate number of receive queues on each network port.
* --proc-id <n>, where n is a numeric value in the range 0 <= n < N (number of processes, specified above).
This identifies which symmetric_mp instance is being run, so that each process can read a unique receive queue on each network port.
The secondary symmetric_mp instances must also have these parameters specified,
and the first two must be the same as those passed to the primary instance, or errors result.
For example, to run a set of four symmetric_mp instances, running on lcores 1-4,
all performing level-2 forwarding of packets between ports 0 and 1,
the following commands can be used (assuming run as root):
.. code-block:: console
# ./build/symmetric_mp -c 2 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=0
# ./build/symmetric_mp -c 4 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=1
# ./build/symmetric_mp -c 8 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=2
# ./build/symmetric_mp -c 10 -n 4 --proc-type=auto -- -p 3 --num-procs=4 --proc-id=3
.. note::
In the above example, the process type can be explicitly specified as primary or secondary, rather than auto.
When using auto, the first process run creates all the memory structures needed for all processes -
irrespective of whether it has a proc-id of 0, 1, 2 or 3.
.. note::
For the symmetric multi-process example, since all processes work in the same manner,
once the hugepage shared memory and the network ports are initialized,
it is not necessary to restart all processes if the primary instance dies.
Instead, that process can be restarted as a secondary,
by explicitly setting the proc-type to secondary on the command line.
(All subsequent instances launched will also need this explicitly specified,
as auto-detection will detect no primary processes running and therefore attempt to re-initialize shared memory.)
How the Application Works
^^^^^^^^^^^^^^^^^^^^^^^^^
The initialization calls in both the primary and secondary instances are the same for the most part,
calling the rte_eal_init(), 1 G and 10 G driver initialization and then rte_eal_pci_probe() functions.
Thereafter, the initialization done depends on whether the process is configured as a primary or secondary instance.
In the primary instance, a memory pool is created for the packet mbufs and the network ports to be used are initialized -
the number of RX and TX queues per port being determined by the num-procs parameter passed on the command-line.
The structures for the initialized network ports are stored in shared memory and
therefore will be accessible by the secondary process as it initializes.
.. code-block:: c
if (num_ports & 1)
rte_exit(EXIT_FAILURE, "Application must use an even number of ports\n");
for(i = 0; i < num_ports; i++){
if(proc_type == RTE_PROC_PRIMARY)
if (smp_port_init(ports[i], mp, (uint16_t)num_procs) < 0)
rte_exit(EXIT_FAILURE, "Error initializing ports\n");
}
In the secondary instance, rather than initializing the network ports, the port information exported by the primary process is used,
giving the secondary process access to the hardware and software rings for each network port.
Similarly, the memory pool of mbufs is accessed by doing a lookup for it by name:
.. code-block:: c
mp = (proc_type == RTE_PROC_SECONDARY) ? rte_mempool_lookup(_SMP_MBUF_POOL) : rte_mempool_create(_SMP_MBUF_POOL, NB_MBUFS, MBUF_SIZE, ... )
Once this initialization is complete, the main loop of each process, both primary and secondary,
is exactly the same - each process reads from each port using the queue corresponding to its proc-id parameter,
and writes to the corresponding transmit queue on the output port.
Client-Server Multi-process Example
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The third example multi-process application included with the DPDK shows how one can
use a client-server type multi-process design to do packet processing.
In this example, a single server process performs the packet reception from the ports being used and
distributes these packets using round-robin ordering among a set of client processes,
which perform the actual packet processing.
In this case, the client applications just perform level-2 forwarding of packets by sending each packet out on a different network port.
The following diagram shows the data-flow through the application, using two client processes.
.. _figure_client_svr_sym_multi_proc_app:
.. figure:: img/client_svr_sym_multi_proc_app.*
Example Data Flow in a Client-Server Symmetric Multi-process Application
Running the Application
^^^^^^^^^^^^^^^^^^^^^^^
The server process must be run initially as the primary process to set up all memory structures for use by the clients.
In addition to the EAL parameters, the application- specific parameters are:
* -p <portmask >, where portmask is a hexadecimal bitmask of what ports on the system are to be used.
For example: -p 3 to use ports 0 and 1 only.
* -n <num-clients>, where the num-clients parameter is the number of client processes that will process the packets received
by the server application.
.. note::
In the server process, a single thread, the master thread, that is, the lowest numbered lcore in the coremask, performs all packet I/O.
If a coremask is specified with more than a single lcore bit set in it,
an additional lcore will be used for a thread to periodically print packet count statistics.
Since the server application stores configuration data in shared memory, including the network ports to be used,
the only application parameter needed by a client process is its client instance ID.
Therefore, to run a server application on lcore 1 (with lcore 2 printing statistics) along with two client processes running on lcores 3 and 4,
the following commands could be used:
.. code-block:: console
# ./mp_server/build/mp_server -c 6 -n 4 -- -p 3 -n 2
# ./mp_client/build/mp_client -c 8 -n 4 --proc-type=auto -- -n 0
# ./mp_client/build/mp_client -c 10 -n 4 --proc-type=auto -- -n 1
.. note::
If the server application dies and needs to be restarted, all client applications also need to be restarted,
as there is no support in the server application for it to run as a secondary process.
Any client processes that need restarting can be restarted without affecting the server process.
How the Application Works
^^^^^^^^^^^^^^^^^^^^^^^^^
The server process performs the network port and data structure initialization much as the symmetric multi-process application does when run as primary.
One additional enhancement in this sample application is that the server process stores its port configuration data in a memory zone in hugepage shared memory.
This eliminates the need for the client processes to have the portmask parameter passed into them on the command line,
as is done for the symmetric multi-process application, and therefore eliminates mismatched parameters as a potential source of errors.
In the same way that the server process is designed to be run as a primary process instance only,
the client processes are designed to be run as secondary instances only.
They have no code to attempt to create shared memory objects.
Instead, handles to all needed rings and memory pools are obtained via calls to rte_ring_lookup() and rte_mempool_lookup().
The network ports for use by the processes are obtained by loading the network port drivers and probing the PCI bus,
which will, as in the symmetric multi-process example,
automatically get access to the network ports using the settings already configured by the primary/server process.
Once all applications are initialized, the server operates by reading packets from each network port in turn and
distributing those packets to the client queues (software rings, one for each client process) in round-robin order.
On the client side, the packets are read from the rings in as big of bursts as possible, then routed out to a different network port.
The routing used is very simple. All packets received on the first NIC port are transmitted back out on the second port and vice versa.
Similarly, packets are routed between the 3rd and 4th network ports and so on.
The sending of packets is done by writing the packets directly to the network ports; they are not transferred back via the server process.
In both the server and the client processes, outgoing packets are buffered before being sent,
so as to allow the sending of multiple packets in a single burst to improve efficiency.
For example, the client process will buffer packets to send,
until either the buffer is full or until we receive no further packets from the server.
Master-slave Multi-process Example
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The fourth example of DPDK multi-process support demonstrates a master-slave model that
provide the capability of application recovery if a slave process crashes or meets unexpected conditions.
In addition, it also demonstrates the floating process,
which can run among different cores in contrast to the traditional way of binding a process/thread to a specific CPU core,
using the local cache mechanism of mempool structures.
This application performs the same functionality as the L2 Forwarding sample application,
therefore this chapter does not cover that part but describes functionality that is introduced in this multi-process example only.
Please refer to :doc:`l2_forward_real_virtual` for more information.
Unlike previous examples where all processes are started from the command line with input arguments, in this example,
only one process is spawned from the command line and that process creates other processes.
The following section describes this in more detail.
Master-slave Process Models
^^^^^^^^^^^^^^^^^^^^^^^^^^^
The process spawned from the command line is called the *master process* in this document.
A process created by the master is called a *slave process*.
The application has only one master process, but could have multiple slave processes.
Once the master process begins to run, it tries to initialize all the resources such as
memory, CPU cores, driver, ports, and so on, as the other examples do.
Thereafter, it creates slave processes, as shown in the following figure.
.. _figure_master_slave_proc:
.. figure:: img/master_slave_proc.*
Master-slave Process Workflow
The master process calls the rte_eal_mp_remote_launch() EAL function to launch an application function for each pinned thread through the pipe.
Then, it waits to check if any slave processes have exited.
If so, the process tries to re-initialize the resources that belong to that slave and launch them in the pinned thread entry again.
The following section describes the recovery procedures in more detail.
For each pinned thread in EAL, after reading any data from the pipe, it tries to call the function that the application specified.
In this master specified function, a fork() call creates a slave process that performs the L2 forwarding task.
Then, the function waits until the slave exits, is killed or crashes. Thereafter, it notifies the master of this event and returns.
Finally, the EAL pinned thread waits until the new function is launched.
After discussing the master-slave model, it is necessary to mention another issue, global and static variables.
For multiple-thread cases, all global and static variables have only one copy and they can be accessed by any thread if applicable.
So, they can be used to sync or share data among threads.
In the previous examples, each process has separate global and static variables in memory and are independent of each other.
If it is necessary to share the knowledge, some communication mechanism should be deployed, such as, memzone, ring, shared memory, and so on.
The global or static variables are not a valid approach to share data among processes.
For variables in this example, on the one hand, the slave process inherits all the knowledge of these variables after being created by the master.
On the other hand, other processes cannot know if one or more processes modifies them after slave creation since that
is the nature of a multiple process address space.
But this does not mean that these variables cannot be used to share or sync data; it depends on the use case.
The following are the possible use cases:
#. The master process starts and initializes a variable and it will never be changed after slave processes created. This case is OK.
#. After the slave processes are created, the master or slave cores need to change a variable, but other processes do not need to know the change.
This case is also OK.
#. After the slave processes are created, the master or a slave needs to change a variable.
In the meantime, one or more other process needs to be aware of the change.
In this case, global and static variables cannot be used to share knowledge. Another communication mechanism is needed.
A simple approach without lock protection can be a heap buffer allocated by rte_malloc or mem zone.
Slave Process Recovery Mechanism
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Before talking about the recovery mechanism, it is necessary to know what is needed before a new slave instance can run if a previous one exited.
When a slave process exits, the system returns all the resources allocated for this process automatically.
However, this does not include the resources that were allocated by the DPDK. All the hardware resources are shared among the processes,
which include memzone, mempool, ring, a heap buffer allocated by the rte_malloc library, and so on.
If the new instance runs and the allocated resource is not returned, either resource allocation failed or the hardware resource is lost forever.
When a slave process runs, it may have dependencies on other processes.
They could have execution sequence orders; they could share the ring to communicate; they could share the same port for reception and forwarding;
they could use lock structures to do exclusive access in some critical path.
What happens to the dependent process(es) if the peer leaves?
The consequence are varied since the dependency cases are complex.
It depends on what the processed had shared.
However, it is necessary to notify the peer(s) if one slave exited.
Then, the peer(s) will be aware of that and wait until the new instance begins to run.
Therefore, to provide the capability to resume the new slave instance if the previous one exited, it is necessary to provide several mechanisms:
#. Keep a resource list for each slave process.
Before a slave process run, the master should prepare a resource list.
After it exits, the master could either delete the allocated resources and create new ones,
or re-initialize those for use by the new instance.
#. Set up a notification mechanism for slave process exit cases. After the specific slave leaves,
the master should be notified and then help to create a new instance.
This mechanism is provided in Section `Master-slave Process Models`_.
#. Use a synchronization mechanism among dependent processes.
The master should have the capability to stop or kill slave processes that have a dependency on the one that has exited.
Then, after the new instance of exited slave process begins to run, the dependency ones could resume or run from the start.
The example sends a STOP command to slave processes dependent on the exited one, then they will exit.
Thereafter, the master creates new instances for the exited slave processes.
The following diagram describes slave process recovery.
.. _figure_slave_proc_recov:
.. figure:: img/slave_proc_recov.*
Slave Process Recovery Process Flow
Floating Process Support
^^^^^^^^^^^^^^^^^^^^^^^^
When the DPDK application runs, there is always a -c option passed in to indicate the cores that are enabled.
Then, the DPDK creates a thread for each enabled core.
By doing so, it creates a 1:1 mapping between the enabled core and each thread.
The enabled core always has an ID, therefore, each thread has a unique core ID in the DPDK execution environment.
With the ID, each thread can easily access the structures or resources exclusively belonging to it without using function parameter passing.
It can easily use the rte_lcore_id() function to get the value in every function that is called.
For threads/processes not created in that way, either pinned to a core or not, they will not own a unique ID and the
rte_lcore_id() function will not work in the correct way.
However, sometimes these threads/processes still need the unique ID mechanism to do easy access on structures or resources.
For example, the DPDK mempool library provides a local cache mechanism
(refer to :ref:`mempool_local_cache`)
for fast element allocation and freeing.
If using a non-unique ID or a fake one,
a race condition occurs if two or more threads/ processes with the same core ID try to use the local cache.
Therefore, unused core IDs from the passing of parameters with the -c option are used to organize the core ID allocation array.
Once the floating process is spawned, it tries to allocate a unique core ID from the array and release it on exit.
A natural way to spawn a floating process is to use the fork() function and allocate a unique core ID from the unused core ID array.
However, it is necessary to write new code to provide a notification mechanism for slave exit
and make sure the process recovery mechanism can work with it.
To avoid producing redundant code, the Master-Slave process model is still used to spawn floating processes,
then cancel the affinity to specific cores.
Besides that, clear the core ID assigned to the DPDK spawning a thread that has a 1:1 mapping with the core mask.
Thereafter, get a new core ID from the unused core ID allocation array.
Run the Application
^^^^^^^^^^^^^^^^^^^
This example has a command line similar to the L2 Forwarding sample application with a few differences.
To run the application, start one copy of the l2fwd_fork binary in one terminal.
Unlike the L2 Forwarding example,
this example requires at least three cores since the master process will wait and be accountable for slave process recovery.
The command is as follows:
.. code-block:: console
#./build/l2fwd_fork -c 1c -n 4 -- -p 3 -f
This example provides another -f option to specify the use of floating process.
If not specified, the example will use a pinned process to perform the L2 forwarding task.
To verify the recovery mechanism, proceed as follows: First, check the PID of the slave processes:
.. code-block:: console
#ps -fe | grep l2fwd_fork
root 5136 4843 29 11:11 pts/1 00:00:05 ./build/l2fwd_fork
root 5145 5136 98 11:11 pts/1 00:00:11 ./build/l2fwd_fork
root 5146 5136 98 11:11 pts/1 00:00:11 ./build/l2fwd_fork
Then, kill one of the slaves:
.. code-block:: console
#kill -9 5145
After 1 or 2 seconds, check whether the slave has resumed:
.. code-block:: console
#ps -fe | grep l2fwd_fork
root 5136 4843 3 11:11 pts/1 00:00:06 ./build/l2fwd_fork
root 5247 5136 99 11:14 pts/1 00:00:01 ./build/l2fwd_fork
root 5248 5136 99 11:14 pts/1 00:00:01 ./build/l2fwd_fork
It can also monitor the traffic generator statics to see whether slave processes have resumed.
Explanation
^^^^^^^^^^^
As described in previous sections,
not all global and static variables need to change to be accessible in multiple processes;
it depends on how they are used.
In this example,
the statics info on packets dropped/forwarded/received count needs to be updated by the slave process,
and the master needs to see the update and print them out.
So, it needs to allocate a heap buffer using rte_zmalloc.
In addition, if the -f option is specified,
an array is needed to store the allocated core ID for the floating process so that the master can return it
after a slave has exited accidentally.
.. code-block:: c
static int
l2fwd_malloc_shared_struct(void)
{
port_statistics = rte_zmalloc("port_stat", sizeof(struct l2fwd_port_statistics) * RTE_MAX_ETHPORTS, 0);
if (port_statistics == NULL)
return -1;
/* allocate mapping_id array */
if (float_proc) {
int i;
mapping_id = rte_malloc("mapping_id", sizeof(unsigned) * RTE_MAX_LCORE, 0);
if (mapping_id == NULL)
return -1;
for (i = 0 ;i < RTE_MAX_LCORE; i++)
mapping_id[i] = INVALID_MAPPING_ID;
}
return 0;
}
For each slave process, packets are received from one port and forwarded to another port that another slave is operating on.
If the other slave exits accidentally, the port it is operating on may not work normally,
so the first slave cannot forward packets to that port.
There is a dependency on the port in this case. So, the master should recognize the dependency.
The following is the code to detect this dependency:
.. code-block:: c
for (portid = 0; portid < nb_ports; portid++) {
/* skip ports that are not enabled */
if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
continue;
/* Find pair ports' lcores */
find_lcore = find_pair_lcore = 0;
pair_port = l2fwd_dst_ports[portid];
for (i = 0; i < RTE_MAX_LCORE; i++) {
if (!rte_lcore_is_enabled(i))
continue;
for (j = 0; j < lcore_queue_conf[i].n_rx_port;j++) {
if (lcore_queue_conf[i].rx_port_list[j] == portid) {
lcore = i;
find_lcore = 1;
break;
}
if (lcore_queue_conf[i].rx_port_list[j] == pair_port) {
pair_lcore = i;
find_pair_lcore = 1;
break;
}
}
if (find_lcore && find_pair_lcore)
break;
}
if (!find_lcore || !find_pair_lcore)
rte_exit(EXIT_FAILURE, "Not find port=%d pair\\n", portid);
printf("lcore %u and %u paired\\n", lcore, pair_lcore);
lcore_resource[lcore].pair_id = pair_lcore;
lcore_resource[pair_lcore].pair_id = lcore;
}
Before launching the slave process,
it is necessary to set up the communication channel between the master and slave so that
the master can notify the slave if its peer process with the dependency exited.
In addition, the master needs to register a callback function in the case where a specific slave exited.
.. code-block:: c
for (i = 0; i < RTE_MAX_LCORE; i++) {
if (lcore_resource[i].enabled) {
/* Create ring for master and slave communication */
ret = create_ms_ring(i);
if (ret != 0)
rte_exit(EXIT_FAILURE, "Create ring for lcore=%u failed",i);
if (flib_register_slave_exit_notify(i,slave_exit_cb) != 0)
rte_exit(EXIT_FAILURE, "Register master_trace_slave_exit failed");
}
}
After launching the slave process, the master waits and prints out the port statics periodically.
If an event indicating that a slave process exited is detected,
it sends the STOP command to the peer and waits until it has also exited.
Then, it tries to clean up the execution environment and prepare new resources.
Finally, the new slave instance is launched.
.. code-block:: c
while (1) {
sleep(1);
cur_tsc = rte_rdtsc();
diff_tsc = cur_tsc - prev_tsc;
/* if timer is enabled */
if (timer_period > 0) {
/* advance the timer */
timer_tsc += diff_tsc;
/* if timer has reached its timeout */
if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
print_stats();
/* reset the timer */
timer_tsc = 0;
}
}
prev_tsc = cur_tsc;
/* Check any slave need restart or recreate */
rte_spinlock_lock(&res_lock);
for (i = 0; i < RTE_MAX_LCORE; i++) {
struct lcore_resource_struct *res = &lcore_resource[i];
struct lcore_resource_struct *pair = &lcore_resource[res->pair_id];
/* If find slave exited, try to reset pair */
if (res->enabled && res->flags && pair->enabled) {
if (!pair->flags) {
master_sendcmd_with_ack(pair->lcore_id, CMD_STOP);
rte_spinlock_unlock(&res_lock);
sleep(1);
rte_spinlock_lock(&res_lock);
if (pair->flags)
continue;
}
if (reset_pair(res->lcore_id, pair->lcore_id) != 0)
rte_exit(EXIT_FAILURE, "failed to reset slave");
res->flags = 0;
pair->flags = 0;
}
}
rte_spinlock_unlock(&res_lock);
}
When the slave process is spawned and starts to run, it checks whether the floating process option is applied.
If so, it clears the affinity to a specific core and also sets the unique core ID to 0.
Then, it tries to allocate a new core ID.
Since the core ID has changed, the resource allocated by the master cannot work,
so it remaps the resource to the new core ID slot.
.. code-block:: c
static int
l2fwd_launch_one_lcore( attribute ((unused)) void *dummy)
{
unsigned lcore_id = rte_lcore_id();
if (float_proc) {
unsigned flcore_id;
/* Change it to floating process, also change it's lcore_id */
clear_cpu_affinity();
RTE_PER_LCORE(_lcore_id) = 0;
/* Get a lcore_id */
if (flib_assign_lcore_id() < 0 ) {
printf("flib_assign_lcore_id failed\n");
return -1;
}
flcore_id = rte_lcore_id();
/* Set mapping id, so master can return it after slave exited */
mapping_id[lcore_id] = flcore_id;
printf("Org lcore_id = %u, cur lcore_id = %u\n",lcore_id, flcore_id);
remapping_slave_resource(lcore_id, flcore_id);
}
l2fwd_main_loop();
/* return lcore_id before return */
if (float_proc) {
flib_free_lcore_id(rte_lcore_id());
mapping_id[lcore_id] = INVALID_MAPPING_ID;
}
return 0;
}
|