diff options
author | 2016-06-23 10:37:04 +0300 | |
---|---|---|
committer | 2016-06-23 10:37:04 +0300 | |
commit | f2320939a5deec2db2948788479199931e1f9176 (patch) | |
tree | fc1b12908503d5b7d67cefe34e0c5fb0f908d2a6 /src | |
parent | 1eed7e59f23d3ab9b957d9822eefe72877e291da (diff) | |
parent | d04442ab671f768a1b645fb887d4a9cd575c7852 (diff) |
Merge branch 'master' into cpu_per_core
Conflicts:
scripts/automation/trex_control_plane/server/singleton_daemon.py
Diffstat (limited to 'src')
36 files changed, 2341 insertions, 515 deletions
diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp index b36ac6e1..79ea2458 100755 --- a/src/bp_gtest.cpp +++ b/src/bp_gtest.cpp @@ -897,7 +897,7 @@ TEST_F(basic, latency3) { EXPECT_EQ_UINT32(mg.is_active()?1:0, (uint32_t)0)<< "pass"; - mg.start(8, NULL); + mg.start(8, false); mg.stop(); mg.Dump(stdout); mg.DumpShort(stdout); diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp index dc41c8f2..6b5acd42 100755 --- a/src/bp_sim.cpp +++ b/src/bp_sim.cpp @@ -722,7 +722,7 @@ std::string double_to_human_str(double num, if (etype ==KBYE_1024){ f=1024.0; } - while ((abs_num > f ) && (i< max_cnt)){ + while ((abs_num > f ) && (i < max_cnt - 1)){ abs_num/=f; div*=f; i++; @@ -3301,9 +3301,6 @@ bool CFlowGenListPerThread::Create(uint32_t thread_id, m_max_threads=max_threads; m_thread_id=thread_id; - m_watchdog = NULL; - m_watchdog_handle = -1; - m_cpu_cp_u.Create(&m_cpu_dp_u); uint32_t socket_id=rte_lcore_to_socket_id(m_core_id); @@ -4981,29 +4978,27 @@ int CErfIFStl::send_sl_node(CGenNodeStateless *node_sl) { bool is_const = false; if (m) { is_const = true; + rte_pktmbuf_refcnt_update(m,1); }else{ m=node_sl->alloc_node_with_vm(); assert(m); } - if (node_sl->is_stat_needed()) { + if (node_sl->is_stat_needed() && (node_sl->get_stat_hw_id() >= MAX_FLOW_STATS) ) { + /* latency packet. flow stat without latency handled like normal packet in simulation */ uint16_t hw_id = node_sl->get_stat_hw_id(); - if (hw_id >= MAX_FLOW_STATS) { - rte_mbuf_t *mi; - struct flow_stat_payload_header *fsp_head; - mi = node_sl->alloc_flow_stat_mbuf(m, fsp_head, is_const); - fsp_head->seq = 0x12345678; - fsp_head->hw_id = hw_id - MAX_FLOW_STATS; - fsp_head->magic = FLOW_STAT_PAYLOAD_MAGIC; - fsp_head->time_stamp = 0x8899aabbccddeeff; - fill_raw_packet(m,(CGenNode *)node_sl,dir); - rte_pktmbuf_free(mi); - } + rte_mbuf_t *mi; + struct flow_stat_payload_header *fsp_head; + mi = node_sl->alloc_flow_stat_mbuf(m, fsp_head, is_const); + fsp_head->seq = 0x12345678; + fsp_head->hw_id = hw_id - MAX_FLOW_STATS; + fsp_head->magic = FLOW_STAT_PAYLOAD_MAGIC; + fsp_head->time_stamp = 0x8899aabbccddeeff; + fill_raw_packet(mi, (CGenNode *)node_sl, dir); + rte_pktmbuf_free(mi); } else { fill_raw_packet(m,(CGenNode *)node_sl,dir); - if (! is_const) { - rte_pktmbuf_free(m); - } + rte_pktmbuf_free(m); } } /* check that we have mbuf */ diff --git a/src/bp_sim.h b/src/bp_sim.h index 136381f9..8a38beb7 100755 --- a/src/bp_sim.h +++ b/src/bp_sim.h @@ -670,6 +670,14 @@ public: return (btGetMaskBit32(m_flags1,4,4) ? true:false); } + /* split mac is enabled */ + void setWDDisable(bool wd_disable){ + btSetMaskBit32(m_flags1,6,6,wd_disable?1:0); + } + + bool getWDDisable(){ + return (btGetMaskBit32(m_flags1,6,6) ? true:false); + } @@ -3637,9 +3645,7 @@ public: } void tickle() { - if (m_watchdog) { - m_watchdog->tickle(m_watchdog_handle); - } + m_monitor.tickle(); } /* return the dual port ID this thread is attached to in 4 ports configuration @@ -3764,8 +3770,7 @@ public: CTupleGeneratorSmart m_smart_gen; - TrexWatchDog *m_watchdog; - int m_watchdog_handle; + TrexMonitor m_monitor; public: CNodeGenerator m_node_gen; diff --git a/src/common/ef/efence.cpp b/src/common/ef/efence.cpp new file mode 100644 index 00000000..1340a12a --- /dev/null +++ b/src/common/ef/efence.cpp @@ -0,0 +1,930 @@ +/* + * Electric Fence - Red-Zone memory allocator. + * Bruce Perens, 1988, 1993 + * + * This is a special version of malloc() and company for debugging software + * that is suspected of overrunning or underrunning the boundaries of a + * malloc buffer, or touching free memory. + * + * It arranges for each malloc buffer to be followed (or preceded) + * in the address space by an inaccessable virtual memory page, + * and for free memory to be inaccessable. If software touches the + * inaccessable page, it will get an immediate segmentation + * fault. It is then trivial to uncover the offending code using a debugger. + * + * An advantage of this product over most malloc debuggers is that this one + * detects reading out of bounds as well as writing, and this one stops on + * the exact instruction that causes the error, rather than waiting until the + * next boundary check. + * + * There is one product that debugs malloc buffer overruns + * better than Electric Fence: "Purify" from Purify Systems, and that's only + * a small part of what Purify does. I'm not affiliated with Purify, I just + * respect a job well done. + * + * This version of malloc() should not be linked into production software, + * since it tremendously increases the time and memory overhead of malloc(). + * Each malloc buffer will consume a minimum of two virtual memory pages, + * this is 16 kilobytes on many systems. On some systems it will be necessary + * to increase the amount of swap space in order to debug large programs that + * perform lots of allocation, because of the per-buffer overhead. + */ +#include "efence.h" +#include <stdlib.h> +#include <unistd.h> +#include <memory.h> +#include <string.h> +#include <pthread.h> +#include <stdio.h> +#include <stdint.h> + + +extern C_LINKAGE void * ef_malloc(size_t size); +extern C_LINKAGE void ef_free(void * address); +extern C_LINKAGE void * ef_memalign(size_t alignment, size_t userSize); +extern C_LINKAGE void * ef_calloc(size_t nelem, size_t elsize); +extern C_LINKAGE void * ef_valloc (size_t size); +extern C_LINKAGE void * ef_realloc(void * oldBuffer, size_t newSize); +extern C_LINKAGE void ef_init(void); + + + + + +#ifdef malloc +#undef malloc +#endif + +#ifdef calloc +#undef calloc +#endif + +static const char version[] = "\n Electric Fence 2.1" + " Copyright (C) 1987-1998 Bruce Perens.\n"; + +/* + * MEMORY_CREATION_SIZE is the amount of memory to get from the operating + * system at one time. We'll break that memory down into smaller pieces for + * malloc buffers. One megabyte is probably a good value. + */ +#define MEMORY_CREATION_SIZE 10* 1024 * 1024 + +/* + * Enum Mode indicates the status of a malloc buffer. + */ +enum _Mode { + NOT_IN_USE = 0, /* Available to represent a malloc buffer. */ + FREE, /* A free buffer. */ + ALLOCATED, /* A buffer that is in use. */ + PROTECTED, /* A freed buffer that can not be allocated again. */ + INTERNAL_USE /* A buffer used internally by malloc(). */ +}; +typedef enum _Mode Mode; + +/* + * Struct Slot contains all of the information about a malloc buffer except + * for the contents of its memory. + */ +struct _Slot { + void * userAddress; + void * internalAddress; + size_t userSize; + size_t internalSize; + Mode mode; +}; +typedef struct _Slot Slot; + + /* + * EF_DISABLE_BANNER is a global variable used to control whether + * Electric Fence prints its usual startup message. If the value is + * -1, it will be set from the environment default to 0 at run time. + */ +int EF_DISABLE_BANNER = 1; + + +/* + * EF_ALIGNMENT is a global variable used to control the default alignment + * of buffers returned by malloc(), calloc(), and realloc(). It is all-caps + * so that its name matches the name of the environment variable that is used + * to set it. This gives the programmer one less name to remember. + * If the value is -1, it will be set from the environment or sizeof(int) + * at run time. + */ +int EF_ALIGNMENT = 8; + +/* + * EF_PROTECT_FREE is a global variable used to control the disposition of + * memory that is released using free(). It is all-caps so that its name + * matches the name of the environment variable that is used to set it. + * If its value is greater non-zero, memory released by free is made + * inaccessable and never allocated again. Any software that touches free + * memory will then get a segmentation fault. If its value is zero, freed + * memory will be available for reallocation, but will still be inaccessable + * until it is reallocated. + * If the value is -1, it will be set from the environment or to 0 at run-time. + */ +int EF_PROTECT_FREE = -1; + +/* + * EF_PROTECT_BELOW is used to modify the behavior of the allocator. When + * its value is non-zero, the allocator will place an inaccessable page + * immediately _before_ the malloc buffer in the address space, instead + * of _after_ it. Use this to detect malloc buffer under-runs, rather than + * over-runs. It won't detect both at the same time, so you should test your + * software twice, once with this value clear, and once with it set. + * If the value is -1, it will be set from the environment or to zero at + * run-time + */ +int EF_PROTECT_BELOW = -1; + +/* + * EF_ALLOW_MALLOC_0 is set if Electric Fence is to allow malloc(0). I + * trap malloc(0) by default because it is a common source of bugs. + */ +int EF_ALLOW_MALLOC_0 = 0; + +/* + * EF_FREE_WIPES is set if Electric Fence is to wipe the memory content + * of freed blocks. This makes it easier to check if memory is freed or + * not + */ +int EF_FREE_WIPES = 1; + + +static int malloc_init =0; +/* + + * allocationList points to the array of slot structures used to manage the + * malloc arena. + */ +static Slot * allocationList = 0; + +/* + * allocationListSize is the size of the allocation list. This will always + * be a multiple of the page size. + */ +static size_t allocationListSize = 0; + +/* + * slotCount is the number of Slot structures in allocationList. + */ +static size_t slotCount = 0; + +/* + * unUsedSlots is the number of Slot structures that are currently available + * to represent new malloc buffers. When this number gets too low, we will + * create new slots. + */ +static size_t unUsedSlots = 0; + +/* + * slotsPerPage is the number of slot structures that fit in a virtual + * memory page. + */ +static size_t slotsPerPage = 0; + +/* + * internalUse is set when allocating and freeing the allocatior-internal + * data structures. + */ +static int internalUse = 0; + +/* + * noAllocationListProtection is set to tell malloc() and free() not to + * manipulate the protection of the allocation list. This is only set in + * realloc(), which does it to save on slow system calls, and in + * allocateMoreSlots(), which does it because it changes the allocation list. + */ +static int noAllocationListProtection = 0; + +/* + * bytesPerPage is set at run-time to the number of bytes per virtual-memory + * page, as returned by Page_Size(). + */ +static size_t bytesPerPage = 0; + + /* + * mutex to enable multithreaded operation + */ +static pthread_mutex_t mutex ; + + +static void lock() { + /* reentrant mutex -see init */ + pthread_mutex_lock(&mutex); +} + +static void unlock() { + pthread_mutex_unlock(&mutex); +} + + + +/* + * internalError is called for those "shouldn't happen" errors in the + * allocator. + */ +static void +internalError(void) +{ + EF_Abort("Internal error in allocator."); +} + +/* + * initialize sets up the memory allocation arena and the run-time + * configuration information. + */ +static void +initialize(void) +{ + size_t size = MEMORY_CREATION_SIZE; + size_t slack; + char * string; + Slot * slot; + + if ( EF_DISABLE_BANNER == -1 ) { + if ( (string = getenv("EF_DISABLE_BANNER")) != 0 ) + EF_DISABLE_BANNER = atoi(string); + else + EF_DISABLE_BANNER = 0; + } + + if ( EF_DISABLE_BANNER == 0 ) + EF_Print(version); + + /* + * Import the user's environment specification of the default + * alignment for malloc(). We want that alignment to be under + * user control, since smaller alignment lets us catch more bugs, + * however some software will break if malloc() returns a buffer + * that is not word-aligned. + * + * I would like + * alignment to be zero so that we could catch all one-byte + * overruns, however if malloc() is asked to allocate an odd-size + * buffer and returns an address that is not word-aligned, or whose + * size is not a multiple of the word size, software breaks. + * This was the case with the Sun string-handling routines, + * which can do word fetches up to three bytes beyond the end of a + * string. I handle this problem in part by providing + * byte-reference-only versions of the string library functions, but + * there are other functions that break, too. Some in X Windows, one + * in Sam Leffler's TIFF library, and doubtless many others. + */ + if ( EF_ALIGNMENT == -1 ) { + if ( (string = getenv("EF_ALIGNMENT")) != 0 ) + EF_ALIGNMENT = (size_t)atoi(string); + else + EF_ALIGNMENT = sizeof(int); + } + + /* + * See if the user wants to protect the address space below a buffer, + * rather than that above a buffer. + */ + if ( EF_PROTECT_BELOW == -1 ) { + if ( (string = getenv("EF_PROTECT_BELOW")) != 0 ) + EF_PROTECT_BELOW = (atoi(string) != 0); + else + EF_PROTECT_BELOW = 0; + } + + /* + * See if the user wants to protect memory that has been freed until + * the program exits, rather than until it is re-allocated. + */ + if ( EF_PROTECT_FREE == -1 ) { + if ( (string = getenv("EF_PROTECT_FREE")) != 0 ) + EF_PROTECT_FREE = (atoi(string) != 0); + else + EF_PROTECT_FREE = 0; + } + + /* + * See if the user wants to allow malloc(0). + */ + if ( EF_ALLOW_MALLOC_0 == -1 ) { + if ( (string = getenv("EF_ALLOW_MALLOC_0")) != 0 ) + EF_ALLOW_MALLOC_0 = (atoi(string) != 0); + else + EF_ALLOW_MALLOC_0 = 0; + } + + /* + * See if the user wants us to wipe out freed memory. + */ + if ( EF_FREE_WIPES == -1 ) { + if ( (string = getenv("EF_FREE_WIPES")) != 0 ) + EF_FREE_WIPES = (atoi(string) != 0); + else + EF_FREE_WIPES = 0; + } + + /* + * Get the run-time configuration of the virtual memory page size. + */ + bytesPerPage = Page_Size(); + + /* + * Figure out how many Slot structures to allocate at one time. + */ + slotCount = slotsPerPage = bytesPerPage / sizeof(Slot); + allocationListSize = bytesPerPage; + + if ( allocationListSize > size ) + size = allocationListSize; + + if ( (slack = size % bytesPerPage) != 0 ) + size += bytesPerPage - slack; + + /* + * Allocate memory, and break it up into two malloc buffers. The + * first buffer will be used for Slot structures, the second will + * be marked free. + */ + slot = allocationList = (Slot *)Page_Create(size); + memset((char *)allocationList, 0, allocationListSize); + + slot[0].internalSize = slot[0].userSize = allocationListSize; + slot[0].internalAddress = slot[0].userAddress = allocationList; + slot[0].mode = INTERNAL_USE; + if ( size > allocationListSize ) { + slot[1].internalAddress = slot[1].userAddress + = ((char *)slot[0].internalAddress) + slot[0].internalSize; + slot[1].internalSize + = slot[1].userSize = size - slot[0].internalSize; + slot[1].mode = FREE; + } + + /* + * Deny access to the free page, so that we will detect any software + * that treads upon free memory. + */ + Page_DenyAccess(slot[1].internalAddress, slot[1].internalSize); + + /* + * Account for the two slot structures that we've used. + */ + unUsedSlots = slotCount - 2; +} + +/* + * allocateMoreSlots is called when there are only enough slot structures + * left to support the allocation of a single malloc buffer. + */ +static void +allocateMoreSlots(void) +{ + size_t newSize = allocationListSize + bytesPerPage; + void * newAllocation; + void * oldAllocation = allocationList; + + Page_AllowAccess(allocationList, allocationListSize); + noAllocationListProtection = 1; + internalUse = 1; + + newAllocation = ef_malloc(newSize); + memcpy(newAllocation, allocationList, allocationListSize); + memset(&(((char *)newAllocation)[allocationListSize]), 0, bytesPerPage); + + allocationList = (Slot *)newAllocation; + allocationListSize = newSize; + slotCount += slotsPerPage; + unUsedSlots += slotsPerPage; + + ef_free(oldAllocation); + + /* + * Keep access to the allocation list open at this point, because + * I am returning to memalign(), which needs that access. + */ + noAllocationListProtection = 0; + internalUse = 0; +} + +/* + * This is the memory allocator. When asked to allocate a buffer, allocate + * it in such a way that the end of the buffer is followed by an inaccessable + * memory page. If software overruns that buffer, it will touch the bad page + * and get an immediate segmentation fault. It's then easy to zero in on the + * offending code with a debugger. + * + * There are a few complications. If the user asks for an odd-sized buffer, + * we would have to have that buffer start on an odd address if the byte after + * the end of the buffer was to be on the inaccessable page. Unfortunately, + * there is lots of software that asks for odd-sized buffers and then + * requires that the returned address be word-aligned, or the size of the + * buffer be a multiple of the word size. An example are the string-processing + * functions on Sun systems, which do word references to the string memory + * and may refer to memory up to three bytes beyond the end of the string. + * For this reason, I take the alignment requests to memalign() and valloc() + * seriously, and + * + * Electric Fence wastes lots of memory. I do a best-fit allocator here + * so that it won't waste even more. It's slow, but thrashing because your + * working set is too big for a system's RAM is even slower. + */ +extern C_LINKAGE void * +ef_memalign(size_t alignment, size_t userSize) +{ + register Slot * slot; + register size_t count; + Slot * fullSlot = 0; + Slot * emptySlots[2]; + size_t internalSize; + size_t slack; + char * address; + + + if ( userSize == 0 && !EF_ALLOW_MALLOC_0 ) + EF_Abort("Allocating 0 bytes, probably a bug."); + + /* + * If EF_PROTECT_BELOW is set, all addresses returned by malloc() + * and company will be page-aligned. + */ + if ( !EF_PROTECT_BELOW && alignment > 1 ) { + if ( (slack = userSize % alignment) != 0 ) + userSize += alignment - slack; + } + + /* + * The internal size of the buffer is rounded up to the next page-size + * boudary, and then we add another page's worth of memory for the + * dead page. + */ + internalSize = userSize + bytesPerPage; + if ( (slack = internalSize % bytesPerPage) != 0 ) + internalSize += bytesPerPage - slack; + + /* + * These will hold the addresses of two empty Slot structures, that + * can be used to hold information for any memory I create, and any + * memory that I mark free. + */ + emptySlots[0] = 0; + emptySlots[1] = 0; + + /* + * The internal memory used by the allocator is currently + * inaccessable, so that errant programs won't scrawl on the + * allocator's arena. I'll un-protect it here so that I can make + * a new allocation. I'll re-protect it before I return. + */ + if ( !noAllocationListProtection ) + Page_AllowAccess(allocationList, allocationListSize); + + /* + * If I'm running out of empty slots, create some more before + * I don't have enough slots left to make an allocation. + */ + if ( !internalUse && unUsedSlots < 7 ) { + allocateMoreSlots(); + } + + /* + * Iterate through all of the slot structures. Attempt to find a slot + * containing free memory of the exact right size. Accept a slot with + * more memory than we want, if the exact right size is not available. + * Find two slot structures that are not in use. We will need one if + * we split a buffer into free and allocated parts, and the second if + * we have to create new memory and mark it as free. + * + */ + + for ( slot = allocationList, count = slotCount ; count > 0; count-- ) { + if ( slot->mode == FREE + && slot->internalSize >= internalSize ) { + if ( !fullSlot + ||slot->internalSize < fullSlot->internalSize){ + fullSlot = slot; + if ( slot->internalSize == internalSize + && emptySlots[0] ) + break; /* All done, */ + } + } + else if ( slot->mode == NOT_IN_USE ) { + if ( !emptySlots[0] ) + emptySlots[0] = slot; + else if ( !emptySlots[1] ) + emptySlots[1] = slot; + else if ( fullSlot + && fullSlot->internalSize == internalSize ) + break; /* All done. */ + } + slot++; + } + if ( !emptySlots[0] ) + internalError(); + + if ( !fullSlot ) { + /* + * I get here if I haven't been able to find a free buffer + * with all of the memory I need. I'll have to create more + * memory. I'll mark it all as free, and then split it into + * free and allocated portions later. + */ + size_t chunkSize = MEMORY_CREATION_SIZE; + + if ( !emptySlots[1] ) + internalError(); + + if ( chunkSize < internalSize ) + chunkSize = internalSize; + + if ( (slack = chunkSize % bytesPerPage) != 0 ) + chunkSize += bytesPerPage - slack; + + /* Use up one of the empty slots to make the full slot. */ + fullSlot = emptySlots[0]; + emptySlots[0] = emptySlots[1]; + fullSlot->internalAddress = Page_Create(chunkSize); + fullSlot->internalSize = chunkSize; + fullSlot->mode = FREE; + unUsedSlots--; + } + + /* + * If I'm allocating memory for the allocator's own data structures, + * mark it INTERNAL_USE so that no errant software will be able to + * free it. + */ + if ( internalUse ) + fullSlot->mode = INTERNAL_USE; + else + fullSlot->mode = ALLOCATED; + + /* + * If the buffer I've found is larger than I need, split it into + * an allocated buffer with the exact amount of memory I need, and + * a free buffer containing the surplus memory. + */ + if ( fullSlot->internalSize > internalSize ) { + emptySlots[0]->internalSize + = fullSlot->internalSize - internalSize; + emptySlots[0]->internalAddress + = ((char *)fullSlot->internalAddress) + internalSize; + emptySlots[0]->mode = FREE; + fullSlot->internalSize = internalSize; + unUsedSlots--; + } + + if ( !EF_PROTECT_BELOW ) { + /* + * Arrange the buffer so that it is followed by an inaccessable + * memory page. A buffer overrun that touches that page will + * cause a segmentation fault. + */ + address = (char *)fullSlot->internalAddress; + + /* Set up the "live" page. */ + if ( internalSize - bytesPerPage > 0 ) + Page_AllowAccess( + fullSlot->internalAddress + ,internalSize - bytesPerPage); + + address += internalSize - bytesPerPage; + + /* Set up the "dead" page. */ + Page_DenyAccess(address, bytesPerPage); + + /* Figure out what address to give the user. */ + address -= userSize; + } + else { /* EF_PROTECT_BELOW != 0 */ + /* + * Arrange the buffer so that it is preceded by an inaccessable + * memory page. A buffer underrun that touches that page will + * cause a segmentation fault. + */ + address = (char *)fullSlot->internalAddress; + + /* Set up the "dead" page. */ + Page_DenyAccess(address, bytesPerPage); + + address += bytesPerPage; + + /* Set up the "live" page. */ + if ( internalSize - bytesPerPage > 0 ) + Page_AllowAccess(address, internalSize - bytesPerPage); + } + + fullSlot->userAddress = address; + fullSlot->userSize = userSize; + + /* + * Make the pool's internal memory inaccessable, so that the program + * being debugged can't stomp on it. + */ + if ( !internalUse ) + Page_DenyAccess(allocationList, allocationListSize); + + return address; +} + +/* + * Find the slot structure for a user address. + */ +static Slot * +slotForUserAddress(void * address) +{ + register Slot * slot = allocationList; + register size_t count = slotCount; + + for ( ; count > 0; count-- ) { + if ( slot->userAddress == address ) + return slot; + slot++; + } + + return 0; +} + +/* + * Find the slot structure for an internal address. + */ +static Slot * +slotForInternalAddress(void * address) +{ + register Slot * slot = allocationList; + register size_t count = slotCount; + + for ( ; count > 0; count-- ) { + if ( slot->internalAddress == address ) + return slot; + slot++; + } + return 0; +} + +/* + * Given the internal address of a buffer, find the buffer immediately + * before that buffer in the address space. This is used by free() to + * coalesce two free buffers into one. + */ +static Slot * +slotForInternalAddressPreviousTo(void * address) +{ + register Slot * slot = allocationList; + register size_t count = slotCount; + + for ( ; count > 0; count-- ) { + if ( ((char *)slot->internalAddress) + + slot->internalSize == address ) + return slot; + slot++; + } + return 0; +} + +extern C_LINKAGE void +ef_free(void * address) +{ + Slot * slot; + Slot * previousSlot = 0; + Slot * nextSlot = 0; + + //printf(" ::free %p \n",address); + lock(); + + if ( address == 0 ) { + unlock(); + return; + } + + if ( allocationList == 0 ) + EF_Abort("free() called before first malloc()."); + + if ( !noAllocationListProtection ) + Page_AllowAccess(allocationList, allocationListSize); + + slot = slotForUserAddress(address); + + if ( !slot ) + EF_Abort("free(%a): address not from malloc().", address); + + if ( slot->mode != ALLOCATED ) { + if ( internalUse && slot->mode == INTERNAL_USE ) + /* Do nothing. */; + else { + EF_Abort( + "free(%a): freeing free memory." + ,address); + } + } + + if ( EF_PROTECT_FREE ) + slot->mode = PROTECTED; + else + slot->mode = FREE; + + if ( EF_FREE_WIPES ) + memset(slot->userAddress, 0xbd, slot->userSize); + + previousSlot = slotForInternalAddressPreviousTo(slot->internalAddress); + nextSlot = slotForInternalAddress( + ((char *)slot->internalAddress) + slot->internalSize); + + if ( previousSlot + && (previousSlot->mode == FREE || previousSlot->mode == PROTECTED) ) { + /* Coalesce previous slot with this one. */ + previousSlot->internalSize += slot->internalSize; + if ( EF_PROTECT_FREE ) + previousSlot->mode = PROTECTED; + + slot->internalAddress = slot->userAddress = 0; + slot->internalSize = slot->userSize = 0; + slot->mode = NOT_IN_USE; + slot = previousSlot; + unUsedSlots++; + } + if ( nextSlot + && (nextSlot->mode == FREE || nextSlot->mode == PROTECTED) ) { + /* Coalesce next slot with this one. */ + slot->internalSize += nextSlot->internalSize; + nextSlot->internalAddress = nextSlot->userAddress = 0; + nextSlot->internalSize = nextSlot->userSize = 0; + nextSlot->mode = NOT_IN_USE; + unUsedSlots++; + } + + slot->userAddress = slot->internalAddress; + slot->userSize = slot->internalSize; + + /* + * Free memory is _always_ set to deny access. When EF_PROTECT_FREE + * is true, free memory is never reallocated, so it remains access + * denied for the life of the process. When EF_PROTECT_FREE is false, + * the memory may be re-allocated, at which time access to it will be + * allowed again. + */ + Page_DenyAccess(slot->internalAddress, slot->internalSize); + + if ( !noAllocationListProtection ) + Page_DenyAccess(allocationList, allocationListSize); + + unlock(); +} + +extern C_LINKAGE void * +ef_realloc(void * oldBuffer, size_t newSize) +{ + void * newBuffer = ef_malloc(newSize); + + lock(); + + if ( oldBuffer ) { + size_t size; + Slot * slot; + + Page_AllowAccess(allocationList, allocationListSize); + noAllocationListProtection = 1; + + slot = slotForUserAddress(oldBuffer); + + if ( slot == 0 ) + EF_Abort( + "realloc(%a, %d): address not from malloc()." + ,oldBuffer + ,newSize); + + if ( newSize < (size = slot->userSize) ) + size = newSize; + + if ( size > 0 ) + memcpy(newBuffer, oldBuffer, size); + + ef_free(oldBuffer); + noAllocationListProtection = 0; + Page_DenyAccess(allocationList, allocationListSize); + + if ( size < newSize ) + memset(&(((char *)newBuffer)[size]), 0, newSize - size); + + /* Internal memory was re-protected in free() */ + } + unlock(); + + return newBuffer; +} + +extern C_LINKAGE void * +ef_malloc(size_t size) +{ + + if ( malloc_init == 0 ){ + ef_init(); + } + + + void *allocation; + + lock(); + allocation=ef_memalign(EF_ALIGNMENT, size); + + /* put 0xaa into the memset to find uninit issues */ + memset(allocation,0xaa,size); + #if 0 + int i; + uint8_t *p=(uint8_t *)allocation; + for (i=0; i<size; i++) { + p[i]=(rand()&0xff); + } + #endif + + unlock(); + //printf(":: alloc %p %d \n",allocation,(int)size); + return allocation; +} + +extern C_LINKAGE void * +ef_calloc(size_t nelem, size_t elsize) +{ + size_t size = nelem * elsize; + void * allocation; + + lock(); + + allocation = ef_malloc(size); + memset(allocation, 0, size); + unlock(); + + return allocation; +} + +/* + * This will catch more bugs if you remove the page alignment, but it + * will break some software. + */ +extern C_LINKAGE void * +ef_valloc (size_t size) +{ + void * allocation; + + lock(); + allocation= ef_memalign(bytesPerPage, size); + unlock(); + + return allocation; +} + + +#define REPLACE_MALLOC + +#ifdef REPLACE_MALLOC + +extern C_LINKAGE void +free(void * address) +{ + ef_free(address); +} + +extern C_LINKAGE void * +realloc(void * oldBuffer, size_t newSize) +{ + return (ef_realloc(oldBuffer, newSize)); +} + +extern C_LINKAGE void * +malloc(size_t size) +{ + return (ef_malloc(size)); +} + +extern C_LINKAGE void * +calloc(size_t nelem, size_t elsize) +{ + return (ef_calloc(nelem, elsize)); +} + +/* + * This will catch more bugs if you remove the page alignment, but it + * will break some software. + */ +extern C_LINKAGE void * +valloc (size_t size) +{ + return (ef_valloc(size)); + +} +#endif + + + +extern C_LINKAGE void ef_init(void ){ + + if ( malloc_init == 0 ){ + malloc_init=1; + pthread_mutexattr_t Attr; + + pthread_mutexattr_init(&Attr); + pthread_mutexattr_settype(&Attr, PTHREAD_MUTEX_RECURSIVE); + + if ( pthread_mutex_init(&mutex, &Attr) != 0 ){ + exit(-1); + } + initialize(); + } + +} + diff --git a/src/common/ef/efence.h b/src/common/ef/efence.h new file mode 100644 index 00000000..60eb30ff --- /dev/null +++ b/src/common/ef/efence.h @@ -0,0 +1,42 @@ +#include <sys/types.h> +#include <sys/param.h> + +/* + * ef_number is the largest unsigned integer we'll need. On systems that + * support 64-bit pointers, this may be "unsigned long long". + */ +#if defined(USE_LONG_LONG) +typedef unsigned long long ef_number; +#else +typedef unsigned long ef_number; +#endif + +/* + * NBBY is the number of bits per byte. Some systems define it in + * <sys/param.h> . + */ +#ifndef NBBY +#define NBBY 8 +#endif + +/* + * This is used to declare functions with "C" linkage if we are compiling + * with C++ . + */ +#ifdef __cplusplus +#define C_LINKAGE "C" +#else +#define C_LINKAGE +#endif + +void Page_AllowAccess(void * address, size_t size); +void * Page_Create(size_t size); +void Page_Delete(void * address, size_t size); +void Page_DenyAccess(void * address, size_t size); +size_t Page_Size(void); + +void EF_Abort(const char * message, ...); +void EF_Exit(const char * message, ...); +void EF_Print(const char * message, ...); +void EF_Lock(); +void EF_UnLock(); diff --git a/src/common/ef/eftest.c b/src/common/ef/eftest.c new file mode 100644 index 00000000..372ac596 --- /dev/null +++ b/src/common/ef/eftest.c @@ -0,0 +1,219 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <setjmp.h> +#include <signal.h> +#include "efence.h" + +/* + * Electric Fence confidence tests. + * Make sure all of the various functions of Electric Fence work correctly. + */ + +#ifndef PAGE_PROTECTION_VIOLATED_SIGNAL +#define PAGE_PROTECTION_VIOLATED_SIGNAL SIGSEGV +#endif + +struct diagnostic { + int (*test)(void); + int expectedStatus; + const char * explanation; +}; + +extern int EF_PROTECT_BELOW; +extern int EF_ALIGNMENT; + +static sigjmp_buf env; + +/* + * There is still too little standardization of the arguments and return + * type of signal handler functions. + */ +static +void +segmentationFaultHandler( +int signalNumber +#if ( defined(_AIX) ) +, ... +#endif +) + { + signal(PAGE_PROTECTION_VIOLATED_SIGNAL, SIG_DFL); + siglongjmp(env, 1); +} + +static int +gotSegmentationFault(int (*test)(void)) +{ + if ( sigsetjmp(env,1) == 0 ) { + int status; + + signal(PAGE_PROTECTION_VIOLATED_SIGNAL + ,segmentationFaultHandler); + status = (*test)(); + signal(PAGE_PROTECTION_VIOLATED_SIGNAL, SIG_DFL); + return status; + } + else + return 1; +} + +static char * allocation; +/* c is global so that assignments to it won't be optimized out. */ +char c; + +static int +testSizes(void) +{ + /* + * If ef_number can't hold all of the bits of a void *, have the user + * add -DUSE_ LONG_LONG to the compiler flags so that ef_number will be + * declared as "unsigned long long" instead of "unsigned long". + */ + return ( sizeof(ef_number) < sizeof(void *) ); +} + +static int +allocateMemory(void) +{ + allocation = (char *)malloc(1); + + if ( allocation != 0 ) + return 0; + else + return 1; +} + +static int +freeMemory(void) +{ + free(allocation); + return 0; +} + +static int +protectBelow(void) +{ + EF_PROTECT_BELOW = 1; + return 0; +} + +static int +read0(void) +{ + c = *allocation; + + return 0; +} + +static int +write0(void) +{ + *allocation = 1; + + return 0; +} + +static int +read1(void) +{ + c = allocation[1]; + + return 0; +} + +static int +readMinus1(void) +{ + c = allocation[-1]; + return 0; +} + +static struct diagnostic diagnostics[] = { + { + testSizes, 0, + "Please add -DLONG_LONG to the compiler flags and recompile." + }, + { + allocateMemory, 0, + "Allocation 1: This test allocates a single byte of memory." + }, + { + read0, 0, + "Read valid memory 1: This test reads the allocated memory." + }, + { + write0, 0, + "Write valid memory 1: This test writes the allocated memory." + }, + { + read1, 1, + "Read overrun: This test reads beyond the end of the buffer." + }, + { + freeMemory, 0, + "Free memory: This test frees the allocated memory." + }, + { + protectBelow, 0, + "Protect below: This sets Electric Fence to protect\n" + "the lower boundary of a malloc buffer, rather than the\n" + "upper boundary." + }, + { + allocateMemory, 0, + "Allocation 2: This allocates memory with the lower boundary" + " protected." + }, + { + read0, 0, + "Read valid memory 2: This test reads the allocated memory." + }, + { + write0, 0, + "Write valid memory 2: This test writes the allocated memory." + }, + { + readMinus1, 1, + "Read underrun: This test reads before the beginning of the" + " buffer." + }, + { + 0, 0, 0 + } +}; + +static const char failedTest[] + = "Electric Fence confidence test failed.\n"; + +static const char newline = '\n'; + +int +main(int argc, char * * argv) +{ + static const struct diagnostic * diag = diagnostics; + + + EF_PROTECT_BELOW = 0; + EF_ALIGNMENT = 0; + + while ( diag->explanation != 0 ) { + int status = gotSegmentationFault(diag->test); + + if ( status != diag->expectedStatus ) { + /* + * Don't use stdio to print here, because stdio + * uses malloc() and we've just proven that malloc() + * is broken. Also, use _exit() instead of exit(), + * because _exit() doesn't flush stdio. + */ + write(2, failedTest, sizeof(failedTest) - 1); + write(2, diag->explanation, strlen(diag->explanation)); + write(2, &newline, 1); + _exit(-1); + } + diag++; + } + return 0; +} diff --git a/src/common/ef/page.cpp b/src/common/ef/page.cpp new file mode 100644 index 00000000..8a5a8f1c --- /dev/null +++ b/src/common/ef/page.cpp @@ -0,0 +1,193 @@ +#include "efence.h" +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> + +/* + * Lots of systems are missing the definition of PROT_NONE. + */ +#ifndef PROT_NONE +#define PROT_NONE 0 +#endif + +/* + * 386 BSD has MAP_ANON instead of MAP_ANONYMOUS. + */ +#if ( !defined(MAP_ANONYMOUS) && defined(MAP_ANON) ) +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* + * For some reason, I can't find mprotect() in any of the headers on + * IRIX or SunOS 4.1.2 + */ +/* extern C_LINKAGE int mprotect(void * addr, size_t len, int prot); */ + + + +//#ifdef _64BIT_PLATFORM + static caddr_t startAddr = (caddr_t) 0xc00000000000; +//#else + //static caddr_t startAddr = (caddr_t) 0; +//#endif + + +#if ( !defined(sgi) && !defined(_AIX) ) +extern int sys_nerr; +/*extern char * sys_errlist[];*/ +#endif + +static const char * +stringErrorReport(void) +{ +#if ( defined(sgi) ) + return strerror(oserror()); +#elif ( defined(_AIX) ) + return strerror(errno); +#else + //if ( errno > 0 && errno < sys_nerr ) + return "Unknown error.\n"; + //return sys_errlist[errno]; + //else + //return "Unknown error.\n"; +#endif +} + +/* + * Create memory. + */ +#if defined(MAP_ANONYMOUS) +void * +Page_Create(size_t size) +{ + caddr_t allocation; + + /* + * In this version, "startAddr" is a _hint_, not a demand. + * When the memory I map here is contiguous with other + * mappings, the allocator can coalesce the memory from two + * or more mappings into one large contiguous chunk, and thus + * might be able to find a fit that would not otherwise have + * been possible. I could _force_ it to be contiguous by using + * the MMAP_FIXED flag, but I don't want to stomp on memory mappings + * generated by other software, etc. + */ + allocation = (caddr_t) mmap( + startAddr + ,size + ,PROT_READ|PROT_WRITE + ,MAP_PRIVATE|MAP_ANONYMOUS + ,-1 + ,0); + +#ifndef __hpux + /* + * Set the "address hint" for the next mmap() so that it will abut + * the mapping we just created. + * + * HP/UX 9.01 has a kernel bug that makes mmap() fail sometimes + * when given a non-zero address hint, so we'll leave the hint set + * to zero on that system. HP recently told me this is now fixed. + * Someone please tell me when it is probable to assume that most + * of those systems that were running 9.01 have been upgraded. + */ + startAddr = allocation + size; +#endif + + if ( allocation == (caddr_t)-1 ) + EF_Exit("mmap() failed: %s", stringErrorReport()); + + return (void *)allocation; +} +#else +void * +Page_Create(size_t size) +{ + static int devZeroFd = -1; + caddr_t allocation; + + if ( devZeroFd == -1 ) { + devZeroFd = open("/dev/zero", O_RDWR); + if ( devZeroFd < 0 ) + EF_Exit( + "open() on /dev/zero failed: %s" + ,stringErrorReport()); + } + + /* + * In this version, "startAddr" is a _hint_, not a demand. + * When the memory I map here is contiguous with other + * mappings, the allocator can coalesce the memory from two + * or more mappings into one large contiguous chunk, and thus + * might be able to find a fit that would not otherwise have + * been possible. I could _force_ it to be contiguous by using + * the MMAP_FIXED flag, but I don't want to stomp on memory mappings + * generated by other software, etc. + */ + allocation = (caddr_t) mmap( + startAddr + ,size + ,PROT_READ|PROT_WRITE + ,MAP_PRIVATE + ,devZeroFd + ,0); + + startAddr = allocation + size; + + if ( allocation == (caddr_t)-1 ) + EF_Exit("mmap() failed: %s", stringErrorReport()); + + return (void *)allocation; +} +#endif + +static void +mprotectFailed(void) +{ + EF_Exit("mprotect() failed: %s", stringErrorReport()); +} + +void +Page_AllowAccess(void * address, size_t size) +{ + if ( mprotect((caddr_t)address, size, PROT_READ|PROT_WRITE) < 0 ) + mprotectFailed(); +} + +void +Page_DenyAccess(void * address, size_t size) +{ + if ( mprotect((caddr_t)address, size, PROT_NONE) < 0 ) + mprotectFailed(); +} + +void +Page_Delete(void * address, size_t size) +{ + Page_DenyAccess(address, size); +} + +#if defined(_SC_PAGESIZE) +size_t +Page_Size(void) +{ + return (size_t)sysconf(_SC_PAGESIZE); +} +#elif defined(_SC_PAGE_SIZE) +size_t +Page_Size(void) +{ + return (size_t)sysconf(_SC_PAGE_SIZE); +} +#else +/* extern int getpagesize(); */ +size_t +Page_Size(void) +{ + return getpagesize(); +} +#endif diff --git a/src/common/ef/print.cpp b/src/common/ef/print.cpp new file mode 100644 index 00000000..c28189e5 --- /dev/null +++ b/src/common/ef/print.cpp @@ -0,0 +1,170 @@ +#include "efence.h" +#include <stdlib.h> +#include <unistd.h> +#include <stdarg.h> +#include <string.h> +#include <signal.h> + +/* + * These routines do their printing without using stdio. Stdio can't + * be used because it calls malloc(). Internal routines of a malloc() + * debugger should not re-enter malloc(), so stdio is out. + */ + +/* + * NUMBER_BUFFER_SIZE is the longest character string that could be needed + * to represent an unsigned integer, assuming we might print in base 2. + */ +#define NUMBER_BUFFER_SIZE (sizeof(ef_number) * NBBY) + +static void +printNumber(ef_number number, ef_number base) +{ + char buffer[NUMBER_BUFFER_SIZE]; + char * s = &buffer[NUMBER_BUFFER_SIZE]; + int size; + + do { + ef_number digit; + + if ( --s == buffer ) + EF_Abort("Internal error printing number."); + + digit = number % base; + + if ( digit < 10 ) + *s = '0' + digit; + else + *s = 'a' + digit - 10; + + } while ( (number /= base) > 0 ); + + size = &buffer[NUMBER_BUFFER_SIZE] - s; + + if ( size > 0 ) + write(2, s, size); +} + +static void +vprint(const char * pattern, va_list args) +{ + static const char bad_pattern[] = + "\nBad pattern specifier %%%c in EF_Print().\n"; + const char * s = pattern; + char c; + + while ( (c = *s++) != '\0' ) { + if ( c == '%' ) { + c = *s++; + switch ( c ) { + case '%': + (void) write(2, &c, 1); + break; + case 'a': + /* + * Print an address passed as a void pointer. + * The type of ef_number must be set so that + * it is large enough to contain all of the + * bits of a void pointer. + */ + printNumber( + (ef_number)va_arg(args, void *) + ,0x10); + break; + case 's': + { + const char * string; + size_t length; + + string = va_arg(args, char *); + length = strlen(string); + + (void) write(2, string, length); + } + break; + case 'd': + { + int n = va_arg(args, int); + + if ( n < 0 ) { + char c = '-'; + write(2, &c, 1); + n = -n; + } + printNumber(n, 10); + } + break; + case 'x': + printNumber(va_arg(args, u_int), 0x10); + break; + case 'c': + { /*Cast used, since char gets promoted to int in ... */ + char c = (char) va_arg(args, int); + + (void) write(2, &c, 1); + } + break; + default: + { + EF_Print(bad_pattern, c); + } + + } + } + else + (void) write(2, &c, 1); + } +} + +void +EF_Abort(const char * pattern, ...) +{ + va_list args; + + va_start(args, pattern); + + EF_Print("\nElectricFence Aborting: "); + vprint(pattern, args); + EF_Print("\n"); + + va_end(args); + + /* + * I use kill(getpid(), SIGILL) instead of abort() because some + * mis-guided implementations of abort() flush stdio, which can + * cause malloc() or free() to be called. + */ + kill(getpid(), SIGILL); + /* Just in case something handles SIGILL and returns, exit here. */ + _exit(-1); +} + +void +EF_Exit(const char * pattern, ...) +{ + va_list args; + + va_start(args, pattern); + + EF_Print("\nElectricFence Exiting: "); + vprint(pattern, args); + EF_Print("\n"); + + va_end(args); + + /* + * I use _exit() because the regular exit() flushes stdio, + * which may cause malloc() or free() to be called. + */ + _exit(-1); +} + +void +EF_Print(const char * pattern, ...) +{ + va_list args; + + va_start(args, pattern); + vprint(pattern, args); + va_end(args); +} diff --git a/src/common/ef/tstheap.c b/src/common/ef/tstheap.c new file mode 100644 index 00000000..c712fed5 --- /dev/null +++ b/src/common/ef/tstheap.c @@ -0,0 +1,61 @@ +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <limits.h> +#include "efence.h" + +/* + * This is a simple program to exercise the allocator. It allocates and frees + * memory in a pseudo-random fashion. It should run silently, using up time + * and resources on your system until you stop it or until it has gone + * through TEST_DURATION (or the argument) iterations of the loop. + */ + +extern C_LINKAGE double drand48(void); /* For pre-ANSI C systems */ + +#define POOL_SIZE 1024 +#define LARGEST_BUFFER 30000 +#define TEST_DURATION 1000000 + +void * pool[POOL_SIZE]; + +#ifdef FAKE_DRAND48 +/* + * Add -DFAKE_DRAND48 to your compile flags if your system doesn't + * provide drand48(). + */ + +#ifndef ULONG_MAX +#define ULONG_MAX ~(1L) +#endif + +double +drand48(void) +{ + return (random() / (double)ULONG_MAX); +} +#endif + +int +main(int argc, char * * argv) +{ + int count = 0; + int duration = TEST_DURATION; + + if ( argc >= 2 ) + duration = atoi(argv[1]); + + for ( ; count < duration; count++ ) { + void * * element = &pool[(int)(drand48() * POOL_SIZE)]; + size_t size = (size_t)(drand48() * (LARGEST_BUFFER + 1)); + + if ( *element ) { + free( *element ); + *element = 0; + } + else if ( size > 0 ) { + *element = malloc(size); + } + } + return 0; +} diff --git a/src/dpdk22/drivers/net/i40e/i40e_ethdev.c b/src/dpdk22/drivers/net/i40e/i40e_ethdev.c index ae195683..5646eb53 100644 --- a/src/dpdk22/drivers/net/i40e/i40e_ethdev.c +++ b/src/dpdk22/drivers/net/i40e/i40e_ethdev.c @@ -3868,6 +3868,30 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi) return i40e_vsi_add_mac(vsi, &filter); } +#ifdef TREX_PATCH +#define LOW_LATENCY_WORKAROUND +#ifdef LOW_LATENCY_WORKAROUND +static int +i40e_vsi_update_tc_max_bw(struct i40e_vsi *vsi, u16 credit){ + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); + int ret; + + if (!vsi->seid) { + PMD_DRV_LOG(ERR, "seid not valid"); + return -EINVAL; + } + + ret = i40e_aq_config_vsi_bw_limit(hw, vsi->seid, credit,0, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "Failed to configure TC BW"); + return ret; + } + return (0); +} +#endif +#endif + + #define I40E_3_BIT_MASK 0x7 /* * i40e_vsi_get_bw_config - Query VSI BW Information @@ -4426,6 +4450,39 @@ i40e_pf_setup(struct i40e_pf *pf) } pf->main_vsi = vsi; + +#ifdef TREX_PATCH +#ifdef LOW_LATENCY_WORKAROUND + /* + Workaround for low latency issue. + It seems RR does not work as expected both from same QSet and from different QSet + Quanta could be very high and this creates very high latency, especially with long packet size (9K) + This is a workaround limit the main (bulk) VSI to 99% of the BW and by that support low latency (suggested by Intel) + ETS with with strict priority and 127 credit does not work . + */ + + if (hw->phy.link_info.link_speed == I40E_LINK_SPEED_10GB) { + i40e_vsi_update_tc_max_bw(vsi,199); + }else{ + if (hw->phy.link_info.link_speed == I40E_LINK_SPEED_40GB) { + i40e_vsi_update_tc_max_bw(vsi,799); + }else{ + PMD_DRV_LOG(ERR, "Unknown phy speed %d",hw->phy.link_info.link_speed); + } + } + + /* add for low latency a new VSI for Queue set */ + vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi, 0); + if (!vsi) { + PMD_DRV_LOG(ERR, "Setup of low latency vsi failed"); + return I40E_ERR_NOT_READY; + } + + pf->ll_vsi = vsi; + +#endif +#endif + /* Configure filter control */ memset(&settings, 0, sizeof(settings)); if (hw->func_caps.rss_table_size == ETH_RSS_RETA_SIZE_128) diff --git a/src/dpdk22/drivers/net/i40e/i40e_ethdev.h b/src/dpdk22/drivers/net/i40e/i40e_ethdev.h index 1f9792b3..53d6afdd 100644 --- a/src/dpdk22/drivers/net/i40e/i40e_ethdev.h +++ b/src/dpdk22/drivers/net/i40e/i40e_ethdev.h @@ -396,6 +396,7 @@ TAILQ_HEAD(i40e_mirror_rule_list, i40e_mirror_rule); struct i40e_pf { struct i40e_adapter *adapter; /* The adapter this PF associate to */ struct i40e_vsi *main_vsi; /* pointer to main VSI structure */ + struct i40e_vsi * ll_vsi; // TREX_PATCH uint16_t mac_seid; /* The seid of the MAC of this PF */ uint16_t main_vsi_seid; /* The seid of the main VSI */ uint16_t max_num_vsi; diff --git a/src/dpdk22/drivers/net/i40e/i40e_rxtx.c b/src/dpdk22/drivers/net/i40e/i40e_rxtx.c index 39d94eca..ee3c3c1a 100644 --- a/src/dpdk22/drivers/net/i40e/i40e_rxtx.c +++ b/src/dpdk22/drivers/net/i40e/i40e_rxtx.c @@ -1923,6 +1923,35 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +// TREX_PATCH +// Based on i40e_pf_get_vsi_by_qindex. Return low latency VSI one queue. +#define LOW_LATENCY_WORKAROUND +#ifdef LOW_LATENCY_WORKAROUND +static struct i40e_vsi* +i40e_pf_tx_get_vsi_by_qindex(struct i40e_pf *pf, uint16_t queue_idx) +{ + // For last queue index, return low latency VSI + if (queue_idx == pf->dev_data->nb_tx_queues-1) { + return pf->ll_vsi; + } + + /* the queue in MAIN VSI range */ + if (queue_idx < pf->dev_data->nb_tx_queues) + return pf->main_vsi; + + + queue_idx -= pf->main_vsi->nb_qps; + + /* queue_idx is greater than VMDQ VSIs range */ + if (queue_idx > pf->nb_cfg_vmdq_vsi * pf->vmdq_nb_qps - 1) { + PMD_INIT_LOG(ERR, "queue_idx out of range. VMDQ configured?"); + return NULL; + } + + return pf->vmdq[queue_idx / pf->vmdq_nb_qps].vsi; +} +#endif + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2334,8 +2363,14 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; - } else + } else { +// TREX_PATCH +#ifdef LOW_LATENCY_WORKAROUND + vsi = i40e_pf_tx_get_vsi_by_qindex(pf, queue_idx); +#else vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); +#endif + } if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) " diff --git a/src/flow_stat.cpp b/src/flow_stat.cpp index 98b9494b..cb7a1bf9 100644 --- a/src/flow_stat.cpp +++ b/src/flow_stat.cpp @@ -227,7 +227,7 @@ CFlowStatUserIdMap::add_user_id(uint32_t user_id, uint8_t proto) { << std::endl; #endif - CFlowStatUserIdInfo *new_id = new CFlowStatUserIdInfo(proto); + CFlowStatUserIdInfo *new_id; if (proto == PAYLOAD_RULE_PROTO) { new_id = new CFlowStatUserIdInfoPayload(proto); @@ -390,7 +390,8 @@ uint16_t CFlowStatUserIdMap::unmap(uint32_t user_id) { /************** class CFlowStatHwIdMap ***************/ CFlowStatHwIdMap::CFlowStatHwIdMap() { - m_map = NULL; + m_map = NULL; // must call create in order to work with the class + m_num_free = 0; // to make coverity happy, init this here too. } CFlowStatHwIdMap::~CFlowStatHwIdMap() { @@ -466,10 +467,21 @@ CFlowStatRuleMgr::CFlowStatRuleMgr() { m_hw_id_map_payload.create(MAX_FLOW_STATS_PAYLOAD); memset(m_rx_cant_count_err, 0, sizeof(m_rx_cant_count_err)); memset(m_tx_cant_count_err, 0, sizeof(m_tx_cant_count_err)); + m_num_ports = 0; // need to call create to init } CFlowStatRuleMgr::~CFlowStatRuleMgr() { delete m_parser; +#ifdef TREX_SIM + // In simulator, nobody handles the messages to RX, so need to free them to have clean valgrind run. + if (m_ring_to_rx) { + CGenNode *msg = NULL; + while (! m_ring_to_rx->isEmpty()) { + m_ring_to_rx->Dequeue(msg); + delete msg; + } + } +#endif } void CFlowStatRuleMgr::create() { @@ -480,7 +492,7 @@ void CFlowStatRuleMgr::create() { m_api = tstateless->get_platform_api(); assert(m_api); m_api->get_interface_stat_info(0, num_counters, cap); - m_api->get_port_num(m_num_ports); + m_api->get_port_num(m_num_ports); // This initialize m_num_ports for (uint8_t port = 0; port < m_num_ports; port++) { assert(m_api->reset_hw_flow_stats(port) == 0); } @@ -537,7 +549,20 @@ void CFlowStatRuleMgr::init_stream(TrexStream * stream) { stream->m_rx_check.m_hw_id = HW_ID_INIT; } +int CFlowStatRuleMgr::verify_stream(TrexStream * stream) { + return add_stream_internal(stream, false); +} + int CFlowStatRuleMgr::add_stream(TrexStream * stream) { + return add_stream_internal(stream, true); +} + +/* + * Helper function for adding/verifying streams + * stream - stream to act on + * do_action - if false, just verify. Do not change any state, or add to database. + */ +int CFlowStatRuleMgr::add_stream_internal(TrexStream * stream, bool do_action) { #ifdef __DEBUG_FUNC_ENTRY__ std::cout << __METHOD_NAME__ << " user id:" << stream->m_rx_check.m_pg_id << std::endl; stream_dump(stream); @@ -570,7 +595,9 @@ int CFlowStatRuleMgr::add_stream(TrexStream * stream) { } // throws exception if there is error - m_user_id_map.add_stream(stream->m_rx_check.m_pg_id, l4_proto); + if (do_action) { + m_user_id_map.add_stream(stream->m_rx_check.m_pg_id, l4_proto); + } break; case TrexPlatformApi::IF_STAT_PAYLOAD: uint16_t payload_len; @@ -582,14 +609,17 @@ int CFlowStatRuleMgr::add_stream(TrexStream * stream) { + " payload bytes for payload rules. Packet only has " + std::to_string(payload_len) + " bytes" , TrexException::T_FLOW_STAT_PAYLOAD_TOO_SHORT); } - m_user_id_map.add_stream(stream->m_rx_check.m_pg_id, PAYLOAD_RULE_PROTO); + if (do_action) { + m_user_id_map.add_stream(stream->m_rx_check.m_pg_id, PAYLOAD_RULE_PROTO); + } break; default: throw TrexFStatEx("Wrong rule_type", TrexException::T_FLOW_STAT_BAD_RULE_TYPE); break; } - - stream->m_rx_check.m_hw_id = HW_ID_FREE; + if (do_action) { + stream->m_rx_check.m_hw_id = HW_ID_FREE; + } return 0; } diff --git a/src/flow_stat.h b/src/flow_stat.h index 8671b228..a2137198 100644 --- a/src/flow_stat.h +++ b/src/flow_stat.h @@ -450,6 +450,7 @@ class CFlowStatRuleMgr { friend std::ostream& operator<<(std::ostream& os, const CFlowStatRuleMgr& cf); void copy_state(TrexStream * from, TrexStream * to); void init_stream(TrexStream * stream); + int verify_stream(TrexStream * stream); int add_stream(TrexStream * stream); int del_stream(TrexStream * stream); int start_stream(TrexStream * stream); @@ -460,6 +461,7 @@ class CFlowStatRuleMgr { private: void create(); int compile_stream(const TrexStream * stream, CFlowStatParser *parser); + int add_stream_internal(TrexStream * stream, bool do_action); int add_hw_rule(uint16_t hw_id, uint8_t proto); void send_start_stop_msg_to_rx(bool is_start); @@ -472,7 +474,7 @@ class CFlowStatRuleMgr { const CRxCoreStateless *m_rx_core; int m_max_hw_id; // max hw id we ever used int m_max_hw_id_payload; // max hw id we ever used for payload rules - uint32_t m_num_started_streams; // How many started (transmitting) streams we have + int m_num_started_streams; // How many started (transmitting) streams we have CNodeRing *m_ring_to_rx; // handle for sending messages to Rx core CFlowStatParser *m_parser; uint16_t m_cap; // capabilities of the NIC driver we are using diff --git a/src/latency.cpp b/src/latency.cpp index acbe26d4..841913cf 100644 --- a/src/latency.cpp +++ b/src/latency.cpp @@ -338,9 +338,7 @@ bool CCPortLatency::dump_packet(rte_mbuf_t * m){ uint16_t pkt_size=rte_pktmbuf_pkt_len(m); utl_DumpBuffer(stdout,p,pkt_size,0); return (0); - - - +#if 0 if (pkt_size < ( sizeof(CRx_check_header)+14+20) ) { assert(0); } @@ -348,16 +346,8 @@ bool CCPortLatency::dump_packet(rte_mbuf_t * m){ lp->dump(stdout); - - uint16_t vlan_offset=0; - if ( unlikely( CGlobalInfo::m_options.preview.get_vlan_mode_enable() ) ){ - vlan_offset=4; - } - - (void)vlan_offset; - -// utl_DumpBuffer(stdout,p,pkt_size,0); return (0); +#endif } @@ -565,9 +555,6 @@ bool CLatencyManager::Create(CLatencyManagerCfg * cfg){ m_nat_check_manager.Create(); } - m_watchdog = NULL; - m_watchdog_handle = -1; - return (true); } @@ -718,12 +705,10 @@ void CLatencyManager::reset(){ } void CLatencyManager::tickle() { - if (m_watchdog) { - m_watchdog->tickle(m_watchdog_handle); - } + m_monitor.tickle(); } -void CLatencyManager::start(int iter, TrexWatchDog *watchdog) { +void CLatencyManager::start(int iter, bool activate_watchdog) { m_do_stop =false; m_is_active =false; int cnt=0; @@ -740,9 +725,9 @@ void CLatencyManager::start(int iter, TrexWatchDog *watchdog) { m_p_queue.push(node); bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable()?true:false; - if (watchdog) { - m_watchdog = watchdog; - m_watchdog_handle = watchdog->register_monitor("STF RX CORE", 1); + if (activate_watchdog) { + m_monitor.create("STF RX CORE", 1); + TrexWatchDog::getInstance().register_monitor(&m_monitor); } while ( !m_p_queue.empty() ) { @@ -812,8 +797,8 @@ void CLatencyManager::start(int iter, TrexWatchDog *watchdog) { } /* disable the monitor */ - if (m_watchdog) { - m_watchdog->disable_monitor(m_watchdog_handle); + if (activate_watchdog) { + m_monitor.disable(); } } diff --git a/src/latency.h b/src/latency.h index 2b74f737..724621f0 100644 --- a/src/latency.h +++ b/src/latency.h @@ -85,6 +85,7 @@ public: CSimplePacketParser(rte_mbuf_t * m){ m_m=m; + m_l4 = NULL; } bool Parse(); @@ -269,6 +270,7 @@ public: CLatencyManagerCfg (){ m_max_ports=0; m_cps=0.0; + memset(m_ports, 0, sizeof(m_ports)); m_client_ip.v4=0x10000000; m_server_ip.v4=0x20000000; m_dual_port_mask=0x01000000; @@ -339,7 +341,7 @@ public: bool Create(CLatencyManagerCfg * cfg); void Delete(); void reset(); - void start(int iter, TrexWatchDog *watchdog); + void start(int iter, bool activate_watchdog); void stop(); bool is_active(); void set_ip(uint32_t client_ip, @@ -403,8 +405,7 @@ private: CNatRxManager m_nat_check_manager; CCpuUtlDp m_cpu_dp_u; CCpuUtlCp m_cpu_cp_u; - TrexWatchDog *m_watchdog; - int m_watchdog_handle; + TrexMonitor m_monitor; volatile bool m_do_stop __rte_cache_aligned ; }; diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp index ffae5caa..eb65ede3 100644 --- a/src/main_dpdk.cpp +++ b/src/main_dpdk.cpp @@ -552,7 +552,8 @@ enum { OPT_HELP, OPT_VIRT_ONE_TX_RX_QUEUE, OPT_PREFIX, OPT_MAC_SPLIT, - OPT_SEND_DEBUG_PKT + OPT_SEND_DEBUG_PKT, + OPT_NO_WATCHDOG }; @@ -614,6 +615,7 @@ static CSimpleOpt::SOption parser_options[] = { OPT_MAC_SPLIT, "--mac-spread", SO_REQ_SEP }, { OPT_SEND_DEBUG_PKT, "--send-debug-pkt", SO_REQ_SEP }, { OPT_MBUF_FACTOR , "--mbuf-factor", SO_REQ_SEP }, + { OPT_NO_WATCHDOG , "--no-watchdog", SO_NONE }, SO_END_OF_OPTIONS @@ -712,18 +714,11 @@ static int usage(){ printf(" --prefix : for multi trex, each instance should have a different name \n"); printf(" --mac-spread : Spread the destination mac-order by this factor. e.g 2 will generate the traffic to 2 devices DEST-MAC ,DEST-MAC+1 \n"); printf(" maximum is up to 128 devices \n"); - printf(" --mbuf-factor : factor for packet memory \n"); - - printf("\n simulation mode : \n"); - printf(" Using this mode you can generate the traffic into a pcap file and learn how trex works \n"); - printf(" With this version you must be SUDO to use this mode ( I know this is not normal ) \n"); - printf(" you can use the Linux CEL version of TRex to do it without super user \n"); - printf(" \n"); - printf(" -o [capfile_name] simulate trex into pcap file \n"); - printf(" --pcap export the file in pcap mode \n"); - printf(" bp-sim-64 -d 10 -f cfg.yaml -o my.pcap --pcap # export 10 sec of what Trex will do on real-time to a file my.pcap \n"); - printf(" --vm-sim : simulate vm with driver of one input queue and one output queue \n"); + printf(" \n"); + printf(" --no-watchdog : disable watchdog \n"); + printf(" \n"); + printf(" --vm-sim : simulate vm with driver of one input queue and one output queue \n"); printf(" \n"); printf(" Examples: "); printf(" basic trex run for 10 sec and multiplier of x10 \n"); @@ -936,6 +931,10 @@ static int parse_options(int argc, char *argv[], CParserOption* po, bool first_t po->preview.set_1g_mode(true); break; + case OPT_NO_WATCHDOG : + po->preview.setWDDisable(true); + break; + case OPT_LATENCY_PREVIEW : sscanf(args.OptionArg(),"%d", &po->m_latency_prev); break; @@ -2066,7 +2065,7 @@ int CCoreEthIFStateless::send_node_flow_stat(rte_mbuf *m, CGenNodeStateless * no } tx_per_flow_t *lp_s = &lp_stats->m_tx_per_flow[hw_id]; lp_s->add_pkts(1); - lp_s->add_bytes(mi->pkt_len); + lp_s->add_bytes(mi->pkt_len + 4); // We add 4 because of ethernet CRC if (hw_id >= MAX_FLOW_STATS) { fsp_head->time_stamp = os_get_hr_tick_64(); @@ -2107,8 +2106,12 @@ int CCoreEthIFStateless::send_node(CGenNode * no) { } if (unlikely(node_sl->is_stat_needed())) { - return send_node_flow_stat(m, node_sl, lp_port, lp_stats, - (node_sl->get_cache_mbuf() || node_sl->is_cache_mbuf_array())? true:false); + if ( unlikely(node_sl->is_cache_mbuf_array()) ) { + // No support for latency + cache. If user asks for cache on latency stream, we change cache to 0. + // assert here just to make sure. + assert(1); + } + return send_node_flow_stat(m, node_sl, lp_port, lp_stats, (node_sl->get_cache_mbuf()) ? true : false); } else { send_pkt(lp_port,m,lp_stats); } @@ -2843,8 +2846,9 @@ private: uint32_t m_stats_cnt; std::mutex m_cp_lock; + TrexMonitor m_monitor; + public: - TrexWatchDog m_watchdog; TrexStateless *m_trex_stateless; }; @@ -3274,8 +3278,7 @@ bool CGlobalTRex::Create(){ TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP, global_platform_cfg_info.m_zmq_rpc_port, - &m_cp_lock, - &m_watchdog); + &m_cp_lock); cfg.m_port_count = CGlobalInfo::m_options.m_expected_portd; cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg; @@ -3848,6 +3851,7 @@ CGlobalTRex::handle_slow_path(bool &was_stopped) { if ( CGlobalInfo::m_options.preview.get_no_keyboard() ==false ) { if ( m_io_modes.handle_io_modes() ) { + printf(" CTRL -C ... \n"); was_stopped=true; return false; } @@ -3978,8 +3982,10 @@ int CGlobalTRex::run_in_master() { const int FASTPATH_DELAY_MS = 10; const int SLOWPATH_DELAY_MS = 500; - int handle = m_watchdog.register_monitor("master", 2); - m_watchdog.start(); + m_monitor.create("master", 2); + TrexWatchDog::getInstance().register_monitor(&m_monitor); + + TrexWatchDog::getInstance().start(); while ( true ) { @@ -4002,12 +4008,15 @@ int CGlobalTRex::run_in_master() { slow_path_counter += FASTPATH_DELAY_MS; cp_lock.lock(); - m_watchdog.tickle(handle); + m_monitor.tickle(); } /* on exit release the lock */ cp_lock.unlock(); + /* first stop the WD */ + TrexWatchDog::getInstance().stop(); + if (!is_all_cores_finished()) { /* probably CLTR-C */ try_stop_all_cores(); @@ -4015,7 +4024,6 @@ int CGlobalTRex::run_in_master() { m_mg.stop(); - m_watchdog.stop(); delay(1000); if ( was_stopped ){ @@ -4031,12 +4039,12 @@ int CGlobalTRex::run_in_rx_core(void){ if (get_is_stateless()) { m_sl_rx_running = true; - m_rx_sl.start(m_watchdog); + m_rx_sl.start(); m_sl_rx_running = false; } else { if ( CGlobalInfo::m_options.is_rx_enabled() ){ m_sl_rx_running = false; - m_mg.start(0, &m_watchdog); + m_mg.start(0, true); } } @@ -4063,9 +4071,8 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){ lpt = m_fl.m_threads_info[virt_core_id-1]; /* register a watchdog handle on current core */ - lpt->m_watchdog = &m_watchdog; - lpt->m_watchdog_handle = m_watchdog.register_monitor(ss.str(), 1); - + lpt->m_monitor.create(ss.str(), 1); + TrexWatchDog::getInstance().register_monitor(&lpt->m_monitor); if (get_is_stateless()) { lpt->start_stateless_daemon(*lp); @@ -4074,7 +4081,7 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){ } /* done - remove this from the watchdog (we might wait on join for a long time) */ - lpt->m_watchdog->disable_monitor(lpt->m_watchdog_handle); + lpt->m_monitor.disable(); m_signal[virt_core_id]=1; return (0); @@ -4768,9 +4775,9 @@ int main_test(int argc , char * argv[]){ g_trex.reset_counters(); } - /* this will give us all cores - master + tx + latency */ - g_trex.m_watchdog.mark_pending_monitor(g_trex.m_max_cores); - + /* disable WD if needed */ + bool wd_enable = (CGlobalInfo::m_options.preview.getWDDisable() ? false : true); + TrexWatchDog::getInstance().init(wd_enable); g_trex.m_sl_rx_running = false; if ( get_is_stateless() ) { @@ -5478,10 +5485,10 @@ void CTRexExtendedDriverBase40G::get_extended_stats(CPhyEthIF * _if,CPhyEthIFSta stats->ipackets = stats1.ipackets; - stats->ibytes = stats1.ibytes; + stats->ibytes = stats1.ibytes + (stats1.ipackets<<2); stats->opackets = stats1.opackets; - stats->obytes = stats1.obytes; + stats->obytes = stats1.obytes + (stats1.opackets<<2); stats->f_ipackets = 0; stats->f_ibytes = 0; diff --git a/src/pal/common/common_mbuf.cpp b/src/pal/common/common_mbuf.cpp new file mode 100644 index 00000000..eba29418 --- /dev/null +++ b/src/pal/common/common_mbuf.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2016-2016 Cisco Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include <stdio.h> +#include <mbuf.h> +#include "common_mbuf.h" + +/* Dump structure of mbuf chain, without the data */ +void +utl_rte_pktmbuf_dump(const struct rte_mbuf *m) { + while (m) { + printf("(%d %d %d)", m->pkt_len, m->data_len, +#ifdef TREX_SIM + (int)m->refcnt_reserved); +#else + (int)m->refcnt_atomic.cnt); +#endif + if (RTE_MBUF_INDIRECT(m)) { +#ifdef TREX_SIM + struct rte_mbuf *md = RTE_MBUF_FROM_BADDR(m->buf_addr); +#else + struct rte_mbuf *md = rte_mbuf_from_indirect((struct rte_mbuf *)m); +#endif + printf("(direct %d %d %d)", md->pkt_len, md->data_len, +#ifdef TREX_SIM + (int)md->refcnt_reserved); +#else + (int)md->refcnt_atomic.cnt); +#endif + } + m = m->next; + } + printf("\n"); +} diff --git a/src/pal/common/common_mbuf.h b/src/pal/common/common_mbuf.h new file mode 100644 index 00000000..c52842bd --- /dev/null +++ b/src/pal/common/common_mbuf.h @@ -0,0 +1,88 @@ +#ifndef COMMON_MBUF_H +#define COMMON_MBUF_H + +/* +Copyright (c) 2016-2016 Cisco Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +static inline rte_mbuf_t * utl_rte_pktmbuf_add_after2(rte_mbuf_t *m1,rte_mbuf_t *m2){ + utl_rte_pktmbuf_check(m1); + utl_rte_pktmbuf_check(m2); + + m1->next=m2; + m1->pkt_len += m2->data_len; + m1->nb_segs = m2->nb_segs + 1; + return (m1); +} + +static inline rte_mbuf_t * utl_rte_pktmbuf_add_after(rte_mbuf_t *m1,rte_mbuf_t *m2){ + + utl_rte_pktmbuf_check(m1); + utl_rte_pktmbuf_check(m2); + + rte_mbuf_refcnt_update(m2,1); + m1->next=m2; + m1->pkt_len += m2->data_len; + m1->nb_segs = m2->nb_segs + 1; + return (m1); +} + + +static inline void utl_rte_pktmbuf_add_last(rte_mbuf_t *m,rte_mbuf_t *m_last){ + + //there could be 2 cases supported + //1. one mbuf + //2. two mbug where last is indirect + + if ( m->next == NULL ) { + utl_rte_pktmbuf_add_after2(m,m_last); + }else{ + m->next->next=m_last; + m->pkt_len += m_last->data_len; + m->nb_segs = 3; + } +} + +// Create following m_buf structure: +// base -> indirect -> last +// Read only is the direct of indirect. +static inline rte_mbuf_t * utl_rte_pktmbuf_chain_with_indirect (rte_mbuf_t *base, rte_mbuf_t *indirect + , rte_mbuf_t *read_only, rte_mbuf_t *last) { + rte_pktmbuf_attach(indirect, read_only); + base->next = indirect; + indirect->next = last; + rte_pktmbuf_refcnt_update(read_only, -1); + base->nb_segs = 3; + indirect->nb_segs = 2; + last->nb_segs = 1; + return base; +} + +rte_mempool_t * utl_rte_mempool_create(const char *name, + unsigned n, + unsigned elt_size, + unsigned cache_size, + uint32_t _id , + int socket_id + ); + +rte_mempool_t * utl_rte_mempool_create_non_pkt(const char *name, + unsigned n, + unsigned elt_size, + unsigned cache_size, + uint32_t _id , + int socket_id); + +#endif diff --git a/src/pal/linux/mbuf.cpp b/src/pal/linux/mbuf.cpp index 846c776c..9f568e80 100755 --- a/src/pal/linux/mbuf.cpp +++ b/src/pal/linux/mbuf.cpp @@ -27,31 +27,12 @@ limitations under the License. #include "mbuf.h" #include <stdio.h> -#include <assert.h> #include <stdlib.h> #include <ctype.h> #include "sanb_atomic.h" - -#define RTE_MBUF_TO_BADDR(mb) (((struct rte_mbuf *)(mb)) + 1) -#define RTE_MBUF_FROM_BADDR(ba) (((struct rte_mbuf *)(ba)) - 1) - - void rte_pktmbuf_detach(struct rte_mbuf *m); - - -void utl_rte_check(rte_mempool_t * mp){ - assert(mp->magic == MAGIC0); - assert(mp->magic2 == MAGIC2); -} - -void utl_rte_pktmbuf_check(struct rte_mbuf *m){ - utl_rte_check(m->pool); - assert(m->magic == MAGIC0); - assert(m->magic2== MAGIC2); -} - rte_mempool_t * utl_rte_mempool_create_non_pkt(const char *name, unsigned n, unsigned elt_size, @@ -95,8 +76,9 @@ void utl_rte_mempool_delete(rte_mempool_t * & pool){ uint16_t rte_mbuf_refcnt_update(rte_mbuf_t *m, int16_t value) { utl_rte_pktmbuf_check(m); - uint32_t a=sanb_atomic_add_return_32_old(&m->refcnt_reserved, value); - return (a); + m->refcnt_reserved = (uint16_t)(m->refcnt_reserved + value); + assert(m->refcnt_reserved >= 0); + return m->refcnt_reserved; } @@ -109,7 +91,7 @@ void rte_pktmbuf_reset(struct rte_mbuf *m) m->pkt_len = 0; m->nb_segs = 1; m->in_port = 0xff; - m->refcnt_reserved=1; + m->ol_flags = 0; #if RTE_PKTMBUF_HEADROOM > 0 m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? @@ -136,7 +118,7 @@ rte_mbuf_t *rte_pktmbuf_alloc(rte_mempool_t *mp){ m->magic = MAGIC0; m->magic2 = MAGIC2; m->pool = mp; - m->refcnt_reserved =0; + m->refcnt_reserved = 1; m->buf_len = buf_len; m->buf_addr =(char *)((char *)m+sizeof(rte_mbuf_t)+RTE_PKTMBUF_HEADROOM) ; @@ -146,28 +128,26 @@ rte_mbuf_t *rte_pktmbuf_alloc(rte_mempool_t *mp){ return (m); } - -void rte_pktmbuf_free_seg(rte_mbuf_t *m){ +void rte_pktmbuf_free_seg(rte_mbuf_t *m) { utl_rte_pktmbuf_check(m); - uint32_t old=sanb_atomic_dec2zero32(&m->refcnt_reserved); - if (old == 1) { - struct rte_mbuf *md = RTE_MBUF_FROM_BADDR(m->buf_addr); - if ( md != m ) { + if (rte_mbuf_refcnt_update(m, -1) == 0) { + /* if this is an indirect mbuf, then + * - detach mbuf + * - free attached mbuf segment + */ + + if (RTE_MBUF_INDIRECT(m)) { + struct rte_mbuf *md = RTE_MBUF_FROM_BADDR(m->buf_addr); rte_pktmbuf_detach(m); - if (rte_mbuf_refcnt_update(md, -1) == 0) { + if (rte_mbuf_refcnt_update(md, -1) == 0) free(md); - } - } - free(m); } } - - void rte_pktmbuf_free(rte_mbuf_t *m){ rte_mbuf_t *m_next; @@ -331,19 +311,6 @@ rte_pktmbuf_dump(const struct rte_mbuf *m, unsigned dump_len) } } - -rte_mbuf_t * utl_rte_pktmbuf_add_after2(rte_mbuf_t *m1,rte_mbuf_t *m2){ - utl_rte_pktmbuf_check(m1); - utl_rte_pktmbuf_check(m2); - - m1->next=m2; - m1->pkt_len += m2->data_len; - m1->nb_segs = m2->nb_segs + 1; - return (m1); -} - - - void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md) { @@ -355,6 +322,7 @@ void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md) mi->next = NULL; mi->data_len = md->data_len; mi->pkt_len = mi->data_len; + mi->ol_flags = mi->ol_flags | IND_ATTACHED_MBUF; mi->nb_segs = 1; } @@ -376,33 +344,14 @@ void rte_pktmbuf_detach(struct rte_mbuf *m) m->data_len = 0; + m->ol_flags = 0; } - - - - -rte_mbuf_t * utl_rte_pktmbuf_add_after(rte_mbuf_t *m1,rte_mbuf_t *m2){ - - utl_rte_pktmbuf_check(m1); - utl_rte_pktmbuf_check(m2); - - rte_mbuf_refcnt_update(m2,1); - m1->next=m2; - m1->pkt_len += m2->data_len; - m1->nb_segs = m2->nb_segs + 1; - return (m1); -} - - uint64_t rte_rand(void){ return ( rand() ); } - - - #ifdef ONLY_A_TEST diff --git a/src/pal/linux/mbuf.h b/src/pal/linux/mbuf.h index 174c757d..e7819148 100755 --- a/src/pal/linux/mbuf.h +++ b/src/pal/linux/mbuf.h @@ -24,10 +24,19 @@ limitations under the License. #include <stdint.h> #include <string.h> +#include <assert.h> + +typedef struct rte_mbuf rte_mbuf_t; #define MAGIC0 0xAABBCCDD #define MAGIC2 0x11223344 +#define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ +#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF) +#define RTE_MBUF_TO_BADDR(mb) (((struct rte_mbuf *)(mb)) + 1) +#define RTE_MBUF_FROM_BADDR(ba) (((struct rte_mbuf *)(ba)) - 1) + + struct rte_mempool { uint32_t magic; uint32_t elt_size; @@ -36,9 +45,6 @@ struct rte_mempool { int size; }; - - - struct rte_mbuf { uint32_t magic; struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */ @@ -55,33 +61,16 @@ struct rte_mbuf { uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ uint32_t magic2; - uint32_t refcnt_reserved; /**< Do not use this field */ + uint16_t refcnt_reserved; + uint64_t ol_flags; /**< Offload features. */ } ; - -typedef struct rte_mbuf rte_mbuf_t; - typedef struct rte_mempool rte_mempool_t; #define RTE_PKTMBUF_HEADROOM 0 void utl_rte_mempool_delete(rte_mempool_t * &pool); -rte_mempool_t * utl_rte_mempool_create(const char *name, - unsigned n, - unsigned elt_size, - unsigned cache_size, - uint32_t _id , - int socket_id - ); - -rte_mempool_t * utl_rte_mempool_create_non_pkt(const char *name, - unsigned n, - unsigned elt_size, - unsigned cache_size, - uint32_t _id , - int socket_id); - inline unsigned rte_mempool_count(rte_mempool_t *mp){ return (10); } @@ -107,9 +96,6 @@ void rte_pktmbuf_free_seg(rte_mbuf_t *m); uint16_t rte_mbuf_refcnt_update(rte_mbuf_t *m, int16_t value); -rte_mbuf_t * utl_rte_pktmbuf_add_after(rte_mbuf_t *m1,rte_mbuf_t *m2); -rte_mbuf_t * utl_rte_pktmbuf_add_after2(rte_mbuf_t *m1,rte_mbuf_t *m2); - void rte_pktmbuf_dump(const struct rte_mbuf *m, unsigned dump_len); @@ -166,22 +152,6 @@ rte_lcore_to_socket_id(unsigned lcore_id){ uint64_t rte_rand(void); - -static inline void utl_rte_pktmbuf_add_last(rte_mbuf_t *m,rte_mbuf_t *m_last){ - - //there could be 2 cases supported - //1. one mbuf - //2. two mbug where last is indirect - - if ( m->next == NULL ) { - utl_rte_pktmbuf_add_after2(m,m_last); - }else{ - m->next->next=m_last; - m->pkt_len += m_last->data_len; - m->nb_segs = 3; - } -} - static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) { do { @@ -189,8 +159,16 @@ static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) } while ((m = m->next) != NULL); } +static inline void utl_rte_check(rte_mempool_t * mp){ + assert(mp->magic == MAGIC0); + assert(mp->magic2 == MAGIC2); +} - +static inline void utl_rte_pktmbuf_check(struct rte_mbuf *m){ + utl_rte_check(m->pool); + assert(m->magic == MAGIC0); + assert(m->magic2== MAGIC2); +} #define __rte_cache_aligned @@ -199,4 +177,7 @@ static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) #define RTE_CACHE_LINE_SIZE 64 #define SOCKET_ID_ANY 0 +// has to be after the definition of rte_mbuf and other utility functions +#include "common_mbuf.h" + #endif diff --git a/src/pal/linux_dpdk/mbuf.cpp b/src/pal/linux_dpdk/mbuf.cpp index dd78617f..2a405ab1 100755 --- a/src/pal/linux_dpdk/mbuf.cpp +++ b/src/pal/linux_dpdk/mbuf.cpp @@ -6,7 +6,7 @@ */ /* -Copyright (c) 2015-2015 Cisco Systems, Inc. +Copyright (c) 2015-2016 Cisco Systems, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ rte_mempool_t * utl_rte_mempool_create(const char *name, unsigned elt_size, unsigned cache_size, uint32_t _id, - uint32_t socket_id ){ + int socket_id ){ char buffer[100]; sprintf(buffer,"%s-%d",name,socket_id); diff --git a/src/pal/linux_dpdk/mbuf.h b/src/pal/linux_dpdk/mbuf.h index 339c0909..0d9ca8be 100755 --- a/src/pal/linux_dpdk/mbuf.h +++ b/src/pal/linux_dpdk/mbuf.h @@ -6,7 +6,7 @@ */ /* -Copyright (c) 2015-2015 Cisco Systems, Inc. +Copyright (c) 2015-2016 Cisco Systems, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,61 +27,12 @@ limitations under the License. #include <rte_random.h> typedef struct rte_mbuf rte_mbuf_t; - +inline void utl_rte_pktmbuf_check(struct rte_mbuf *m) {} typedef struct rte_mempool rte_mempool_t; -inline void utl_rte_mempool_delete(rte_mempool_t * & pool){ -} - - -rte_mempool_t * utl_rte_mempool_create(const char *name, - unsigned n, - unsigned elt_size, - unsigned cache_size, - uint32_t _id, - uint32_t socket_id ); +#include "common_mbuf.h" -rte_mempool_t * utl_rte_mempool_create_non_pkt(const char *name, - unsigned n, - unsigned elt_size, - unsigned cache_size, - uint32_t _id , - int socket_id); - - -static inline rte_mbuf_t * utl_rte_pktmbuf_add_after(rte_mbuf_t *m1,rte_mbuf_t *m2){ - - rte_mbuf_refcnt_update(m2,1); - m1->next=m2; - - m1->pkt_len += m2->data_len; - m1->nb_segs = m2->nb_segs + 1; - return (m1); -} - -static inline rte_mbuf_t * utl_rte_pktmbuf_add_after2(rte_mbuf_t *m1,rte_mbuf_t *m2){ - - m1->next=m2; - m1->pkt_len += m2->data_len; - m1->nb_segs = m2->nb_segs + 1; - return (m1); -} - -static inline void utl_rte_pktmbuf_add_last(rte_mbuf_t *m,rte_mbuf_t *m_last){ - - //there could be 2 cases supported - //1. one mbuf - //2. two mbug where last is indirect - - if ( m->next == NULL ) { - utl_rte_pktmbuf_add_after2(m,m_last); - }else{ - m->next->next=m_last; - m->pkt_len += m_last->data_len; - m->nb_segs = 3; - } +inline void utl_rte_mempool_delete(rte_mempool_t * & pool){ } - - #endif diff --git a/src/rpc-server/trex_rpc_req_resp_server.cpp b/src/rpc-server/trex_rpc_req_resp_server.cpp index 033f265c..e0e7635c 100644 --- a/src/rpc-server/trex_rpc_req_resp_server.cpp +++ b/src/rpc-server/trex_rpc_req_resp_server.cpp @@ -56,7 +56,8 @@ void TrexRpcServerReqRes::_rpc_thread_cb() { std::stringstream ss; int zmq_rc; - m_watchdog_handle = m_watchdog->register_monitor(m_name, 1); + m_monitor.create(m_name, 1); + TrexWatchDog::getInstance().register_monitor(&m_monitor); /* create a socket based on the configuration */ @@ -102,7 +103,7 @@ void TrexRpcServerReqRes::_rpc_thread_cb() { zmq_close(m_socket); /* done */ - m_watchdog->disable_monitor(m_watchdog_handle); + m_monitor.disable(); } bool @@ -115,7 +116,7 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) { assert(rc == 0); while (true) { - m_watchdog->tickle(m_watchdog_handle); + m_monitor.tickle(); rc = zmq_msg_recv (&zmq_msg, m_socket, 0); if (rc != -1) { @@ -200,23 +201,24 @@ void TrexRpcServerReqRes::process_request_raw(const std::string &request, std::s int index = 0; - /* expcetion safe */ - std::unique_lock<std::mutex> lock(*m_lock); - /* for every command parsed - launch it */ for (auto command : commands) { Json::Value single_response; + /* the command itself should be protected */ + std::unique_lock<std::mutex> lock(*m_lock); command->execute(single_response); + lock.unlock(); + delete command; response_json[index++] = single_response; + /* batch is like getting all the messages one by one - it should not be considered as stuck thread */ + /* need to think if this is a good thing */ + //m_monitor.tickle(); } - /* done with the lock */ - lock.unlock(); - /* write the JSON to string and sever on ZMQ */ if (response.size() == 1) { diff --git a/src/rpc-server/trex_rpc_server.cpp b/src/rpc-server/trex_rpc_server.cpp index e4ca95c3..6c323c16 100644 --- a/src/rpc-server/trex_rpc_server.cpp +++ b/src/rpc-server/trex_rpc_server.cpp @@ -36,8 +36,6 @@ TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, c m_name = name; m_lock = cfg.m_lock; - m_watchdog = cfg.m_watchdog; - m_watchdog_handle = -1; m_is_running = false; m_is_verbose = false; @@ -78,7 +76,6 @@ void TrexRpcServerInterface::start() { /* prepare for run */ _prepare(); - m_watchdog->mark_pending_monitor(); m_thread = new std::thread(&TrexRpcServerInterface::_rpc_thread_cb, this); if (!m_thread) { throw TrexRpcException("unable to create RPC thread"); diff --git a/src/rpc-server/trex_rpc_server_api.h b/src/rpc-server/trex_rpc_server_api.h index 3d9837ef..6df37b17 100644 --- a/src/rpc-server/trex_rpc_server_api.h +++ b/src/rpc-server/trex_rpc_server_api.h @@ -30,10 +30,10 @@ limitations under the License. #include <stdexcept> #include <trex_rpc_exception_api.h> #include <json/json.h> +#include "trex_watchdog.h" class TrexRpcServerInterface; class TrexRpcServerReqRes; -class TrexWatchDog; /** * defines a configuration of generic RPC server @@ -48,11 +48,10 @@ public: RPC_PROT_MOCK }; - TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock, TrexWatchDog *watchdog) { + TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock) { m_protocol = protocol; m_port = port; m_lock = lock; - m_watchdog = watchdog; } uint16_t get_port() const { @@ -69,7 +68,6 @@ private: public: std::mutex *m_lock; - TrexWatchDog *m_watchdog; }; /** @@ -142,8 +140,7 @@ protected: std::string m_name; std::mutex *m_lock; std::mutex m_dummy_lock; - TrexWatchDog *m_watchdog; - int m_watchdog_handle; + TrexMonitor m_monitor; }; /** diff --git a/src/sim/trex_sim_stateless.cpp b/src/sim/trex_sim_stateless.cpp index d3981e97..77bd4d70 100644 --- a/src/sim/trex_sim_stateless.cpp +++ b/src/sim/trex_sim_stateless.cpp @@ -200,7 +200,7 @@ SimStateless::prepare_control_plane() { m_publisher = new SimPublisher(); - TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL, NULL); + TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL); cfg.m_port_count = m_port_count; cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg; diff --git a/src/stateless/cp/trex_stateless_port.cpp b/src/stateless/cp/trex_stateless_port.cpp index 4dc3e449..d736d09e 100644 --- a/src/stateless/cp/trex_stateless_port.cpp +++ b/src/stateless/cp/trex_stateless_port.cpp @@ -87,6 +87,68 @@ protected: } }; +/************************************* + * Streams Feeder + * A class that holds a temporary + * clone of streams that can be + * manipulated + * + * this is a RAII object meant for + * graceful cleanup + ************************************/ +class StreamsFeeder { +public: + StreamsFeeder(TrexStatelessPort *port) { + + /* start pesimistic */ + m_success = false; + + /* fetch the original streams */ + port->get_object_list(m_in_streams); + + for (const TrexStream *in_stream : m_in_streams) { + TrexStream *out_stream = in_stream->clone(true); + + get_stateless_obj()->m_rx_flow_stat.start_stream(out_stream); + + m_out_streams.push_back(out_stream); + } + } + + void set_status(bool status) { + m_success = status; + } + + vector<TrexStream *> &get_streams() { + return m_out_streams; + } + + /** + * RAII + */ + ~StreamsFeeder() { + for (int i = 0; i < m_out_streams.size(); i++) { + TrexStream *out_stream = m_out_streams[i]; + TrexStream *in_stream = m_in_streams[i]; + + if (m_success) { + /* success path */ + get_stateless_obj()->m_rx_flow_stat.copy_state(out_stream, in_stream); + } else { + /* fail path */ + get_stateless_obj()->m_rx_flow_stat.stop_stream(out_stream); + } + delete out_stream; + } + } + +private: + vector<TrexStream *> m_in_streams; + vector<TrexStream *> m_out_streams; + bool m_success; +}; + + /*************************** * trex stateless port * @@ -193,10 +255,7 @@ TrexStatelessPort::start_traffic(const TrexPortMultiplier &mul, double duration, /* caclulate the effective factor for DP */ double factor = calculate_effective_factor(mul, force); - /* fetch all the streams from the table */ - vector<TrexStream *> streams; - get_object_list(streams); - + StreamsFeeder feeder(this); /* compiler it */ std::vector<TrexStreamsCompiledObj *> compiled_objs; @@ -204,15 +263,19 @@ TrexStatelessPort::start_traffic(const TrexPortMultiplier &mul, double duration, TrexStreamsCompiler compiler; bool rc = compiler.compile(m_port_id, - streams, + feeder.get_streams(), compiled_objs, get_dp_core_count(), factor, &fail_msg); + if (!rc) { + feeder.set_status(false); throw TrexException(fail_msg); } + feeder.set_status(true); + /* generate a message to all the relevant DP cores to start transmitting */ assert(m_pending_async_stop_event == TrexDpPortEvents::INVALID_ID); m_pending_async_stop_event = m_dp_events.create_event(new AsyncStopEvent()); diff --git a/src/stateless/cp/trex_streams_compiler.cpp b/src/stateless/cp/trex_streams_compiler.cpp index f2296aeb..e54c5f9c 100644 --- a/src/stateless/cp/trex_streams_compiler.cpp +++ b/src/stateless/cp/trex_streams_compiler.cpp @@ -382,7 +382,13 @@ TrexStreamsCompiler::compile(uint8_t port_id, assert(dp_core_count > 0); try { - return compile_internal(port_id,streams,objs,dp_core_count,factor,fail_msg); + return compile_internal(port_id, + streams, + objs, + dp_core_count, + factor, + fail_msg); + } catch (const TrexException &ex) { if (fail_msg) { *fail_msg = ex.what(); @@ -411,7 +417,6 @@ TrexStreamsCompiler::compile_internal(uint8_t por GraphNodeMap nodes; - /* compile checks */ pre_compile_check(streams, nodes); @@ -474,7 +479,7 @@ TrexStreamsCompiler::compile_on_single_core(uint8_t } /* compile all the streams */ - for (auto stream : streams) { + for (auto const stream : streams) { /* skip non-enabled streams */ if (!stream->m_enabled) { @@ -507,7 +512,7 @@ TrexStreamsCompiler::compile_on_all_cores(uint8_t } /* compile all the streams */ - for (auto stream : streams) { + for (auto const stream : streams) { /* skip non-enabled streams */ if (!stream->m_enabled) { @@ -527,7 +532,7 @@ TrexStreamsCompiler::compile_on_all_cores(uint8_t * */ void -TrexStreamsCompiler::compile_stream(TrexStream *stream, +TrexStreamsCompiler::compile_stream(const TrexStream *stream, double factor, uint8_t dp_core_count, std::vector<TrexStreamsCompiledObj *> &objs, @@ -543,31 +548,25 @@ TrexStreamsCompiler::compile_stream(TrexStream *stream, new_next_id = nodes.get(stream->m_next_stream_id)->m_compressed_stream_id; } - TrexStream *fixed_rx_flow_stat_stream = stream->clone(true); - - get_stateless_obj()->m_rx_flow_stat.start_stream(fixed_rx_flow_stat_stream); - // CFlowStatRuleMgr keeps state of the stream object. We duplicated the stream here (in order not - // change the packet kept in the stream). We want the state to be saved in the original stream. - get_stateless_obj()->m_rx_flow_stat.copy_state(fixed_rx_flow_stat_stream, stream); - - fixed_rx_flow_stat_stream->update_rate_factor(factor); + /* we clone because we alter the stream now */ + std::unique_ptr<TrexStream> tmp_stream(stream->clone(true)); + tmp_stream->update_rate_factor(factor); /* can this stream be split to many cores ? */ if ( (dp_core_count == 1) || (!stream->is_splitable(dp_core_count)) ) { - compile_stream_on_single_core(fixed_rx_flow_stat_stream, + compile_stream_on_single_core(tmp_stream.get(), dp_core_count, objs, new_id, new_next_id); } else { - compile_stream_on_all_cores(fixed_rx_flow_stat_stream, + compile_stream_on_all_cores(tmp_stream.get(), dp_core_count, objs, new_id, new_next_id); } - delete fixed_rx_flow_stat_stream; } /** @@ -925,7 +924,7 @@ TrexStreamsGraphObj::find_max_rate() { } /* if not mark as inifite - get the last event time */ - if (m_expected_duration != -1) { + if ( (m_expected_duration != -1) && (m_rate_events.size() > 0) ) { m_expected_duration = m_rate_events.back().time; } diff --git a/src/stateless/cp/trex_streams_compiler.h b/src/stateless/cp/trex_streams_compiler.h index 0ce71b49..7e674364 100644 --- a/src/stateless/cp/trex_streams_compiler.h +++ b/src/stateless/cp/trex_streams_compiler.h @@ -141,7 +141,7 @@ private: bool all_continues); - void compile_stream(TrexStream *stream, + void compile_stream(const TrexStream *stream, double factor, uint8_t dp_core_count, std::vector<TrexStreamsCompiledObj *> &objs, @@ -244,7 +244,7 @@ public: } double get_factor_pps(double req_pps) const { - if ( (req_pps - m_fixed.m_pps) <= 0 ) { + if ( (req_pps - m_fixed.m_pps) < 0 ) { std::stringstream ss; ss << "current stream configuration enforces a minimum rate of '" << m_fixed.m_pps << "' pps"; throw TrexException(ss.str()); diff --git a/src/stateless/dp/trex_stateless_dp_core.cpp b/src/stateless/dp/trex_stateless_dp_core.cpp index fe78c5b2..58d8f21a 100644 --- a/src/stateless/dp/trex_stateless_dp_core.cpp +++ b/src/stateless/dp/trex_stateless_dp_core.cpp @@ -258,23 +258,20 @@ rte_mbuf_t * CGenNodeStateless::alloc_flow_stat_mbuf(rte_mbuf_t *m, struct flow_ fsp_head = (struct flow_stat_payload_header *)(p + rte_pktmbuf_data_len(m) - fsp_head_size); return m; } else { - // r/w --> read only. Should do something like: - // Alloc indirect,. make r/w->indirect point to read_only) -> new fsp_header - // for the mean time, just copy the entire packet. - m_ret = CGlobalInfo::pktmbuf_alloc( get_socket_id(), rte_pktmbuf_pkt_len(m) ); - assert(m_ret); - char *p_new = rte_pktmbuf_append(m_ret, rte_pktmbuf_pkt_len(m)); - rte_mbuf_t *m_free = m; - while (m != NULL) { - char *p = rte_pktmbuf_mtod(m, char*); - memcpy(p_new, p, m->data_len); - p_new += m->data_len; - m = m->next; - } - p_new = rte_pktmbuf_mtod(m_ret, char*); - fsp_head = (struct flow_stat_payload_header *)(p_new + rte_pktmbuf_data_len(m_ret) - fsp_head_size); - rte_pktmbuf_free(m_free); - return m_ret; + // We have: r/w --> read only. + // Changing to: + // (original) r/w -> (new) indirect (direct is original read_only, after trimming last bytes) -> (new) latency info + rte_mbuf_t *m_read_only = m->next, *m_indirect; + + m_indirect = CGlobalInfo::pktmbuf_alloc_small(get_socket_id()); + assert(m_indirect); + // alloc mbuf just for the latency header + m_lat = CGlobalInfo::pktmbuf_alloc( get_socket_id(), fsp_head_size); + assert(m_lat); + fsp_head = (struct flow_stat_payload_header *)rte_pktmbuf_append(m_lat, fsp_head_size); + utl_rte_pktmbuf_chain_with_indirect(m, m_indirect, m_read_only, m_lat); + m_indirect->data_len = (uint16_t)(m_indirect->data_len - fsp_head_size); + return m; } } } @@ -910,6 +907,10 @@ TrexStatelessDpCore::add_stream(TrexStatelessDpPerPort * lp_port, uint8_t hw_id = stream->m_rx_check.m_hw_id; assert (hw_id < MAX_FLOW_STATS + MAX_FLOW_STATS_PAYLOAD); node->set_stat_hw_id(hw_id); + // no support for cache with flow stat payload rules + if ((TrexPlatformApi::driver_stat_cap_e)stream->m_rx_check.m_rule_type == TrexPlatformApi::IF_STAT_PAYLOAD) { + stream->m_cache_size = 0; + } } /* set socket id */ diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp index b3555c13..a622ee7a 100644 --- a/src/stateless/rx/trex_stateless_rx_core.cpp +++ b/src/stateless/rx/trex_stateless_rx_core.cpp @@ -72,9 +72,6 @@ void CRxCoreStateless::create(const CRxSlCfg &cfg) { m_ring_to_cp = cp_rx->getRingDpToCp(0); m_state = STATE_IDLE; - m_watchdog_handle = -1; - m_watchdog = NULL; - for (int i = 0; i < m_max_ports; i++) { CLatencyManagerPerPortStl * lp = &m_ports[i]; lp->m_io = cfg.m_ports[i]; @@ -93,7 +90,7 @@ void CRxCoreStateless::handle_cp_msg(TrexStatelessCpToRxMsgBase *msg) { } void CRxCoreStateless::tickle() { - m_watchdog->tickle(m_watchdog_handle); + m_monitor.tickle(); } bool CRxCoreStateless::periodic_check_for_cp_messages() { @@ -147,14 +144,14 @@ void CRxCoreStateless::idle_state_loop() { } } -void CRxCoreStateless::start(TrexWatchDog &watchdog) { +void CRxCoreStateless::start() { int count = 0; int i = 0; bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable() ? true : false; /* register a watchdog handle on current core */ - m_watchdog = &watchdog; - m_watchdog_handle = watchdog.register_monitor("STL RX CORE", 1); + m_monitor.create("STL RX CORE", 1); + TrexWatchDog::getInstance().register_monitor(&m_monitor); while (true) { if (m_state == STATE_WORKING) { @@ -179,7 +176,7 @@ void CRxCoreStateless::start(TrexWatchDog &watchdog) { } rte_pause(); - m_watchdog->disable_monitor(m_watchdog_handle); + m_monitor.disable(); } void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t *m) { @@ -238,7 +235,7 @@ void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t * curr_rfc2544.set_seq(pkt_seq + 1); } lp->m_port.m_rx_pg_stat_payload[hw_id].add_pkts(1); - lp->m_port.m_rx_pg_stat_payload[hw_id].add_bytes(m->pkt_len); + lp->m_port.m_rx_pg_stat_payload[hw_id].add_bytes(m->pkt_len + 4); // +4 for ethernet CRC uint64_t d = (os_get_hr_tick_64() - fsp_head->time_stamp ); dsec_t ctime = ptime_convert_hr_dsec(d); curr_rfc2544.add_sample(ctime); @@ -246,7 +243,7 @@ void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t * } else { hw_id = get_hw_id(ip_id); lp->m_port.m_rx_pg_stat[hw_id].add_pkts(1); - lp->m_port.m_rx_pg_stat[hw_id].add_bytes(m->pkt_len); + lp->m_port.m_rx_pg_stat[hw_id].add_bytes(m->pkt_len + 4); // +4 for ethernet CRC } } } diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h index ce1bc1ad..dfc56e4d 100644 --- a/src/stateless/rx/trex_stateless_rx_core.h +++ b/src/stateless/rx/trex_stateless_rx_core.h @@ -95,7 +95,7 @@ class CRxCoreStateless { }; public: - void start(TrexWatchDog &watchdog); + void start(); void create(const CRxSlCfg &cfg); void reset_rx_stats(uint8_t port_id); int get_rx_stats(uint8_t port_id, rx_per_flow_t *rx_stats, int min, int max, bool reset @@ -126,8 +126,7 @@ class CRxCoreStateless { private: - TrexWatchDog *m_watchdog; - int m_watchdog_handle; + TrexMonitor m_monitor; uint32_t m_max_ports; bool m_has_streams; diff --git a/src/time_histogram.cpp b/src/time_histogram.cpp index fefa59d6..b36fe164 100755 --- a/src/time_histogram.cpp +++ b/src/time_histogram.cpp @@ -60,13 +60,13 @@ bool CTimeHistogram::Add(dsec_t dt) { period_elem.inc_cnt(); period_elem.update_sum(dt); + period_elem.update_max(dt); // values smaller then certain threshold do not get into the histogram if (dt < m_min_delta) { return false; } period_elem.inc_high_cnt(); - period_elem.update_max(dt); uint32_t d_10usec = (uint32_t)(dt*100000.0); // 1 10-19 usec diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp index e78e8e6d..d099933b 100644 --- a/src/trex_watchdog.cpp +++ b/src/trex_watchdog.cpp @@ -36,9 +36,8 @@ limitations under the License. #include <iostream> #include <stdexcept> -#define DISABLE_WATCHDOG_ON_GDB -static TrexWatchDog::monitor_st *global_monitor; +static TrexMonitor *global_monitor; const char *get_exe_name(); @@ -114,7 +113,7 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) double now = now_sec(); - ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds"; + ss << "WATCHDOG: task '" << global_monitor->get_name() << "' has not responded for more than " << global_monitor->get_interval(now) << " seconds - timeout is " << global_monitor->get_timeout_sec() << " seconds"; std::string backtrace = Backtrace(); ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n"; @@ -122,143 +121,72 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) throw std::runtime_error(ss.str()); } -void TrexWatchDog::mark_pending_monitor(int count) { - std::unique_lock<std::mutex> lock(m_lock); - m_pending += count; - lock.unlock(); +/************************************** + * Trex Monitor object + *************************************/ + +void TrexMonitor::create(const std::string &name, double timeout_sec) { + m_active = true; + m_tid = pthread_self(); + m_name = name; + m_timeout_sec = timeout_sec; + m_tickled = true; + m_ts = 0; } -void TrexWatchDog::block_on_pending(int max_block_time_ms) { - - int timeout_msec = max_block_time_ms; - - std::unique_lock<std::mutex> lock(m_lock); - - while (m_pending > 0) { - - lock.unlock(); - delay(1); - lock.lock(); - - timeout_msec -= 1; - if (timeout_msec == 0) { - throw TrexException("WATCHDOG: block on pending monitors timed out"); - } - } +/************************************** + * Trex watchdog + *************************************/ - /* lock will be released */ +void TrexWatchDog::init(bool enable){ + m_enable = enable; + if (m_enable) { + register_signal(); + } } /** * register a monitor - * must be called from the relevant thread - * * this function is thread safe * - * @author imarom (01-Jun-16) - * - * @param name - * @param timeout_sec - * - * @return int */ -int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) { - monitor_st monitor; - - /* cannot add monitors while active */ - assert(m_active == false); - - monitor.active = true; - monitor.tid = pthread_self(); - monitor.name = name; - monitor.timeout_sec = timeout_sec; - monitor.tickled = true; - monitor.ts = 0; +void TrexWatchDog::register_monitor(TrexMonitor *monitor) { + if (!m_enable){ + return; + } /* critical section start */ std::unique_lock<std::mutex> lock(m_lock); - /* make sure no double register */ - for (auto &m : m_monitors) { - if (m.tid == pthread_self()) { + /* sanity - not a must but why not... */ + for (int i = 0; i < m_mon_count; i++) { + if ( (monitor == m_monitors[i]) || (m_monitors[i]->get_tid() == pthread_self()) ) { std::stringstream ss; ss << "WATCHDOG: double register detected\n\n" << Backtrace(); throw TrexException(ss.str()); } } - monitor.handle = m_monitors.size(); - m_monitors.push_back(monitor); + /* check capacity */ + if (m_mon_count == MAX_MONITORS) { + std::stringstream ss; + ss << "WATCHDOG: too many registered monitors\n\n" << Backtrace(); + throw TrexException(ss.str()); + } - assert(m_pending > 0); - m_pending--; + /* add monitor */ + m_monitors[m_mon_count++] = monitor; /* critical section end */ lock.unlock(); - return monitor.handle; -} - -/** - * will disable the monitor - it will no longer be watched - * - */ -void TrexWatchDog::disable_monitor(int handle) { - assert(handle < m_monitors.size()); - - m_monitors[handle].active = false; -} - -/** - * thread safe function - * - */ -void TrexWatchDog::tickle(int handle) { - - assert(handle < m_monitors.size()); - - /* not nesscary but write gets cache invalidate for nothing */ - if (m_monitors[handle].tickled) { - return; - } - - m_monitors[handle].tickled = true; -} - -void TrexWatchDog::register_signal() { - - /* do this once */ - if (g_signal_init) { - return; - } - - /* register a handler on SIG ALARM */ - struct sigaction sa; - memset (&sa, '\0', sizeof(sa)); - - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = _callstack_signal_handler; - - int rc = sigaction(SIGALRM , &sa, NULL); - assert(rc == 0); - - g_signal_init = true; } void TrexWatchDog::start() { - block_on_pending(); - - /* no pending monitors */ - assert(m_pending == 0); - - /* under GDB - disable the watchdog */ - #ifdef DISABLE_WATCHDOG_ON_GDB - if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1) { - printf("\n\n*** GDB detected - disabling watchdog... ***\n\n"); - return; + if (!m_enable){ + return ; } - #endif m_active = true; m_thread = new std::thread(&TrexWatchDog::_main, this); @@ -268,6 +196,11 @@ void TrexWatchDog::start() { } void TrexWatchDog::stop() { + + if (!m_enable){ + return ; + } + m_active = false; if (m_thread) { @@ -285,40 +218,42 @@ void TrexWatchDog::stop() { */ void TrexWatchDog::_main() { - /* reset all the monitors */ - for (auto &monitor : m_monitors) { - monitor.tickled = true; - } + assert(m_enable == true); /* start main loop */ while (m_active) { dsec_t now = now_sec(); - for (auto &monitor : m_monitors) { + /* to be on the safe side - read the count with a lock */ + std::unique_lock<std::mutex> lock(m_lock); + int count = m_mon_count; + lock.unlock(); + + for (int i = 0; i < count; i++) { + TrexMonitor *monitor = m_monitors[i]; /* skip non active monitors */ - if (!monitor.active) { + if (!monitor->is_active()) { continue; } /* if its own - turn it off and write down the time */ - if (monitor.tickled) { - monitor.tickled = false; - monitor.ts = now; + if (monitor->is_tickled()) { + monitor->reset(now); continue; } - /* the bit is off - check the time first */ - if ( (now - monitor.ts) > monitor.timeout_sec ) { - global_monitor = &monitor; + /* if the monitor has expired - crash */ + if (monitor->is_expired(now)) { + global_monitor = monitor; - pthread_kill(monitor.tid, SIGALRM); + pthread_kill(monitor->get_tid(), SIGALRM); /* nothing to do more... the other thread will terminate, but if not - we terminate */ sleep(5); - printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str()); - exit(1); + fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str()); + abort(); } } @@ -328,4 +263,25 @@ void TrexWatchDog::_main() { } } + +void TrexWatchDog::register_signal() { + /* do this once */ + if (g_signal_init) { + return; + } + + /* register a handler on SIG ALARM */ + struct sigaction sa; + memset (&sa, '\0', sizeof(sa)); + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = _callstack_signal_handler; + + int rc = sigaction(SIGALRM , &sa, NULL); + assert(rc == 0); + + g_signal_init = true; +} + bool TrexWatchDog::g_signal_init = false; + diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h index 63255180..1c948d56 100644 --- a/src/trex_watchdog.h +++ b/src/trex_watchdog.h @@ -27,68 +27,142 @@ limitations under the License. #include <thread> #include <mutex> -//#include "rte_memory.h" #include "mbuf.h" #include "os_time.h" -class TrexWatchDog { +/** + * every thread creates its own monitor from its own memory + * + * @author imarom (19-Jun-16) + */ +class TrexMonitor { + public: - TrexWatchDog() { - m_thread = NULL; - m_active = false; - m_pending = 0; - register_signal(); - } + /** + * create a monitor + * + * @author imarom (31-May-16) + * + * @param name + * @param timeout + * + * @return int + */ + void create(const std::string &name, double timeout_sec); /** - * registering a monitor happens from another thread - * this make sure that start will be able to block until - * all threads has registered + * disable the monitor - it will be ignored * - * @author imarom (01-Jun-16) */ - void mark_pending_monitor(int count = 1); - + void disable() { + m_active = false; + } /** - * blocks while monitors are pending registeration + * tickle the monitor - this should be called from the thread + * to avoid the watchdog from detecting a stuck thread * - * @author imarom (01-Jun-16) + * @author imarom (19-Jun-16) */ - void block_on_pending(int max_block_time_ms = 200); - + void tickle() { + /* to avoid useless writes - first check */ + if (!m_tickled) { + m_tickled = true; + } + } /** - * add a monitor to the watchdog - * this thread will be monitored and if timeout - * has passed without calling tick - an exception will be called - * - * @author imarom (31-May-16) - * - * @param name - * @param timeout + * called by the watchdog to reset the monitor for a new round * - * @return int */ - int register_monitor(const std::string &name, double timeout_sec); + void reset(dsec_t now) { + m_tickled = false; + m_ts = now; + } + + + /* return how much time has passed since last tickle */ + dsec_t get_interval(dsec_t now) const { + return (now - m_ts); + } + + pthread_t get_tid() const { + return m_tid; + } + + const std::string &get_name() const { + return m_name; + } + + dsec_t get_timeout_sec() const { + return m_timeout_sec; + } + + volatile bool is_active() const { + return m_active; + } + volatile bool is_tickled() const { + return m_tickled; + } + + bool is_expired(dsec_t now) const { + return ( get_interval(now) > m_timeout_sec ); + } + + +private: + + /* write fields are first */ + volatile bool m_active; + volatile bool m_tickled; + dsec_t m_ts; + + int m_handle; + double m_timeout_sec; + pthread_t m_tid; + std::string m_name; + + /* for for a full cacheline */ + uint8_t pad[15]; + +} __rte_cache_aligned; + + +/** + * a watchdog is a list of registered monitors + * + * @author imarom (19-Jun-16) + */ +class TrexWatchDog { +public: /** - * disable a monitor - it will no longer be watched + * singleton entry + * + * @author imarom (19-Jun-16) * + * @return TrexWatchDog& */ - void disable_monitor(int handle); + static TrexWatchDog& getInstance() { + static TrexWatchDog instance; + return instance; + } + void init(bool enable); + /** - * should be called by each thread on it's handle + * add a monitor to the watchdog + * from now on this monitor will be watched + * + * @author imarom (19-Jun-16) * - * @author imarom (31-May-16) + * @param monitor - a pointer to the object * - * @param handle */ - void tickle(int handle); + void register_monitor(TrexMonitor *monitor); /** @@ -105,37 +179,30 @@ public: void stop(); - /* should be cache aligned to avoid false sharing */ - struct monitor_st { - /* write fields are first */ - volatile bool active; - volatile bool tickled; - dsec_t ts; - - int handle; - double timeout_sec; - pthread_t tid; - std::string name; - - /* for for a full cacheline */ - uint8_t pad[15]; - }; +private: + TrexWatchDog() { + m_thread = NULL; + m_enable = false; + m_active = false; + m_mon_count = 0; + } -private: void register_signal(); void _main(); - std::vector<monitor_st> m_monitors __rte_cache_aligned; - std::mutex m_lock; + static const int MAX_MONITORS = 100; + TrexMonitor *m_monitors[MAX_MONITORS]; + volatile int m_mon_count; + std::mutex m_lock; - volatile bool m_active; - std::thread *m_thread; - volatile int m_pending; + bool m_enable; + volatile bool m_active; + std::thread *m_thread; - static bool g_signal_init; + static bool g_signal_init; }; -static_assert(sizeof(TrexWatchDog::monitor_st) >= RTE_CACHE_LINE_SIZE, "sizeof(monitor_st) != RTE_CACHE_LINE_SIZE" ); +static_assert(sizeof(TrexMonitor) == RTE_CACHE_LINE_SIZE, "sizeof(TrexMonitor) != RTE_CACHE_LINE_SIZE" ); #endif /* __TREX_WATCHDOG_H__ */ |