From 8635fce7827e0ee75a2fe032e8dce139a746e1c4 Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Sun, 26 Jun 2016 17:41:29 +0300 Subject: Fx trex-216 - Crash on VM setups when running latency flows --- .../regression/stateless_tests/stl_rx_test.py | 17 ++-- src/bp_sim.cpp | 1 + src/bp_sim.h | 12 +-- src/flow_stat.cpp | 4 - src/flow_stat.h | 6 +- src/main_dpdk.cpp | 3 +- src/stateless/rx/trex_stateless_rx_core.cpp | 91 ++++++++++++---------- src/stateless/rx/trex_stateless_rx_core.h | 17 ++-- 8 files changed, 83 insertions(+), 68 deletions(-) diff --git a/scripts/automation/regression/stateless_tests/stl_rx_test.py b/scripts/automation/regression/stateless_tests/stl_rx_test.py index 23ebf081..36f17170 100644 --- a/scripts/automation/regression/stateless_tests/stl_rx_test.py +++ b/scripts/automation/regression/stateless_tests/stl_rx_test.py @@ -9,10 +9,6 @@ class STLRX_Test(CStlGeneral_Test): """Tests for RX feature""" def setUp(self): - #if CTRexScenario.setup_name in ('trex08', 'trex09'): - # self.skip('This test makes trex08 and trex09 sick. Fix those ASAP.') - if self.is_virt_nics: - self.skip('Skip this for virtual NICs for now') per_driver_params = {"rte_vmxnet3_pmd": [1, 50, 1,False], "rte_ixgbe_pmd": [30, 5000, 1,True,200,400], "rte_i40e_pmd": [80, 5000, 1,True,100,250], "rte_igb_pmd": [80, 500, 1,False], "rte_em_pmd": [1, 50, 1,False], "rte_virtio_pmd": [1, 50, 1,False]} @@ -182,6 +178,9 @@ class STLRX_Test(CStlGeneral_Test): def test_multiple_streams(self): + if self.is_virt_nics: + self.skip('Skip this for virtual NICs') + num_latency_streams = 128 num_flow_stat_streams = 127 total_pkts = int(self.total_pkts / (num_latency_streams + num_flow_stat_streams)) @@ -313,8 +312,8 @@ class STLRX_Test(CStlGeneral_Test): # check low latency when you have stream of 9K stream def test_9k_stream(self): - - #self.skip('Skip due to bug trex-215') + if self.is_virt_nics: + self.skip('Skip this for virtual NICs') if self.latency_9k_enable == False: print("SKIP") @@ -441,7 +440,9 @@ class STLRX_Test(CStlGeneral_Test): def test_fcs_stream(self): """ this test send 1 64 byte packet with latency and check that all counters are reported as 64 bytes""" - #self.skip('Skip due to bug trex-213') + + if self.is_virt_nics: + self.skip('Skip this for virtual NICs') all_ports=list(CTRexScenario.stl_ports_map['map'].keys()); for port in all_ports: @@ -452,6 +453,8 @@ class STLRX_Test(CStlGeneral_Test): # this test adds more and more latency streams and re-test with incremental def test_incremental_latency_streams (self): + if self.is_virt_nics: + self.skip('Skip this for virtual NICs') total_pkts = self.total_pkts percent = 0.5 diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp index f9e96b6b..077a3d45 100755 --- a/src/bp_sim.cpp +++ b/src/bp_sim.cpp @@ -5006,6 +5006,7 @@ int CErfIFStl::send_sl_node(CGenNodeStateless *node_sl) { fsp_head->seq = 0x12345678; fsp_head->hw_id = hw_id - MAX_FLOW_STATS; fsp_head->magic = FLOW_STAT_PAYLOAD_MAGIC; + fsp_head->flow_seq = FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ; fsp_head->time_stamp = 0x8899aabbccddeeff; fill_raw_packet(mi, (CGenNode *)node_sl, dir); rte_pktmbuf_free(mi); diff --git a/src/bp_sim.h b/src/bp_sim.h index 56e37272..e396a710 100755 --- a/src/bp_sim.h +++ b/src/bp_sim.h @@ -259,18 +259,20 @@ class CPreviewMode ; class CLatencyPktData { public: - CLatencyPktData() {m_magic = 0xaa;} + CLatencyPktData() {m_flow_seq = FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ;} inline uint32_t get_seq_num() {return m_seq_num;} inline void inc_seq_num() {m_seq_num++;} - inline uint32_t get_magic() {return m_magic;} + inline uint32_t get_flow_seq() {return m_flow_seq;} void reset() { m_seq_num = UINT32_MAX - 1; // catch wrap around issues early - m_magic++; + m_flow_seq++; + if (m_flow_seq == FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ) + m_flow_seq++; } private: - uint32_t m_seq_num; // seq num to put in packet for payload rules - uint16_t m_magic; // magic to put in packet for payload rules + uint32_t m_seq_num; // seq num to put in packet for payload rules. Increased every packet. + uint16_t m_flow_seq; // Seq num of flow. Changed when we start new flow on this id. }; /* represent the virtual interface diff --git a/src/flow_stat.cpp b/src/flow_stat.cpp index cb7a1bf9..5503434f 100644 --- a/src/flow_stat.cpp +++ b/src/flow_stat.cpp @@ -784,10 +784,6 @@ int CFlowStatRuleMgr::start_stream(TrexStream * stream) { m_parser->set_ip_id(IP_ID_RESERVE_BASE + hw_id); stream->m_rx_check.m_hw_id = hw_id; } else { - struct flow_stat_payload_header *fsp_head = (struct flow_stat_payload_header *) - (stream->m_pkt.binary + stream->m_pkt.len - sizeof(struct flow_stat_payload_header)); - fsp_head->hw_id = hw_id; - fsp_head->magic = FLOW_STAT_PAYLOAD_MAGIC; m_parser->set_ip_id(FLOW_STAT_PAYLOAD_IP_ID); // for payload rules, we use the range right after ip id rules stream->m_rx_check.m_hw_id = hw_id + MAX_FLOW_STATS; diff --git a/src/flow_stat.h b/src/flow_stat.h index a2137198..25d16173 100644 --- a/src/flow_stat.h +++ b/src/flow_stat.h @@ -35,7 +35,8 @@ // Do not change this value. In i350 cards, we filter according to first byte of IP ID // In other places, we identify packets by if (ip_id > IP_ID_RESERVE_BASE) #define IP_ID_RESERVE_BASE 0xff00 -#define FLOW_STAT_PAYLOAD_MAGIC 0xABCD +#define FLOW_STAT_PAYLOAD_MAGIC 0xAB +#define FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ 0x01 extern const uint16_t FLOW_STAT_PAYLOAD_IP_ID; typedef std::map flow_stat_map_t; @@ -44,7 +45,8 @@ typedef std::map::iterator flow_stat_map_it_t; class CRxCoreStateless; struct flow_stat_payload_header { - uint16_t magic; + uint8_t magic; + uint8_t flow_seq; uint16_t hw_id; uint32_t seq; uint64_t time_stamp; diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp index 906aa2b7..d132df51 100644 --- a/src/main_dpdk.cpp +++ b/src/main_dpdk.cpp @@ -2055,7 +2055,8 @@ int CCoreEthIFStateless::send_node_flow_stat(rte_mbuf *m, CGenNodeStateless * no mi = node_sl->alloc_flow_stat_mbuf(m, fsp_head, is_const); fsp_head->seq = lp_stats->m_lat_data[hw_id_payload].get_seq_num(); fsp_head->hw_id = hw_id_payload; - fsp_head->magic = lp_stats->m_lat_data[hw_id_payload].get_magic(); + fsp_head->flow_seq = lp_stats->m_lat_data[hw_id_payload].get_flow_seq(); + fsp_head->magic = FLOW_STAT_PAYLOAD_MAGIC; lp_stats->m_lat_data[hw_id_payload].inc_seq_num(); #ifdef ERR_CNTRS_TEST diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp index e5831129..0bd601b6 100644 --- a/src/stateless/rx/trex_stateless_rx_core.cpp +++ b/src/stateless/rx/trex_stateless_rx_core.cpp @@ -29,15 +29,15 @@ void CRFC2544Info::create() { m_latency.Create(); - m_exp_magic = 0; - m_prev_magic = 0; + m_exp_flow_seq = 0; + m_prev_flow_seq = 0; reset(); } // after calling stop, packets still arriving will be considered error void CRFC2544Info::stop() { - m_prev_magic = m_exp_magic; - m_exp_magic = FLOW_STAT_PAYLOAD_MAGIC_NONE; + m_prev_flow_seq = m_exp_flow_seq; + m_exp_flow_seq = FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ; } void CRFC2544Info::reset() { @@ -195,82 +195,95 @@ void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t * if (parser.get_ip_id(ip_id) == 0) { if (is_flow_stat_id(ip_id)) { uint16_t hw_id; - bool good_packet = true; + if (is_flow_stat_payload_id(ip_id)) { + bool good_packet = true; uint8_t *p = rte_pktmbuf_mtod(m, uint8_t*); struct flow_stat_payload_header *fsp_head = (struct flow_stat_payload_header *) (p + m->pkt_len - sizeof(struct flow_stat_payload_header)); hw_id = fsp_head->hw_id; - CRFC2544Info &curr_rfc2544 = m_rfc2544[hw_id]; - if (unlikely(fsp_head->magic != curr_rfc2544.get_exp_magic())) { - // bad magic. - // Might be the first packet of a new flow, packet from an old flow or just garbage. - if (fsp_head->magic == curr_rfc2544.get_prev_magic()) { - // packet from previous flow using this hw_id that arrived late - good_packet = false; - } else { - if (curr_rfc2544.no_magic()) { - // first packet we see from this flow - good_packet = true; - curr_rfc2544.set_exp_magic(fsp_head->magic); - } else { - // garbage packet + CRFC2544Info *curr_rfc2544; + + if (unlikely(fsp_head->magic != FLOW_STAT_PAYLOAD_MAGIC) || hw_id >= MAX_FLOW_STATS_PAYLOAD) { + good_packet = false; + } else { + curr_rfc2544 = &m_rfc2544[hw_id]; + + if (fsp_head->flow_seq != curr_rfc2544->get_exp_flow_seq()) { + // bad flow seq num + // Might be the first packet of a new flow, packet from an old flow, or garbage. + + if (fsp_head->flow_seq == curr_rfc2544->get_prev_flow_seq()) { + // packet from previous flow using this hw_id that arrived late good_packet = false; + } else { + if (curr_rfc2544->no_flow_seq()) { + // first packet we see from this flow + good_packet = true; + curr_rfc2544->set_exp_flow_seq(fsp_head->flow_seq); + } else { + // garbage packet + good_packet = false; + } } } } if (good_packet) { uint32_t pkt_seq = fsp_head->seq; - uint32_t exp_seq = curr_rfc2544.get_seq(); + uint32_t exp_seq = curr_rfc2544->get_seq(); if (unlikely(pkt_seq != exp_seq)) { if (pkt_seq < exp_seq) { if (exp_seq - pkt_seq > 100000) { // packet loss while we had wrap around - curr_rfc2544.inc_seq_err(pkt_seq - exp_seq); - curr_rfc2544.inc_seq_err_too_big(); - curr_rfc2544.set_seq(pkt_seq + 1); + curr_rfc2544->inc_seq_err(pkt_seq - exp_seq); + curr_rfc2544->inc_seq_err_too_big(); + curr_rfc2544->set_seq(pkt_seq + 1); } else { if (pkt_seq == (exp_seq - 1)) { - curr_rfc2544.inc_dup(); + curr_rfc2544->inc_dup(); } else { - curr_rfc2544.inc_ooo(); + curr_rfc2544->inc_ooo(); // We thought it was lost, but it was just out of order - curr_rfc2544.dec_seq_err(); + curr_rfc2544->dec_seq_err(); } - curr_rfc2544.inc_seq_err_too_low(); + curr_rfc2544->inc_seq_err_too_low(); } } else { if (unlikely (pkt_seq - exp_seq > 100000)) { // packet reorder while we had wrap around if (pkt_seq == (exp_seq - 1)) { - curr_rfc2544.inc_dup(); + curr_rfc2544->inc_dup(); } else { - curr_rfc2544.inc_ooo(); + curr_rfc2544->inc_ooo(); // We thought it was lost, but it was just out of order - curr_rfc2544.dec_seq_err(); + curr_rfc2544->dec_seq_err(); } - curr_rfc2544.inc_seq_err_too_low(); + curr_rfc2544->inc_seq_err_too_low(); } else { - // seq > curr_rfc2544.seq. Assuming lost packets - curr_rfc2544.inc_seq_err(pkt_seq - exp_seq); - curr_rfc2544.inc_seq_err_too_big(); - curr_rfc2544.set_seq(pkt_seq + 1); + // seq > curr_rfc2544->seq. Assuming lost packets + curr_rfc2544->inc_seq_err(pkt_seq - exp_seq); + curr_rfc2544->inc_seq_err_too_big(); + curr_rfc2544->set_seq(pkt_seq + 1); } } } else { - curr_rfc2544.set_seq(pkt_seq + 1); + curr_rfc2544->set_seq(pkt_seq + 1); } lp->m_port.m_rx_pg_stat_payload[hw_id].add_pkts(1); lp->m_port.m_rx_pg_stat_payload[hw_id].add_bytes(m->pkt_len + 4); // +4 for ethernet CRC uint64_t d = (os_get_hr_tick_64() - fsp_head->time_stamp ); dsec_t ctime = ptime_convert_hr_dsec(d); - curr_rfc2544.add_sample(ctime); + curr_rfc2544->add_sample(ctime); } } else { hw_id = get_hw_id(ip_id); - lp->m_port.m_rx_pg_stat[hw_id].add_pkts(1); - lp->m_port.m_rx_pg_stat[hw_id].add_bytes(m->pkt_len + 4); // +4 for ethernet CRC + if (hw_id >= MAX_FLOW_STATS) { + // increase some error counter + } else { + lp->m_port.m_rx_pg_stat[hw_id].add_pkts(1); + lp->m_port.m_rx_pg_stat[hw_id].add_bytes(m->pkt_len + 4); // +4 for ethernet CRC + } } } } diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h index 140fedf4..209dc29f 100644 --- a/src/stateless/rx/trex_stateless_rx_core.h +++ b/src/stateless/rx/trex_stateless_rx_core.h @@ -77,14 +77,11 @@ class CRFC2544Info { inline void inc_seq_err_too_low() {m_seq_err_events_too_low++;} inline void inc_dup() {m_dup++;} inline void inc_ooo() {m_ooo++;} - inline uint16_t get_exp_magic() {return m_exp_magic;} - inline void set_exp_magic(uint16_t magic) {m_exp_magic = magic;} - inline uint16_t get_prev_magic() {return m_prev_magic;} - inline bool no_magic() {return (m_exp_magic == FLOW_STAT_PAYLOAD_MAGIC_NONE) ? true : false;} + inline uint16_t get_exp_flow_seq() {return m_exp_flow_seq;} + inline void set_exp_flow_seq(uint16_t flow_seq) {m_exp_flow_seq = flow_seq;} + inline uint16_t get_prev_flow_seq() {return m_prev_flow_seq;} + inline bool no_flow_seq() {return (m_exp_flow_seq == FLOW_STAT_PAYLOAD_INITIAL_FLOW_SEQ) ? true : false;} private: - enum payload_e { - FLOW_STAT_PAYLOAD_MAGIC_NONE = 0 - }; uint32_t m_seq; // expected next seq num CTimeHistogram m_latency; // latency info CJitter m_jitter; @@ -93,9 +90,9 @@ class CRFC2544Info { uint64_t m_seq_err_events_too_low; // How many packet seq num lower than expected events we had uint64_t m_ooo; // Packets we got with seq num lower than expected (We guess they are out of order) uint64_t m_dup; // Packets we got with same seq num - uint16_t m_exp_magic; // magic number we should see in latency header - // magic number previously used with this id. We use this to catch packets arriving late from old flow - uint16_t m_prev_magic; + uint16_t m_exp_flow_seq; // flow sequence number we should see in latency header + // flow sequence number previously used with this id. We use this to catch packets arriving late from an old flow + uint16_t m_prev_flow_seq; }; class CRxCoreStateless { -- cgit 1.2.3-korg