diff options
author | imarom <imarom@cisco.com> | 2017-01-22 16:20:45 +0200 |
---|---|---|
committer | imarom <imarom@cisco.com> | 2017-01-22 16:20:45 +0200 |
commit | 904eacd9be1230efb7ae0ab7997ec131b588ec8a (patch) | |
tree | 8e4bcd1b1a5f683efdb8f3eeb962acefc3201961 /src | |
parent | d2f1c8451e2e8ffc47b208f68f9b16697d706d60 (diff) | |
parent | b81cdb6c2d6d118c1c346e7c8dae6a5e747d867d (diff) |
Merge branch 'master' into capture
Signed-off-by: imarom <imarom@cisco.com>
Conflicts:
scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_client.py
scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_jsonrpc_client.py
scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_port.py
src/main_dpdk.cpp
Diffstat (limited to 'src')
45 files changed, 2752 insertions, 1385 deletions
diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp index 4c04dde9..57cf2ffa 100755 --- a/src/bp_gtest.cpp +++ b/src/bp_gtest.cpp @@ -20,6 +20,7 @@ limitations under the License. */ #include "bp_sim.h" +#include <stdlib.h> #include <common/gtest.h> #include <common/basic_utils.h> #include "utl_cpuu.h" @@ -34,6 +35,7 @@ limitations under the License. #include "stateful_rx_core.h" #include "nat_check_flow_table.h" #include "utl_ipg_bucket.h" +#include "bp_gtest.h" int test_policer(){ CPolicer policer; @@ -70,9 +72,9 @@ int test_priorty_queue(void){ for (i=0; i<10; i++) { node = new CGenNode(); printf(" +%p \n",node); - node->m_flow_id = 10-i; - node->m_pkt_info = (CFlowPktInfo *)(uintptr_t)i; - node->m_time = (double)i+0.1; + node->m_flow_id = 10-i; + node->m_pkt_info = (CFlowPktInfo *)(uintptr_t)i; + node->m_time = (double)i+0.1; p_queue.push(node); } while (!p_queue.empty()) { @@ -113,143 +115,8 @@ int test_human_p(){ return (0); } - - - - - -#define EXPECT_EQ_UINT32(a,b) EXPECT_EQ((uint32_t)(a),(uint32_t)(b)) - - -class CTestBasic { - -public: - CTestBasic(){ - m_threads=1; - m_time_diff=0.001; - m_req_ports=0; - m_dump_json=false; - } - - bool init(void){ - - uint16 * ports = NULL; - CTupleBase tuple; - - CErfIF erf_vif; - - - fl.Create(); - m_saved_packet_padd_offset=0; - - fl.load_from_yaml(CGlobalInfo::m_options.cfg_file,m_threads); - fl.generate_p_thread_info(m_threads); - - CFlowGenListPerThread * lpt; - - fl.m_threads_info[0]->set_vif(&erf_vif); - - - CErfCmp cmp; - cmp.dump=1; - - bool res=true; - - - int i; - for (i=0; i<m_threads; i++) { - lpt=fl.m_threads_info[i]; - - CFlowPktInfo * pkt=lpt->m_cap_gen[0]->m_flow_info->GetPacket(0); - m_saved_packet_padd_offset =pkt->m_pkt_indication.m_packet_padding; - - char buf[100]; - char buf_ex[100]; - sprintf(buf,"%s-%d.erf",CGlobalInfo::m_options.out_file.c_str(),i); - sprintf(buf_ex,"%s-%d-ex.erf",CGlobalInfo::m_options.out_file.c_str(),i); - - if ( m_req_ports ){ - /* generate from first template m_req_ports ports */ - int i; - CTupleTemplateGeneratorSmart * lpg=&lpt->m_cap_gen[0]->tuple_gen; - ports = new uint16_t[m_req_ports]; - lpg->GenerateTuple(tuple); - for (i=0 ; i<m_req_ports;i++) { - ports[i]=lpg->GenerateOneSourcePort(); - } - } - CGlobalInfo::m_options.m_run_mode = CParserOption::RUN_MODE_BATCH; - lpt->start_generate_stateful(buf,CGlobalInfo::m_options.preview); - lpt->m_node_gen.DumpHist(stdout); - - cmp.d_sec = m_time_diff; - //compare - if ( cmp.compare(std::string(buf),std::string(buf_ex)) != true ) { - res=false; - } - - } - if ( m_dump_json ){ - printf(" dump json ...........\n"); - std::string s; - fl.m_threads_info[0]->m_node_gen.dump_json(s); - printf(" %s \n",s.c_str()); - } - - if ( m_req_ports ){ - int i; - fl.m_threads_info[0]->m_smart_gen.FreePort(0, tuple.getClientId(),tuple.getClientPort()); - - for (i=0 ; i<m_req_ports;i++) { - fl.m_threads_info[0]->m_smart_gen.FreePort(0,tuple.getClientId(),ports[i]); - } - delete []ports; - } - - printf(" active %d \n", fl.m_threads_info[0]->m_smart_gen.ActiveSockets()); - EXPECT_EQ_UINT32(fl.m_threads_info[0]->m_smart_gen.ActiveSockets(),0); - fl.Delete(); - return (res); - } - - uint16_t get_padd_offset_first_packet(){ - return (m_saved_packet_padd_offset); - - } - - - -public: - int m_req_ports; - int m_threads; - double m_time_diff; - bool m_dump_json; - uint16_t m_saved_packet_padd_offset; - CFlowGenList fl; -}; - - - - -class basic : public testing::Test { - protected: - virtual void SetUp() { - } - virtual void TearDown() { - } -public: -}; - -class cpu : public testing::Test { - protected: - virtual void SetUp() { - } - virtual void TearDown() { - } -public: -}; - - +class basic : public trexTest {}; +class cpu : public trexTest {}; TEST_F(basic, limit_single_pkt) { @@ -273,7 +140,7 @@ TEST_F(basic, limit_multi_pkt) { po->out_file ="exp/limit_multi_pkt"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, imix) { @@ -390,7 +257,6 @@ TEST_F(basic, dns_ipv6) { bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; EXPECT_EQ_UINT32(t1.get_padd_offset_first_packet(),0); - po->preview.set_ipv6_mode_enable(false); } TEST_F(basic, dns_json) { @@ -562,7 +428,6 @@ TEST_F(basic, ipv6_convert) { po->out_file ="exp/imix_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); } TEST_F(basic, ipv6) { @@ -576,7 +441,6 @@ TEST_F(basic, ipv6) { po->out_file ="exp/ipv6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); } TEST_F(basic, ipv4_vlan) { @@ -602,7 +466,6 @@ TEST_F(basic, ipv6_vlan) { po->out_file ="exp/ipv6_vlan"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); } @@ -615,7 +478,7 @@ TEST_F(basic, test_pcap_mode1) { po->preview.setFileWrite(true); po->cfg_file ="cap2/test_pcap_mode1.yaml"; po->out_file ="exp/pcap_mode1"; - t1.m_time_diff = 0.000005; // 5 nsec + t1.m_time_diff = 0.000005; // 5 nsec bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; } @@ -629,7 +492,7 @@ TEST_F(basic, test_pcap_mode2) { po->preview.setFileWrite(true); po->cfg_file ="cap2/test_pcap_mode2.yaml"; po->out_file ="exp/pcap_mode2"; - t1.m_time_diff = 0.000005; // 5 nsec + t1.m_time_diff = 0.000005; // 5 nsec bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; } @@ -682,10 +545,10 @@ bool verify_latency_pkt(uint8_t *p, uint8_t proto, uint16_t icmp_seq, uint8_t icmp_type) { EthernetHeader *eth = (EthernetHeader *)p; IPHeader *ip = (IPHeader *)(p + 14); - uint8_t srcmac[]={0x10,0x10,0x10,0x10,0x10,0x10}; + uint8_t srcmac[]={0x10,0x10,0x10,0x10,0x10,0x10}; //uint8_t dstmac[]={0x0,0x0,0x0,0x0,0x0,0x0}; latency_header * h; - + // eth EXPECT_EQ_UINT32(eth->getNextProtocol(), 0x0800)<< "Failed ethernet next protocol check"; EXPECT_EQ_UINT32(memcmp(p, srcmac, 6), 0)<< "Failed ethernet source MAC check"; @@ -697,7 +560,7 @@ verify_latency_pkt(uint8_t *p, uint8_t proto, uint16_t icmp_seq, uint8_t icmp_ty EXPECT_EQ_UINT32(ip->isChecksumOK()?0:1, 0)<< "Failed IP checksum check"; EXPECT_EQ_UINT32(ip->getTimeToLive(), 0xff)<< "Failed IP ttl check"; EXPECT_EQ_UINT32(ip->getTotalLength(), 48)<< "Failed IP total length check"; - + // payload h=(latency_header *)(p+42); EXPECT_EQ_UINT32(h->magic, LATENCY_MAGIC)<< "Failed latency magic check"; @@ -838,7 +701,7 @@ public: virtual int tx_latency(rte_mbuf_t *m) { return tx(m); } - + virtual rte_mbuf_t * rx(){ //printf(" rx on port %d \n",m_port_id); rte_mbuf_t * m=0; @@ -853,7 +716,7 @@ public: return ( m ); } - virtual uint16_t rx_burst(struct rte_mbuf **rx_pkts, + virtual uint16_t rx_burst(struct rte_mbuf **rx_pkts, uint16_t nb_pkts){ //printf(" rx on port %d \n",m_port_id); rte_mbuf_t * m=rx(); @@ -935,7 +798,7 @@ TEST_F(basic, rtsp1) { po->out_file ="exp/rtsp_short1"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, rtsp2) { @@ -947,7 +810,7 @@ TEST_F(basic, rtsp2) { po->out_file ="exp/rtsp_short2"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, rtsp3) { @@ -960,7 +823,7 @@ TEST_F(basic, rtsp3) { t1.m_req_ports = 32000; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, rtsp1_ipv6) { @@ -974,8 +837,7 @@ TEST_F(basic, rtsp1_ipv6) { po->out_file ="exp/rtsp_short1_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, rtsp2_ipv6) { @@ -988,8 +850,7 @@ TEST_F(basic, rtsp2_ipv6) { po->out_file ="exp/rtsp_short2_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, rtsp3_ipv6) { @@ -1003,8 +864,7 @@ TEST_F(basic, rtsp3_ipv6) { t1.m_req_ports = 32000; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, sip1) { @@ -1017,7 +877,7 @@ TEST_F(basic, sip1) { po->out_file ="exp/sip_short1"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, sip2) { @@ -1030,7 +890,7 @@ TEST_F(basic, sip2) { po->out_file ="exp/sip_short2"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, sip3) { @@ -1043,7 +903,7 @@ TEST_F(basic, sip3) { t1.m_req_ports = 32000; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, sip1_ipv6) { @@ -1057,8 +917,7 @@ TEST_F(basic, sip1_ipv6) { po->out_file ="exp/sip_short1_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, sip2_ipv6) { @@ -1072,8 +931,7 @@ TEST_F(basic, sip2_ipv6) { po->out_file ="exp/sip_short2_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, sip3_ipv6) { @@ -1087,21 +945,21 @@ TEST_F(basic, sip3_ipv6) { t1.m_req_ports = 32000; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} TEST_F(basic, dyn1) { CTestBasic t1; CParserOption * po =&CGlobalInfo::m_options; + srand(1); po->preview.setVMode(3); po->preview.setFileWrite(true); po->cfg_file ="cap2/dyn_pyld1.yaml"; po->out_file ="exp/dyn_pyld1"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, http1) { @@ -1113,7 +971,7 @@ TEST_F(basic, http1) { po->out_file ="exp/http_plugin"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; -} +} TEST_F(basic, http1_ipv6) { @@ -1126,8 +984,7 @@ TEST_F(basic, http1_ipv6) { po->out_file ="exp/http_plugin_v6"; bool res=t1.init(); EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; - po->preview.set_ipv6_mode_enable(false); -} +} @@ -1194,7 +1051,7 @@ TEST_F(cpu, cpu3) { printf(" cpu %2.0f \n",c1); int s=( c1<11 && c1>8)?1:0; EXPECT_EQ(s,1); - } + } delay(1); if ((i%10)==1) { cpu_dp.commit(); @@ -1206,35 +1063,25 @@ TEST_F(cpu, cpu3) { } #endif - -class timerwl : public testing::Test { - protected: - virtual void SetUp() { - } - virtual void TearDown() { - } -public: -}; - - +class timerwl : public trexTest {}; void flow_callback(CFlowTimerHandle * timer_handle); class CTestFlow { public: - CTestFlow(){ - flow_id = 0; + CTestFlow(){ + flow_id = 0; m_timer_handle.m_callback=flow_callback; - m_timer_handle.m_object = (void *)this; - m_timer_handle.m_id = 0x1234; - } + m_timer_handle.m_object = (void *)this; + m_timer_handle.m_id = 0x1234; + } - uint32_t flow_id; - CFlowTimerHandle m_timer_handle; + uint32_t flow_id; + CFlowTimerHandle m_timer_handle; public: - void OnTimeOut(){ + void OnTimeOut(){ printf(" timeout %d \n",flow_id); - } + } }; void flow_callback(CFlowTimerHandle * t){ @@ -1384,7 +1231,7 @@ TEST_F(timerwl, many_timers) { CTimerWheel my_tw; - int i; + int i; for (i=0; i<100; i++) { CTestFlow * f= new CTestFlow(); f->m_timer_handle.m_callback=many_timers_flow_callback; @@ -1393,17 +1240,17 @@ TEST_F(timerwl, many_timers) { } many_timers_flow_id=99; - double time; + double time; double ex_time=1.0; while (true) { if ( my_tw.peek_top_time(time) ){ assert(time==ex_time); ex_time+=1.0; assert(my_tw.handle()); - } - else{ - break; - } + } + else{ + break; + } } my_tw.Dump(stdout); @@ -1412,7 +1259,7 @@ TEST_F(timerwl, many_timers) { EXPECT_EQ(my_tw.m_st_alloc ,100); EXPECT_EQ(my_tw.m_st_free ,100); EXPECT_EQ(my_tw.m_st_start ,100); - + } void many_timers_stop_flow_callback(CFlowTimerHandle * t){ @@ -1455,13 +1302,12 @@ TEST_F(timerwl, many_timers_with_stop) { EXPECT_EQ(my_tw.m_st_start ,300); } - ////////////////////////////////////////////// -class rx_check : public testing::Test { +class rx_check : public trexTest { protected: virtual void SetUp() { + trexTest::SetUp(); m_rx_check.Create(); - } virtual void TearDown() { m_rx_check.Delete(); @@ -1539,100 +1385,100 @@ TEST_F(rx_check, rx_check_drop) { TEST_F(rx_check, rx_check_ooo) { - m_rx_check.Create(); - int i; + m_rx_check.Create(); + int i; - for (i=0; i<10; i++) { - CRx_check_header rxh; + for (i=0; i<10; i++) { + CRx_check_header rxh; rxh.clean(); rxh.m_option_type=RX_CHECK_V4_OPT_TYPE; rxh.m_option_len=RX_CHECK_V4_OPT_LEN; - rxh.m_time_stamp=0; - rxh.m_magic=RX_CHECK_MAGIC; + rxh.m_time_stamp=0; + rxh.m_magic=RX_CHECK_MAGIC; rxh.m_aging_sec=10; rxh.set_dir(0); rxh.set_both_dir(0); - /* out of order */ - if (i==4) { - rxh.m_pkt_id=5; - }else{ - if (i==5) { - rxh.m_pkt_id=4; - }else{ - rxh.m_pkt_id=i; - } - } + /* out of order */ + if (i==4) { + rxh.m_pkt_id=5; + }else{ + if (i==5) { + rxh.m_pkt_id=4; + }else{ + rxh.m_pkt_id=i; + } + } - rxh.m_flow_size=10; + rxh.m_flow_size=10; - rxh.m_flow_id=7; + rxh.m_flow_id=7; - rxh.m_flags=0; + rxh.m_flags=0; m_rx_check.handle_packet(&rxh); - } - m_rx_check.tw_drain(); + } + m_rx_check.tw_drain(); EXPECT_EQ(m_rx_check.m_stats.m_err_oo_early,1); EXPECT_EQ(m_rx_check.m_stats.m_err_oo_late,2); - m_rx_check.Dump(stdout); + m_rx_check.Dump(stdout); } TEST_F(rx_check, rx_check_ooo_1) { - int i; + int i; - for (i=0; i<10; i++) { - CRx_check_header rxh; + for (i=0; i<10; i++) { + CRx_check_header rxh; rxh.clean(); rxh.m_option_type=RX_CHECK_V4_OPT_TYPE; rxh.m_option_len=RX_CHECK_V4_OPT_LEN; - rxh.m_time_stamp=0; + rxh.m_time_stamp=0; rxh.set_dir(0); rxh.set_both_dir(0); - rxh.m_magic=RX_CHECK_MAGIC; + rxh.m_magic=RX_CHECK_MAGIC; rxh.m_aging_sec=10; - /* out of order */ - if (i==4) { - rxh.m_pkt_id=56565; - }else{ - if (i==5) { - rxh.m_pkt_id=4; - }else{ - rxh.m_pkt_id=i; - } - } - rxh.m_flow_size=10; - rxh.m_flow_id=7; - rxh.m_flags=0; - m_rx_check.handle_packet(&rxh); - } - m_rx_check.tw_drain(); + /* out of order */ + if (i==4) { + rxh.m_pkt_id=56565; + }else{ + if (i==5) { + rxh.m_pkt_id=4; + }else{ + rxh.m_pkt_id=i; + } + } + rxh.m_flow_size=10; + rxh.m_flow_id=7; + rxh.m_flags=0; + m_rx_check.handle_packet(&rxh); + } + m_rx_check.tw_drain(); EXPECT_EQ(m_rx_check.m_stats.m_err_wrong_pkt_id,1); EXPECT_EQ(m_rx_check.m_stats.m_err_oo_late,1); - m_rx_check.Dump(stdout); + m_rx_check.Dump(stdout); } // start without first packet ( not FIF */ TEST_F(rx_check, rx_check_ooo_2) { - int i; + int i; - for (i=0; i<10; i++) { - CRx_check_header rxh; + for (i=0; i<10; i++) { + CRx_check_header rxh; rxh.clean(); rxh.m_option_type=RX_CHECK_V4_OPT_TYPE; rxh.m_option_len=RX_CHECK_V4_OPT_LEN; - rxh.m_time_stamp=0; - rxh.m_magic=RX_CHECK_MAGIC; + rxh.m_time_stamp=0; + rxh.m_magic=RX_CHECK_MAGIC; rxh.m_aging_sec=10; - /* out of order */ + /* out of order */ rxh.set_dir(0); rxh.set_both_dir(0); @@ -1647,15 +1493,15 @@ TEST_F(rx_check, rx_check_ooo_2) { } } - rxh.m_flow_size=10; - rxh.m_flow_id=7; - rxh.m_flags=0; - m_rx_check.handle_packet(&rxh); - } - m_rx_check.tw_drain(); + rxh.m_flow_size=10; + rxh.m_flow_id=7; + rxh.m_flags=0; + m_rx_check.handle_packet(&rxh); + } + m_rx_check.tw_drain(); EXPECT_EQ(m_rx_check.m_stats.m_err_open_with_no_fif_pkt,1); EXPECT_EQ(m_rx_check.m_stats. m_err_oo_late,1); - m_rx_check.Dump(stdout); + m_rx_check.Dump(stdout); } @@ -1997,9 +1843,8 @@ TEST_F(rx_check, rx_check_normal_no_aging) { EXPECT_EQ(m_rx_check.m_stats.m_remove,0); } - /////////////////////////////////////////////////////////////// -// check the generation of template and check sample of it +// check the generation of template and check sample of it class CRxCheckCallbackBase { @@ -2035,10 +1880,10 @@ public: /** * send one packet - * + * * @param node - * - * @return + * + * @return */ virtual int send_node(CGenNode * node); @@ -2049,9 +1894,9 @@ public: /** - * flush all pending packets into the stream - * - * @return + * flush all pending packets into the stream + * + * @return */ virtual int flush_tx_queue(void){ return (0); @@ -2086,16 +1931,16 @@ int CRxCheckIF::send_node(CGenNode * node){ m_raw->time_nsec = t_c.m_time_nsec; m_raw->time_sec = t_c.m_time_sec; m_raw->setInterface(node->m_pkt_info->m_pkt_indication.m_desc.IsInitSide()); - + if (m_store_pcfg) { erf_vif.write_pkt(m_raw); } - + if ((m_callback) && (node->is_rx_check_enabled()) ) { m_callback->handle_packet(m); } - // just free it + // just free it rte_pktmbuf_free(m); return (0); } @@ -2148,18 +1993,18 @@ public: }; -class rx_check_system : public testing::Test { +class rx_check_system : public trexTest { protected: virtual void SetUp() { - - m_rx_check.m_callback=&m_callback; - m_callback.mg =&m_mg; + trexTest::SetUp(); + m_rx_check.m_callback = &m_callback; + m_callback.mg = &m_mg; m_mg.Create(); CParserOption * po =&CGlobalInfo::m_options; po->preview.setVMode(0); po->preview.setFileWrite(true); po->preview.set_rx_check_enable(true); - + po->m_run_mode = CParserOption::RUN_MODE_BATCH; } virtual void TearDown() { @@ -2174,7 +2019,7 @@ public: }; -// check DNS yaml with sample of 1/2 check that there is no errors +// check DNS yaml with sample of 1/2 check that there is no errors TEST_F(rx_check_system, rx_system1) { m_rxcs.lpVf=&m_rx_check; @@ -2192,7 +2037,7 @@ TEST_F(rx_check_system, rx_system1) { EXPECT_EQ(m_mg.m_stats.get_total_err(),0); } -// check DNS with rxcheck and write results out to capture file +// check DNS with rxcheck and write results out to capture file TEST_F(rx_check_system, rx_system1_dns) { m_rxcs.lpVf=&m_rx_check; @@ -2215,7 +2060,7 @@ TEST_F(rx_check_system, rx_system1_dns) { EXPECT_EQ(cmp.compare("exp/dns_rxcheck.erf","exp/dns_rxcheck-ex.erf"),true); } -// check DNS yaml with sample of 1/4 using IPv6 packets +// check DNS yaml with sample of 1/4 using IPv6 packets TEST_F(rx_check_system, rx_system1_ipv6) { m_rxcs.lpVf=&m_rx_check; @@ -2236,7 +2081,7 @@ TEST_F(rx_check_system, rx_system1_ipv6) { } // check DNS with rxcheck using IPv6 packets -// and write results out to capture file +// and write results out to capture file TEST_F(rx_check_system, rx_system1_dns_ipv6) { m_rxcs.lpVf=&m_rx_check; @@ -2278,7 +2123,7 @@ TEST_F(rx_check_system, rx_system2_plugin_one_dir) { EXPECT_EQ(m_mg.m_stats.get_total_err(),0); } -// check HTTP with rxcheck and write results out to capture file +// check HTTP with rxcheck and write results out to capture file TEST_F(rx_check_system, rx_system2_plugin) { m_rxcs.lpVf=&m_rx_check; @@ -2302,7 +2147,7 @@ TEST_F(rx_check_system, rx_system2_plugin) { } // check DNS with rxcheck using IPv6 packets -// and write results out to capture file +// and write results out to capture file TEST_F(rx_check_system, rx_system2_plugin_ipv6) { m_rxcs.lpVf=&m_rx_check; @@ -2429,8 +2274,8 @@ public: /* ip option packet */ printf(" rx got ip option packet ! \n"); mg->handle_packet_ipv4(option, ipv4, true); - delay(10); // delay for queue flush - mg->handle_aging(); // flush the RxRing + delay(10); // delay for queue flush + mg->handle_aging(); // flush the RxRing } } CNatRxManager * mg; @@ -2438,9 +2283,10 @@ public: -class nat_check_system : public testing::Test { +class nat_check_system : public trexTest { protected: virtual void SetUp() { + trexTest::SetUp(); m_rx_check.m_callback=&m_callback; m_callback.mg =&m_mg; m_mg.Create(); @@ -2482,10 +2328,11 @@ TEST_F(nat_check_system, nat_system1) { ////////////////////////////////////////////////////////////// -class file_flow_info : public testing::Test { +class file_flow_info : public trexTest { protected: virtual void SetUp() { + trexTest::SetUp(); assert(m_flow_info.Create()); } @@ -2688,10 +2535,11 @@ TEST_F(file_flow_info, load_cap_file_errors) { ////////////////////////////////////////////////////////////// -class time_histogram : public testing::Test { +class time_histogram : public trexTest { protected: virtual void SetUp() { + trexTest::SetUp(); m_hist.Create(); } @@ -2716,7 +2564,7 @@ TEST_F(time_histogram, test_average) { EXPECT_EQ(m_hist.get_high_count(), 2001 * (j+1) - (11 * (j+1))); EXPECT_EQ(m_hist.get_max_latency(), 2000); } - + m_hist.Dump(stdout); } @@ -2739,35 +2587,16 @@ TEST_F(time_histogram, test_json) { printf(" %s \n",json.c_str()); } - - -class gt_jitter : public testing::Test { - -protected: - virtual void SetUp() { - } - - virtual void TearDown() { - } +class gt_jitter : public trexTest { public: CJitter m_jitter; }; - -class gt_jitter_uint : public testing::Test { - -protected: - virtual void SetUp() { - } - - virtual void TearDown() { - } +class gt_jitter_uint : public trexTest { public: CJitterUint m_jitter; }; - - TEST_F(gt_jitter, jitter1) { int i; double a=0.000030; @@ -2796,17 +2625,7 @@ TEST_F(gt_jitter_uint, jitter2) { EXPECT_EQ((uint32_t)(m_jitter.get_jitter()), 19); } - -class gt_ring : public testing::Test { - -protected: - virtual void SetUp() { - } - - virtual void TearDown() { - } -public: -}; +class gt_ring : public trexTest {}; TEST_F(gt_ring, ring1) { @@ -2902,32 +2721,8 @@ TEST_F(gt_ring, ring3) { my_map.Delete(); } - -class gt_conf : public testing::Test { - -protected: - virtual void SetUp() { - } - - virtual void TearDown() { - } -public: -}; - - - -class ipg_calc : public testing::Test { - protected: - virtual void SetUp() { - } - virtual void TearDown() { - } -public: -}; - - - - +class gt_conf : public trexTest {}; +class ipg_calc : public trexTest {}; TEST_F(ipg_calc, test1) { @@ -2963,4 +2758,3 @@ TEST_F(ipg_calc, test3) { EXPECT_EQ(ticks,UINT32_MAX); } } - diff --git a/src/bp_gtest.h b/src/bp_gtest.h new file mode 100644 index 00000000..955b781e --- /dev/null +++ b/src/bp_gtest.h @@ -0,0 +1,138 @@ +/* +Copyright (c) 2017-2017 Cisco Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef _BP_GTEST_H_ +#define _BP_GTEST_H_ + +#define EXPECT_EQ_UINT32(a,b) EXPECT_EQ((uint32_t)(a),(uint32_t)(b)) + +class trexTest : public testing::Test { + protected: + virtual void SetUp() { + CGlobalInfo::m_options.reset(); + } + virtual void TearDown() { + } +public: +}; + +class CTestBasic { + +public: + CTestBasic() { + m_threads=1; + m_time_diff=0.001; + m_req_ports=0; + m_dump_json=false; + } + + bool init(void) { + uint16 * ports = NULL; + CTupleBase tuple; + CErfIF erf_vif; + + fl.Create(); + m_saved_packet_padd_offset=0; + fl.load_from_yaml(CGlobalInfo::m_options.cfg_file,m_threads); + + if (CGlobalInfo::m_options.client_cfg_file != "") { + try { + fl.load_client_config_file(CGlobalInfo::m_options.client_cfg_file); + // The simulator only test MAC address configs, so this parameter is not used + CManyIPInfo pretest_result; + fl.set_client_config_resolved_macs(pretest_result); + } catch (const std::runtime_error &e) { + std::cout << "\n*** " << e.what() << "\n\n"; + exit(-1); + } + CGlobalInfo::m_options.preview.set_client_cfg_enable(true); + } + + fl.generate_p_thread_info(m_threads); + fl.m_threads_info[0]->set_vif(&erf_vif); + CErfCmp cmp; + cmp.dump = 1; + bool res = true; + int i; + CFlowGenListPerThread * lpt; + + for (i=0; i<m_threads; i++) { + lpt=fl.m_threads_info[i]; + + CFlowPktInfo * pkt=lpt->m_cap_gen[0]->m_flow_info->GetPacket(0); + m_saved_packet_padd_offset = pkt->m_pkt_indication.m_packet_padding; + + char buf[100]; + char buf_ex[100]; + sprintf(buf,"%s-%d.erf", CGlobalInfo::m_options.out_file.c_str(), i); + sprintf(buf_ex,"%s-%d-ex.erf", CGlobalInfo::m_options.out_file.c_str(), i); + + if ( m_req_ports ) { + /* generate from first template m_req_ports ports */ + int i; + CTupleTemplateGeneratorSmart * lpg=&lpt->m_cap_gen[0]->tuple_gen; + ports = new uint16_t[m_req_ports]; + lpg->GenerateTuple(tuple); + for (i=0 ; i<m_req_ports;i++) { + ports[i]=lpg->GenerateOneSourcePort(); + } + } + CGlobalInfo::m_options.m_run_mode = CParserOption::RUN_MODE_BATCH; + lpt->start_generate_stateful(buf,CGlobalInfo::m_options.preview); + lpt->m_node_gen.DumpHist(stdout); + cmp.d_sec = m_time_diff; + //compare generated file to expected file + if ( cmp.compare(std::string(buf), std::string(buf_ex)) != true ) { + res=false; + } + } + + if ( m_dump_json ) { + printf(" dump json ...........\n"); + std::string s; + fl.m_threads_info[0]->m_node_gen.dump_json(s); + printf(" %s \n",s.c_str()); + } + + if ( m_req_ports ) { + int i; + fl.m_threads_info[0]->m_smart_gen.FreePort(0, tuple.getClientId(),tuple.getClientPort()); + for (i=0 ; i < m_req_ports; i++) { + fl.m_threads_info[0]->m_smart_gen.FreePort(0,tuple.getClientId(),ports[i]); + } + delete []ports; + } + + printf(" active %d \n", fl.m_threads_info[0]->m_smart_gen.ActiveSockets()); + EXPECT_EQ_UINT32(fl.m_threads_info[0]->m_smart_gen.ActiveSockets(),0); + fl.Delete(); + return (res); + } + + uint16_t get_padd_offset_first_packet() { + return (m_saved_packet_padd_offset); + } + +public: + int m_req_ports; + int m_threads; + double m_time_diff; + bool m_dump_json; + uint16_t m_saved_packet_padd_offset; + CFlowGenList fl; +}; + +#endif diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp index 938d8f65..aa80c475 100755 --- a/src/bp_sim.cpp +++ b/src/bp_sim.cpp @@ -2349,7 +2349,7 @@ enum CCapFileFlowInfo::load_cap_file_err CCapFileFlowInfo::load_cap_file(std::st pkt_indication.m_desc.SetId(_id); bool is_fif; CFlow * lpflow=flow.process(pkt_indication.m_flow_key,is_fif); - m_total_bytes += pkt_indication.m_packet->pkt_len; + m_total_bytes += (pkt_indication.m_packet->pkt_len+4); /* L2 include CRC*/ pkt_indication.m_cap_ipg = raw_packet.get_time(); pkt_indication.m_flow =lpflow; @@ -3413,7 +3413,7 @@ bool CNodeGenerator::Create(CFlowGenListPerThread * parent){ m_socket_id =0; m_realtime_his.Create(); m_last_sync_time_sec = 0; - + m_tw_level1_next_sec = 0; return(true); } @@ -3527,7 +3527,7 @@ bool CFlowGenListPerThread::Create(uint32_t thread_id, 0 , socket_id); - RC_HTW_t tw_res=m_tw.Create(TW_BUCKETS,TW_LEVELS); + RC_HTW_t tw_res=m_tw.Create(TW_BUCKETS,TW_BUCKETS_LEVEL1_DIV); if (tw_res != RC_HTW_OK){ CHTimerWheelErrorStr err(tw_res); printf("Timer wheel configuration error,please look into the manual for details \n"); @@ -3934,7 +3934,7 @@ inline bool CNodeGenerator::do_work_both(CGenNode * node, /* update bucket time */ thread->m_cur_time_sec = node->m_time; if ( ON_TERMINATE ) { - thread->m_tw.on_tick((void*)thread,tw_on_tick_per_thread_cb_always); + thread->m_tw.on_tick_level0((void*)thread,tw_on_tick_per_thread_cb_always); if ( thread->m_tw.is_any_events_left() ){ node->m_time += BUCKET_TIME_SEC; m_p_queue.push(node); @@ -3942,7 +3942,7 @@ inline bool CNodeGenerator::do_work_both(CGenNode * node, thread->free_node(node); } }else{ - thread->m_tw.on_tick((void*)thread,tw_on_tick_per_thread_cb); + thread->m_tw.on_tick_level0((void*)thread,tw_on_tick_per_thread_cb); node->m_time += BUCKET_TIME_SEC;; m_p_queue.push(node); } @@ -4058,6 +4058,7 @@ inline int CNodeGenerator::flush_file_realtime(dsec_t max_time, }else{ add_exit_node(thread,max_time); } + m_scheduler_offset = offset; thread->m_cpu_dp_u.start_work1(); @@ -4169,6 +4170,8 @@ void CNodeGenerator::handle_time_strech(CGenNode * &node, /* fix the time offset */ dsec_t dt = cur_time - n_time; offset += dt; + /* set new offset */ + m_scheduler_offset = offset; /* check if flow sync message was delayed too much */ if ( (cur_time - m_last_sync_time_sec) > SYNC_TIME_OUT ) { @@ -4242,6 +4245,59 @@ int CNodeGenerator::flush_file(dsec_t max_time, } +void CNodeGenerator::handle_batch_tw_level1(CGenNode *node, + CFlowGenListPerThread *thread, + bool &exit_scheduler, + bool on_terminate) { + + m_p_queue.pop(); + /* update bucket time */ + thread->m_cur_time_sec = node->m_time; + + bool stop_loop=false; + + while (!stop_loop) { + na_htw_state_num_t tw_state = thread->m_tw.on_tick_level1((void*)thread,tw_on_tick_per_thread_cb); + if ( (tw_state == TW_FIRST_FINISH) || (tw_state == TW_FIRST_FINISH_ANY)){ + node->m_time += BUCKET_TIME_SEC_LEVEL1; + stop_loop=true; + }else{ + switch (tw_state) { + case TW_FIRST_BATCH: + m_tw_level1_next_sec = node->m_time + BUCKET_TIME_SEC_LEVEL1; + node->m_time = now_sec()-m_scheduler_offset; /* spread if we can */ + if (m_tw_level1_next_sec+m_scheduler_offset > now_sec() ) { + stop_loop=true; + } + break; + case TW_NEXT_BATCH : + node->m_time = now_sec()-m_scheduler_offset; /* spread if we can */ + if (m_tw_level1_next_sec+m_scheduler_offset > now_sec() ) { + stop_loop=true; + } + break; + case TW_END_BATCH: + if (m_tw_level1_next_sec+m_scheduler_offset > now_sec() ) { + node->m_time = m_tw_level1_next_sec; + }else{ + node->m_time = m_tw_level1_next_sec; /* too late but we don't have anyting to do */ + } + stop_loop=true; + break; + default: + assert(0); + }; + } + } + + if ( on_terminate && + (thread->m_tw.is_any_events_left()==false) ){ + thread->free_node(node); + }else{ + m_p_queue.push(node); + } +} + void CNodeGenerator::handle_flow_pkt(CGenNode *node, CFlowGenListPerThread *thread) { @@ -4387,6 +4443,10 @@ CNodeGenerator::handle_slow_messages(uint8_t type, handle_command(node, thread, exit_scheduler); break; + case CGenNode::TW_SYNC1: + handle_batch_tw_level1(node, thread, exit_scheduler,on_terminate); + break; + default: assert(0); } @@ -4677,6 +4737,26 @@ void CFlowGenListPerThread::handle_nat_msg(CGenNodeNatInfo * msg){ } } + +void CFlowGenListPerThread::no_memory_error(){ + printf("--------\n"); + printf("\n"); + printf("\n"); + printf("ERROR, not enough flow objects, try to enlarge the number of objects in trex_cfg file or reduce the bandwidth \n"); + printf("See in the manual how to enlarge the number of objects.\n"); + printf("\n"); + printf("\n"); + printf(" Check your active flows, 'Active-flows : 6771863', If it too high reduce the multiplier \n"); + printf(" or use --active-flows directive to reduce the number of flows\n"); + printf(" If you don't have enough memory for flows you should add something like that in your config file \n"); + printf("\n"); + printf(" memory : \n"); + printf(" dp_flows : 4048576 \n"); + printf("--------\n"); + exit(1); +} + + bool CFlowGenListPerThread::check_msgs_from_rx() { if ( likely ( m_ring_from_rx->isEmpty() ) ) { return false; @@ -4825,6 +4905,11 @@ void CFlowGenListPerThread::start_generate_stateful(std::string erf_file_name, node->m_type = CGenNode::TW_SYNC; node->m_time = m_cur_time_sec + BUCKET_TIME_SEC ; m_node_gen.add_node(node); + + node= create_node() ; + node->m_type = CGenNode::TW_SYNC1; + node->m_time = m_cur_time_sec + BUCKET_TIME_SEC_LEVEL1 ; + m_node_gen.add_node(node); } diff --git a/src/bp_sim.h b/src/bp_sim.h index 9cdfd30a..282e7fe4 100755 --- a/src/bp_sim.h +++ b/src/bp_sim.h @@ -370,6 +370,13 @@ public: #define CONST_9k_MBUF_SIZE (MAX_PKT_ALIGN_BUF_9K + MBUF_PKT_PREFIX) +#define TW_BUCKETS (CGlobalInfo::m_options.get_tw_buckets()) +#define TW_BUCKETS_LEVEL1_DIV (16) +#define TW_LEVELS (CGlobalInfo::m_options.get_tw_levels()) +#define BUCKET_TIME_SEC (CGlobalInfo::m_options.get_tw_bucket_time_in_sec()) +#define BUCKET_TIME_SEC_LEVEL1 (CGlobalInfo::m_options.get_tw_bucket_level1_time_in_sec()) + + class CPreviewMode { public: CPreviewMode(){ @@ -642,9 +649,12 @@ typedef struct mac_align_t_ { struct CMacAddrCfg { public: - CMacAddrCfg (){ - memset(u.m_data,0,sizeof(u.m_data)); - u.m_mac.dest[3]=1; + CMacAddrCfg () { + reset(); + } + void reset () { + memset(u.m_data, 0, sizeof(u.m_data)); + u.m_mac.dest[3] = 1; u.m_mac.is_set = 0; } union { @@ -701,36 +711,49 @@ public: }; public: - CParserOption(){ - m_factor=1.0; - m_mbuf_factor=1.0; - m_duration=0.0; - m_latency_rate =0; - m_latency_mask =0xffffffff; - m_latency_prev=0; - m_wait_before_traffic=1; - m_zmq_port=4500; - m_telnet_port =4501; - m_platform_factor=1.0; - m_expected_portd = 4; /* should be at least the number of ports found in the system but could be less */ - m_vlan_port[0]=100; - m_vlan_port[1]=100; - m_rx_check_sample=0; + + void reset() { + preview.clean(); + m_tw_buckets = 1024; + m_tw_levels = 3; + m_active_flows = 0; + m_factor = 1.0; + m_mbuf_factor = 1.0; + m_duration = 0.0; + m_platform_factor = 1.0; + m_vlan_port[0] = 100; + m_vlan_port[1] = 100; + memset(m_src_ipv6, 0, sizeof(m_src_ipv6)); + memset(m_dst_ipv6, 0, sizeof(m_dst_ipv6)); + memset(m_ip_cfg, 0, sizeof(m_ip_cfg)); + m_latency_rate = 0; + m_latency_mask = 0xffffffff; + m_latency_prev = 0; + m_rx_check_sample = 0; m_rx_check_hops = 0; - m_io_mode=1; - m_run_flags=0; - prefix=""; - m_run_mode = RUN_MODE_INVALID; + m_wait_before_traffic = 1; + m_zmq_port = 4500; + m_telnet_port = 4501; + m_expected_portd = 4; /* should be at least the number of ports found in the system but could be less */ + m_io_mode = 1; + m_run_flags = 0; m_l_pkt_mode = 0; - m_rx_thread_enabled = false; + m_learn_mode = 0; + m_debug_pkt_proto = 0; m_arp_ref_per = 120; // in seconds - m_tw_buckets = 1024; - m_tw_levels = 3; - m_tw_bucket_time_sec = (20.0/1000000.0); - m_active_flows=0; - + m_rx_thread_enabled = false; + m_run_mode = RUN_MODE_INVALID; + cfg_file = ""; + client_cfg_file = ""; + platform_cfg_file = ""; + out_file = ""; + prefix = ""; + set_tw_bucket_time_in_usec(20.0); } + CParserOption(){ + reset(); + } CPreviewMode preview; uint16_t m_tw_buckets; @@ -761,22 +784,17 @@ public: uint16_t m_arp_ref_per; bool m_rx_thread_enabled; trex_run_mode_e m_run_mode; - - - std::string cfg_file; std::string client_cfg_file; std::string platform_cfg_file; - std::string out_file; std::string prefix; std::vector<std::string> dump_interfaces; - - CMacAddrCfg m_mac_addr[TREX_MAX_PORTS]; double m_tw_bucket_time_sec; - + double m_tw_bucket_time_sec_level1; +public: uint8_t * get_src_mac_addr(int if_index){ return (m_mac_addr[if_index].u.m_mac.src); } @@ -784,7 +802,6 @@ public: return (m_mac_addr[if_index].u.m_mac.dest); } -public: uint32_t get_expected_ports(){ return (m_expected_portd); } @@ -819,8 +836,13 @@ public: return (m_tw_bucket_time_sec); } + inline double get_tw_bucket_level1_time_in_sec(void){ + return (m_tw_bucket_time_sec_level1); + } + void set_tw_bucket_time_in_usec(double usec){ - m_tw_bucket_time_sec=(usec/1000000.0); + m_tw_bucket_time_sec= (usec/1000000.0); + m_tw_bucket_time_sec_level1 = (m_tw_bucket_time_sec*(double)m_tw_buckets)/((double)TW_BUCKETS_LEVEL1_DIV); } void set_tw_buckets(uint16_t buckets){ @@ -1469,7 +1491,9 @@ public: EXIT_PORT_SCHED =8, PCAP_PKT =9, GRAT_ARP =10, - TW_SYNC =11 + TW_SYNC =11, + TW_SYNC1 =12, + }; /* flags MASKS*/ @@ -2215,6 +2239,8 @@ private: void handle_flow_sync(CGenNode *node, CFlowGenListPerThread *thread, bool &exit_scheduler); void handle_pcap_pkt(CGenNode *node, CFlowGenListPerThread *thread); void handle_maintenance(CFlowGenListPerThread *thread); + void handle_batch_tw_level1(CGenNode *node, CFlowGenListPerThread *thread,bool &exit_scheduler,bool on_terminate); + public: pqueue_t m_p_queue; @@ -2226,8 +2252,10 @@ public: uint64_t m_non_active; uint64_t m_limit; CTimeHistogram m_realtime_his; + dsec_t m_scheduler_offset; dsec_t m_last_sync_time_sec; + dsec_t m_tw_level1_next_sec; }; @@ -3798,9 +3826,6 @@ private: bool server_seq_init; /* TCP seq been init for server? */ }; -#define TW_BUCKETS (CGlobalInfo::m_options.get_tw_buckets()) -#define TW_LEVELS (CGlobalInfo::m_options.get_tw_levels()) -#define BUCKET_TIME_SEC (CGlobalInfo::m_options.get_tw_bucket_time_in_sec()) @@ -3957,6 +3982,8 @@ public: private: + FORCE_NO_INLINE void no_memory_error(); + bool check_msgs_from_rx(); void handle_nat_msg(CGenNodeNatInfo * msg); @@ -4016,7 +4043,7 @@ public: public: CNodeGenerator m_node_gen; - CHTimerWheel m_tw; + CNATimerWheel m_tw; public: uint32_t m_cur_template; @@ -4051,7 +4078,7 @@ private: inline CGenNode * CFlowGenListPerThread::create_node(void){ CGenNode * res; if ( unlikely (rte_mempool_sc_get(m_node_pool, (void **)&res) <0) ){ - rte_exit(EXIT_FAILURE, "cant allocate object , need more \n"); + no_memory_error(); return (0); } return (res); diff --git a/src/common/captureFile.cpp b/src/common/captureFile.cpp index b3035e8a..a25d1c42 100755 --- a/src/common/captureFile.cpp +++ b/src/common/captureFile.cpp @@ -135,7 +135,7 @@ bool CCapPktRaw::Compare(CCapPktRaw * obj,int dump,double dsec){ CPktNsecTimeStamp t2(obj->time_sec,obj->time_nsec); if ( t1.diff(t2) > dsec ){ if ( dump ){ - printf(" ERROR: diff of 1 msec in time \n"); + printf(" ERROR: diff of %lf seconds while only %lf allowed\n", t1.diff(t2), dsec); } return (false); } diff --git a/src/common/captureFile.h b/src/common/captureFile.h index d87e57b6..a552d501 100755 --- a/src/common/captureFile.h +++ b/src/common/captureFile.h @@ -25,6 +25,7 @@ limitations under the License. #include <stdlib.h> #include <string> #include <iostream> +#include <cmath> #ifdef WIN32 #pragma warning(disable:4786) @@ -87,7 +88,7 @@ public: } double diff(const CPktNsecTimeStamp & obj){ - return (abs(getNsec() - obj.getNsec() ) ); + return (std::abs(getNsec() - obj.getNsec() ) ); } void Dump(FILE *fd); diff --git a/src/common/ef/efence.cpp b/src/common/ef/efence.cpp index 1340a12a..b74d3da5 100644 --- a/src/common/ef/efence.cpp +++ b/src/common/ef/efence.cpp @@ -821,7 +821,6 @@ ef_malloc(size_t size) lock(); allocation=ef_memalign(EF_ALIGNMENT, size); - /* put 0xaa into the memset to find uninit issues */ memset(allocation,0xaa,size); #if 0 @@ -897,6 +896,14 @@ calloc(size_t nelem, size_t elsize) return (ef_calloc(nelem, elsize)); } +extern C_LINKAGE int +posix_memalign(void **memptr, size_t alignment, size_t size) +{ + *memptr=ef_malloc(size); + return(0); +} + + /* * This will catch more bugs if you remove the page alignment, but it * will break some software. diff --git a/src/debug.cpp b/src/debug.cpp index 5a881dd9..4abd05a0 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -290,8 +290,8 @@ struct pkt_params { }; struct pkt_params test_pkts[] = { - {"ARP", 1, IPPROTO_UDP, 255, 5, 0, ZERO}, - {"VLAN ARP", 1, IPPROTO_UDP, 255, 5, DPF_VLAN, ZERO}, + {"ARP req", 1, IPPROTO_UDP, 255, 5, 0, ZERO}, + {"VLAN ARP req", 1, IPPROTO_UDP, 255, 5, DPF_VLAN, ZERO}, {"ipv4 TCP ttl 255", 4, IPPROTO_TCP, 255, 5, 0, STF}, {"ipv4 TCP ttl 246", 4, IPPROTO_TCP, 246, 5, 0, STF}, {"ipv4 TCP ttl 245", 4, IPPROTO_TCP, 245, 5, 0, ZERO}, @@ -398,7 +398,13 @@ int CTrexDebug::verify_hw_rules(bool recv_all) { memset(pkt_per_q, 0, sizeof(pkt_per_q)); // We don't know which interfaces connected where, so sum all queue 1 and all queue 0 for (int port = 0; port < m_max_ports; port++) { - for(int queue_id = 0; queue_id <= 1; queue_id++) { + int max_q; + if (CGlobalInfo::m_options.preview.get_vm_one_queue_enable()) { + max_q = 0; + } else { + max_q = 1; + } + for(int queue_id = 0; queue_id <= max_q; queue_id++) { lp = &m_ports[port]; uint16_t cnt = lp->rx_burst(queue_id, rx_pkts, 32); pkt_per_q[queue_id] += cnt; diff --git a/src/dpdk/drivers/net/enic/base/vnic_dev.c b/src/dpdk/drivers/net/enic/base/vnic_dev.c index e50b90e7..dddb1dcd 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_dev.c +++ b/src/dpdk/drivers/net/enic/base/vnic_dev.c @@ -667,12 +667,8 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); -#define TREX_PATCH -#ifdef TREX_PATCH - err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER_ALL, &a0, &a1, wait); -#else err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); -#endif + if (err) pr_err("Can't set packet filter\n"); diff --git a/src/dpdk/drivers/net/enic/enic_clsf.c b/src/dpdk/drivers/net/enic/enic_clsf.c index 8f68faab..1610951d 100644 --- a/src/dpdk/drivers/net/enic/enic_clsf.c +++ b/src/dpdk/drivers/net/enic/enic_clsf.c @@ -145,7 +145,7 @@ copy_fltr_recv_all(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, memset(ð_mask, 0, sizeof(eth_mask)); memset(ð_val, 0, sizeof(eth_val)); - eth_val.ether_type = 0x0806; + eth_val.ether_type = 0xdead; eth_mask.ether_type = 0; gp->position = 0; @@ -381,14 +381,6 @@ int enic_fdir_del_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) /* The entry is present in the table */ key = enic->fdir.nodes[pos]; -#ifdef TREX_PATCH - switch (params->soft_id) { - case 100: - // remove promisc when we delete 'receive all' filter - vnic_dev_packet_filter(enic->vdev, 1, 1, 1, 0, 1); - break; - } -#endif /* Delete the filter */ vnic_dev_classifier(enic->vdev, CLSF_DEL, &key->fltr_id, NULL); @@ -498,7 +490,6 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) #ifdef TREX_PATCH switch (params->soft_id) { case 100: - vnic_dev_packet_filter(enic->vdev, 1, 1, 1, 1, 1); copy_fltr_recv_all(&fltr, ¶ms->input, &enic->rte_dev->data->dev_conf.fdir_conf.mask); break; default: diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx.c b/src/dpdk/drivers/net/i40e/i40e_rxtx.c index 8fdf30c6..19b431c3 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx.c +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx.c @@ -2161,10 +2161,18 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t base, bsf, tc_mapping; int use_def_burst_func = 1; +#define TREX_PATCH_LOW_LATENCY +#ifdef TREX_PATCH_LOW_LATENCY + int is_vf = 0; +#endif + if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; +#ifdef TREX_PATCH_LOW_LATENCY + is_vf = 1; +#endif } else { vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); } @@ -2272,10 +2280,11 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, ad->rx_bulk_alloc_allowed = false; } -#define TREX_PATCH_LOW_LATENCY #ifdef TREX_PATCH_LOW_LATENCY - rxq->dcb_tc =0; -#else + if (! is_vf) + rxq->dcb_tc =0; + else // The entire for below is in the else +#endif for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) @@ -2289,7 +2298,6 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, if (queue_idx >= base && queue_idx < (base + BIT(bsf))) rxq->dcb_tc = i; } -#endif return 0; } @@ -2385,20 +2393,25 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_rs_thresh, tx_free_thresh; uint16_t i, base, bsf, tc_mapping; +#ifdef TREX_PATCH_LOW_LATENCY + u8 low_latency = 0; + int is_vf = 1; +#endif + if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; } else { vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); - } - #ifdef TREX_PATCH_LOW_LATENCY - u8 low_latency = 0; - if (queue_idx == pf->dev_data->nb_tx_queues-1) { - low_latency = 1; - } + if (queue_idx == pf->dev_data->nb_tx_queues-1) { + low_latency = 1; + } + is_vf = 0; #endif + } + if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) " @@ -2555,12 +2568,14 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, i40e_set_tx_function_flag(dev, txq); #ifdef TREX_PATCH_LOW_LATENCY - if (low_latency) { - txq->dcb_tc=1; - }else{ - txq->dcb_tc=0; - } -#else + if (! is_vf) { + if (low_latency) { + txq->dcb_tc=1; + }else{ + txq->dcb_tc=0; + } + } else // The entire for below is in the else +#endif for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) continue; @@ -2573,7 +2588,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, if (queue_idx >= base && queue_idx < (base + BIT(bsf))) txq->dcb_tc = i; } -#endif + return 0; } diff --git a/src/dpdk/drivers/net/mlx5/mlx5.c b/src/dpdk/drivers/net/mlx5/mlx5.c index 303b917b..7c072391 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.c +++ b/src/dpdk/drivers/net/mlx5/mlx5.c @@ -43,16 +43,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_malloc.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_common.h> #include <rte_kvargs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -257,6 +257,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) return ret; } + /** * Verify and store value for device argument. * @@ -297,6 +298,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) return 0; } + + /** * Parse device parameters. * @@ -341,6 +344,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs) return 0; } + + static struct eth_driver mlx5_driver; /** @@ -527,7 +532,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* TREX PATCH */ /* set for maximum performance default */ - priv->txq_inline =128; + priv->txq_inline =64; priv->txqs_inline =4; @@ -781,3 +786,515 @@ static struct rte_driver rte_mlx5_driver = { PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5); DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map); + + + + + + +#if 0 +/** + * Verify and store value for device argument. + * + * @param[in] key + * Key argument to verify. + * @param[in] val + * Value associated with key. + * @param opaque + * User data. + * + * @return + * 0 on success, negative errno value on failure. + */ +static int +mlx5_args_check(const char *key, const char *val, void *opaque) +{ + struct priv *priv = opaque; + unsigned long tmp; + + errno = 0; + tmp = strtoul(val, NULL, 0); + if (errno) { + WARN("%s: \"%s\" is not a valid integer", key, val); + return errno; + } + if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { + priv->cqe_comp = !!tmp; + } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { + priv->txq_inline = tmp; + } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { + priv->txqs_inline = tmp; + } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { + priv->mps = !!tmp; + } else { + WARN("%s: unknown parameter", key); + return -EINVAL; + } + return 0; +} + +/** + * Parse device parameters. + * + * @param priv + * Pointer to private structure. + * @param devargs + * Device arguments structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +mlx5_args(struct priv *priv, struct rte_devargs *devargs) +{ + const char **params = (const char *[]){ + MLX5_RXQ_CQE_COMP_EN, + MLX5_TXQ_INLINE, + MLX5_TXQS_MIN_INLINE, + MLX5_TXQ_MPW_EN, + NULL, + }; + struct rte_kvargs *kvlist; + int ret = 0; + int i; + + if (devargs == NULL) + return 0; + /* Following UGLY cast is done to pass checkpatch. */ + kvlist = rte_kvargs_parse(devargs->args, params); + if (kvlist == NULL) + return 0; + /* Process parameters. */ + for (i = 0; (params[i] != NULL); ++i) { + if (rte_kvargs_count(kvlist, params[i])) { + ret = rte_kvargs_process(kvlist, params[i], + mlx5_args_check, priv); + if (ret != 0) + return ret; + } + } + rte_kvargs_free(kvlist); + return 0; +} + +static struct eth_driver mlx5_driver; + +/** + * DPDK callback to register a PCI device. + * + * This function creates an Ethernet device for each port of a given + * PCI device. + * + * @param[in] pci_drv + * PCI driver structure (mlx5_driver). + * @param[in] pci_dev + * PCI device information. + * + * @return + * 0 on success, negative errno value on failure. + */ +static int +mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + struct ibv_device **list; + struct ibv_device *ibv_dev; + int err = 0; + struct ibv_context *attr_ctx = NULL; + struct ibv_device_attr device_attr; + unsigned int sriov; + unsigned int mps; + int idx; + int i; + + (void)pci_drv; + assert(pci_drv == &mlx5_driver.pci_drv); + /* Get mlx5_dev[] index. */ + idx = mlx5_dev_idx(&pci_dev->addr); + if (idx == -1) { + ERROR("this driver cannot support any more adapters"); + return -ENOMEM; + } + DEBUG("using driver device index %d", idx); + + /* Save PCI address. */ + mlx5_dev[idx].pci_addr = pci_dev->addr; + list = ibv_get_device_list(&i); + if (list == NULL) { + assert(errno); + if (errno == ENOSYS) { + WARN("cannot list devices, is ib_uverbs loaded?"); + return 0; + } + return -errno; + } + assert(i >= 0); + /* + * For each listed device, check related sysfs entry against + * the provided PCI ID. + */ + while (i != 0) { + struct rte_pci_addr pci_addr; + + --i; + DEBUG("checking device \"%s\"", list[i]->name); + if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) + continue; + if ((pci_dev->addr.domain != pci_addr.domain) || + (pci_dev->addr.bus != pci_addr.bus) || + (pci_dev->addr.devid != pci_addr.devid) || + (pci_dev->addr.function != pci_addr.function)) + continue; + sriov = ((pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); + /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ + mps = (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); + INFO("PCI information matches, using device \"%s\"" + " (SR-IOV: %s, MPS: %s)", + list[i]->name, + sriov ? "true" : "false", + mps ? "true" : "false"); + attr_ctx = ibv_open_device(list[i]); + err = errno; + break; + } + if (attr_ctx == NULL) { + ibv_free_device_list(list); + switch (err) { + case 0: + WARN("cannot access device, is mlx5_ib loaded?"); + return 0; + case EINVAL: + WARN("cannot use device, are drivers up to date?"); + return 0; + } + assert(err > 0); + return -err; + } + ibv_dev = list[i]; + + DEBUG("device opened"); + if (ibv_query_device(attr_ctx, &device_attr)) + goto error; + INFO("%u port(s) detected", device_attr.phys_port_cnt); + + for (i = 0; i < device_attr.phys_port_cnt; i++) { + uint32_t port = i + 1; /* ports are indexed from one */ + uint32_t test = (1 << i); + struct ibv_context *ctx = NULL; + struct ibv_port_attr port_attr; + struct ibv_pd *pd = NULL; + struct priv *priv = NULL; + struct rte_eth_dev *eth_dev; + struct ibv_exp_device_attr exp_device_attr; + struct ether_addr mac; + uint16_t num_vfs = 0; + + exp_device_attr.comp_mask = + IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | + IBV_EXP_DEVICE_ATTR_RX_HASH | + IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS | + IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN | + 0; + + DEBUG("using port %u (%08" PRIx32 ")", port, test); + + ctx = ibv_open_device(ibv_dev); + if (ctx == NULL) + goto port_error; + + /* Check port status. */ + err = ibv_query_port(ctx, port, &port_attr); + if (err) { + ERROR("port query failed: %s", strerror(err)); + goto port_error; + } + + if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { + ERROR("port %d is not configured in Ethernet mode", + port); + goto port_error; + } + + if (port_attr.state != IBV_PORT_ACTIVE) + DEBUG("port %d is not active: \"%s\" (%d)", + port, ibv_port_state_str(port_attr.state), + port_attr.state); + + /* Allocate protection domain. */ + pd = ibv_alloc_pd(ctx); + if (pd == NULL) { + ERROR("PD allocation failure"); + err = ENOMEM; + goto port_error; + } + + mlx5_dev[idx].ports |= test; + + /* from rte_ethdev.c */ + priv = rte_zmalloc("ethdev private structure", + sizeof(*priv), + RTE_CACHE_LINE_SIZE); + if (priv == NULL) { + ERROR("priv allocation failure"); + err = ENOMEM; + goto port_error; + } + + priv->ctx = ctx; + priv->device_attr = device_attr; + priv->port = port; + priv->pd = pd; + priv->mtu = ETHER_MTU; + priv->mps = mps; /* Enable MPW by default if supported. */ + priv->cqe_comp = 1; /* Enable compression by default. */ + err = mlx5_args(priv, pci_dev->device.devargs); + if (err) { + ERROR("failed to process device arguments: %s", + strerror(err)); + goto port_error; + } + if (ibv_exp_query_device(ctx, &exp_device_attr)) { + ERROR("ibv_exp_query_device() failed"); + goto port_error; + } + + priv->hw_csum = + ((exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && + (exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); + DEBUG("checksum offloading is %ssupported", + (priv->hw_csum ? "" : "not ")); + + priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_VXLAN_SUPPORT); + DEBUG("L2 tunnel checksum offloads are %ssupported", + (priv->hw_csum_l2tun ? "" : "not ")); + + priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; + /* Remove this check once DPDK supports larger/variable + * indirection tables. */ + if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) + priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; + DEBUG("maximum RX indirection table size is %u", + priv->ind_table_max_size); + priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & + IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); + DEBUG("VLAN stripping is %ssupported", + (priv->hw_vlan_strip ? "" : "not ")); + + priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_SCATTER_FCS); + DEBUG("FCS stripping configuration is %ssupported", + (priv->hw_fcs_strip ? "" : "not ")); + + priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align; + DEBUG("hardware RX end alignment padding is %ssupported", + (priv->hw_padding ? "" : "not ")); + + priv_get_num_vfs(priv, &num_vfs); + priv->sriov = (num_vfs || sriov); + if (priv->mps && !mps) { + ERROR("multi-packet send not supported on this device" + " (" MLX5_TXQ_MPW_EN ")"); + err = ENOTSUP; + goto port_error; + } + /* Allocate and register default RSS hash keys. */ + priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, + sizeof((*priv->rss_conf)[0]), 0); + if (priv->rss_conf == NULL) { + err = ENOMEM; + goto port_error; + } + err = rss_hash_rss_conf_new_key(priv, + rss_hash_default_key, + rss_hash_default_key_len, + ETH_RSS_PROTO_MASK); + if (err) + goto port_error; + /* Configure the first MAC address by default. */ + if (priv_get_mac(priv, &mac.addr_bytes)) { + ERROR("cannot get MAC address, is mlx5_en loaded?" + " (errno: %s)", strerror(errno)); + goto port_error; + } + INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", + priv->port, + mac.addr_bytes[0], mac.addr_bytes[1], + mac.addr_bytes[2], mac.addr_bytes[3], + mac.addr_bytes[4], mac.addr_bytes[5]); + /* Register MAC address. */ + claim_zero(priv_mac_addr_add(priv, 0, + (const uint8_t (*)[ETHER_ADDR_LEN]) + mac.addr_bytes)); + /* Initialize FD filters list. */ + err = fdir_init_filters_list(priv); + if (err) + goto port_error; +#ifndef NDEBUG + { + char ifname[IF_NAMESIZE]; + + if (priv_get_ifname(priv, &ifname) == 0) + DEBUG("port %u ifname is \"%s\"", + priv->port, ifname); + else + DEBUG("port %u ifname is unknown", priv->port); + } +#endif + /* Get actual MTU if possible. */ + priv_get_mtu(priv, &priv->mtu); + DEBUG("port %u MTU is %u", priv->port, priv->mtu); + + /* from rte_ethdev.c */ + { + char name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(name, sizeof(name), "%s port %u", + ibv_get_device_name(ibv_dev), port); + eth_dev = rte_eth_dev_allocate(name); + } + if (eth_dev == NULL) { + ERROR("can not allocate rte ethdev"); + err = ENOMEM; + goto port_error; + } + + /* Secondary processes have to use local storage for their + * private data as well as a copy of eth_dev->data, but this + * pointer must not be modified before burst functions are + * actually called. */ + if (mlx5_is_secondary()) { + struct mlx5_secondary_data *sd = + &mlx5_secondary_data[eth_dev->data->port_id]; + sd->primary_priv = eth_dev->data->dev_private; + if (sd->primary_priv == NULL) { + ERROR("no private data for port %u", + eth_dev->data->port_id); + err = EINVAL; + goto port_error; + } + sd->shared_dev_data = eth_dev->data; + rte_spinlock_init(&sd->lock); + memcpy(sd->data.name, sd->shared_dev_data->name, + sizeof(sd->data.name)); + sd->data.dev_private = priv; + sd->data.rx_mbuf_alloc_failed = 0; + sd->data.mtu = ETHER_MTU; + sd->data.port_id = sd->shared_dev_data->port_id; + sd->data.mac_addrs = priv->mac; + eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup; + eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; + } else { + eth_dev->data->dev_private = priv; + eth_dev->data->rx_mbuf_alloc_failed = 0; + eth_dev->data->mtu = ETHER_MTU; + eth_dev->data->mac_addrs = priv->mac; + } + + eth_dev->pci_dev = pci_dev; + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->driver = &mlx5_driver; + priv->dev = eth_dev; + eth_dev->dev_ops = &mlx5_dev_ops; + + TAILQ_INIT(ð_dev->link_intr_cbs); + + /* Bring Ethernet device up. */ + DEBUG("forcing Ethernet interface up"); + priv_set_flags(priv, ~IFF_UP, IFF_UP); + mlx5_link_update_unlocked(priv->dev, 1); + continue; + +port_error: + if (priv) { + rte_free(priv->rss_conf); + rte_free(priv); + } + if (pd) + claim_zero(ibv_dealloc_pd(pd)); + if (ctx) + claim_zero(ibv_close_device(ctx)); + break; + } + + /* + * XXX if something went wrong in the loop above, there is a resource + * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as + * long as the dpdk does not provide a way to deallocate a ethdev and a + * way to enumerate the registered ethdevs to free the previous ones. + */ + + /* no port found, complain */ + if (!mlx5_dev[idx].ports) { + err = ENODEV; + goto error; + } + +error: + if (attr_ctx) + claim_zero(ibv_close_device(attr_ctx)); + if (list) + ibv_free_device_list(list); + assert(err >= 0); + return -err; +} + +static const struct rte_pci_id mlx5_pci_id_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) + }, + { + .vendor_id = 0 + } +}; + +static struct eth_driver mlx5_driver = { + .pci_drv = { + .driver = { + .name = MLX5_DRIVER_NAME + }, + .id_table = mlx5_pci_id_map, + .probe = mlx5_pci_probe, + .drv_flags = RTE_PCI_DRV_INTR_LSC, + }, + .dev_private_size = sizeof(struct priv) +}; + +/** + * Driver initialization routine. + */ +RTE_INIT(rte_mlx5_pmd_init); +static void +rte_mlx5_pmd_init(void) +{ + /* + * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use + * huge pages. Calling ibv_fork_init() during init allows + * applications to use fork() safely for purposes other than + * using this PMD, which is not supported in forked processes. + */ + setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); + rte_eal_pci_register(&mlx5_driver.pci_drv); +} + +RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); +#endif diff --git a/src/dpdk/drivers/net/mlx5/mlx5.h b/src/dpdk/drivers/net/mlx5/mlx5.h index 68bad904..83b29e18 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.h +++ b/src/dpdk/drivers/net/mlx5/mlx5.h @@ -43,16 +43,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_interrupts.h> #include <rte_errno.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -162,6 +162,8 @@ struct priv { unsigned int (*reta_idx)[]; /* RETA index table. */ unsigned int reta_idx_n; /* RETA index size. */ struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */ + struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */ + uint32_t link_speed_capa; /* Link speed capabilities. */ rte_spinlock_t lock; /* Lock for control functions. */ struct mlx5_stats_priv m_stats; }; @@ -215,6 +217,7 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); +int mlx5_link_update_unlocked(struct rte_eth_dev *, int); int mlx5_link_update(struct rte_eth_dev *, int); int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); @@ -274,7 +277,6 @@ void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); void mlx5_stats_reset(struct rte_eth_dev *); void mlx5_stats_free(struct rte_eth_dev *dev); - /* mlx5_vlan.c */ int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int); @@ -288,6 +290,7 @@ void mlx5_dev_stop(struct rte_eth_dev *); /* mlx5_fdir.c */ +void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *); int fdir_init_filters_list(struct priv *); void priv_fdir_delete_filters_list(struct priv *); void priv_fdir_disable(struct priv *); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_defs.h b/src/dpdk/drivers/net/mlx5/mlx5_defs.h index cc2a6f3e..30adfebb 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_defs.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_defs.h @@ -37,7 +37,7 @@ #include "mlx5_autoconf.h" /* Reported driver name. */ -#define MLX5_DRIVER_NAME "librte_pmd_mlx5" +#define MLX5_DRIVER_NAME "net_mlx5" /* Maximum number of simultaneous MAC addresses. */ #define MLX5_MAX_MAC_ADDRESSES 128 @@ -79,4 +79,41 @@ /* Alarm timeout. */ #define MLX5_ALARM_TIMEOUT_US 100000 + +//#ifdef TREX_PATCH_DPDK PATH for DPDK16.11 should be removed + +/** + * Mask of bits used to determine the status of RX IP checksum. + * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum + * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong + * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid + * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet + * data, but the integrity of the IP header is verified. + */ +#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) + +#define PKT_RX_IP_CKSUM_UNKNOWN 0 +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) + +/** + * Mask of bits used to determine the status of RX L4 checksum. + * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum + * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong + * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid + * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet + * data, but the integrity of the L4 data is verified. + */ +#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) + +#define PKT_RX_L4_CKSUM_UNKNOWN 0 +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) + + +//#endif + + #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c index 130e15d5..85b81360 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c @@ -50,7 +50,7 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_atomic.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_alarm.h> #include <rte_malloc.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -583,7 +583,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM) : - 0); + 0) | + (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0); if (!priv->mps) info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; if (priv->hw_csum) @@ -599,15 +600,10 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) * size if it is not fixed. * The API should be updated to solve this problem. */ info->reta_size = priv->ind_table_max_size; - info->speed_capa = - ETH_LINK_SPEED_1G | - ETH_LINK_SPEED_10G | - ETH_LINK_SPEED_20G | - ETH_LINK_SPEED_25G | - ETH_LINK_SPEED_40G | - ETH_LINK_SPEED_50G | - ETH_LINK_SPEED_56G | - ETH_LINK_SPEED_100G; + info->hash_key_size = ((*priv->rss_conf) ? + (*priv->rss_conf)[0]->rss_key_len : + 0); + info->speed_capa = priv->link_speed_capa; priv_unlock(priv); } @@ -630,7 +626,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) } /** - * DPDK callback to retrieve physical link information (unlocked version). + * Retrieve physical link information (unlocked version using legacy ioctl). * * @param dev * Pointer to Ethernet device structure. @@ -638,11 +634,11 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) * Wait for request completion (ignored). */ static int -mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) { struct priv *priv = mlx5_get_priv(dev); struct ethtool_cmd edata = { - .cmd = ETHTOOL_GSET + .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ }; struct ifreq ifr; struct rte_eth_link dev_link; @@ -667,6 +663,19 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) dev_link.link_speed = 0; else dev_link.link_speed = link_speed; + priv->link_speed_capa = 0; + if (edata.supported & SUPPORTED_Autoneg) + priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; + if (edata.supported & (SUPPORTED_1000baseT_Full | + SUPPORTED_1000baseKX_Full)) + priv->link_speed_capa |= ETH_LINK_SPEED_1G; + if (edata.supported & SUPPORTED_10000baseKR_Full) + priv->link_speed_capa |= ETH_LINK_SPEED_10G; + if (edata.supported & (SUPPORTED_40000baseKR4_Full | + SUPPORTED_40000baseCR4_Full | + SUPPORTED_40000baseSR4_Full | + SUPPORTED_40000baseLR4_Full)) + priv->link_speed_capa |= ETH_LINK_SPEED_40G; dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & @@ -681,6 +690,123 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) } /** + * Retrieve physical link information (unlocked version using new ioctl from + * Linux 4.5). + * + * @param dev + * Pointer to Ethernet device structure. + * @param wait_to_complete + * Wait for request completion (ignored). + */ +static int +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) +{ +#ifdef ETHTOOL_GLINKSETTINGS + struct priv *priv = mlx5_get_priv(dev); + struct ethtool_link_settings edata = { + .cmd = ETHTOOL_GLINKSETTINGS, + }; + struct ifreq ifr; + struct rte_eth_link dev_link; + uint64_t sc; + + (void)wait_to_complete; + if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { + WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); + return -1; + } + memset(&dev_link, 0, sizeof(dev_link)); + dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && + (ifr.ifr_flags & IFF_RUNNING)); + ifr.ifr_data = (void *)&edata; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { + DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", + strerror(errno)); + return -1; + } + dev_link.link_speed = edata.speed; + sc = edata.link_mode_masks[0] | + ((uint64_t)edata.link_mode_masks[1] << 32); + priv->link_speed_capa = 0; + /* Link speeds available in kernel v4.5. */ + if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) + priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; + if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_1G; + if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT | + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT | + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_10G; + if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT | + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_20G; + if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_40G; + if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_56G; + /* Link speeds available in kernel v4.6. */ +#ifdef HAVE_ETHTOOL_LINK_MODE_25G + if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_25G; +#endif +#ifdef HAVE_ETHTOOL_LINK_MODE_50G + if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_50G; +#endif +#ifdef HAVE_ETHTOOL_LINK_MODE_100G + if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_100G; +#endif + dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? + ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); + dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & + ETH_LINK_SPEED_FIXED); + if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { + /* Link status changed. */ + dev->data->dev_link = dev_link; + return 0; + } +#else + (void)dev; + (void)wait_to_complete; +#endif + /* Link status is still the same. */ + return -1; +} + +/** + * DPDK callback to retrieve physical link information (unlocked version). + * + * @param dev + * Pointer to Ethernet device structure. + * @param wait_to_complete + * Wait for request completion (ignored). + */ +int +mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +{ + int ret; + + ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); + if (ret < 0) + ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); + return ret; +} + +/** * DPDK callback to retrieve physical link information. * * @param dev @@ -807,7 +933,7 @@ recover: if (rehash) ret = rxq_rehash(dev, rxq_ctrl); else - ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n, + ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n, rxq_ctrl->socket, NULL, rxq->mp); if (!ret) continue; @@ -1067,8 +1193,8 @@ mlx5_dev_link_status_handler(void *arg) assert(priv->pending_alarm == 1); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + //if (ret) + // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1090,8 +1216,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) priv_lock(priv); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + //if (ret) + // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1308,12 +1434,14 @@ mlx5_secondary_data_setup(struct priv *priv) continue; primary_txq_ctrl = container_of(primary_txq, struct txq_ctrl, txq); - txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl), 0, + txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) + + (1 << primary_txq->elts_n) * + sizeof(struct rte_mbuf *), 0, primary_txq_ctrl->socket); if (txq_ctrl != NULL) { if (txq_ctrl_setup(priv->dev, - primary_txq_ctrl, - primary_txq->elts_n, + txq_ctrl, + 1 << primary_txq->elts_n, primary_txq_ctrl->socket, NULL) == 0) { txq_ctrl->txq.stats.idx = @@ -1397,10 +1525,6 @@ priv_select_tx_function(struct priv *priv) } else if ((priv->sriov == 0) && priv->mps) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; DEBUG("selected MPW TX function"); - } else if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { - priv->dev->tx_pkt_burst = mlx5_tx_burst_inline; - DEBUG("selected inline TX function (%u >= %u queues)", - priv->txqs_n, priv->txqs_inline); } } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c index 4ba3bb9f..f03e95ef 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c @@ -42,23 +42,23 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs_exp.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -72,7 +72,6 @@ struct fdir_flow_desc { uint8_t tos; uint8_t ip_id; uint8_t proto; - uint8_t mac[6]; uint16_t vlan_tag; enum hash_rxq_type type; @@ -81,6 +80,7 @@ struct fdir_flow_desc { struct mlx5_fdir_filter { LIST_ENTRY(mlx5_fdir_filter) next; uint16_t queue; /* Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; struct fdir_flow_desc desc; struct ibv_exp_flow *flow; }; @@ -107,8 +107,8 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, /* Set VLAN ID. */ desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci; - /* Set MAC address. */ #ifndef TREX_PATCH + /* Set MAC address. */ if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { rte_memcpy(desc->mac, fdir_filter->input.flow.mac_vlan_flow.mac_addr. @@ -119,12 +119,13 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, } #else if (fdir_filter->input.flow.ip4_flow.ip_id == 2) { - desc->type = HASH_RXQ_ETH; + desc->type = HASH_RXQ_ETH; desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - return; + return; } #endif + /* Set mode */ switch (fdir_filter->input.flow_type) { case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: @@ -155,7 +156,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: desc->src_port = fdir_filter->input.flow.udp4_flow.src_port; desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port; - case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip; desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip; @@ -178,14 +178,12 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, desc->tos = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits; /* TTL is mapped to TOS - TREX_PATCH */ desc->ip_id = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label; desc->proto = fdir_filter->input.flow.ipv6_flow.proto; - break; default: break; } } - /** * Check if two flow descriptors overlap according to configured mask. * @@ -220,7 +218,6 @@ priv_fdir_overlap(const struct priv *priv, ((desc1->dst_port & mask->dst_port_mask) != (desc2->dst_port & mask->dst_port_mask))) return 0; - if ( (desc1->tos != desc2->tos) || (desc1->ip_id != desc2->ip_id) || (desc1->proto != desc2->proto) ) @@ -233,9 +230,8 @@ priv_fdir_overlap(const struct priv *priv, if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) != (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) || ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) != - (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip))) + (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip))) return 0; - break; case HASH_RXQ_IPV6: case HASH_RXQ_UDPV6: @@ -337,7 +333,7 @@ priv_fdir_flow_add(struct priv *priv, spec_eth->mask.ether_type = 0x0000; goto create_flow; } -#endif +#endif switch (desc->type) { case HASH_RXQ_IPV4: @@ -372,8 +368,8 @@ priv_fdir_flow_add(struct priv *priv, spec_ipv4->mask.tos = 0x0; spec_ipv4->val.tos = 0x0; } - // spec_ipv4->val.tos = desc->tos & spec_ipv4->mask.tos;// & mask->ipv4_mask.tos; #endif + /* Update priority */ attr->priority = 1; @@ -470,6 +466,145 @@ create_flow: } /** + * Destroy a flow director queue. + * + * @param fdir_queue + * Flow director queue to be destroyed. + */ +void +priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue) +{ + struct mlx5_fdir_filter *fdir_filter; + + /* Disable filter flows still applying to this queue. */ + LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) { + unsigned int idx = fdir_filter->queue; + struct rxq_ctrl *rxq_ctrl = + container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq); + + assert(idx < priv->rxqs_n); + if (fdir_queue == rxq_ctrl->fdir_queue && + fdir_filter->flow != NULL) { + claim_zero(ibv_exp_destroy_flow(fdir_filter->flow)); + fdir_filter->flow = NULL; + } + } + assert(fdir_queue->qp); + claim_zero(ibv_destroy_qp(fdir_queue->qp)); + assert(fdir_queue->ind_table); + claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table)); + if (fdir_queue->wq) + claim_zero(ibv_exp_destroy_wq(fdir_queue->wq)); + if (fdir_queue->cq) + claim_zero(ibv_destroy_cq(fdir_queue->cq)); +#ifndef NDEBUG + memset(fdir_queue, 0x2a, sizeof(*fdir_queue)); +#endif + rte_free(fdir_queue); +} + +/** + * Create a flow director queue. + * + * @param priv + * Private structure. + * @param wq + * Work queue to route matched packets to, NULL if one needs to + * be created. + * + * @return + * Related flow director queue on success, NULL otherwise. + */ +static struct fdir_queue * +priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq, + unsigned int socket) +{ + struct fdir_queue *fdir_queue; + + fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue), + 0, socket); + if (!fdir_queue) { + ERROR("cannot allocate flow director queue"); + return NULL; + } + assert(priv->pd); + assert(priv->ctx); + if (!wq) { + fdir_queue->cq = ibv_exp_create_cq( + priv->ctx, 1, NULL, NULL, 0, + &(struct ibv_exp_cq_init_attr){ + .comp_mask = 0, + }); + if (!fdir_queue->cq) { + ERROR("cannot create flow director CQ"); + goto error; + } + fdir_queue->wq = ibv_exp_create_wq( + priv->ctx, + &(struct ibv_exp_wq_init_attr){ + .wq_type = IBV_EXP_WQT_RQ, + .max_recv_wr = 1, + .max_recv_sge = 1, + .pd = priv->pd, + .cq = fdir_queue->cq, + }); + if (!fdir_queue->wq) { + ERROR("cannot create flow director WQ"); + goto error; + } + wq = fdir_queue->wq; + } + fdir_queue->ind_table = ibv_exp_create_rwq_ind_table( + priv->ctx, + &(struct ibv_exp_rwq_ind_table_init_attr){ + .pd = priv->pd, + .log_ind_tbl_size = 0, + .ind_tbl = &wq, + .comp_mask = 0, + }); + if (!fdir_queue->ind_table) { + ERROR("cannot create flow director indirection table"); + goto error; + } + fdir_queue->qp = ibv_exp_create_qp( + priv->ctx, + &(struct ibv_exp_qp_init_attr){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_PORT | + IBV_EXP_QP_INIT_ATTR_RX_HASH, + .pd = priv->pd, + .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){ + .rx_hash_function = + IBV_EXP_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = rss_hash_default_key_len, + .rx_hash_key = rss_hash_default_key, + .rx_hash_fields_mask = 0, + .rwq_ind_tbl = fdir_queue->ind_table, + }, + .port_num = priv->port, + }); + if (!fdir_queue->qp) { + ERROR("cannot create flow director hash RX QP"); + goto error; + } + return fdir_queue; +error: + assert(fdir_queue); + assert(!fdir_queue->qp); + if (fdir_queue->ind_table) + claim_zero(ibv_exp_destroy_rwq_ind_table + (fdir_queue->ind_table)); + if (fdir_queue->wq) + claim_zero(ibv_exp_destroy_wq(fdir_queue->wq)); + if (fdir_queue->cq) + claim_zero(ibv_destroy_cq(fdir_queue->cq)); + rte_free(fdir_queue); + return NULL; +} + +/** * Get flow director queue for a specific RX queue, create it in case * it does not exist. * @@ -486,74 +621,42 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx) { struct rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq); - struct fdir_queue *fdir_queue = &rxq_ctrl->fdir_queue; - struct ibv_exp_rwq_ind_table *ind_table = NULL; - struct ibv_qp *qp = NULL; - struct ibv_exp_rwq_ind_table_init_attr ind_init_attr; - struct ibv_exp_rx_hash_conf hash_conf; - struct ibv_exp_qp_init_attr qp_init_attr; - int err = 0; - - /* Return immediately if it has already been created. */ - if (fdir_queue->qp != NULL) - return fdir_queue; + struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue; - ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){ - .pd = priv->pd, - .log_ind_tbl_size = 0, - .ind_tbl = &rxq_ctrl->wq, - .comp_mask = 0, - }; - - errno = 0; - ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, - &ind_init_attr); - if (ind_table == NULL) { - /* Not clear whether errno is set. */ - err = (errno ? errno : EINVAL); - ERROR("RX indirection table creation failed with error %d: %s", - err, strerror(err)); - goto error; - } - - /* Create fdir_queue qp. */ - hash_conf = (struct ibv_exp_rx_hash_conf){ - .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = rss_hash_default_key_len, - .rx_hash_key = rss_hash_default_key, - .rx_hash_fields_mask = 0, - .rwq_ind_tbl = ind_table, - }; - qp_init_attr = (struct ibv_exp_qp_init_attr){ - .max_inl_recv = 0, /* Currently not supported. */ - .qp_type = IBV_QPT_RAW_PACKET, - .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | - IBV_EXP_QP_INIT_ATTR_RX_HASH), - .pd = priv->pd, - .rx_hash_conf = &hash_conf, - .port_num = priv->port, - }; - - qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr); - if (qp == NULL) { - err = (errno ? errno : EINVAL); - ERROR("hash RX QP creation failure: %s", strerror(err)); - goto error; + assert(rxq_ctrl->wq); + if (fdir_queue == NULL) { + fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq, + rxq_ctrl->socket); + rxq_ctrl->fdir_queue = fdir_queue; } - - fdir_queue->ind_table = ind_table; - fdir_queue->qp = qp; - return fdir_queue; +} -error: - if (qp != NULL) - claim_zero(ibv_destroy_qp(qp)); - - if (ind_table != NULL) - claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); +/** + * Get or flow director drop queue. Create it if it does not exist. + * + * @param priv + * Private structure. + * + * @return + * Flow director drop queue on success, NULL otherwise. + */ +static struct fdir_queue * +priv_get_fdir_drop_queue(struct priv *priv) +{ + struct fdir_queue *fdir_queue = priv->fdir_drop_queue; - return NULL; + if (fdir_queue == NULL) { + unsigned int socket = SOCKET_ID_ANY; + + /* Select a known NUMA socket if possible. */ + if (priv->rxqs_n && (*priv->rxqs)[0]) + socket = container_of((*priv->rxqs)[0], + struct rxq_ctrl, rxq)->socket; + fdir_queue = priv_fdir_queue_create(priv, NULL, socket); + priv->fdir_drop_queue = fdir_queue; + } + return fdir_queue; } /** @@ -578,7 +681,11 @@ priv_fdir_filter_enable(struct priv *priv, return 0; /* Get fdir_queue for specific queue. */ - fdir_queue = priv_get_fdir_queue(priv, mlx5_fdir_filter->queue); + if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT) + fdir_queue = priv_get_fdir_drop_queue(priv); + else + fdir_queue = priv_get_fdir_queue(priv, + mlx5_fdir_filter->queue); if (fdir_queue == NULL) { ERROR("failed to create flow director rxq for queue %d", @@ -671,7 +778,6 @@ priv_fdir_disable(struct priv *priv) { unsigned int i; struct mlx5_fdir_filter *mlx5_fdir_filter; - struct fdir_queue *fdir_queue; /* Run on every flow director filter and destroy flow handle. */ LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) { @@ -688,23 +794,19 @@ priv_fdir_disable(struct priv *priv) } } - /* Run on every RX queue to destroy related flow director QP and - * indirection table. */ + /* Destroy flow director context in each RX queue. */ for (i = 0; (i != priv->rxqs_n); i++) { struct rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq); - fdir_queue = &rxq_ctrl->fdir_queue; - if (fdir_queue->qp != NULL) { - claim_zero(ibv_destroy_qp(fdir_queue->qp)); - fdir_queue->qp = NULL; - } - - if (fdir_queue->ind_table != NULL) { - claim_zero(ibv_exp_destroy_rwq_ind_table - (fdir_queue->ind_table)); - fdir_queue->ind_table = NULL; - } + if (!rxq_ctrl->fdir_queue) + continue; + priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue); + rxq_ctrl->fdir_queue = NULL; + } + if (priv->fdir_drop_queue) { + priv_fdir_queue_destroy(priv, priv->fdir_drop_queue); + priv->fdir_drop_queue = NULL; } } @@ -795,7 +897,7 @@ priv_fdir_filter_add(struct priv *priv, #ifndef TREX_PATCH ERROR("filter already exists"); #endif - return EEXIST; + return EEXIST; } /* Create new flow director filter. */ @@ -808,8 +910,9 @@ priv_fdir_filter_add(struct priv *priv, return err; } - /* Set queue. */ + /* Set action parameters. */ mlx5_fdir_filter->queue = fdir_filter->action.rx_queue; + mlx5_fdir_filter->behavior = fdir_filter->action.behavior; /* Convert to mlx5 filter descriptor. */ fdir_filter_to_flow_desc(fdir_filter, @@ -923,7 +1026,7 @@ priv_fdir_filter_delete(struct priv *priv, ERROR("%p: flow director delete failed, cannot find filter", (void *)priv); #endif - return ENOENT; + return ENOENT; } /** @@ -1029,7 +1132,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = -EINVAL; + int ret = EINVAL; struct priv *priv = dev->data->dev_private; switch (filter_type) { @@ -1044,5 +1147,5 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, break; } - return ret; + return -ret; } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_mac.c b/src/dpdk/drivers/net/mlx5/mlx5_mac.c index f6b27bb8..4fcfd3b8 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_mac.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_mac.c @@ -44,22 +44,22 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_mr.c b/src/dpdk/drivers/net/mlx5/mlx5_mr.c index 67dfefa8..0a363846 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_mr.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_mr.c @@ -34,20 +34,20 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mempool.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_prm.h b/src/dpdk/drivers/net/mlx5/mlx5_prm.h index 5db219b3..8426adb3 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_prm.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_prm.h @@ -37,13 +37,15 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif +#include "mlx5_autoconf.h" + /* Get CQE owner bit. */ #define MLX5_CQE_OWNER(op_own) ((op_own) & MLX5_CQE_OWNER_MASK) @@ -65,12 +67,44 @@ /* Maximum number of packets a multi-packet WQE can handle. */ #define MLX5_MPW_DSEG_MAX 5 -/* Room for inline data in regular work queue element. */ -#define MLX5_WQE64_INL_DATA 12 +/* WQE DWORD size */ +#define MLX5_WQE_DWORD_SIZE 16 + +/* WQE size */ +#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE) + +/* Compute the number of DS. */ +#define MLX5_WQE_DS(n) \ + (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE) /* Room for inline data in multi-packet WQE. */ #define MLX5_MWQE64_INL_DATA 28 +//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO +//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ +//#endif + +/* IPv4 packet. */ +#define MLX5_CQE_RX_IPV4_PACKET (1u << 2) + +/* IPv6 packet. */ +#define MLX5_CQE_RX_IPV6_PACKET (1u << 3) + +/* Outer IPv4 packet. */ +#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7) + +/* Outer IPv6 packet. */ +#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8) + +/* Tunnel packet bit in the CQE. */ +#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4) + +/* Outer IP checksum OK. */ +#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5) + +/* Outer UDP header and checksum OK. */ +#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6) + /* Subset of struct mlx5_wqe_eth_seg. */ struct mlx5_wqe_eth_seg_small { uint32_t rsvd0; @@ -79,59 +113,26 @@ struct mlx5_wqe_eth_seg_small { uint16_t mss; uint32_t rsvd2; uint16_t inline_hdr_sz; + uint8_t inline_hdr[2]; }; -/* Regular WQE. */ -struct mlx5_wqe_regular { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg eseg; - struct mlx5_wqe_data_seg dseg; -} __rte_aligned(64); - -/* Inline WQE. */ -struct mlx5_wqe_inl { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg eseg; +struct mlx5_wqe_inl_small { uint32_t byte_cnt; - uint8_t data[MLX5_WQE64_INL_DATA]; -} __rte_aligned(64); + uint8_t raw; +}; -/* Multi-packet WQE. */ -struct mlx5_wqe_mpw { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; +/* Small common part of the WQE. */ +struct mlx5_wqe { + uint32_t ctrl[4]; struct mlx5_wqe_eth_seg_small eseg; - struct mlx5_wqe_data_seg dseg[2]; -} __rte_aligned(64); +}; -/* Multi-packet WQE with inline. */ -struct mlx5_wqe_mpw_inl { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg_small eseg; - uint32_t byte_cnt; - uint8_t data[MLX5_MWQE64_INL_DATA]; +/* WQE. */ +struct mlx5_wqe64 { + struct mlx5_wqe hdr; + uint8_t raw[32]; } __rte_aligned(64); -/* Union of all WQE types. */ -union mlx5_wqe { - struct mlx5_wqe_regular wqe; - struct mlx5_wqe_inl inl; - struct mlx5_wqe_mpw mpw; - struct mlx5_wqe_mpw_inl mpw_inl; - uint8_t data[64]; -}; - /* MPW session status. */ enum mlx5_mpw_state { MLX5_MPW_STATE_OPENED, @@ -145,7 +146,7 @@ struct mlx5_mpw { unsigned int pkts_n; unsigned int len; unsigned int total_len; - volatile union mlx5_wqe *wqe; + volatile struct mlx5_wqe *wqe; union { volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX]; volatile uint8_t *raw; @@ -157,7 +158,21 @@ struct mlx5_cqe { #if (RTE_CACHE_LINE_SIZE == 128) uint8_t padding[64]; #endif - struct mlx5_cqe64 cqe64; + uint8_t pkt_info; + uint8_t rsvd0[11]; + uint32_t rx_hash_res; + uint8_t rx_hash_type; + uint8_t rsvd1[11]; + uint8_t hds_ip_ext; + uint8_t l4_hdr_type_etc; + uint16_t vlan_info; + uint8_t rsvd2[12]; + uint32_t byte_cnt; + uint64_t timestamp; + uint8_t rsvd3[4]; + uint16_t wqe_counter; + uint8_t rsvd4; + uint8_t op_own; }; #endif /* RTE_PMD_MLX5_PRM_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rss.c b/src/dpdk/drivers/net/mlx5/mlx5_rss.c index 639e935b..0bed74ee 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rss.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rss.c @@ -40,21 +40,21 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_malloc.h> #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c b/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c index 8b585554..173e6e84 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c @@ -38,20 +38,20 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c index 6be01d39..c5746fa0 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c @@ -40,25 +40,25 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/arch.h> #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl) if (rxq_ctrl->rxq.elts == NULL) return; - for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) { + for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) { if ((*rxq_ctrl->rxq.elts)[i] != NULL) rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); (*rxq_ctrl->rxq.elts)[i] = NULL; @@ -745,6 +745,8 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl) DEBUG("cleaning up %p", (void *)rxq_ctrl); rxq_free_elts(rxq_ctrl); + if (rxq_ctrl->fdir_queue != NULL) + priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue); if (rxq_ctrl->if_wq != NULL) { assert(rxq_ctrl->priv != NULL); assert(rxq_ctrl->priv->ctx != NULL); @@ -805,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl) int rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl) { - unsigned int elts_n = rxq_ctrl->rxq.elts_n; + unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; unsigned int i; struct ibv_exp_wq_attr mod; int err; @@ -868,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl) struct ibv_cq *ibcq = tmpl->cq; struct mlx5_cq *cq = to_mxxx(cq, cq); struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); - struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] = + struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] = rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) { @@ -879,7 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl) if (elts == NULL) return ENOMEM; tmpl->rxq.rq_db = rwq->rq.db; - tmpl->rxq.cqe_n = ibcq->cqe + 1; + tmpl->rxq.cqe_n = log2above(ibcq->cqe); tmpl->rxq.cq_ci = 0; tmpl->rxq.rq_ci = 0; tmpl->rxq.cq_db = cq->dbrec; @@ -922,8 +924,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, .priv = priv, .socket = socket, .rxq = { - .elts_n = desc, + .elts_n = log2above(desc), .mp = mp, + .rss_hash = priv->rxqs_n > 1, }, }; struct ibv_exp_wq_attr mod; @@ -943,6 +946,11 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, (void)conf; /* Thresholds configuration (ignored). */ /* Enable scattered packets support for this queue if necessary. */ assert(mb_len >= RTE_PKTMBUF_HEADROOM); + /* If smaller than MRU, multi-segment support must be enabled. */ + if (mb_len < (priv->mtu > dev->data->dev_conf.rxmode.max_rx_pkt_len ? + dev->data->dev_conf.rxmode.max_rx_pkt_len : + priv->mtu)) + dev->data->dev_conf.rxmode.jumbo_frame = 1; if ((dev->data->dev_conf.rxmode.jumbo_frame) && (dev->data->dev_conf.rxmode.max_rx_pkt_len > (mb_len - RTE_PKTMBUF_HEADROOM))) { @@ -1146,7 +1154,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, } /* Reuse buffers from original queue if possible. */ if (rxq_ctrl->rxq.elts_n) { - assert(rxq_ctrl->rxq.elts_n == desc); + assert(1 << rxq_ctrl->rxq.elts_n == desc); assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts); ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts); } else @@ -1259,7 +1267,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void *)dev, (void *)rxq_ctrl); (*priv->rxqs)[idx] = &rxq_ctrl->rxq; /* Update receive callback. */ - dev->rx_pkt_burst = mlx5_rx_burst; + priv_select_rx_function(priv); } priv_unlock(priv); return -ret; diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c index c0bcfd03..b56c0a11 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c @@ -39,18 +39,18 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/mlx5_hw.h> #include <infiniband/arch.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_mempool.h> @@ -59,7 +59,7 @@ #include <rte_branch_prediction.h> #include <rte_ether.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -69,6 +69,8 @@ #include "mlx5_defs.h" #include "mlx5_prm.h" +//#define MLX5_OPCODE_TSO 0xe + #ifndef NDEBUG /** @@ -81,10 +83,10 @@ * 0 the first time. */ static inline int -check_cqe64_seen(volatile struct mlx5_cqe64 *cqe) +check_cqe_seen(volatile struct mlx5_cqe *cqe) { static const uint8_t magic[] = "seen"; - volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = &cqe->rsvd40; + volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3; int ret = 1; unsigned int i; @@ -99,9 +101,9 @@ check_cqe64_seen(volatile struct mlx5_cqe64 *cqe) #endif /* NDEBUG */ static inline int -check_cqe64(volatile struct mlx5_cqe64 *cqe, - unsigned int cqes_n, const uint16_t ci) - __attribute__((always_inline)); +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) + __attribute__((always_inline)); /** * Check whether CQE is valid. @@ -117,8 +119,8 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, * 0 on success, 1 on failure. */ static inline int -check_cqe64(volatile struct mlx5_cqe64 *cqe, - unsigned int cqes_n, const uint16_t ci) +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) { uint16_t idx = ci & cqes_n; uint8_t op_own = cqe->op_own; @@ -136,14 +138,14 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) || (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) return 0; - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected CQE error %u (0x%02x)" " syndrome 0x%02x", op_code, op_code, syndrome); return 1; } else if ((op_code != MLX5_CQE_RESP_SEND) && (op_code != MLX5_CQE_REQ)) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected CQE opcode %u (0x%02x)", op_code, op_code); return 1; @@ -152,6 +154,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, return 0; } +static inline void +txq_complete(struct txq *txq) __attribute__((always_inline)); + /** * Manage TX completions. * @@ -160,34 +165,34 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, * @param txq * Pointer to TX queue structure. */ -static void +static inline void txq_complete(struct txq *txq) { - const unsigned int elts_n = txq->elts_n; - const unsigned int cqe_n = txq->cqe_n; + const unsigned int elts_n = 1 << txq->elts_n; + const unsigned int cqe_n = 1 << txq->cqe_n; const unsigned int cqe_cnt = cqe_n - 1; uint16_t elts_free = txq->elts_tail; uint16_t elts_tail; uint16_t cq_ci = txq->cq_ci; - volatile struct mlx5_cqe64 *cqe = NULL; - volatile union mlx5_wqe *wqe; + volatile struct mlx5_cqe *cqe = NULL; + volatile struct mlx5_wqe *wqe; do { - volatile struct mlx5_cqe64 *tmp; + volatile struct mlx5_cqe *tmp; - tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64; - if (check_cqe64(tmp, cqe_n, cq_ci)) + tmp = &(*txq->cqes)[cq_ci & cqe_cnt]; + if (check_cqe(tmp, cqe_n, cq_ci)) break; cqe = tmp; #ifndef NDEBUG if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected compressed CQE, TX stopped"); return; } if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected error CQE, TX stopped"); return; } @@ -196,9 +201,10 @@ txq_complete(struct txq *txq) } while (1); if (unlikely(cqe == NULL)) return; - wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)]; - elts_tail = wqe->wqe.ctrl.data[3]; - assert(elts_tail < txq->wqe_n); + wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & + ((1 << txq->wqe_n) - 1)].hdr; + elts_tail = wqe->ctrl[3]; + assert(elts_tail < (1 << txq->wqe_n)); /* Free buffers. */ while (elts_free != elts_tail) { struct rte_mbuf *elt = (*txq->elts)[elts_free]; @@ -284,235 +290,6 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp) } /** - * Write a regular WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - */ -static inline void -mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint32_t lkey) -{ - wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); - wqe->wqe.ctrl.data[2] = 0; - wqe->wqe.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE); - /* Copy the first 16 bytes into inline header. */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start, - (uint8_t *)(uintptr_t)addr, - MLX5_ETH_INLINE_HEADER_SIZE); - addr += MLX5_ETH_INLINE_HEADER_SIZE; - length -= MLX5_ETH_INLINE_HEADER_SIZE; - /* Store remaining data in data segment. */ - wqe->wqe.dseg.byte_count = htonl(length); - wqe->wqe.dseg.lkey = lkey; - wqe->wqe.dseg.addr = htonll(addr); - /* Increment consumer index. */ - ++txq->wqe_ci; -} - -/** - * Write a regular WQE with VLAN. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - * @param vlan_tci - * VLAN field to insert in packet. - */ -static inline void -mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint32_t lkey, - uint16_t vlan_tci) -{ - uint32_t vlan = htonl(0x81000000 | vlan_tci); - - wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); - wqe->wqe.ctrl.data[2] = 0; - wqe->wqe.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE); - /* - * Copy 12 bytes of source & destination MAC address. - * Copy 4 bytes of VLAN. - * Copy 2 bytes of Ether type. - */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start, - (uint8_t *)(uintptr_t)addr, 12); - rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12), - &vlan, sizeof(vlan)); - rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16), - (uint8_t *)((uintptr_t)addr + 12), 2); - addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - /* Store remaining data in data segment. */ - wqe->wqe.dseg.byte_count = htonl(length); - wqe->wqe.dseg.lkey = lkey; - wqe->wqe.dseg.addr = htonll(addr); - /* Increment consumer index. */ - ++txq->wqe_ci; -} - -/** - * Write a inline WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - */ -static inline void -mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length) -{ - uint32_t size; - uint16_t wqe_cnt = txq->wqe_n - 1; - uint16_t wqe_ci = txq->wqe_ci + 1; - - /* Copy the first 16 bytes into inline header. */ - rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start, - (void *)(uintptr_t)addr, - MLX5_ETH_INLINE_HEADER_SIZE); - addr += MLX5_ETH_INLINE_HEADER_SIZE; - length -= MLX5_ETH_INLINE_HEADER_SIZE; - size = 3 + ((4 + length + 15) / 16); - wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG); - rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0], - (void *)addr, MLX5_WQE64_INL_DATA); - addr += MLX5_WQE64_INL_DATA; - length -= MLX5_WQE64_INL_DATA; - while (length) { - volatile union mlx5_wqe *wqe_next = - &(*txq->wqes)[wqe_ci & wqe_cnt]; - uint32_t copy_bytes = (length > sizeof(*wqe)) ? - sizeof(*wqe) : - length; - - rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0], - (uint8_t *)addr); - addr += copy_bytes; - length -= copy_bytes; - ++wqe_ci; - } - assert(size < 64); - wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); - wqe->inl.ctrl.data[2] = 0; - wqe->inl.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE); - /* Increment consumer index. */ - txq->wqe_ci = wqe_ci; -} - -/** - * Write a inline WQE with VLAN. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - * @param vlan_tci - * VLAN field to insert in packet. - */ -static inline void -mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint16_t vlan_tci) -{ - uint32_t size; - uint32_t wqe_cnt = txq->wqe_n - 1; - uint16_t wqe_ci = txq->wqe_ci + 1; - uint32_t vlan = htonl(0x81000000 | vlan_tci); - - /* - * Copy 12 bytes of source & destination MAC address. - * Copy 4 bytes of VLAN. - * Copy 2 bytes of Ether type. - */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start, - (uint8_t *)addr, 12); - rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 12, - &vlan, sizeof(vlan)); - rte_memcpy((uint8_t *)((uintptr_t)wqe->inl.eseg.inline_hdr_start + 16), - (uint8_t *)(addr + 12), 2); - addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - size = (sizeof(wqe->inl.ctrl.ctrl) + - sizeof(wqe->inl.eseg) + - sizeof(wqe->inl.byte_cnt) + - length + 15) / 16; - wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG); - rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0], - (void *)addr, MLX5_WQE64_INL_DATA); - addr += MLX5_WQE64_INL_DATA; - length -= MLX5_WQE64_INL_DATA; - while (length) { - volatile union mlx5_wqe *wqe_next = - &(*txq->wqes)[wqe_ci & wqe_cnt]; - uint32_t copy_bytes = (length > sizeof(*wqe)) ? - sizeof(*wqe) : - length; - - rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0], - (uint8_t *)addr); - addr += copy_bytes; - length -= copy_bytes; - ++wqe_ci; - } - assert(size < 64); - wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); - wqe->inl.ctrl.data[2] = 0; - wqe->inl.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE); - /* Increment consumer index. */ - txq->wqe_ci = wqe_ci; -} - -/** * Ring TX queue doorbell. * * @param txq @@ -532,8 +309,8 @@ mlx5_tx_dbrec(struct txq *txq) *txq->qp_db = htonl(txq->wqe_ci); /* Ensure ordering between DB record and BF copy. */ rte_wmb(); - rte_mov16(dst, (uint8_t *)data); - txq->bf_offset ^= txq->bf_buf_size; + memcpy(dst, (uint8_t *)data, 16); + txq->bf_offset ^= (1 << txq->bf_buf_size); } /** @@ -547,9 +324,9 @@ mlx5_tx_dbrec(struct txq *txq) static inline void tx_prefetch_cqe(struct txq *txq, uint16_t ci) { - volatile struct mlx5_cqe64 *cqe; + volatile struct mlx5_cqe *cqe; - cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64; + cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)]; rte_prefetch0(cqe); } @@ -564,9 +341,9 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci) static inline void tx_prefetch_wqe(struct txq *txq, uint16_t ci) { - volatile union mlx5_wqe *wqe; + volatile struct mlx5_wqe64 *wqe; - wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)]; + wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)]; rte_prefetch0(wqe); } @@ -588,12 +365,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; unsigned int comp; - volatile union mlx5_wqe *wqe = NULL; + volatile struct mlx5_wqe *wqe = NULL; + unsigned int segs_n = 0; + struct rte_mbuf *buf = NULL; + uint8_t *raw; if (unlikely(!pkts_n)) return 0; @@ -607,15 +387,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; - uintptr_t addr; + volatile struct mlx5_wqe_data_seg *dseg = NULL; uint32_t length; - uint32_t lkey; - unsigned int segs_n = buf->nb_segs; - volatile struct mlx5_wqe_data_seg *dseg; - unsigned int ds = sizeof(*wqe) / 16; + unsigned int ds = 0; + uintptr_t addr; +#ifdef MLX5_PMD_SOFT_COUNTERS + uint32_t total_length = 0; +#endif + /* first_seg */ + buf = *(pkts++); + segs_n = buf->nb_segs; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. @@ -624,235 +406,180 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max < segs_n + 1) break; max -= segs_n; - --pkts_n; - elts_head_next = (elts_head + 1) & (elts_n - 1); - wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; - dseg = &wqe->wqe.dseg; - rte_prefetch0(wqe); - if (pkts_n) + --segs_n; + if (!segs_n) + --pkts_n; + wqe = &(*txq->wqes)[txq->wqe_ci & + ((1 << txq->wqe_n) - 1)].hdr; + tx_prefetch_wqe(txq, txq->wqe_ci + 1); + if (pkts_n > 1) rte_prefetch0(*pkts); - /* Retrieve buffer information. */ addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); +#ifdef MLX5_PMD_SOFT_COUNTERS + total_length = length; +#endif + assert(length >= MLX5_WQE_DWORD_SIZE); /* Update element. */ (*txq->elts)[elts_head] = buf; + elts_head = (elts_head + 1) & (elts_n - 1); /* Prefetch next buffer data. */ - if (pkts_n) - rte_prefetch0(rte_pktmbuf_mtod(*pkts, - volatile void *)); - /* Retrieve Memory Region key for this memory pool. */ - lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_vlan(txq, wqe, addr, length, lkey, - buf->vlan_tci); - else - mlx5_wqe_write(txq, wqe, addr, length, lkey); + if (pkts_n > 1) { + volatile void *pkt_addr; + + pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *); + rte_prefetch0(pkt_addr); + } /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->wqe.eseg.cs_flags = + wqe->eseg.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; } else { - wqe->wqe.eseg.cs_flags = 0; + wqe->eseg.cs_flags = 0; + } + raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0]; + /* Start the know and common part of the WQE structure. */ + wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); + wqe->ctrl[2] = 0; + wqe->ctrl[3] = 0; + wqe->eseg.rsvd0 = 0; + wqe->eseg.rsvd1 = 0; + wqe->eseg.mss = 0; + wqe->eseg.rsvd2 = 0; + /* Start by copying the Ethernet Header. */ + memcpy((uint8_t *)raw, ((uint8_t *)addr), 16); + length -= MLX5_WQE_DWORD_SIZE; + addr += MLX5_WQE_DWORD_SIZE; + /* Replace the Ethernet type by the VLAN if necessary. */ + if (buf->ol_flags & PKT_TX_VLAN_PKT) { + uint32_t vlan = htonl(0x81000000 | buf->vlan_tci); + + memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - + sizeof(vlan)), + &vlan, sizeof(vlan)); + addr -= sizeof(vlan); + length += sizeof(vlan); } - while (--segs_n) { + /* Inline if enough room. */ + if (txq->max_inline != 0) { + uintptr_t end = + (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]; + uint16_t max_inline = + txq->max_inline * RTE_CACHE_LINE_SIZE; + uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE; + uint16_t room; + + raw += MLX5_WQE_DWORD_SIZE; + room = end - (uintptr_t)raw; + if (room > max_inline) { + uintptr_t addr_end = (addr + max_inline) & + ~(RTE_CACHE_LINE_SIZE - 1); + uint16_t copy_b = ((addr_end - addr) > length) ? + length : + (addr_end - addr); + + rte_memcpy((void *)raw, (void *)addr, copy_b); + addr += copy_b; + length -= copy_b; + pkt_inline_sz += copy_b; + /* Sanity check. */ + assert(addr <= addr_end); + } + /* Store the inlined packet size in the WQE. */ + wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz); + /* + * 2 DWORDs consumed by the WQE header + 1 DSEG + + * the size of the inline part of the packet. + */ + ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); + if (length > 0) { + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + if ((uintptr_t)dseg >= end) + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)&(*txq->wqes)[0]); + goto use_dseg; + } else if (!segs_n) { + goto next_pkt; + } else { + goto next_seg; + } + } else { /* - * Spill on next WQE when the current one does not have - * enough room left. Size of WQE must a be a multiple - * of data segment size. + * No inline has been done in the packet, only the + * Ethernet Header as been stored. */ - assert(!(sizeof(*wqe) % sizeof(*dseg))); - if (!(ds % (sizeof(*wqe) / 16))) - dseg = (volatile void *) - &(*txq->wqes)[txq->wqe_ci++ & - (txq->wqe_n - 1)]; - else - ++dseg; + wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE); + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); + ds = 3; +use_dseg: + /* Add the remaining packet as a simple ds. */ + *dseg = (struct mlx5_wqe_data_seg) { + .addr = htonll(addr), + .byte_count = htonl(length), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + }; ++ds; - buf = buf->next; - assert(buf); - /* Store segment information. */ - dseg->byte_count = htonl(DATA_LEN(buf)); - dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); - (*txq->elts)[elts_head_next] = buf; - elts_head_next = (elts_head_next + 1) & (elts_n - 1); -#ifdef MLX5_PMD_SOFT_COUNTERS - length += DATA_LEN(buf); -#endif - ++j; + if (!segs_n) + goto next_pkt; } - /* Update DS field in WQE. */ - wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0); - wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f); - elts_head = elts_head_next; -#ifdef MLX5_PMD_SOFT_COUNTERS - /* Increment sent bytes counter. */ - txq->stats.obytes += length; -#endif - elts_head = elts_head_next; - ++i; - } while (pkts_n); - /* Take a shortcut if nothing must be sent. */ - if (unlikely(i == 0)) - return 0; - /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i + j; - if (comp >= MLX5_TX_COMP_THRESH) { - /* Request completion on last WQE. */ - wqe->wqe.ctrl.data[2] = htonl(8); - /* Save elts_head in unused "immediate" field of WQE. */ - wqe->wqe.ctrl.data[3] = elts_head; - txq->elts_comp = 0; - } else { - txq->elts_comp = comp; - } -#ifdef MLX5_PMD_SOFT_COUNTERS - /* Increment sent packets counter. */ - txq->stats.opackets += i; -#endif - /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq); - txq->elts_head = elts_head; - return i; -} - -/** - * DPDK callback for TX with inline support. - * - * @param dpdk_txq - * Generic pointer to TX queue structure. - * @param[in] pkts - * Packets to transmit. - * @param pkts_n - * Number of packets in array. - * - * @return - * Number of packets successfully transmitted (<= pkts_n). - */ -uint16_t -mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) -{ - struct txq *txq = (struct txq *)dpdk_txq; - uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; - unsigned int i = 0; - unsigned int j = 0; - unsigned int max; - unsigned int comp; - volatile union mlx5_wqe *wqe = NULL; - unsigned int max_inline = txq->max_inline; - - if (unlikely(!pkts_n)) - return 0; - /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_cqe(txq, txq->cq_ci + 1); - rte_prefetch0(*pkts); - /* Start processing. */ - txq_complete(txq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; - do { - struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; - uintptr_t addr; - uint32_t length; - uint32_t lkey; - unsigned int segs_n = buf->nb_segs; - volatile struct mlx5_wqe_data_seg *dseg; - unsigned int ds = sizeof(*wqe) / 16; - +next_seg: + assert(buf); + assert(ds); + assert(wqe); /* - * Make sure there is enough room to store this packet and - * that one ring entry remains unused. + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. */ - assert(segs_n); - if (max < segs_n + 1) - break; - max -= segs_n; - --pkts_n; - elts_head_next = (elts_head + 1) & (elts_n - 1); - wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; - dseg = &wqe->wqe.dseg; - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); - if (pkts_n) - rte_prefetch0(*pkts); - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->inl.eseg.cs_flags = - MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } else { - wqe->inl.eseg.cs_flags = 0; + assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); + if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { + unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & + ((1 << txq->wqe_n) - 1); + + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)&(*txq->wqes)[n]); + tx_prefetch_wqe(txq, n + 1); + } else if (!dseg) { + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + } else { + ++dseg; } - /* Retrieve buffer information. */ - addr = rte_pktmbuf_mtod(buf, uintptr_t); + ++ds; + buf = buf->next; + assert(buf); length = DATA_LEN(buf); - /* Update element. */ - (*txq->elts)[elts_head] = buf; - /* Prefetch next buffer data. */ - if (pkts_n) - rte_prefetch0(rte_pktmbuf_mtod(*pkts, - volatile void *)); - if ((length <= max_inline) && (segs_n == 1)) { - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_inline_vlan(txq, wqe, - addr, length, - buf->vlan_tci); - else - mlx5_wqe_write_inline(txq, wqe, addr, length); - goto skip_segs; - } else { - /* Retrieve Memory Region key for this memory pool. */ - lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_vlan(txq, wqe, addr, length, - lkey, buf->vlan_tci); - else - mlx5_wqe_write(txq, wqe, addr, length, lkey); - } - while (--segs_n) { - /* - * Spill on next WQE when the current one does not have - * enough room left. Size of WQE must a be a multiple - * of data segment size. - */ - assert(!(sizeof(*wqe) % sizeof(*dseg))); - if (!(ds % (sizeof(*wqe) / 16))) - dseg = (volatile void *) - &(*txq->wqes)[txq->wqe_ci++ & - (txq->wqe_n - 1)]; - else - ++dseg; - ++ds; - buf = buf->next; - assert(buf); - /* Store segment information. */ - dseg->byte_count = htonl(DATA_LEN(buf)); - dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); - (*txq->elts)[elts_head_next] = buf; - elts_head_next = (elts_head_next + 1) & (elts_n - 1); #ifdef MLX5_PMD_SOFT_COUNTERS - length += DATA_LEN(buf); + total_length += length; #endif - ++j; - } - /* Update DS field in WQE. */ - wqe->inl.ctrl.data[1] &= htonl(0xffffffc0); - wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f); -skip_segs: - elts_head = elts_head_next; + /* Store segment information. */ + *dseg = (struct mlx5_wqe_data_seg) { + .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), + .byte_count = htonl(length), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + }; + (*txq->elts)[elts_head] = buf; + elts_head = (elts_head + 1) & (elts_n - 1); + ++j; + --segs_n; + if (segs_n) + goto next_seg; + else + --pkts_n; +next_pkt: + ++i; + wqe->ctrl[1] = htonl(txq->qp_num_8s | ds); + txq->wqe_ci += (ds + 3) / 4; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ - txq->stats.obytes += length; + txq->stats.obytes += total_length; #endif - ++i; } while (pkts_n); /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) @@ -861,9 +588,9 @@ skip_segs: comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ - wqe->inl.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->inl.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -891,28 +618,29 @@ skip_segs: static inline void mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) { - uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)]; + (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)]; mpw->state = MLX5_MPW_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = &(*txq->wqes)[idx]; - mpw->wqe->mpw.eseg.mss = htons(length); - mpw->wqe->mpw.eseg.inline_hdr_sz = 0; - mpw->wqe->mpw.eseg.rsvd0 = 0; - mpw->wqe->mpw.eseg.rsvd1 = 0; - mpw->wqe->mpw.eseg.rsvd2 = 0; - mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | - (txq->wqe_ci << 8) | - MLX5_OPCODE_TSO); - mpw->wqe->mpw.ctrl.data[2] = 0; - mpw->wqe->mpw.ctrl.data[3] = 0; - mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0]; - mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1]; + mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe->eseg.mss = htons(length); + mpw->wqe->eseg.inline_hdr_sz = 0; + mpw->wqe->eseg.rsvd0 = 0; + mpw->wqe->eseg.rsvd1 = 0; + mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); + mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[3] = 0; + mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) + (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); + mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) + (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); mpw->data.dseg[2] = &(*dseg)[0]; mpw->data.dseg[3] = &(*dseg)[1]; mpw->data.dseg[4] = &(*dseg)[2]; @@ -935,7 +663,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) * Store size in multiple of 16 bytes. Control and Ethernet segments * count as 2. */ - mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num)); + mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num)); mpw->state = MLX5_MPW_STATE_CLOSED; if (num < 3) ++txq->wqe_ci; @@ -963,7 +691,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; @@ -1013,11 +741,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if ((mpw.state == MLX5_MPW_STATE_OPENED) && ((mpw.len != length) || (segs_n != 1) || - (mpw.wqe->mpw.eseg.cs_flags != cs_flags))) + (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { mlx5_mpw_new(txq, &mpw, length); - mpw.wqe->mpw.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); @@ -1063,12 +791,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* "j" includes both packets and segments. */ comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { - volatile union mlx5_wqe *wqe = mpw.wqe; + volatile struct mlx5_wqe *wqe = mpw.wqe; /* Request completion on last WQE. */ - wqe->mpw.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->mpw.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -1098,25 +826,28 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) static inline void mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) { - uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); + struct mlx5_wqe_inl_small *inl; mpw->state = MLX5_MPW_INL_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = &(*txq->wqes)[idx]; - mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | - (txq->wqe_ci << 8) | - MLX5_OPCODE_TSO); - mpw->wqe->mpw_inl.ctrl.data[2] = 0; - mpw->wqe->mpw_inl.ctrl.data[3] = 0; - mpw->wqe->mpw_inl.eseg.mss = htons(length); - mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0; - mpw->wqe->mpw_inl.eseg.cs_flags = 0; - mpw->wqe->mpw_inl.eseg.rsvd0 = 0; - mpw->wqe->mpw_inl.eseg.rsvd1 = 0; - mpw->wqe->mpw_inl.eseg.rsvd2 = 0; - mpw->data.raw = &mpw->wqe->mpw_inl.data[0]; + mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | + MLX5_OPCODE_TSO); + mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[3] = 0; + mpw->wqe->eseg.mss = htons(length); + mpw->wqe->eseg.inline_hdr_sz = 0; + mpw->wqe->eseg.cs_flags = 0; + mpw->wqe->eseg.rsvd0 = 0; + mpw->wqe->eseg.rsvd1 = 0; + mpw->wqe->eseg.rsvd2 = 0; + inl = (struct mlx5_wqe_inl_small *) + (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); + mpw->data.raw = (uint8_t *)&inl->raw; } /** @@ -1131,17 +862,18 @@ static inline void mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw) { unsigned int size; + struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) + (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); - size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len; + size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; /* * Store size in multiple of 16 bytes. Control and Ethernet segments * count as 2. */ - mpw->wqe->mpw_inl.ctrl.data[1] = - htonl(txq->qp_num_8s | ((size + 15) / 16)); + mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size)); mpw->state = MLX5_MPW_STATE_CLOSED; - mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); - txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe); + inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); + txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; } /** @@ -1163,12 +895,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; unsigned int comp; - unsigned int inline_room = txq->max_inline; + unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; @@ -1214,31 +946,33 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (mpw.state == MLX5_MPW_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || - (mpw.wqe->mpw.eseg.cs_flags != cs_flags)) + (mpw.wqe->eseg.cs_flags != cs_flags)) mlx5_mpw_close(txq, &mpw); } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || (length > inline_room) || - (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) { + (mpw.wqe->eseg.cs_flags != cs_flags)) { mlx5_mpw_inline_close(txq, &mpw); - inline_room = txq->max_inline; + inline_room = + txq->max_inline * RTE_CACHE_LINE_SIZE; } } if (mpw.state == MLX5_MPW_STATE_CLOSED) { if ((segs_n != 1) || (length > inline_room)) { mlx5_mpw_new(txq, &mpw, length); - mpw.wqe->mpw.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } else { mlx5_mpw_inline_new(txq, &mpw, length); - mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); if (mpw.state == MLX5_MPW_STATE_OPENED) { - assert(inline_room == txq->max_inline); + assert(inline_room == + txq->max_inline * RTE_CACHE_LINE_SIZE); #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) length = 0; #endif @@ -1277,7 +1011,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, addr = rte_pktmbuf_mtod(buf, uintptr_t); (*txq->elts)[elts_head] = buf; /* Maximum number of bytes before wrapping. */ - max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] - + max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] - (uintptr_t)mpw.data.raw); if (length > max) { rte_memcpy((void *)(uintptr_t)mpw.data.raw, @@ -1296,14 +1030,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mpw.data.raw += length; } if ((uintptr_t)mpw.data.raw == - (uintptr_t)&(*txq->wqes)[txq->wqe_n]) + (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]) mpw.data.raw = (volatile void *)&(*txq->wqes)[0]; ++mpw.pkts_n; ++j; if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { mlx5_mpw_inline_close(txq, &mpw); - inline_room = txq->max_inline; + inline_room = + txq->max_inline * RTE_CACHE_LINE_SIZE; } else { inline_room -= length; } @@ -1323,12 +1058,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, /* "j" includes both packets and segments. */ comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { - volatile union mlx5_wqe *wqe = mpw.wqe; + volatile struct mlx5_wqe *wqe = mpw.wqe; /* Request completion on last WQE. */ - wqe->mpw_inl.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->mpw_inl.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -1359,25 +1094,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, * Packet type for struct rte_mbuf. */ static inline uint32_t -rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe) +rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) { uint32_t pkt_type; uint8_t flags = cqe->l4_hdr_type_etc; - uint8_t info = cqe->rsvd0[0]; - if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET) + if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, + MLX5_CQE_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, + MLX5_CQE_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, + MLX5_CQE_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, + MLX5_CQE_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6); else pkt_type = @@ -1399,14 +1133,16 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe) * Pointer to RX queue. * @param cqe * CQE to process. + * @param[out] rss_hash + * Packet RSS Hash result. * * @return * Packet size in bytes (0 if there is none), -1 in case of completion * with error. */ static inline int -mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, - uint16_t cqe_cnt) +mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, + uint16_t cqe_cnt, uint32_t *rss_hash) { struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; @@ -1416,9 +1152,10 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, if (zip->ai) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) - (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64); + (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]); len = ntohl((*mc)[zip->ai & 7].byte_cnt); + *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); if ((++zip->ai & 7) == 0) { /* * Increment consumer index to skip the number of @@ -1433,7 +1170,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, uint16_t end = zip->cq_ci; while (idx != end) { - (*rxq->cqes)[idx & cqe_cnt].cqe64.op_own = + (*rxq->cqes)[idx & cqe_cnt].op_own = MLX5_CQE_INVALIDATE; ++idx; } @@ -1445,7 +1182,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, int ret; int8_t op_own; - ret = check_cqe64(cqe, cqe_n, rxq->cq_ci); + ret = check_cqe(cqe, cqe_n, rxq->cq_ci); if (unlikely(ret == 1)) return 0; ++rxq->cq_ci; @@ -1454,7 +1191,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & - cqe_cnt].cqe64); + cqe_cnt]); /* Fix endianness. */ zip->cqe_cnt = ntohl(cqe->byte_cnt); @@ -1473,9 +1210,11 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; /* Get packet size to return. */ len = ntohl((*mc)[0].byte_cnt); + *rss_hash = ntohl((*mc)[0].rx_hash_result); zip->ai = 1; } else { len = ntohl(cqe->byte_cnt); + *rss_hash = ntohl(cqe->rx_hash_res); } /* Error while receiving packet. */ if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) @@ -1496,38 +1235,32 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, * Offload flags (ol_flags) for struct rte_mbuf. */ static inline uint32_t -rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe) +rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) { uint32_t ol_flags = 0; uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK; uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK; - uint8_t info = cqe->rsvd0[0]; if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) || (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6)) - ol_flags |= - (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) * - PKT_RX_IP_CKSUM_BAD); + ol_flags |= TRANSPOSE(cqe->hds_ip_ext, + MLX5_CQE_L3_OK, + PKT_RX_IP_CKSUM_GOOD); if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP)) + ol_flags |= TRANSPOSE(cqe->hds_ip_ext, + MLX5_CQE_L4_OK, + PKT_RX_L4_CKSUM_GOOD); + if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) * - PKT_RX_L4_CKSUM_BAD); - /* - * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place - * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional - * (its value is 0). - */ - if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) - ol_flags |= - TRANSPOSE(~cqe->l4_hdr_type_etc, - IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~cqe->l4_hdr_type_etc, - IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); + TRANSPOSE(cqe->l4_hdr_type_etc, + MLX5_CQE_RX_OUTER_IP_CSUM_OK, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(cqe->l4_hdr_type_etc, + MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK, + PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -1548,21 +1281,22 @@ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = dpdk_rxq; - const unsigned int wqe_cnt = rxq->elts_n - 1; - const unsigned int cqe_cnt = rxq->cqe_n - 1; + const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; + const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; const unsigned int sges_n = rxq->sges_n; struct rte_mbuf *pkt = NULL; struct rte_mbuf *seg = NULL; - volatile struct mlx5_cqe64 *cqe = - &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; + volatile struct mlx5_cqe *cqe = + &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; unsigned int i = 0; unsigned int rq_ci = rxq->rq_ci << sges_n; - int len; + int len; /* keep its value across iterations. */ while (pkts_n) { unsigned int idx = rq_ci & wqe_cnt; volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; struct rte_mbuf *rep = (*rxq->elts)[idx]; + uint32_t rss_hash_res = 0; if (pkt) NEXT(seg) = rep; @@ -1572,6 +1306,14 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0(wqe); rep = rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { + ++rxq->stats.rx_nombuf; + if (!pkt) { + /* + * no buffers before we even started, + * bail out silently. + */ + break; + } while (pkt != seg) { assert(pkt != (*rxq->elts)[idx]); seg = NEXT(pkt); @@ -1579,13 +1321,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) __rte_mbuf_raw_free(pkt); pkt = seg; } - ++rxq->stats.rx_nombuf; break; } if (!pkt) { - cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt); - if (len == 0) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, + &rss_hash_res); + if (!len) { rte_mbuf_refcnt_set(rep, 0); __rte_mbuf_raw_free(rep); break; @@ -1602,12 +1344,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update packet information. */ pkt->packet_type = 0; pkt->ol_flags = 0; + if (rxq->rss_hash) { + pkt->hash.rss = rss_hash_res; + pkt->ol_flags = PKT_RX_RSS_HASH; + } if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | rxq->crc_present) { if (rxq->csum) { pkt->packet_type = rxq_cq_to_pkt_type(cqe); - pkt->ol_flags = + pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); } if (cqe->l4_hdr_type_etc & diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h index d87dd19b..f45e3f51 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h @@ -40,22 +40,23 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_mempool.h> +#include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -87,6 +88,8 @@ struct mlx5_txq_stats { struct fdir_queue { struct ibv_qp *qp; /* Associated RX QP. */ struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */ + struct ibv_exp_wq *wq; /* Work queue. */ + struct ibv_cq *cq; /* Completion queue. */ }; struct priv; @@ -107,16 +110,18 @@ struct rxq { unsigned int vlan_strip:1; /* Enable VLAN stripping. */ unsigned int crc_present:1; /* CRC must be subtracted. */ unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */ + unsigned int cqe_n:4; /* Log 2 of CQ elements. */ + unsigned int elts_n:4; /* Log 2 of Mbufs. */ + unsigned int port_id:8; + unsigned int rss_hash:1; /* RSS hash result is enabled. */ + unsigned int :9; /* Remaining bits. */ + volatile uint32_t *rq_db; + volatile uint32_t *cq_db; uint16_t rq_ci; uint16_t cq_ci; - uint16_t elts_n; - uint16_t cqe_n; /* Number of CQ elements. */ - uint16_t port_id; volatile struct mlx5_wqe_data_seg(*wqes)[]; volatile struct mlx5_cqe(*cqes)[]; struct rxq_zip zip; /* Compressed context. */ - volatile uint32_t *rq_db; - volatile uint32_t *cq_db; struct rte_mbuf *(*elts)[]; struct rte_mempool *mp; struct mlx5_rxq_stats stats; @@ -128,7 +133,7 @@ struct rxq_ctrl { struct ibv_cq *cq; /* Completion Queue. */ struct ibv_exp_wq *wq; /* Work Queue. */ struct ibv_exp_res_domain *rd; /* Resource Domain. */ - struct fdir_queue fdir_queue; /* Flow director queue. */ + struct fdir_queue *fdir_queue; /* Flow director queue. */ struct ibv_mr *mr; /* Memory Region (for mp). */ struct ibv_exp_wq_family *if_wq; /* WQ burst interface. */ struct ibv_exp_cq_family_v1 *if_cq; /* CQ interface. */ @@ -235,22 +240,30 @@ struct hash_rxq { [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS]; }; +/** C extension macro for environments lacking C11 features. */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define RTE_STD_C11 __extension__ +#else +#define RTE_STD_C11 +#endif + /* TX queue descriptor. */ +RTE_STD_C11 struct txq { uint16_t elts_head; /* Current index in (*elts)[]. */ uint16_t elts_tail; /* First element awaiting completion. */ uint16_t elts_comp; /* Counter since last completion request. */ - uint16_t elts_n; /* (*elts)[] length. */ uint16_t cq_ci; /* Consumer index for completion queue. */ - uint16_t cqe_n; /* Number of CQ elements. */ uint16_t wqe_ci; /* Consumer index for work queue. */ - uint16_t wqe_n; /* Number of WQ elements. */ + uint16_t elts_n:4; /* (*elts)[] length (in log2). */ + uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ + uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ + uint16_t bf_buf_size:4; /* Log2 Blueflame size. */ uint16_t bf_offset; /* Blueflame offset. */ - uint16_t bf_buf_size; /* Blueflame size. */ - uint16_t max_inline; /* Maximum size to inline in a WQE. */ + uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ - volatile union mlx5_wqe (*wqes)[]; /* Work queue. */ + volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ @@ -312,7 +325,6 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t); /* mlx5_rxtx.c */ uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_inline(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_stats.c b/src/dpdk/drivers/net/mlx5/mlx5_stats.c index 788ef939..c6087d4e 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_stats.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_stats.c @@ -33,31 +33,20 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" #include "mlx5_rxtx.h" #include "mlx5_defs.h" - #include <linux/ethtool.h> #include <linux/sockios.h> -/** - * DPDK callback to get device statistics. - * - * @param dev - * Pointer to Ethernet device structure. - * @param[out] stats - * Stats structure output buffer. - */ - - static void mlx5_stats_read_hw(struct rte_eth_dev *dev, struct rte_eth_stats *stats){ @@ -271,7 +260,8 @@ mlx5_stats_diff(struct rte_eth_stats *a, MLX5_DIFF(rx_nombuf); } - + + void mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { diff --git a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c index e9b9a293..d4dccd88 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c @@ -33,14 +33,14 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> #include <rte_interrupts.h> #include <rte_alarm.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_txq.c b/src/dpdk/drivers/net/mlx5/mlx5_txq.c index 6fe61c4a..053665d5 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_txq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_txq.c @@ -40,23 +40,23 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -81,8 +81,8 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) for (i = 0; (i != elts_n); ++i) (*txq_ctrl->txq.elts)[i] = NULL; - for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) { - volatile union mlx5_wqe *wqe = &(*txq_ctrl->txq.wqes)[i]; + for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) { + volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i]; memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe)); } @@ -101,7 +101,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) static void txq_free_elts(struct txq_ctrl *txq_ctrl) { - unsigned int elts_n = txq_ctrl->txq.elts_n; + unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; unsigned int elts_head = txq_ctrl->txq.elts_head; unsigned int elts_tail = txq_ctrl->txq.elts_tail; struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts; @@ -212,22 +212,22 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl) "it should be set to %u", RTE_CACHE_LINE_SIZE); return EINVAL; } - tmpl->txq.cqe_n = ibcq->cqe + 1; + tmpl->txq.cqe_n = log2above(ibcq->cqe); tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8; tmpl->txq.wqes = - (volatile union mlx5_wqe (*)[]) + (volatile struct mlx5_wqe64 (*)[]) (uintptr_t)qp->gen_data.sqstart; - tmpl->txq.wqe_n = qp->sq.wqe_cnt; + tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt); tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR]; tmpl->txq.bf_reg = qp->gen_data.bf->reg; tmpl->txq.bf_offset = qp->gen_data.bf->offset; - tmpl->txq.bf_buf_size = qp->gen_data.bf->buf_size; + tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size); tmpl->txq.cq_db = cq->dbrec; tmpl->txq.cqes = (volatile struct mlx5_cqe (*)[]) (uintptr_t)cq->active_buf->buf; tmpl->txq.elts = - (struct rte_mbuf *(*)[tmpl->txq.elts_n]) + (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n]) ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl)); return 0; } @@ -277,7 +277,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, } (void)conf; /* Thresholds configuration (ignored). */ assert(desc > MLX5_TX_COMP_THRESH); - tmpl.txq.elts_n = desc; + tmpl.txq.elts_n = log2above(desc); /* MRs will be registered in mp2mr[] later. */ attr.rd = (struct ibv_exp_res_domain_init_attr){ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | @@ -338,9 +338,12 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), }; - if (priv->txq_inline && priv->txqs_n >= priv->txqs_inline) { - tmpl.txq.max_inline = priv->txq_inline; - attr.init.cap.max_inline_data = tmpl.txq.max_inline; + if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { + tmpl.txq.max_inline = + ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) / + RTE_CACHE_LINE_SIZE); + attr.init.cap.max_inline_data = + tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE; } tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init); if (tmpl.qp == NULL) { diff --git a/src/dpdk/drivers/net/mlx5/mlx5_vlan.c b/src/dpdk/drivers/net/mlx5/mlx5_vlan.c index 4719e697..1b0fa40a 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_vlan.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_vlan.c @@ -38,12 +38,12 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -87,7 +87,8 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) --priv->vlan_filter_n; memmove(&priv->vlan_filter[i], &priv->vlan_filter[i + 1], - priv->vlan_filter_n - i); + sizeof(priv->vlan_filter[i]) * + (priv->vlan_filter_n - i)); priv->vlan_filter[priv->vlan_filter_n] = 0; } else { assert(i == priv->vlan_filter_n); diff --git a/src/gtest/bp_timer_gtest.cpp b/src/gtest/bp_timer_gtest.cpp index 07f0e214..70e5e192 100644 --- a/src/gtest/bp_timer_gtest.cpp +++ b/src/gtest/bp_timer_gtest.cpp @@ -23,7 +23,7 @@ limitations under the License. #include <common/basic_utils.h> #include "h_timer.h" #include <common/utl_gcc_diag.h> - +#include <cmath> class gt_r_timer : public testing::Test { @@ -216,9 +216,9 @@ TEST_F(gt_r_timer, timer7) { int i; for (i=0; i<150; i++) { - printf(" tick %d :",i); + //printf(" tick %d :",i); timer.on_tick((void *)&timer,my_test_on_tick_cb7); - printf(" \n"); + //printf(" \n"); } EXPECT_EQ( timer.Delete(),RC_HTW_OK); @@ -655,4 +655,460 @@ TEST_F(gt_r_timer, timer18) { +///////////////////////////////////////////////////////////////// +/* test for NA class */ + +class CNATimerWheelTest1Cfg { +public: + uint32_t m_wheel_size; + uint32_t m_level1_div; + uint32_t m_start_tick; + uint32_t m_restart_tick; + uint32_t m_total_ticks; + int m_verbose; + bool m_dont_assert; +}; + + +class CNATimerWheelTest1 : public CHTimerWheelBase { + +public: + bool Create(CNATimerWheelTest1Cfg & cfg); + void Delete(); + void start_test(); + virtual void on_tick(CMyTestObject *lpobj); + +private: + CNATimerWheelTest1Cfg m_cfg; + CNATimerWheel m_timer; + CMyTestObject m_event; + uint32_t m_ticks; + uint32_t m_total_ticks; + uint32_t m_expected_total_ticks; + uint32_t m_div_err; + + uint32_t m_expect_tick; + double m_max_err; +}; + +void my_test_on_tick_cb18(void *userdata,CHTimerObj *tmr){ + CHTimerWheelBase * lp=(CHTimerWheelBase *)userdata; + UNSAFE_CONTAINER_OF_PUSH + CMyTestObject *lpobj=(CMyTestObject *)((uint8_t*)tmr-offsetof (CMyTestObject,m_timer)); + UNSAFE_CONTAINER_OF_POP + lp->on_tick(lpobj); +} + + +void CNATimerWheelTest1::on_tick(CMyTestObject *lpobj){ + assert(lpobj->m_id==17); + m_total_ticks++; + if (m_cfg.m_verbose) { + printf(" [event(%d)-%d]",lpobj->m_timer.m_wheel,lpobj->m_id); + } + if (!m_cfg.m_dont_assert){ + uint32_t expect_min=m_expect_tick; + if (expect_min>m_div_err) { + expect_min-=m_div_err*2; + } + double pre=std::abs(100.0-100.0*(double)m_ticks/(double)m_expect_tick); + if (pre>m_max_err){ + m_max_err=pre; + } + if (pre>(200.0/(double)m_div_err)) { + printf(" =====>tick:%d expect [%d -%d] %f \n",m_ticks,expect_min,m_expect_tick+(m_div_err*2),pre); + } + } + m_timer.timer_start(&lpobj->m_timer,m_cfg.m_restart_tick); + m_expect_tick+=m_cfg.m_restart_tick; +} + + +void CNATimerWheelTest1::start_test(){ + + if (m_cfg.m_verbose) { + printf(" test start %d,restart: %d \n",m_cfg.m_start_tick,m_cfg.m_restart_tick); + } + int i; + m_expected_total_ticks=0; + uint32_t cnt=m_cfg.m_start_tick; + for (i=0; i<m_cfg.m_total_ticks; i++) { + if (i==cnt) { + m_expected_total_ticks++; + cnt+=m_cfg.m_restart_tick; + } + } + + m_div_err =m_cfg.m_wheel_size/m_cfg.m_level1_div; + m_total_ticks=0; + m_event.m_id=17; + m_timer.timer_start(&m_event.m_timer,m_cfg.m_start_tick); + + m_ticks=0; + m_expect_tick= m_cfg.m_start_tick; + + for (i=0; i<m_cfg.m_total_ticks; i++) { + if (m_cfg.m_verbose) { + printf(" tick %d :",i); + } + m_ticks=i; + m_timer.on_tick_level0((void *)this,my_test_on_tick_cb18); + /* level 2 */ + if ((i>=m_div_err) && (i%m_div_err==0)) { + int cnt_rerty=0; + while (true){ + if (m_cfg.m_verbose>1) { + printf("\n level1 - try %d \n",cnt_rerty); + } + + na_htw_state_num_t state; + state = m_timer.on_tick_level1((void *)this,my_test_on_tick_cb18); + if (m_cfg.m_verbose>1) { + printf("\n state - %lu \n",(ulong)state); + } + + if ( state !=TW_NEXT_BATCH){ + break; + } + cnt_rerty++; + } + if (m_cfg.m_verbose>1) { + printf("\n level1 - stop %d \n",cnt_rerty); + } + } + if (m_cfg.m_verbose) { + printf(" \n"); + } + } + if (m_cfg.m_verbose) { + printf(" %d == %d \n",m_expected_total_ticks,m_total_ticks); + } + if (!m_cfg.m_dont_assert){ + //assert( (m_expected_total_ticks==m_total_ticks) || ((m_expected_total_ticks+1) ==m_total_ticks) ); + } +} + + +bool CNATimerWheelTest1::Create(CNATimerWheelTest1Cfg & cfg){ + m_cfg = cfg; + m_max_err=0.0; + assert(m_timer.Create(m_cfg.m_wheel_size,m_cfg.m_level1_div)==RC_HTW_OK); + m_ticks=0; + return (true); +} + +void CNATimerWheelTest1::Delete(){ + //printf (" %f \n",m_max_err); + assert(m_timer.Delete()==RC_HTW_OK); +} + + +TEST_F(gt_r_timer, timer20) { + + CNATimerWheelTest1 test; + + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_start_tick = 2, + .m_restart_tick = 2, + .m_total_ticks = 1024, + .m_verbose=0 + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} + +TEST_F(gt_r_timer, timer21) { + + CNATimerWheelTest1 test; + + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_start_tick = 2, + .m_restart_tick = 34, + .m_total_ticks = 100, + .m_verbose=0 + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} + + +TEST_F(gt_r_timer, timer22) { + + CNATimerWheelTest1 test; + + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_start_tick = 2, + .m_restart_tick = 55, + .m_total_ticks = 1000, + .m_verbose=0, + .m_dont_assert =0 + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} + +TEST_F(gt_r_timer, timer23) { + + int i,j; + + for (i=0; i<100; i++) { + for (j=1; j<100; j++) { + CNATimerWheelTest1 test; + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_start_tick = (uint32_t)i, + .m_restart_tick = (uint32_t)j, + .m_total_ticks = 1000, + .m_verbose=0, + .m_dont_assert =0 + }; + + cfg.m_total_ticks= (uint32_t)(i*2+j*10); + test.Create(cfg); + test.start_test(); + test.Delete(); + } + } +} + + + +#if 0 +// too long, skip for now +TEST_F(gt_r_timer, timer24) { + + int i,j; + + for (i=0; i<2048; i++) { + printf(" %d \n",i); + for (j=1024; j<2048; j=j+7) { + CNATimerWheelTest1 test; + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 1024, + .m_level1_div = 32, + .m_start_tick = (uint32_t)i, + .m_restart_tick = (uint32_t)j, + .m_total_ticks = 3000, + .m_verbose=0, + .m_dont_assert =0 + }; + + cfg.m_total_ticks= (uint32_t)(i*2+j*10); + test.Create(cfg); + test.start_test(); + test.Delete(); + } + } +} +#endif + +/* very long flow, need to restart it */ +TEST_F(gt_r_timer, timer25) { + + + CNATimerWheelTest1 test; + + CNATimerWheelTest1Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_start_tick = 2, + .m_restart_tick = 512, + .m_total_ticks = 1000, + .m_verbose=0, + .m_dont_assert =0 + }; + + test.Create(cfg); + test.start_test(); + test.Delete(); +} + + + +//////////////////////////////////////////////////////// + +class CNATimerWheelTest2Cfg { +public: + uint32_t m_wheel_size; + uint32_t m_level1_div; + uint32_t m_number_of_con_event; + uint32_t m_total_ticks; + bool m_random; + bool m_burst; + int m_verbose; + bool m_dont_check; +}; + +class CNATimerWheelTest2 : public CHTimerWheelBase { + +public: + bool Create(CNATimerWheelTest2Cfg & cfg); + void Delete(); + void start_test(); + virtual void on_tick(CMyTestObject *lpobj); + +private: + CNATimerWheelTest2Cfg m_cfg; + CNATimerWheel m_timer; + uint32_t m_ticks; + uint32_t m_div_err; +}; + +bool CNATimerWheelTest2::Create(CNATimerWheelTest2Cfg & cfg){ + m_cfg = cfg; + assert(m_timer.Create(m_cfg.m_wheel_size,m_cfg.m_level1_div)==RC_HTW_OK); + m_ticks=0; + return (true); +} + +void CNATimerWheelTest2::Delete(){ + assert(m_timer.Delete()==RC_HTW_OK); +} + + +void CNATimerWheelTest2::start_test(){ + + CMyTestObject * m_events = new CMyTestObject[m_cfg.m_number_of_con_event]; + int i; + for (i=0; i<m_cfg.m_number_of_con_event; i++) { + CMyTestObject * lp=&m_events[i]; + lp->m_id=i+1; + if (m_cfg.m_random) { + lp->m_d_tick = ((rand() % m_cfg.m_number_of_con_event)+1); + if (m_cfg.m_verbose) { + printf(" flow %d : %d \n",i,lp->m_d_tick); + } + }else{ + if (m_cfg.m_burst){ + lp->m_d_tick = m_cfg.m_wheel_size*2; /* all in the same bucket */ + }else{ + lp->m_d_tick=i+1; + } + } + lp->m_t_tick=lp->m_d_tick; + m_timer.timer_start(&lp->m_timer,lp->m_d_tick); + } + + m_div_err =m_cfg.m_wheel_size/m_cfg.m_level1_div; + + for (i=0; i<m_cfg.m_total_ticks; i++) { + if (m_cfg.m_verbose) { + printf(" tick %d :",i); + } + m_ticks=i; + m_timer.on_tick_level0((void *)this,my_test_on_tick_cb18); + + if ((i>=m_div_err) && (i%m_div_err==0)) { + int cnt_rerty=0; + while (true){ + if (m_cfg.m_verbose>1) { + printf("\n level1 - try %d \n",cnt_rerty); + } + + na_htw_state_num_t state; + state = m_timer.on_tick_level1((void *)this,my_test_on_tick_cb18); + if (m_cfg.m_verbose>1) { + printf("\n state - %lu \n",(ulong)state); + } + + if ( state !=TW_NEXT_BATCH){ + break; + } + + cnt_rerty++; + } + if (m_cfg.m_verbose>1) { + printf("\n level1 - stop %d \n",cnt_rerty); + } + } + + + if (m_cfg.m_verbose) { + printf(" \n"); + } + } + delete []m_events; +} + + +void CNATimerWheelTest2::on_tick(CMyTestObject *lp){ + + if (!m_cfg.m_random && !m_cfg.m_burst) { + assert(lp->m_id==lp->m_d_tick); + } + if (m_cfg.m_verbose) { + printf(" [event %d ]",lp->m_id); + } + m_timer.timer_start(&lp->m_timer,lp->m_d_tick); + if (!m_cfg.m_dont_check){ + double pre=std::abs(100.0-100.0*(double)m_ticks/(double)lp->m_t_tick); + if (pre>(200.0/(double)m_div_err)) { + printf(" =====>tick:%d %f \n",m_ticks,pre); + assert(0); + } + } + lp->m_t_tick+=lp->m_d_tick; +} + + +TEST_F(gt_r_timer, timer30) { + + CNATimerWheelTest2 test; + CNATimerWheelTest2Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_number_of_con_event = 100, + .m_total_ticks =1000, + .m_random=false, + .m_burst=false, + .m_verbose =false + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} + +TEST_F(gt_r_timer, timer31) { + + CNATimerWheelTest2 test; + CNATimerWheelTest2Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_number_of_con_event = 500, + .m_total_ticks =5000, + .m_random=true, + .m_burst=false, + .m_verbose =false + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} + +TEST_F(gt_r_timer, timer32) { + + CNATimerWheelTest2 test; + CNATimerWheelTest2Cfg cfg ={ + .m_wheel_size = 32, + .m_level1_div = 4, + .m_number_of_con_event = 500, + .m_total_ticks =100, + .m_random=false, + .m_burst=true, + .m_verbose =0 + }; + test.Create(cfg); + test.start_test(); + test.Delete(); +} diff --git a/src/gtest/client_cfg_test.cpp b/src/gtest/client_cfg_test.cpp index 4e93f3c5..851da2d1 100644 --- a/src/gtest/client_cfg_test.cpp +++ b/src/gtest/client_cfg_test.cpp @@ -1,6 +1,6 @@ /* Ido Barnea - + Cisco Systems, Inc. */ @@ -23,6 +23,16 @@ limitations under the License. #include "../bp_sim.h" #include <common/gtest.h> #include <common/basic_utils.h> +#include "bp_gtest.h" + +class client_cfg : public testing::Test { + protected: + virtual void SetUp() { + } + virtual void TearDown() { + } + public: +}; class basic_client_cfg : public testing::Test { protected: @@ -33,6 +43,7 @@ class basic_client_cfg : public testing::Test { public: }; +// testing IP resolution relevant classes TEST_F(basic_client_cfg, test1) { uint32_t ip_start = 0x10010101; uint32_t ip_end = 0x100101ff; @@ -58,9 +69,9 @@ TEST_F(basic_client_cfg, test1) { tg_yam_info.m_client_pool.push_back(s_pool); CGlobalInfo::m_options.m_expected_portd = 4; - printf("Expected ports %d\n", CGlobalInfo::m_options.m_expected_portd); - - std::string tmp_file_name = "client_cfg_gtest_GENERATED.yaml"; + printf("Expected ports %d\n", CGlobalInfo::m_options.m_expected_portd); + + std::string tmp_file_name = "generated/client_cfg_gtest_GENERATED.yaml"; FILE *fd = fopen(tmp_file_name.c_str(), "w"); if (fd == NULL) { @@ -72,7 +83,7 @@ TEST_F(basic_client_cfg, test1) { cfg_ext.m_responder.set_next_hop(next_hop_resp); cfg_ext.m_initiator.set_vlan(vlan_init); cfg_ext.m_responder.set_vlan(vlan_resp); - + cfg_ent.set_params(ip_start, ip_end, test_count); cfg_ent.set_cfg(cfg_ext); @@ -95,7 +106,7 @@ TEST_F(basic_client_cfg, test1) { test_db.load_yaml_file(tmp_file_name); test_db.set_tuple_gen_info(&tg_yam_info); test_db.get_entry_list(ent_list); - + // We expect ports for first two groups to be found. // This group addresses should not appear in the list, since @@ -114,13 +125,13 @@ TEST_F(basic_client_cfg, test1) { case 2: assert(port == i); assert(vlan == vlan_init); - assert(dst_ip == next_hop_init); + assert(dst_ip == next_hop_init); break; case 1: case 3: assert(port == i); assert(vlan == vlan_resp); - assert(dst_ip == next_hop_resp); + assert(dst_ip == next_hop_resp); break; default: fprintf(stderr, "Test failed. Too many entries returned\n"); @@ -141,12 +152,12 @@ TEST_F(basic_client_cfg, test1) { COneIPv4Info ip0_2(next_hop_init + 1, vlan_init, mac1, 0); COneIPv4Info ip1_1(next_hop_resp, vlan_resp, mac2, 1); COneIPv4Info ip1_2(next_hop_resp + 1, vlan_resp, mac3, 1); - + many_ip.insert(ip0_1); many_ip.insert(ip0_2); many_ip.insert(ip1_1); many_ip.insert(ip1_2); - + test_db.set_resolved_macs(many_ip); ClientCfgBase cfg0; @@ -167,7 +178,7 @@ TEST_F(basic_client_cfg, test1) { ent0->assign(cfg0); assert (!memcmp(cfg0.m_responder.get_dst_mac_addr() , mac3.GetConstBuffer(), ETHER_ADDR_LEN)); - + assert(ent1 != NULL); ent1->assign(cfg0); assert (!memcmp(cfg0.m_initiator.get_dst_mac_addr() @@ -181,6 +192,24 @@ TEST_F(basic_client_cfg, test1) { ent1->assign(cfg0); assert (!memcmp(cfg0.m_responder.get_dst_mac_addr() , mac3.GetConstBuffer(), ETHER_ADDR_LEN)); - + } +// simulation testing of MAC based client config +// basic_* tests are checked for memory leaks with valgrind. When running yaml file load/parse, test suite name +// should not start with basic_ +TEST_F(client_cfg, test2) { + CTestBasic t1; + CParserOption * po =&CGlobalInfo::m_options; + + po->reset(); + po->preview.setVMode(3); + po->preview.setFileWrite(true); + po->cfg_file = "cap2/dns.yaml"; + po->out_file = "exp/client_cfg_dns"; + po->client_cfg_file = "cap2/cluster_example.yaml"; + + bool res = t1.init(); + + EXPECT_EQ_UINT32(1, res?1:0)<< "pass"; +} diff --git a/src/h_timer.cpp b/src/h_timer.cpp index b3d86d46..4e52c3d2 100644 --- a/src/h_timer.cpp +++ b/src/h_timer.cpp @@ -266,6 +266,168 @@ RC_HTW_t CHTimerWheel::Delete(){ return(RC_HTW_OK); } +//////////////////////////////////////////////////////// + + + +void CNATimerWheel::detach_all(void *userdata,htw_on_tick_cb_t cb){ + #ifndef _DEBUG + if (m_total_events==0) { + return; + } + #endif + int i; + uint32_t res=0; + for (i=0;i<HNA_TIMER_LEVELS; i++) { + CHTimerOneWheel * lp=&m_timer_w[i]; + res=lp->detach_all(userdata,cb); + assert(m_total_events>=res); + m_total_events -=res; + } + assert(m_total_events==0); +} + + +void CNATimerWheel::on_tick_level0(void *userdata,htw_on_tick_cb_t cb){ + + CHTimerOneWheel * lp=&m_timer_w[0]; + CHTimerObj * event; + + while ( true ) { + event = lp->pop_event(); + if (!event) { + break; + } + m_total_events--; + cb(userdata,event); + } + lp->timer_tick(); + m_ticks[0]++; +} + +/* almost always we will have burst here */ +na_htw_state_num_t CNATimerWheel::on_tick_level1(void *userdata,htw_on_tick_cb_t cb){ + + CHTimerOneWheel * lp=&m_timer_w[1]; + CHTimerObj * event; + uint32_t cnt=0; + + while ( true ) { + event = lp->pop_event(); + if (!event) { + break; + } + if (event->m_ticks_left==0) { + m_total_events--; + cb(userdata,event); + }else{ + timer_start_rest(event,event->m_ticks_left); + } + cnt++; + if (cnt>HNA_MAX_LEVEL1_EVENTS) { + /* need another batch */ + na_htw_state_num_t old_state; + old_state=m_state; + m_state=TW_NEXT_BATCH; + if (old_state ==TW_FIRST_FINISH){ + return(TW_FIRST_BATCH); + }else{ + return(TW_NEXT_BATCH); + } + } + } + lp->timer_tick(); + m_ticks[1]++; + if (m_state==TW_FIRST_FINISH) { + if (cnt>0) { + return (TW_FIRST_FINISH_ANY); + }else{ + return (TW_FIRST_FINISH); + } + }else{ + assert(m_state==TW_NEXT_BATCH); + m_state=TW_FIRST_FINISH; + return(TW_END_BATCH); + } +} + + + +RC_HTW_t CNATimerWheel::timer_stop (CHTimerObj *tmr){ + if ( tmr->is_running() ) { + assert(tmr->m_wheel<HNA_TIMER_LEVELS); + m_timer_w[tmr->m_wheel].timer_stop(tmr); + m_total_events--; + } + return (RC_HTW_OK); +} + + + +RC_HTW_t CNATimerWheel::timer_start_rest(CHTimerObj *tmr, + htw_ticks_t ticks){ + + htw_ticks_t nticks = (ticks+m_wheel_level1_err)>>m_wheel_level1_shift; + if (nticks<m_wheel_size) { + if (nticks<2) { + nticks=2; /* not on the same bucket*/ + } + tmr->m_ticks_left=0; + tmr->m_wheel=1; + m_timer_w[1].timer_start(tmr,nticks-1); + }else{ + tmr->m_ticks_left = ticks - ((m_wheel_size-1)<<m_wheel_level1_shift); + tmr->m_wheel=1; + m_timer_w[1].timer_start(tmr,m_wheel_size-1); + } + return (RC_HTW_OK); +} + + +void CNATimerWheel::reset(){ + m_wheel_shift=0; + m_total_events=0; + m_wheel_size=0; + m_wheel_mask=0; + m_wheel_level1_shift=0; + m_wheel_level1_err=0; + m_state=TW_FIRST_FINISH; + int i; + for (i=0; i<HNA_TIMER_LEVELS; i++) { + m_ticks[i]=0; + } + +} + + +RC_HTW_t CNATimerWheel::Create(uint32_t wheel_size, + uint8_t level1_div){ + RC_HTW_t res; + int i; + for (i=0; i<HNA_TIMER_LEVELS; i++) { + res = m_timer_w[i].Create(wheel_size); + if ( res !=RC_HTW_OK ){ + return (res); + } + m_ticks[i]=0; + } + m_wheel_shift = utl_log2_shift(wheel_size); + m_wheel_mask = utl_mask_log2(wheel_size); + m_wheel_size = wheel_size; + m_wheel_level1_shift = m_wheel_shift - utl_log2_shift((uint32_t)level1_div); + m_wheel_level1_err = ((1<<(m_wheel_level1_shift))-1); + assert(m_wheel_shift>utl_log2_shift((uint32_t)level1_div)); + + return(RC_HTW_OK); +} + +RC_HTW_t CNATimerWheel::Delete(){ + int i; + for (i=0; i<HNA_TIMER_LEVELS; i++) { + m_timer_w[i].Delete(); + } + return(RC_HTW_OK); +} diff --git a/src/h_timer.h b/src/h_timer.h index 22343533..17ff44be 100644 --- a/src/h_timer.h +++ b/src/h_timer.h @@ -344,4 +344,78 @@ private: } ; + + +#define HNA_TIMER_LEVELS (2) +#define HNA_MAX_LEVEL1_EVENTS (64) /* small bursts */ + +typedef enum { + TW_FIRST_FINISH =17, + TW_FIRST_FINISH_ANY =18, + TW_FIRST_BATCH =19, + TW_NEXT_BATCH =20, + TW_END_BATCH =21 +} NA_HTW_STATE_t; + +typedef uint8_t na_htw_state_num_t; + + +/* two levels 0,1. level 1 would be less accurate */ +class CNATimerWheel { + +public: + CNATimerWheel(){ + reset(); + } + + RC_HTW_t Create(uint32_t wheel_size,uint8_t level1_div); + + RC_HTW_t Delete(); + + + inline RC_HTW_t timer_start(CHTimerObj *tmr, + htw_ticks_t ticks){ + m_total_events++; + if (likely(ticks<m_wheel_size)) { + tmr->m_ticks_left=0; + tmr->m_wheel=0; + return (m_timer_w[0].timer_start(tmr,ticks)); + } + return ( timer_start_rest(tmr, ticks)); + } + + RC_HTW_t timer_stop (CHTimerObj *tmr); + + void on_tick_level0(void *userdata,htw_on_tick_cb_t cb); + + na_htw_state_num_t on_tick_level1(void *userdata,htw_on_tick_cb_t cb); + + bool is_any_events_left(){ + return(m_total_events>0?true:false); + } + + /* iterate all, detach and call the callback */ + void detach_all(void *userdata,htw_on_tick_cb_t cb); + + +private: + void reset(void); + + RC_HTW_t timer_start_rest(CHTimerObj *tmr, + htw_ticks_t ticks); + +private: + htw_ticks_t m_ticks[HNA_TIMER_LEVELS]; + uint32_t m_wheel_size; //e.g. 256 + uint32_t m_wheel_mask; //e.g 256-1 + uint32_t m_wheel_shift; // e.g 8 + uint32_t m_wheel_level1_shift; //e.g 16 + uint32_t m_wheel_level1_err; //e.g 16 + + uint64_t m_total_events; + CHTimerOneWheel m_timer_w[HNA_TIMER_LEVELS]; + na_htw_state_num_t m_state; +} ; + + #endif diff --git a/src/main.cpp b/src/main.cpp index 15bd99dd..dc9e89f4 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -38,7 +38,7 @@ using namespace std; enum { OPT_HELP, OPT_CFG, OPT_NODE_DUMP, OP_STATS, OPT_FILE_OUT, OPT_UT, OPT_PCAP, OPT_IPV6, OPT_CLIENT_CFG_FILE, OPT_SL, OPT_DP_CORE_COUNT, OPT_DP_CORE_INDEX, OPT_LIMIT, - OPT_DRY_RUN}; + OPT_DRY_RUN, OPT_DURATION}; @@ -69,8 +69,10 @@ static CSimpleOpt::SOption parser_options[] = { OP_STATS, "-s", SO_NONE }, { OPT_CFG, "-f", SO_REQ_SEP }, { OPT_CLIENT_CFG_FILE, "--client_cfg", SO_REQ_SEP }, + { OPT_CLIENT_CFG_FILE, "--client-cfg", SO_REQ_SEP }, { OPT_FILE_OUT , "-o", SO_REQ_SEP }, { OPT_NODE_DUMP , "-v", SO_REQ_SEP }, + { OPT_DURATION, "-d", SO_REQ_SEP }, { OPT_PCAP, "--pcap", SO_NONE }, { OPT_IPV6, "--ipv6", SO_NONE }, { OPT_SL, "--sl", SO_NONE }, @@ -95,7 +97,8 @@ static int usage(){ printf(" Usage: bp_sim [OPTION] -f cfg.yaml -o outfile.erf \n"); printf(" \n"); printf(" \n"); - printf(" options \n"); + printf(" options:\n"); + printf(" -d [s] duration time of simulated traffic in seconds\n"); printf(" -v [1-3] verbose mode \n"); printf(" 1 show only stats \n"); printf(" 2 run preview do not write to file \n"); @@ -194,12 +197,16 @@ static int parse_options(int argc, params["limit"] = atoi(args.OptionArg()); break; + case OPT_DURATION: + sscanf(args.OptionArg(),"%f", &po->m_duration); + break; + case OPT_DRY_RUN: params["dry"] = 1; break; default: - usage(); + printf("Error: option %s is defined, but not handled.\n\n", args.OptionText()); return -1; break; } // End of switch diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp index 36fe1804..1d315fa7 100644 --- a/src/main_dpdk.cpp +++ b/src/main_dpdk.cpp @@ -122,7 +122,7 @@ static inline int get_is_rx_thread_enabled() { struct port_cfg_t; -#define MAX_DPDK_ARGS 40 +#define MAX_DPDK_ARGS 50 static CPlatformYamlInfo global_platform_cfg_info; static int global_dpdk_args_num ; static char * global_dpdk_args[MAX_DPDK_ARGS]; @@ -288,6 +288,27 @@ public: virtual int set_rcv_all(CPhyEthIF * _if, bool set_on) {return 0;} }; +class CTRexExtendedDriverVf : public CTRexExtendedDriverBase1GVm { + +public: + CTRexExtendedDriverVf(){ + /* we are working in mode in which we have we have 1 queue for rx and one queue for tx*/ + CGlobalInfo::m_options.preview.set_vm_one_queue_enable(true); + } + virtual void get_extended_stats(CPhyEthIF * _if, CPhyEthIFStats *stats) { + uint64_t prev_ipackets = stats->ipackets; + uint64_t prev_opackets = stats->opackets; + + CTRexExtendedDriverBase1GVm::get_extended_stats(_if, stats); + // Since this driver report byte counts without Ethernet FCS (4 bytes), we need to fix the reported numbers + stats->ibytes += (stats->ipackets - prev_ipackets) * 4; + stats->obytes += (stats->opackets - prev_opackets) * 4; + } + static CTRexExtendedDriverBase * create(){ + return ( new CTRexExtendedDriverVf() ); + } +}; + class CTRexExtendedDriverBaseE1000 : public CTRexExtendedDriverBase1GVm { CTRexExtendedDriverBaseE1000() { // E1000 driver is only relevant in VM in our case @@ -569,16 +590,14 @@ private: register_driver(std::string("rte_igb_pmd"),CTRexExtendedDriverBase1G::create); register_driver(std::string("rte_i40e_pmd"),CTRexExtendedDriverBase40G::create); register_driver(std::string("rte_enic_pmd"),CTRexExtendedDriverBaseVIC::create); - register_driver(std::string("librte_pmd_mlx5"),CTRexExtendedDriverBaseMlnx5G::create); - + register_driver(std::string("net_mlx5"),CTRexExtendedDriverBaseMlnx5G::create); /* virtual devices */ register_driver(std::string("rte_em_pmd"),CTRexExtendedDriverBaseE1000::create); register_driver(std::string("rte_vmxnet3_pmd"),CTRexExtendedDriverBase1GVm::create); register_driver(std::string("rte_virtio_pmd"),CTRexExtendedDriverBase1GVm::create); - - - + register_driver(std::string("rte_ixgbevf_pmd"),CTRexExtendedDriverVf::create); + register_driver(std::string("rte_i40evf_pmd"),CTRexExtendedDriverVf::create); m_driver_was_set=false; m_drv=0; @@ -693,6 +712,7 @@ enum { OPT_HELP, OPT_CLOSE, OPT_ARP_REF_PER, OPT_NO_OFED_CHECK, + OPT_NO_SCAPY_SERVER, OPT_ACTIVE_FLOW }; @@ -749,11 +769,11 @@ static CSimpleOpt::SOption parser_options[] = { OPT_NO_WATCHDOG, "--no-watchdog", SO_NONE }, { OPT_ALLOW_COREDUMP, "--allow-coredump", SO_NONE }, { OPT_CHECKSUM_OFFLOAD, "--checksum-offload", SO_NONE }, - { OPT_ACTIVE_FLOW, "--active-flows", SO_REQ_SEP }, + { OPT_ACTIVE_FLOW, "--active-flows", SO_REQ_SEP }, { OPT_CLOSE, "--close-at-end", SO_NONE }, { OPT_ARP_REF_PER, "--arp-refresh-period", SO_REQ_SEP }, { OPT_NO_OFED_CHECK, "--no-ofed-check", SO_NONE }, - + { OPT_NO_SCAPY_SERVER, "--no-scapy-server", SO_NONE }, SO_END_OF_OPTIONS }; @@ -806,6 +826,7 @@ static int usage(){ printf(" --no-flow-control-change : By default TRex disables flow-control. If this option is given, it does not touch it \n"); printf(" --no-key : Daemon mode, don't get input from keyboard \n"); printf(" --no-ofed-check : Disable the check of OFED version \n"); + printf(" --no-scapy-server : Disable Scapy server implicit start at stateless \n"); printf(" --no-watchdog : Disable watchdog \n"); printf(" -p : Send all flow packets from the same interface (choosed randomly between client ad server ports) without changing their src/dst IP \n"); printf(" -pm : Platform factor. If you have splitter in the setup, you can multiply the total results by this factor \n"); @@ -1090,10 +1111,11 @@ static int parse_options(int argc, char *argv[], CParserOption* po, bool first_t break; case OPT_NO_OFED_CHECK: break; + case OPT_NO_SCAPY_SERVER: + break; default: printf("Error: option %s is not handled.\n\n", args.OptionText()); - usage(); return -1; break; } // End of switch @@ -1799,7 +1821,15 @@ int DpdkTRexPortAttr::add_mac(char * mac){ for (int i=0; i<6;i++) { mac_addr.addr_bytes[i] =mac[i]; } - return rte_eth_dev_mac_addr_add(m_port_id, &mac_addr,0); + + if ( ! get_vm_one_queue_enable() ) { + if ( rte_eth_dev_mac_addr_add(m_port_id, &mac_addr,0) != 0) { + printf("Failed setting MAC for port %d \n", m_port_id); + exit(-1); + } + } + + return 0; } int DpdkTRexPortAttr::set_promiscuous(bool enable){ @@ -2169,8 +2199,18 @@ int CCoreEthIF::send_burst(CCorePerPort * lp_port, uint16_t len, CVirtualIFPerSideStats * lp_stats){ - //assert(m_ring_to_rx->Enqueue((CGenNode *)0x0) == 0); - +#ifdef DEBUG_SEND_BURST + if (CGlobalInfo::m_options.preview.getVMode() > 10) { + fprintf(stdout, "send_burst port:%d queue:%d len:%d\n", lp_port->m_port->get_rte_port_id() + , lp_port->m_tx_queue_id, len); + for (int i = 0; i < lp_port->m_len; i++) { + fprintf(stdout, "packet %d:\n", i); + rte_mbuf_t *m = lp_port->m_table[i]; + utl_DumpBuffer(stdout, rte_pktmbuf_mtod(m, uint8_t*), rte_pktmbuf_pkt_len(m), 0); + } + } +#endif + uint16_t ret = lp_port->m_port->tx_burst(lp_port->m_tx_queue_id,lp_port->m_table,len); #ifdef DELAY_IF_NEEDED while ( unlikely( ret<len ) ){ @@ -3742,7 +3782,7 @@ int CGlobalTRex::ixgbe_start(void){ /* wait for ports to be stable */ get_ex_drv()->wait_for_stable_link(); - if ( !is_all_links_are_up(true) /*&& !get_is_stateless()*/ ){ // disable start with link down for now + if ( !is_all_links_are_up() /*&& !get_is_stateless()*/ ){ // disable start with link down for now /* temporary solution for trex-192 issue, solve the case for X710/XL710, will work for both Statless and Stateful */ if ( get_ex_drv()->drop_packets_incase_of_linkdown() ){ @@ -3929,21 +3969,22 @@ int CGlobalTRex::ixgbe_prob_init(void){ m_max_ports = rte_eth_dev_count(); if (m_max_ports == 0) - rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + rte_exit(EXIT_FAILURE, "Error: Could not find supported ethernet ports. You are probably trying to use unsupported NIC \n"); printf(" Number of ports found: %d \n",m_max_ports); if ( m_max_ports %2 !=0 ) { - rte_exit(EXIT_FAILURE, " Number of ports %d should be even, mask the one port in the configuration file \n, ", + rte_exit(EXIT_FAILURE, " Number of ports in config file is %d. It should be even. Please use --limit-ports, or change 'port_limit:' in the config file\n", m_max_ports); } if ( CGlobalInfo::m_options.get_expected_ports() > TREX_MAX_PORTS ) { - rte_exit(EXIT_FAILURE, " Maximum ports supported are %d, use the configuration file to set the expected number of ports \n",TREX_MAX_PORTS); + rte_exit(EXIT_FAILURE, " Maximum number of ports supported is %d. You are trying to use %d. Please use --limit-ports, or change 'port_limit:' in the config file\n" + ,TREX_MAX_PORTS, CGlobalInfo::m_options.get_expected_ports()); } if ( CGlobalInfo::m_options.get_expected_ports() > m_max_ports ){ - rte_exit(EXIT_FAILURE, " There are %d ports you expected more %d,use the configuration file to set the expected number of ports \n", + rte_exit(EXIT_FAILURE, " There are %d ports available. You are trying to use %d. Please use --limit-ports, or change 'port_limit:' in the config file\n", m_max_ports, CGlobalInfo::m_options.get_expected_ports()); } @@ -5219,70 +5260,6 @@ int CPhyEthIF::get_flow_stats_payload(rx_per_flow_t *rx_stats, tx_per_flow_t *tx return 0; } -// If needed, send packets to rx core for processing. -// This is relevant only in VM case, where we receive packets to the working DP core (only 1 DP core in this case) -bool CCoreEthIF::process_rx_pkt(pkt_dir_t dir, rte_mbuf_t * m) { - CFlowStatParser parser; - uint32_t ip_id; - - if (parser.parse(rte_pktmbuf_mtod(m, uint8_t*), rte_pktmbuf_pkt_len(m)) != 0) { - return false; - } - bool send=false; - - // e1000 on ESXI hands us the packet with the ethernet FCS - if (parser.get_pkt_size() < rte_pktmbuf_pkt_len(m)) { - rte_pktmbuf_trim(m, rte_pktmbuf_pkt_len(m) - parser.get_pkt_size()); - } - - if ( get_is_stateless() ) { - // In stateless RX, we only care about flow stat packets - if ((parser.get_ip_id(ip_id) == 0) && ((ip_id & 0xff00) == IP_ID_RESERVE_BASE)) { - send = true; - } - } else { - CLatencyPktMode *c_l_pkt_mode = g_trex.m_mg.c_l_pkt_mode; - bool is_lateancy_pkt = c_l_pkt_mode->IsLatencyPkt((IPHeader *)parser.get_l4()) & - CCPortLatency::IsLatencyPkt(parser.get_l4() + c_l_pkt_mode->l4_header_len()); - - if (is_lateancy_pkt) { - send = true; - } else { - if ( get_is_rx_filter_enable() ) { - uint8_t max_ttl = 0xff - get_rx_check_hops(); - uint8_t pkt_ttl = parser.get_ttl(); - if ( (pkt_ttl==max_ttl) || (pkt_ttl==(max_ttl-1) ) ) { - send=true; - } - } - } - } - - - if (send) { - CGenNodeLatencyPktInfo * node=(CGenNodeLatencyPktInfo * )CGlobalInfo::create_node(); - if ( node ) { - node->m_msg_type = CGenNodeMsgBase::LATENCY_PKT; - node->m_dir = dir; - node->m_latency_offset = 0xdead; - node->m_pkt = m; - if ( m_ring_to_rx->Enqueue((CGenNode*)node)==0 ){ - }else{ - CGlobalInfo::free_node((CGenNode *)node); - send=false; - } - -#ifdef LATENCY_QUEUE_TRACE_ - printf("rx to cp --\n"); - rte_pktmbuf_dump(stdout,m, rte_pktmbuf_pkt_len(m)); -#endif - }else{ - send=false; - } - } - return (send); -} - TrexStateless * get_stateless_obj() { return g_trex.m_trex_stateless; } @@ -6678,8 +6655,8 @@ void CTRexExtendedDriverBase40G::get_extended_stats(CPhyEthIF * _if,CPhyEthIFSta stats->ipackets += stats1.ipackets - prev_stats->ipackets; stats->ibytes += stats1.ibytes - prev_stats->ibytes; stats->opackets += stats1.opackets - prev_stats->opackets; - stats->obytes += stats1.obytes - prev_stats->obytes - + (stats1.opackets << 2) - (prev_stats->opackets << 2); + // Since this driver report obytes count without Ethernet FCS (4 bytes), we need to fix the reported numbers + stats->obytes += stats1.obytes - prev_stats->obytes + (stats1.opackets - prev_stats->opackets) * 4; stats->f_ipackets += 0; stats->f_ibytes += 0; stats->ierrors += stats1.imissed + stats1.ierrors + stats1.rx_nombuf @@ -6834,7 +6811,7 @@ void CTRexExtendedDriverBaseMlnx5G::add_del_rules(enum rte_filter_op op, uint8_t ret = rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_FDIR, op, (void*)&filter); if ( ret != 0 ) { - if (((op == RTE_ETH_FILTER_ADD) && (ret == EEXIST)) || ((op == RTE_ETH_FILTER_DELETE) && (ret == ENOENT))) + if (((op == RTE_ETH_FILTER_ADD) && (ret == -EEXIST)) || ((op == RTE_ETH_FILTER_DELETE) && (ret == -ENOENT))) return; rte_exit(EXIT_FAILURE, "rte_eth_dev_filter_ctrl: err=%d, port=%u\n", diff --git a/src/pkt_gen.cpp b/src/pkt_gen.cpp index 656b1b06..9f6a3d34 100644 --- a/src/pkt_gen.cpp +++ b/src/pkt_gen.cpp @@ -123,6 +123,9 @@ char *CTestPktGen::create_test_pkt(uint16_t l3_type, uint16_t l4_proto, uint8_t pkt_size += sizeof(struct CRx_check_header); } break; + case EthernetHeader::Protocol::ARP: + pkt_size += sizeof(ArpHdr); + break; } switch (l4_proto) { @@ -192,9 +195,13 @@ char *CTestPktGen::create_test_pkt(uint16_t l3_type, uint16_t l4_proto, uint8_t ipv6->setPayloadLen(pkt_size - 14 - sizeof(ipv6_header)); ipv6->setFlowLabel(ip_id); break; + case EthernetHeader::Protocol::ARP: + uint16_t vlan = (flags & DPF_VLAN) ? 200 : 0; + create_arp_req((uint8_t *)p_start, 0x01020304, 0x05060708, src_mac, vlan, 0); + return p_start; + break; } - struct TCPHeader *tcp = (TCPHeader *)p; struct ICMPHeader *icmp= (ICMPHeader *)p; switch (l4_proto) { diff --git a/src/platform_cfg.cpp b/src/platform_cfg.cpp index 575c4c72..92b4e7e9 100755 --- a/src/platform_cfg.cpp +++ b/src/platform_cfg.cpp @@ -344,6 +344,12 @@ void operator >> (const YAML::Node& node, CPlatformYamlInfo & plat_info) { /* must have interfaces */ const YAML::Node& interfaces = node["interfaces"]; + if ( interfaces.size() > TREX_MAX_PORTS ) { + printf("ERROR: Maximal number of interfaces is: %d, you have specified: %d.\n", + TREX_MAX_PORTS, (int) interfaces.size()); + exit(-1); + } + for(unsigned i=0;i<interfaces.size();i++) { std::string fi; const YAML::Node & node = interfaces; diff --git a/src/platform_cfg.h b/src/platform_cfg.h index c839bd96..b921c9c7 100755 --- a/src/platform_cfg.h +++ b/src/platform_cfg.h @@ -28,6 +28,7 @@ limitations under the License. #include <vector> #include <string> #include "tw_cfg.h" +#include "trex_defs.h" #define CONST_NB_MBUF_2_10G (16380/2) diff --git a/src/rpc-server/commands/trex_rpc_cmd_general.cpp b/src/rpc-server/commands/trex_rpc_cmd_general.cpp index 55249fc8..6f0ab09a 100644 --- a/src/rpc-server/commands/trex_rpc_cmd_general.cpp +++ b/src/rpc-server/commands/trex_rpc_cmd_general.cpp @@ -629,12 +629,23 @@ TrexRpcCmdPushRemote::_run(const Json::Value ¶ms, Json::Value &result) { } + /* IO might take time, increase timeout of WD */ + TrexMonitor * cur_monitor = TrexWatchDog::getInstance().get_current_monitor(); + if (cur_monitor != NULL) { + cur_monitor->io_begin(); + } + try { port->push_remote(pcap_filename, ipg_usec, min_ipg_sec, speedup, count, duration, is_dual); } catch (const TrexException &ex) { generate_execute_err(result, ex.what()); } + /* revert timeout of WD */ + if (cur_monitor != NULL) { + cur_monitor->io_end(); + } + result["result"] = Json::objectValue; return (TREX_RPC_CMD_OK); diff --git a/src/sim/trex_sim_stateful.cpp b/src/sim/trex_sim_stateful.cpp index 7546644d..3980dc24 100644 --- a/src/sim/trex_sim_stateful.cpp +++ b/src/sim/trex_sim_stateful.cpp @@ -169,6 +169,9 @@ int load_list_of_cap_files(CParserOption * op){ if (op->client_cfg_file != "") { try { fl.load_client_config_file(op->client_cfg_file); + // The simulator only test MAC address configs, so this parameter is not used + CManyIPInfo pretest_result; + fl.set_client_config_resolved_macs(pretest_result); } catch (const std::runtime_error &e) { std::cout << "\n*** " << e.what() << "\n\n"; exit(-1); diff --git a/src/stateless/dp/trex_stateless_dp_core.cpp b/src/stateless/dp/trex_stateless_dp_core.cpp index ed130c29..0a317170 100644 --- a/src/stateless/dp/trex_stateless_dp_core.cpp +++ b/src/stateless/dp/trex_stateless_dp_core.cpp @@ -1281,6 +1281,12 @@ bool CGenNodePCAP::create(uint8_t port_id, m_dir = dir; m_min_ipg_sec = min_ipg_sec; + /* increase timeout of WD due to io */ + TrexMonitor * cur_monitor = TrexWatchDog::getInstance().get_current_monitor(); + if (cur_monitor != NULL) { + cur_monitor->io_begin(); + } + /* mark this node as slow path */ set_slow_path(true); @@ -1344,6 +1350,12 @@ void CGenNodePCAP::destroy() { m_reader = NULL; } + /* end of io, return normal timeout of WD */ + TrexMonitor * cur_monitor = TrexWatchDog::getInstance().get_current_monitor(); + if (cur_monitor != NULL) { + cur_monitor->io_end(); + } + m_state = PCAP_INVALID; } diff --git a/src/stateless/rx/trex_stateless_rx_port_mngr.cpp b/src/stateless/rx/trex_stateless_rx_port_mngr.cpp index d2e0b4e8..ede86062 100644 --- a/src/stateless/rx/trex_stateless_rx_port_mngr.cpp +++ b/src/stateless/rx/trex_stateless_rx_port_mngr.cpp @@ -265,6 +265,9 @@ public: m_icmp = NULL; m_vlan_tag = 0; + if (m_size_left < 14) + return; + /* ethernet */ m_ether = (EthernetHeader *)parse_bytes(14); diff --git a/src/trex_client_config.h b/src/trex_client_config.h index 257d354f..6423c390 100644 --- a/src/trex_client_config.h +++ b/src/trex_client_config.h @@ -202,7 +202,7 @@ public: ClientCfgDirBase m_responder; }; -class ClientCfgExt : public ClientCfgBase { +class ClientCfgExt { public: virtual void dump (FILE *fd) const { fprintf(fd, " initiator:\n"); @@ -281,7 +281,8 @@ public: * @param info */ void assign(ClientCfgBase &info) { - info = m_cfg; + info.m_initiator = m_cfg.m_initiator; + info.m_responder = m_cfg.m_responder; info.update(m_iterator, &m_cfg); /* advance for the next assign */ diff --git a/src/trex_defs.h b/src/trex_defs.h index 8a4bf664..60a60df9 100644 --- a/src/trex_defs.h +++ b/src/trex_defs.h @@ -21,7 +21,7 @@ limitations under the License. #ifndef __TREX_DEFS_H__ #define __TREX_DEFS_H__ -#define TREX_MAX_PORTS 12 +#define TREX_MAX_PORTS 16 // maximum number of IP ID type flow stats we support #define MAX_FLOW_STATS 127 diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp index d2b6b803..8a06746b 100644 --- a/src/trex_watchdog.cpp +++ b/src/trex_watchdog.cpp @@ -155,16 +155,14 @@ void TrexWatchDog::init(bool enable){ * */ TrexMonitor * TrexWatchDog::get_current_monitor() { - TrexMonitor * cur_monitor = NULL; for (int i = 0; i < m_mon_count; i++) { if ( m_monitors[i]->get_tid() == pthread_self() ) { - cur_monitor = m_monitors[i]; - break; + return m_monitors[i]; } } - return cur_monitor; + return NULL; } diff --git a/src/utl_yaml.cpp b/src/utl_yaml.cpp index a4fd6404..df605964 100755 --- a/src/utl_yaml.cpp +++ b/src/utl_yaml.cpp @@ -313,6 +313,7 @@ YAMLParserWrapper::parse_mac_addr(const YAML::Node &node, const std::string &nam } assert(0); + return(0); } uint64_t |