diff options
Diffstat (limited to 'scripts/dpdk_setup_ports.py')
-rwxr-xr-x | scripts/dpdk_setup_ports.py | 274 |
1 files changed, 221 insertions, 53 deletions
diff --git a/scripts/dpdk_setup_ports.py b/scripts/dpdk_setup_ports.py index f85dae5d..ce6d2b2f 100755 --- a/scripts/dpdk_setup_ports.py +++ b/scripts/dpdk_setup_ports.py @@ -14,6 +14,7 @@ import traceback from collections import defaultdict, OrderedDict from distutils.util import strtobool import getpass +import subprocess class ConfigCreator(object): mandatory_interface_fields = ['Slot_str', 'Device_str', 'NUMA'] @@ -41,51 +42,52 @@ class ConfigCreator(object): cores[core] = cores[core][:1] include_lcores = [int(x) for x in include_lcores] exclude_lcores = [int(x) for x in exclude_lcores] + self.has_zero_lcore = False + self.lcores_per_numa = {} + total_lcores = 0 for numa, cores in self.cpu_topology.items(): + self.lcores_per_numa[numa] = {'main': [], 'siblings': [], 'all': []} for core, lcores in cores.items(): - for lcore in copy.copy(lcores): + total_lcores += len(lcores) + for lcore in list(lcores): if include_lcores and lcore not in include_lcores: cores[core].remove(lcore) if exclude_lcores and lcore in exclude_lcores: cores[core].remove(lcore) if 0 in lcores: self.has_zero_lcore = True - cores[core].remove(0) - zero_lcore_numa = numa - zero_lcore_core = core - zero_lcore_siblings = cores[core] - if self.has_zero_lcore: - del self.cpu_topology[zero_lcore_numa][zero_lcore_core] - self.cpu_topology[zero_lcore_numa][zero_lcore_core] = zero_lcore_siblings + lcores.remove(0) + self.lcores_per_numa[numa]['siblings'].extend(lcores) + else: + self.lcores_per_numa[numa]['main'].extend(lcores[:1]) + self.lcores_per_numa[numa]['siblings'].extend(lcores[1:]) + self.lcores_per_numa[numa]['all'].extend(lcores) + for interface in self.interfaces: for mandatory_interface_field in ConfigCreator.mandatory_interface_fields: if mandatory_interface_field not in interface: raise DpdkSetup("Expected '%s' field in interface dictionary, got: %s" % (mandatory_interface_field, interface)) + Device_str = self._verify_devices_same_type(self.interfaces) if '40Gb' in Device_str: self.speed = 40 else: self.speed = 10 - lcores_per_numa = OrderedDict() - system_lcores = int(self.has_zero_lcore) - for numa, core in self.cpu_topology.items(): - for lcores in core.values(): - if numa not in lcores_per_numa: - lcores_per_numa[numa] = [] - lcores_per_numa[numa].extend(lcores) - system_lcores += len(lcores) - minimum_required_lcores = len(self.interfaces) / 2 + 2 - if system_lcores < minimum_required_lcores: + + minimum_required_lcores = len(self.interfaces) // 2 + 2 + if total_lcores < minimum_required_lcores: raise DpdkSetup('Your system should have at least %s cores for %s interfaces, and it has: %s.' % - (minimum_required_lcores, len(self.interfaces), system_lcores + (0 if self.has_zero_lcore else 1))) + (minimum_required_lcores, len(self.interfaces), total_lcores)) interfaces_per_numa = defaultdict(int) + for i in range(0, len(self.interfaces), 2): - if self.interfaces[i]['NUMA'] != self.interfaces[i+1]['NUMA'] and not ignore_numa: + numa = self.interfaces[i]['NUMA'] + if numa != self.interfaces[i+1]['NUMA'] and not ignore_numa: raise DpdkSetup('NUMA of each pair of interfaces should be the same. Got NUMA %s for client interface %s, NUMA %s for server interface %s' % - (self.interfaces[i]['NUMA'], self.interfaces[i]['Slot_str'], self.interfaces[i+1]['NUMA'], self.interfaces[i+1]['Slot_str'])) - interfaces_per_numa[self.interfaces[i]['NUMA']] += 2 - self.lcores_per_numa = lcores_per_numa + (numa, self.interfaces[i]['Slot_str'], self.interfaces[i+1]['NUMA'], self.interfaces[i+1]['Slot_str'])) + interfaces_per_numa[numa] += 2 + self.interfaces_per_numa = interfaces_per_numa self.prefix = prefix self.zmq_pub_port = zmq_pub_port @@ -153,16 +155,20 @@ class ConfigCreator(object): config_str += ' '*8 + 'src_mac: %s\n' % self.verify_mac(interface['src_mac']) if index % 2: config_str += '\n' # dual if barrier + if not self.ignore_numa: config_str += ' platform:\n' - if len(self.interfaces_per_numa.keys()) == 1 and -1 in self.interfaces_per_numa: # VM, use any cores, 1 core per dual_if - lcores_pool = sorted([lcore for lcores in self.lcores_per_numa.values() for lcore in lcores]) - config_str += ' '*6 + 'master_thread_id: %s\n' % (0 if self.has_zero_lcore else lcores_pool.pop()) + if len(self.interfaces_per_numa.keys()) == 1 and -1 in self.interfaces_per_numa: # VM, use any cores + lcores_pool = sorted([lcore for lcores in self.lcores_per_numa.values() for lcore in lcores['all']]) + config_str += ' '*6 + 'master_thread_id: %s\n' % (0 if self.has_zero_lcore else lcores_pool.pop(0)) config_str += ' '*6 + 'latency_thread_id: %s\n' % lcores_pool.pop(0) - lcores_per_dual_if = int(len(lcores_pool) / len(self.interfaces)) + lcores_per_dual_if = int(len(lcores_pool) * 2 / len(self.interfaces)) config_str += ' '*6 + 'dual_if:\n' for i in range(0, len(self.interfaces), 2): - lcores_for_this_dual_if = [str(lcores_pool.pop(0)) for _ in range(lcores_per_dual_if)] + lcores_for_this_dual_if = list(map(str, sorted(lcores_pool[:lcores_per_dual_if]))) + lcores_pool = lcores_pool[lcores_per_dual_if:] + if not lcores_for_this_dual_if: + raise DpdkSetup('lcores_for_this_dual_if is empty (internal bug, please report with details of setup)') config_str += ' '*8 + '- socket: 0\n' config_str += ' '*10 + 'threads: [%s]\n\n' % ','.join(lcores_for_this_dual_if) else: @@ -170,26 +176,46 @@ class ConfigCreator(object): lcores_per_dual_if = 99 extra_lcores = 1 if self.has_zero_lcore else 2 # worst case 3 iterations, to ensure master and "rx" have cores left - while (lcores_per_dual_if * sum(self.interfaces_per_numa.values()) / 2) + extra_lcores > sum([len(lcores) for lcores in self.lcores_per_numa.values()]): + while (lcores_per_dual_if * sum(self.interfaces_per_numa.values()) / 2) + extra_lcores > sum([len(lcores['all']) for lcores in self.lcores_per_numa.values()]): lcores_per_dual_if -= 1 - for numa, cores in self.lcores_per_numa.items(): + for numa, lcores_dict in self.lcores_per_numa.items(): if not self.interfaces_per_numa[numa]: continue - lcores_per_dual_if = min(lcores_per_dual_if, int(2 * len(cores) / self.interfaces_per_numa[numa])) + lcores_per_dual_if = min(lcores_per_dual_if, int(2 * len(lcores_dict['all']) / self.interfaces_per_numa[numa])) lcores_pool = copy.deepcopy(self.lcores_per_numa) # first, allocate lcores for dual_if section dual_if_section = ' '*6 + 'dual_if:\n' for i in range(0, len(self.interfaces), 2): numa = self.interfaces[i]['NUMA'] dual_if_section += ' '*8 + '- socket: %s\n' % numa - lcores_for_this_dual_if = [str(lcores_pool[numa].pop(0)) for _ in range(lcores_per_dual_if)] + lcores_for_this_dual_if = lcores_pool[numa]['all'][:lcores_per_dual_if] + lcores_pool[numa]['all'] = lcores_pool[numa]['all'][lcores_per_dual_if:] + for lcore in lcores_for_this_dual_if: + if lcore in lcores_pool[numa]['main']: + lcores_pool[numa]['main'].remove(lcore) + elif lcore in lcores_pool[numa]['siblings']: + lcores_pool[numa]['siblings'].remove(lcore) + else: + raise DpdkSetup('lcore not in main nor in siblings list (internal bug, please report with details of setup)') if not lcores_for_this_dual_if: raise DpdkSetup('Not enough cores at NUMA %s. This NUMA has %s processing units and %s interfaces.' % (numa, len(self.lcores_per_numa[numa]), self.interfaces_per_numa[numa])) - dual_if_section += ' '*10 + 'threads: [%s]\n\n' % ','.join(lcores_for_this_dual_if) + dual_if_section += ' '*10 + 'threads: [%s]\n\n' % ','.join(list(map(str, sorted(lcores_for_this_dual_if)))) + # take the cores left to master and rx - lcores_pool_left = [lcore for lcores in lcores_pool.values() for lcore in lcores] - config_str += ' '*6 + 'master_thread_id: %s\n' % (0 if self.has_zero_lcore else lcores_pool_left.pop(0)) - config_str += ' '*6 + 'latency_thread_id: %s\n' % lcores_pool_left.pop(0) + mains_left = [lcore for lcores in lcores_pool.values() for lcore in lcores['main']] + siblings_left = [lcore for lcores in lcores_pool.values() for lcore in lcores['siblings']] + if mains_left: + rx_core = mains_left.pop(0) + else: + rx_core = siblings_left.pop(0) + if self.has_zero_lcore: + master_core = 0 + elif mains_left: + master_core = mains_left.pop(0) + else: + master_core = siblings_left.pop(0) + config_str += ' '*6 + 'master_thread_id: %s\n' % master_core + config_str += ' '*6 + 'latency_thread_id: %s\n' % rx_core # add the dual_if section config_str += dual_if_section @@ -227,6 +253,7 @@ class CIfMap: self.m_cfg_file =cfg_file; self.m_cfg_dict={}; self.m_devices={}; + self.m_is_mellanox_mode=False; def dump_error (self,err): s="""%s @@ -264,6 +291,94 @@ Other network devices s= self.dump_error (err) raise DpdkSetup(s) + def set_only_mellanox_nics(self): + self.m_is_mellanox_mode=True; + + def get_only_mellanox_nics(self): + return self.m_is_mellanox_mode + + + def read_pci (self,pci_id,reg_id): + out=subprocess.check_output(['setpci', '-s',pci_id, '%s.w' %(reg_id)]) + out=out.decode(errors='replace'); + return (out.strip()); + + def write_pci (self,pci_id,reg_id,val): + out=subprocess.check_output(['setpci','-s',pci_id, '%s.w=%s' %(reg_id,val)]) + out=out.decode(errors='replace'); + return (out.strip()); + + def tune_mlx5_device (self,pci_id): + # set PCIe Read to 1024 and not 512 ... need to add it to startup s + val=self.read_pci (pci_id,68) + if val[0]!='3': + val='3'+val[1:] + self.write_pci (pci_id,68,val) + assert(self.read_pci (pci_id,68)==val); + + def get_mtu_mlx5 (self,dev_id): + if len(dev_id)>0: + out=subprocess.check_output(['ifconfig', dev_id]) + out=out.decode(errors='replace'); + obj=re.search(r'MTU:(\d+)',out,flags=re.MULTILINE|re.DOTALL); + if obj: + return int(obj.group(1)); + else: + obj=re.search(r'mtu (\d+)',out,flags=re.MULTILINE|re.DOTALL); + if obj: + return int(obj.group(1)); + else: + return -1 + + def set_mtu_mlx5 (self,dev_id,new_mtu): + if len(dev_id)>0: + out=subprocess.check_output(['ifconfig', dev_id,'mtu',str(new_mtu)]) + out=out.decode(errors='replace'); + + + def set_max_mtu_mlx5_device(self,dev_id): + mtu=9*1024+22 + dev_mtu=self.get_mtu_mlx5 (dev_id); + if (dev_mtu>0) and (dev_mtu!=mtu): + self.set_mtu_mlx5(dev_id,mtu); + if self.get_mtu_mlx5(dev_id) != mtu: + print("Could not set MTU to %d" % mtu) + exit(-1); + + + def disable_flow_control_mlx5_device (self,dev_id): + + if len(dev_id)>0: + my_stderr = open("/dev/null","wb") + cmd ='ethtool -A '+dev_id + ' rx off tx off ' + subprocess.call(cmd, stdout=my_stderr,stderr=my_stderr, shell=True) + my_stderr.close(); + + def check_ofe_version (self): + ofed_info='/usr/bin/ofed_info' + ofed_ver= '-3.4-' + ofed_ver_show= '3.4-1' + + + if not os.path.isfile(ofed_info): + print("OFED %s is not installed on this setup" % ofed_info) + exit(-1); + + try: + out = subprocess.check_output([ofed_info]) + except Exception as e: + print("OFED %s can't run " % (ofed_info)) + exit(-1); + + lines=out.splitlines(); + + if len(lines)>1: + if not (ofed_ver in str(lines[0])): + print("installed OFED version is '%s' should be at least '%s' and up" % (lines[0],ofed_ver_show)) + exit(-1); + + + def load_config_file (self): fcfg=self.m_cfg_file @@ -299,15 +414,19 @@ Other network devices self.raise_error ('Error: port_limit should not be higher than number of interfaces in config file: %s\n' % fcfg) - def do_bind_one (self,key): - cmd='%s dpdk_nic_bind.py --bind=igb_uio %s ' % (sys.executable, key) + def do_bind_one (self,key,mellanox): + if mellanox: + drv="mlx5_core" + else: + drv="igb_uio" + + cmd='%s dpdk_nic_bind.py --bind=%s %s ' % (sys.executable, drv,key) print(cmd) res=os.system(cmd); if res!=0: raise DpdkSetup('') - def pci_name_to_full_name (self,pci_name): c='[0-9A-Fa-f]'; sp='[:]' @@ -330,7 +449,7 @@ Other network devices dpdk_nic_bind.get_nic_details() self.m_devices= dpdk_nic_bind.devices - def do_run (self): + def do_run (self,only_check_all_mlx=False): self.run_dpdk_lspci () if map_driver.dump_interfaces is None or (map_driver.dump_interfaces == [] and map_driver.parent_cfg): self.load_config_file() @@ -343,27 +462,74 @@ Other network devices if_list.append(dev['Slot']) if_list = list(map(self.pci_name_to_full_name, if_list)) + + + # check how many mellanox cards we have + Mellanox_cnt=0; for key in if_list: if key not in self.m_devices: err=" %s does not exist " %key; raise DpdkSetup(err) + if 'Vendor_str' not in self.m_devices[key]: + err=" %s does not have Vendor_str " %key; + raise DpdkSetup(err) - if 'Driver_str' in self.m_devices[key]: - if self.m_devices[key]['Driver_str'] not in dpdk_nic_bind.dpdk_drivers : - self.do_bind_one (key) + if self.m_devices[key]['Vendor_str'].find("Mellanox")>-1 : + Mellanox_cnt=Mellanox_cnt+1 + + + if not map_driver.dump_interfaces : + if ((Mellanox_cnt>0) and (Mellanox_cnt!= len(if_list))): + err=" All driver should be from one vendor. you have at least one driver from Mellanox but not all "; + raise DpdkSetup(err) + + + if not map_driver.dump_interfaces : + if Mellanox_cnt>0 : + self.set_only_mellanox_nics() + + if self.get_only_mellanox_nics(): + self.check_ofe_version () + for key in if_list: + pci_id=self.m_devices[key]['Slot_str'] + self.tune_mlx5_device (pci_id) + if 'Interface' in self.m_devices[key]: + dev_id=self.m_devices[key]['Interface'] + self.disable_flow_control_mlx5_device (dev_id) + self.set_max_mtu_mlx5_device(dev_id) + + + if only_check_all_mlx: + if Mellanox_cnt >0: + exit(1); else: - self.do_bind_one (key) + exit(0); - if if_list and map_driver.args.parent and dpdk_nic_bind.get_igb_uio_usage(): - pid = dpdk_nic_bind.get_pid_using_pci(if_list) - if pid: - cmdline = dpdk_nic_bind.read_pid_cmdline(pid) - print('Some or all of given interfaces are in use by following process:\npid: %s, cmd: %s' % (pid, cmdline)) - if not dpdk_nic_bind.confirm('Ignore and proceed (y/N):'): - sys.exit(1) + for key in if_list: + if key not in self.m_devices: + err=" %s does not exist " %key; + raise DpdkSetup(err) + + if 'Driver_str' in self.m_devices[key]: + if self.m_devices[key]['Driver_str'] not in (dpdk_nic_bind.dpdk_drivers+dpdk_nic_bind.dpdk_and_kernel) : + self.do_bind_one (key,(Mellanox_cnt>0)) + pass; else: - print('WARNING: Some other program is using DPDK driver.\nIf it is TRex and you did not configure it for dual run, current command will fail.') + self.do_bind_one (key,(Mellanox_cnt>0)) + pass; + + if (Mellanox_cnt==0): + # We are not in Mellanox case, we can do this check only in case of Intel (another process is running) + if if_list and map_driver.args.parent and (dpdk_nic_bind.get_igb_uio_usage()): + pid = dpdk_nic_bind.get_pid_using_pci(if_list) + if pid: + cmdline = dpdk_nic_bind.read_pid_cmdline(pid) + print('Some or all of given interfaces are in use by following process:\npid: %s, cmd: %s' % (pid, cmdline)) + if not dpdk_nic_bind.confirm('Ignore and proceed (y/N):'): + sys.exit(1) + else: + print('WARNING: Some other program is using DPDK driver.\nIf it is TRex and you did not configure it for dual run, current command will fail.') def do_return_to_linux(self): if not self.m_devices: @@ -684,7 +850,7 @@ To return to Linux the DPDK bound interfaces (for ifconfig etc.) sudo ./dpdk_set_ports.py -l To create TRex config file using interactive mode - sudo ./dpdk_set_ports.py -l + sudo ./dpdk_set_ports.py -i To create a default config file (example1) sudo ./dpdk_setup_ports.py -c 02:00.0 02:00.1 -o /etc/trex_cfg.yaml @@ -829,6 +995,8 @@ def main (): print(e) exit(-1) + + if __name__ == '__main__': main() |