#!/router/bin/python-2.7.4
import h_avc
from trex_control_plane.client.trex_client import CTRexClient
import ConfigParser
import threading
import time,signal
import argparse
import sys
import os
import subprocess
from time import sleep
import signal
import textwrap
import getpass
import random
import datetime
from datetime import timedelta
import traceback
import math
import re
import termios
import errno
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders
from email.mime.image import MIMEImage
from distutils.version import StrictVersion
class TrexRunException(Exception):
def __init__ (self, reason, cmd = None, std_log = None, err_log = None):
self.reason = reason
self.std_log = std_log
self.err_log = err_log
# generate the error message
self.message = "\nSummary of error:\n\n %s\n" % (reason)
if std_log:
self.message += "\nConsole Log:\n\n %s\n" % (self.std_log)
if err_log:
self.message += "\nStd Error Log:\n\n %s\n" % (self.err_log)
def __str__(self):
return self.message
############################# utility functions start #################################
def verify_glibc_version ():
x = subprocess.check_output("/usr/bin/ldd --version", shell=True)
m = re.match("ldd \(GNU libc\) (.*)", x)
if not m:
raise Exception("Cannot determine LDD version")
current_version = m.group(1)
if StrictVersion(current_version) < StrictVersion("2.5"):
raise Exception("GNU ldd version required for graph plotting is at least 2.5, system is %s - please run simple 'find'" % current_version)
def get_median(numericValues):
theValues = sorted(numericValues)
if len(theValues) % 2 == 1:
return theValues[(len(theValues)+1)/2-1]
else:
lower = theValues[len(theValues)/2-1]
upper = theValues[len(theValues)/2]
return (float(lower + upper)) / 2
def list_to_clusters(l, n):
for i in xrange(0, len(l), n):
yield l[i:i+n]
def cpu_histo_to_str (cpu_histo):
s = "\nCPU Samplings:\n\n"
period = 0
clusters = list(list_to_clusters(cpu_histo, 10))
for cluster in clusters:
period += 10
line = "%3s Seconds: [" % period
cluster += (10 - len(cluster)) * [None]
for x in cluster:
if (x != None):
line += "%5.1f%%, " % x
else:
line += " "
line = line[:-2] # trim the comma and space
line += " " # return the space
line += "]\n"
s += line
return s
# Terminal Manager Class
class TermMng:
def __enter__(self):
self.fd = sys.stdin.fileno()
self.old = termios.tcgetattr(self.fd)
# copy new and remove echo
new = self.old[:]
new[3] &= ~termios.ECHO
self.tcsetattr_flags = termios.TCSAFLUSH
if hasattr(termios, 'TCSASOFT'):
self.tcsetattr_flags |= termios.TCSASOFT
termios.tcsetattr(self.fd, self.tcsetattr_flags, new)
def __exit__ (self ,type, value, traceback):
termios.tcsetattr(self.fd, self.tcsetattr_flags, self.old)
############################# utility functions stop #################################
def send_mail(send_from, send_to, subject, html_text, txt_attachments=[], images=[], server="localhost"):
assert isinstance(send_to, list)
assert isinstance(txt_attachments, list)
assert isinstance(images, list)
# create a multi part message
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = COMMASPACE.join(send_to)
msg['Date'] = formatdate(localtime=True)
msg['Subject'] = subject
msg['Cc'] = "imarom@cisco.com"
# add all images to the text as embbeded images
for image in images:
html_text += '
'.format(image)
fp = open(image, 'rb')
image_object = MIMEImage(fp.read())
fp.close()
image_object.add_header('Content-ID', image)
msg.attach(image_object)
# attach the main report as embedded HTML
msg.attach( MIMEText(html_text, 'html') )
# attach regualr txt files
for f in txt_attachments:
part = MIMEBase('application', "octet-stream")
part.set_payload( open(f,"rb").read() )
Encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f))
msg.attach(part)
smtp = smtplib.SMTP(server)
smtp.sendmail(send_from, send_to, msg.as_string())
smtp.close()
# convert HTML to image - returning a image file as a string
def html2image (html_filename, image_filename):
cmd = "./phantom/phantomjs ./phantom/rasterize.js {0} {1}".format(html_filename, image_filename)
subprocess.call(cmd, shell=True)
assert os.path.exists(image_filename)
return (image_filename)
# convert results of run to a string
def run_results_to_str (results, cond_type):
output = ""
output += "M: {0:<12.6f}\n".format(results['m'])
output += "BW: {0:<12,.2f} [Mbps]\n".format(results['tx'])
output += "PPS: {0:<12,} [pkts]\n".format(int(results['total-pps']))
output += "CPU: {0:.4f} %\n".format(results['cpu_util'])
output += "Maximum Latency: {0:<12,} [usec]\n".format(int(results['maximum-latency']))
output += "Average Latency: {0:<12,} [usec]\n".format(int(results['average-latency']))
output += "Pkt Drop: {0:<12,} [pkts]\n".format(int(results['total-pkt-drop']))
output += "Condition: {0:<12} ({1})\n".format("Passed" if check_condition(cond_type, results) else "Failed", cond_type_to_str(cond_type))
return (output)
############################# classes #################################
class ErrorHandler(object):
def __init__ (self, exception, traceback):
if isinstance(exception, TrexRunException):
logger.log("\n*** Script Terminated Due To Trex Failure")
logger.log("\n********************** TRex Error - Report **************************\n")
logger.log(str(exception))
logger.flush()
elif isinstance(exception, IOError):
logger.log("\n*** Script Terminated Due To IO Error")
logger.log("\nEither Router address or the Trex config is bad or some file is missing - check traceback below")
logger.log("\n********************** IO Error - Report **************************\n")
logger.log(str(exception))
logger.log(str(traceback))
logger.flush()
else:
logger.log("\n*** Script Terminated Due To Fatal Error")
logger.log("\n********************** Internal Error - Report **************************\n")
logger.log(str(exception) + "\n")
logger.log(str(traceback))
logger.flush()
# call the handler
g_kill_cause = "error"
os.kill(os.getpid(), signal.SIGUSR1)
# simple HTML table
class HTMLTable:
def __init__ (self):
self.table_rows = []
def add_row (self, param, value):
self.table_rows.append([param, value])
def generate_table(self):
txt = '
'
txt += "Parameter | Results |
"
for row in self.table_rows:
txt += "{0} | {1} |
".format(row[0], row[1])
txt += "
"
return txt
# process results and dispatch it
class JobReporter:
def __init__ (self, job_summary):
self.job_summary = job_summary
pass
def __plot_results_to_str (self, plot_results):
output = "\nPlotted Points: \n\n"
for p in plot_results:
output += "BW : {0:8.2f}, ".format(p['tx'])
output += "PPS : {0:8,} ".format(int(p['total-pps']))
output += "CPU : {0:8.2f} %, ".format(p['cpu_util'])
output += "Max Latency : {0:10,}, ".format(int(p['maximum-latency']))
output += "Avg Latency : {0:10,}, ".format(int(p['average-latency']))
output += "Pkt Drop : {0:12,}, \n".format(int(p['total-pkt-drop']))
return (output + "\n")
def __summary_to_string (self):
output = ""
output += "\n-== Job Completed Successfully ==-\n\n"
output += "Job Report:\n\n"
output += "Job Name: {0}\n".format(self.job_summary['job_name'])
output += "YAML file: {0}\n".format(self.job_summary['yaml'])
output += "Job Type: {0}\n".format(self.job_summary['job_type_str'])
output += "Condition: {0}\n".format(self.job_summary['cond_name'])
output += "Job Dir: {0}\n".format(self.job_summary['job_dir'])
output += "Job Log: {0}\n".format(self.job_summary['log_filename'])
output += "Email Report: {0}\n".format(self.job_summary['email'])
output += "Job Total Time: {0}\n\n".format(self.job_summary['total_run_time'])
if (self.job_summary.get('find_results') != None):
find_results = self.job_summary['find_results']
output += ("Maximum BW Point Details:\n\n")
output += run_results_to_str(find_results, self.job_summary['cond_type'])
if (self.job_summary.get('plot_results') != None):
plot_results = self.job_summary['plot_results']
output += self.__plot_results_to_str(plot_results)
return output
# simple print to screen of the job summary
def print_summary (self):
summary = self.__summary_to_string()
logger.log(summary)
def __generate_graph_report (self, plot_results):
graph_data = str( [ [x['tx'], x['cpu_util']/100, x['maximum-latency'], x['average-latency']] for x in plot_results ] )
table_data = str( [ [x['tx'], x['total-pps'], x['cpu_util']/100, x['norm_cpu'], x['maximum-latency'], x['average-latency'], x['total-pkt-drop']] for x in plot_results ] )
with open ("graph_template.html", "r") as myfile:
data = myfile.read()
data = data.replace("!@#$template_fill_head!@#$", self.job_summary['yaml'])
data = data.replace("!@#$template_fill_graph!@#$", graph_data[1:(len(graph_data) - 1)])
data = data.replace("!@#$template_fill_table!@#$", table_data[1:(len(table_data) - 1)])
# generate HTML report
graph_filename = self.job_summary['graph_filename']
text_file = open(graph_filename, "w")
text_file.write(str(data))
text_file.close()
return graph_filename
def __generate_body_report (self):
job_setup_table = HTMLTable()
job_setup_table.add_row("User Name", self.job_summary['user'])
job_setup_table.add_row("Job Name", self.job_summary['job_name'])
job_setup_table.add_row("Job Type", self.job_summary['job_type_str'])
job_setup_table.add_row("Test Condition", self.job_summary['cond_name'])
job_setup_table.add_row("YAML File", self.job_summary['yaml'])
job_setup_table.add_row("Job Total Time", "{0}".format(self.job_summary['total_run_time']))
job_summary_table = HTMLTable()
find_results = self.job_summary['find_results']
if find_results != None:
job_summary_table.add_row("Maximum Bandwidth", "{0:,.2f} [Mbps]".format(find_results['tx']))
job_summary_table.add_row("Maximum PPS", "{0:,} [pkts]".format(int(find_results['total-pps'])))
job_summary_table.add_row("CPU Util.", "{0:.2f}%".format(find_results['cpu_util']))
job_summary_table.add_row("Maximum Latency", "{0:,} [usec]".format(int(find_results['maximum-latency'])))
job_summary_table.add_row("Average Latency", "{0:,} [usec]".format(int(find_results['average-latency'])))
job_summary_table.add_row("Total Pkt Drop", "{0:,} [pkts]".format(int(find_results['total-pkt-drop'])))
with open ("report_template.html", "r") as myfile:
data = myfile.read()
data = data.replace("!@#$template_fill_job_setup_table!@#$", job_setup_table.generate_table())
data = data.replace("!@#$template_fill_job_summary_table!@#$", job_summary_table.generate_table())
return data
# create an email report and send to the user
def send_email_report (self):
images = []
logger.log("\nCreating E-Mail Report...\n")
# generate main report
report_str = self.__generate_body_report()
# generate graph report (if exists)
plot_results = self.job_summary['plot_results']
if plot_results:
logger.log("Generating Plot Results HTML ...\n")
graph_filename = self.__generate_graph_report(plot_results)
logger.log("Converting HTML to image ...\n")
images.append(html2image(graph_filename, graph_filename + ".png"))
else:
graph_filename = None
# create email
from_addr = 'TrexReporter@cisco.com'
to_addr = []
to_addr.append(self.job_summary['email'])
to_addr.append('imarom@cisco.com')
attachments = []
attachments.append(self.job_summary['log_filename'])
logger.log("Attaching log {0}...".format(self.job_summary['log_filename']))
if graph_filename:
attachments.append(graph_filename)
logger.log("Attaching plotting report {0}...".format(graph_filename))
logger.flush()
send_mail(from_addr, to_addr, "TRex Performance Report", report_str, attachments, images)
logger.log("\nE-mail sent successfully to: " + self.job_summary['email'])
# dummy logger in case logger creation failed
class DummyLogger(object):
def __init__(self):
pass
def log(self, text, force = False, newline = True):
text_out = (text + "\n") if newline else text
sys.stdout.write(text_out)
def console(self, text, force = False, newline = True):
self.log(text, force, newline)
def flush (self):
pass
# logger object
class MyLogger(object):
def __init__(self, log_filename):
# Store the original stdout and stderr
sys.stdout.flush()
sys.stderr.flush()
self.stdout_fd = os.dup(sys.stdout.fileno())
self.devnull = os.open('/dev/null', os.O_WRONLY)
self.log_file = open(log_filename, 'w')
self.silenced = False
self.pending_log_file_prints = 0
self.active = True
def shutdown (self):
self.active = False
def reactive (self):
self.active = True
# silence all prints from stdout
def silence(self):
os.dup2(self.devnull, sys.stdout.fileno())
self.silenced = True
# restore stdout status
def restore(self):
sys.stdout.flush()
sys.stderr.flush()
# Restore normal stdout
os.dup2(self.stdout_fd, sys.stdout.fileno())
self.silenced = False
#print a message to the log (both stdout / log file)
def log(self, text, force = False, newline = True):
if not self.active:
return
self.log_file.write((text + "\n") if newline else text)
self.pending_log_file_prints += 1
if (self.pending_log_file_prints >= 10):
self.log_file.flush()
self.pending_log_file_prints = 0
self.console(text, force, newline)
# print a message to the console alone
def console(self, text, force = False, newline = True):
if not self.active:
return
_text = (text + "\n") if newline else text
# if we are silenced and not forced - go home
if self.silenced and not force:
return
if self.silenced:
os.write(self.stdout_fd, _text)
else:
sys.stdout.write(_text)
sys.stdout.flush()
# flush
def flush(self):
sys.stdout.flush()
self.log_file.flush()
def __del__(self):
os.close(self.devnull)
if self.log_file:
self.log_file.flush()
self.log_file.close()
# simple progress bar
class ProgressBar(threading.Thread):
def __init__(self, time_sec, router):
super(ProgressBar, self).__init__()
self.active = True
self.time_sec = time_sec + 15
self.router = router
def run (self):
global g_stop
col = 40
delta_for_sec = float(col) / self.time_sec
accu = 0.0
for i in range(self.time_sec):
if (self.active == False):
# print 100% - done
bar = "\r[" + ('#' * col) + "] {0:.2f} %".format(100)
logger.console(bar, force = True, newline = False)
break
if (g_stop == True):
break
sleep(1)
accu += delta_for_sec
bar = "\r[" + ('#' * int(accu)) + (' ' * (col - int(accu))) + "] {0:.2f} %".format( (accu/col) * 100 )
bar += " / Router CPU: {0:.2f} %".format(self.router.get_last_cpu_util())
logger.console(bar, force = True, newline = False)
logger.console("\r\n", force = True, newline = False)
logger.flush()
def stop (self):
self.active = False
self.join()
# global vars
g_stop = False
logger = DummyLogger()
# cleanup list is a list of callables to be run when cntrl+c is caught
cleanup_list = []
################ threads ########################
# sampler
class Sample_Thread (threading.Thread):
def __init__(self, threadID, router):
threading.Thread.__init__(self)
self.threadID = threadID
self.router = router
self.stop = False
def run(self):
self.router.clear_sampling_stats()
try:
while (self.stop==False) and (g_stop==False):
self.router.sample_stats()
time.sleep(1);
except Exception as e:
ErrorHandler(e, traceback.format_exc())
def do_stop(self):
self.stop = True
def general_cleanup_on_error ():
global g_stop
global cleanup_list
# mark all the threads to finish
g_stop = True;
# shutdown and flush the logger
logger.shutdown()
if logger:
logger.flush()
# execute the registered callables
for c in cleanup_list:
c()
# dummy wait for threads to finish (TODO: make this more smart)
time.sleep(2)
exit(-1)
# just a dummy for preventing chain calls
def signal_handler_dummy (sig_id, frame):
pass
def error_signal_handler (sig_id, frame):
# make sure no chain of calls
signal.signal(signal.SIGUSR1, signal_handler_dummy)
signal.signal(signal.SIGINT, signal_handler_dummy)
general_cleanup_on_error()
def int_signal_handler(sig_id, frame):
# make sure no chain of calls
signal.signal(signal.SIGINT, signal_handler_dummy)
signal.signal(signal.SIGUSR1, signal_handler_dummy)
logger.log("\n\nCaught Cntrl+C... Cleaning up!\n\n")
general_cleanup_on_error()
# Trex with sampling
class CTRexWithRouter:
def __init__(self, trex, trex_params):
self.trex = trex;
self.trex_params = trex_params
if self.trex_params['router_type'] == "ASR":
self.router = h_avc.ASR1k(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
elif self.trex_params['router_type'] == "ISR":
self.router = h_avc.ISR(self.trex_params['router_interface'], self.trex_params['router_port'], self.trex_params['router_password'])
else:
raise Exception("unknown router type in config file")
self.router.connect()
def get_router (self):
return self.router
def run(self, m, duration):
self.sample_thread = Sample_Thread(1, self.router)
self.sample_thread.start();
# launch trex
try:
# trex_res = self.trex.run(m, duration);
self.trex.start_trex(c = self.trex_params['trex_cores'],
m = m,
d = duration,
f = self.trex_params['trex_yaml_file'],
nc = True,
l = self.trex_params['trex_latency'],
limit_ports = self.trex_params['trex_limit_ports'])
self.trex.sample_to_run_finish(20) # collect trex-sample every 20 seconds.
except Exception:
self.sample_thread.do_stop() # signal to stop
self.sample_thread.join() # wait for it to realy stop
raise
self.sample_thread.do_stop() # signal to stop
self.sample_thread.join() # wait for it to realy stop
self.res = self.trex.get_result_obj()
results = {}
results['status'] = True
results['trex_results'] = self.res
results['avc_results'] = self.router.get_stats()
return (results)
#return(trex_res.get_status() == STATUS_OK);
# sanity checks to see run really went well
def sanity_test_run (trex_r, avc_r):
pass
#if (sum(avc_r['cpu_histo']) == 0):
#raise h_trex.TrexRunException("CPU utilization from router is zero, check connectivity")
def _trex_run (job_summary, m, duration):
trex_thread = job_summary['trex_thread']
p = ProgressBar(duration, trex_thread.get_router())
p.start()
try:
results = trex_thread.run(m, duration)
except Exception,e:
p.stop()
raise
p.stop()
if (results == None):
raise Exception("Failed to run Trex")
# fetch values
trex_r = results['trex_results']
avc_r = results['avc_results']
sanity_test_run(trex_r, avc_r)
res_dict = {}
res_dict['m'] = m
total_tx_bps = trex_r.get_last_value("trex-global.data.m_tx_bps")
res_dict['tx'] = total_tx_bps / (1000 * 1000) # EVENTUALLY CONTAINS IN MBPS (EXTRACTED IN BPS)
res_dict['cpu_util'] = avc_r['cpu_util']
if int(res_dict['cpu_util']) == 0:
res_dict['norm_cpu']=1;
else:
res_dict['norm_cpu'] = (res_dict['tx'] / res_dict['cpu_util']) * 100
res_dict['maximum-latency'] = max ( trex_r.get_max_latency().values() ) #trex_r.res['maximum-latency']
res_dict['average-latency'] = trex_r.get_avg_latency()['all'] #trex_r.res['average-latency']
logger.log(cpu_histo_to_str(avc_r['cpu_histo']))
res_dict['total-pkt-drop'] = trex_r.get_total_drops()
res_dict['expected-bps'] = trex_r.get_expected_tx_rate()['m_tx_expected_bps']
res_dict['total-pps'] = get_median( trex_r.get_value_list("trex-global.data.m_tx_pps") )#trex_r.res['total-pps']
res_dict['m_total_pkt'] = trex_r.get_last_value("trex-global.data.m_total_tx_pkts")
res_dict['latency_condition'] = job_summary['trex_params']['trex_latency_condition']
return res_dict
def trex_run (job_summary, m, duration):
res = _trex_run (job_summary, m, duration)
return res
def m_to_mbps (job_summary, m):
return (m * job_summary['base_m_unit'])
# find the correct range of M
def find_m_range (job_summary):
trex = job_summary['trex']
trex_config = job_summary['trex_params']
# if not provided - guess the correct range of bandwidth
if not job_summary['m_range']:
m_range = [0.0, 0.0]
# 1 Mbps -> 1 Gbps
LOW_TX = 1.0 * 1000 * 1000
MAX_TX = 1.0 * 1000 * 1000 * 1000
# for 10g go to 10g
if trex_config['trex_machine_type'] == "10G":
MAX_TX *= 10
# dual injection can potentially reach X2 speed
if trex_config['trex_is_dual'] == True:
MAX_TX *= 2
else:
m_range = job_summary['m_range']
LOW_TX = m_range[0] * 1000 * 1000
MAX_TX = m_range[1] * 1000 * 1000
logger.log("\nSystem Settings - Min: {0:,} Mbps / Max: {1:,} Mbps".format(LOW_TX / (1000 * 1000), MAX_TX / (1000 * 1000)))
logger.log("\nTrying to get system minimum M and maximum M...")
res_dict = trex_run(job_summary, 1, 30)
# figure out low / high M
m_range[0] = (LOW_TX / res_dict['expected-bps']) * 1
m_range[1] = (MAX_TX / res_dict['expected-bps']) * 1
# return both the m_range and the base m unit for future calculation
results = {}
results['m_range'] = m_range
results['base_m_unit'] = res_dict['expected-bps'] /(1000 * 1000)
return (results)
# calculate points between m_range[0] and m_range[1]
def calculate_plot_points (job_summary, m_range, plot_count):
cond_type = job_summary['cond_type']
delta_m = (m_range[1] - m_range[0]) / plot_count
m_current = m_range[0]
m_end = m_range[1]
logger.log("\nStarting Plot Graph Task ...\n")
logger.log("Plotting Range Is From: {0:.2f} [Mbps] To: {1:.2f} [Mbps] Over {2} Points".format(m_to_mbps(job_summary, m_range[0]),
m_to_mbps(job_summary, m_range[1]),
plot_count))
logger.log("Delta Between Points is {0:.2f} [Mbps]".format(m_to_mbps(job_summary, delta_m)))
plot_points = []
duration = 180
iter = 1
trex = job_summary['trex']
while (iter <= plot_count):
logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, plot_count))
logger.log("Estimated BW ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(job_summary, m_current)))
logger.log("M = {0:.6f}".format(m_current))
logger.log("Duration = {0} seconds\n".format(duration))
res_dict = trex_run(job_summary, m_current, duration)
print_trex_results(res_dict, cond_type)
plot_points.append(dict(res_dict))
m_current += delta_m
iter = iter + 1
# last point - make sure its the maximum point
if (iter == plot_count):
m_current = m_range[1]
#print "waiting for system to stabilize ..."
#time.sleep(30);
return plot_points
def cond_type_to_str (cond_type):
return "Max Latency" if cond_type=='latency' else "Pkt Drop"
# success condition (latency or drop)
def check_condition (cond_type, res_dict):
if cond_type == 'latency':
if res_dict['maximum-latency'] < res_dict['latency_condition']:
return True
else:
return False
# drop condition is a bit more complex - it should create high latency in addition to 0.2% drop
elif cond_type == 'drop':
if (res_dict['maximum-latency'] > (res_dict['latency_condition']+2000) ) and (res_dict['total-pkt-drop'] > (0.002 * res_dict['m_total_pkt'])):
return False
else:
return True
assert(0)
def print_trex_results (res_dict, cond_type):
logger.log("\nRun Results:\n")
output = run_results_to_str(res_dict, cond_type)
logger.log(output)
######################## describe a find job ########################
class FindJob:
# init a job object with min / max
def __init__ (self, min, max, job_summary):
self.min = float(min)
self.max = float(max)
self.job_summary = job_summary
self.cond_type = job_summary['cond_type']
self.success_points = []
self.iter_num = 1
self.found = False
self.iter_duration = 300
def _distance (self):
return ( (self.max - self.min) / min(self.max, self.min) )
def time_to_end (self):
time_in_sec = (self.iters_to_end() * self.iter_duration)
return timedelta(seconds = time_in_sec)
def iters_to_end (self):
# find 2% point
ma = self.max
mi = self.min
iter = 0
while True:
dist = (ma - mi) / min(ma , mi)
if dist < 0.02:
break
if random.choice(["up", "down"]) == "down":
ma = (ma + mi) / 2
else:
mi = (ma + mi) / 2
iter += 1
return (iter)
def _cur (self):
return ( (self.min + self.max) / 2 )
def _add_success_point (self, res_dict):
self.success_points.append(res_dict.copy())
def _is_found (self):
return (self.found)
def _next_iter_duration (self):
return (self.iter_duration)
# execute iteration
def _execute (self):
# reset the found var before running
self.found = False
# run and print results
res_dict = trex_run(self.job_summary, self._cur(), self.iter_duration)
self.iter_num += 1
cur = self._cur()
if (self._distance() < 0.02):
if (check_condition(self.cond_type, res_dict)):
# distance < 2% and success - we are done
self.found = True
else:
# lower to 90% of current and retry
self.min = cur * 0.9
self.max = cur
else:
# success
if (check_condition(self.cond_type, res_dict)):
self.min = cur
else:
self.max = cur
if (check_condition(self.cond_type, res_dict)):
self._add_success_point(res_dict)
return res_dict
# find the max M before
def find_max_m (self):
res_dict = {}
while not self._is_found():
logger.log("\n-> Starting Find Iteration #{0}\n".format(self.iter_num))
logger.log("Estimated BW ~= {0:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self._cur())))
logger.log("M = {0:.6f}".format(self._cur()))
logger.log("Duration = {0} seconds".format(self._next_iter_duration()))
logger.log("Current BW Range = {0:,.2f} [Mbps] / {1:,.2f} [Mbps]".format(m_to_mbps(self.job_summary, self.min), m_to_mbps(self.job_summary, self.max)))
logger.log("Est. Iterations Left = {0} Iterations".format(self.iters_to_end()))
logger.log("Est. Time Left = {0}\n".format(self.time_to_end()))
res_dict = self._execute()
print_trex_results(res_dict, self.cond_type)
find_results = res_dict.copy()
find_results['max_m'] = self._cur()
return (find_results)
######################## describe a plot job ########################
class PlotJob:
def __init__(self, findjob):
self.job_summary = findjob.job_summary
self.plot_points = list(findjob.success_points)
self.plot_points.sort(key = lambda item:item['tx'])
def plot (self, duration = 300):
return self.plot_points
# add points if needed
#iter = 0
#for point in self.success_points:
#iter += 1
#logger.log("\nPlotting Point [{0}/{1}]:\n".format(iter, self.plot_count))
#logger.log("Estimated BW ~= {0:,.2f} [Mbps]\n".format(m_to_mbps(self.job_summary, point['m'])))
#logger.log("M = {0:.6f}".format(point['m']))
#logger.log("Duration = {0} seconds\n".format(duration))
#res_dict = trex_run(self.job_summary, point['m'], duration)
#print_trex_results(res_dict, self.job_summary['cond_type'])
#self.plot_points.append(dict(res_dict))
#self.plot_points = list(self.success_points)
#print self.plot_points
#self.plot_points.sort(key = lambda item:item['m'])
#print self.plot_points
#return self.plot_points
def generate_job_id ():
return (str(int(random.getrandbits(32))))
def print_header ():
logger.log("--== TRex Performance Tool v1.0 (2014) ==--")
# print startup summary
def log_startup_summary (job_summary):
trex = job_summary['trex']
trex_config = job_summary['trex_params']
logger.log("\nWork Request Details:\n")
logger.log("Setup Details:\n")
logger.log("TRex Config File: {0}".format(job_summary['config_file']))
logger.log("Machine Name: {0}".format(trex_config['trex_name']))
logger.log("TRex Type: {0}".format(trex_config['trex_machine_type']))
logger.log("TRex Dual Int. Tx: {0}".format(trex_config['trex_is_dual']))
logger.log("Router Interface: {0}".format(trex_config['router_interface']))
logger.log("\nJob Details:\n")
logger.log("Job Name: {0}".format(job_summary['job_name']))
logger.log("YAML file: {0}".format(job_summary['yaml']))
logger.log("Job Type: {0}".format(job_summary['job_type_str']))
logger.log("Condition Type: {0}".format(job_summary['cond_name']))
logger.log("Job Log: {0}".format(job_summary['log_filename']))
logger.log("Email Report: {0}".format(job_summary['email']))
# logger.log("\nTrex Command Used:\n{0}".format(trex.build_cmd(1, 10)))
def load_trex_config_params (filename, yaml_file):
config = {}
parser = ConfigParser.ConfigParser()
try:
parser.read(filename)
config['trex_name'] = parser.get("trex", "machine_name")
config['trex_port'] = parser.get("trex", "machine_port")
config['trex_hisory_size'] = parser.getint("trex", "history_size")
config['trex_latency_condition'] = parser.getint("trex", "latency_condition")
config['trex_yaml_file'] = yaml_file
# support legacy data
config['trex_latency'] = parser.getint("trex", "latency")
config['trex_limit_ports'] = parser.getint("trex", "limit_ports")
config['trex_cores'] = parser.getint("trex", "cores")
config['trex_machine_type'] = parser.get("trex", "machine_type")
config['trex_is_dual'] = parser.getboolean("trex", "is_dual")
# optional Trex parameters
if parser.has_option("trex", "config_file"):
config['trex_config_file'] = parser.get("trex", "config_file")
else:
config['trex_config_file'] = None
if parser.has_option("trex", "misc_params"):
config['trex_misc_params'] = parser.get("trex", "misc_params")
else:
config['trex_misc_params'] = None
# router section
if parser.has_option("router", "port"):
config['router_port'] = parser.get("router", "port")
else:
# simple telnet port
config['router_port'] = 23
config['router_interface'] = parser.get("router", "interface")
config['router_password'] = parser.get("router", "password")
config['router_type'] = parser.get("router", "type")
except Exception as inst:
raise TrexRunException("\nBad configuration file: '{0}'\n\n{1}".format(filename, inst))
return config
def prepare_for_run (job_summary):
global logger
# generate unique id
job_summary['job_id'] = generate_job_id()
job_summary['job_dir'] = "trex_job_{0}".format(job_summary['job_id'])
job_summary['start_time'] = datetime.datetime.now()
if not job_summary['email']:
job_summary['user'] = getpass.getuser()
job_summary['email'] = "{0}@cisco.com".format(job_summary['user'])
# create dir for reports
try:
job_summary['job_dir'] = os.path.abspath( os.path.join(os.getcwd(), 'logs', job_summary['job_dir']) )
print job_summary['job_dir']
os.makedirs( job_summary['job_dir'] )
except OSError as err:
if err.errno == errno.EACCES:
# fall back. try creating the dir name at /tmp path
job_summary['job_dir'] = os.path.join("/tmp/", "trex_job_{0}".format(job_summary['job_id']) )
os.makedirs(job_summary['job_dir'])
job_summary['log_filename'] = os.path.join(job_summary['job_dir'], "trex_log_{0}.txt".format(job_summary['job_id']))
job_summary['graph_filename'] = os.path.join(job_summary['job_dir'], "trex_graph_{0}.html".format(job_summary['job_id']))
# init logger
logger = MyLogger(job_summary['log_filename'])
# mark those as not populated yet
job_summary['find_results'] = None
job_summary['plot_results'] = None
# create trex client instance
trex_params = load_trex_config_params(job_summary['config_file'],job_summary['yaml'])
trex = CTRexClient(trex_host = trex_params['trex_name'],
max_history_size = trex_params['trex_hisory_size'],
trex_daemon_port = trex_params['trex_port'])
job_summary['trex'] = trex
job_summary['trex_params'] = trex_params
# create trex task thread
job_summary['trex_thread'] = CTRexWithRouter(trex, trex_params);
# in case of an error we need to call the remote cleanup
cleanup_list.append(trex.stop_trex)
# signal handler
signal.signal(signal.SIGINT, int_signal_handler)
signal.signal(signal.SIGUSR1, error_signal_handler)
def after_run (job_summary):
job_summary['total_run_time'] = datetime.datetime.now() - job_summary['start_time']
reporter = JobReporter(job_summary)
reporter.print_summary()
reporter.send_email_report()
def launch (job_summary):
prepare_for_run(job_summary)
print_header()
log_startup_summary(job_summary)
# find the correct M range if not provided
range_results = find_m_range(job_summary)
job_summary['base_m_unit'] = range_results['base_m_unit']
if job_summary['m_range']:
m_range = job_summary['m_range']
else:
m_range = range_results['m_range']
logger.log("\nJob Bandwidth Working Range:\n")
logger.log("Min M = {0:.6f} / {1:,.2f} [Mbps] \nMax M = {2:.6f} / {3:,.2f} [Mbps]".format(m_range[0], m_to_mbps(job_summary, m_range[0]), m_range[1], m_to_mbps(job_summary, m_range[1])))
# job time
findjob = FindJob(m_range[0], m_range[1], job_summary)
job_summary['find_results'] = findjob.find_max_m()
if job_summary['job_type'] == "all":
# plot points to graph
plotjob = PlotJob(findjob)
job_summary['plot_results'] = plotjob.plot()
after_run(job_summary)
# populate the fields for run
def populate_fields (job_summary, args):
job_summary['config_file'] = args.config_file
job_summary['job_type'] = args.job
job_summary['cond_type'] = args.cond_type
job_summary['yaml'] = args.yaml
if args.n:
job_summary['job_name'] = args.n
else:
job_summary['job_name'] = "Nameless"
# did the user provided an email
if args.e:
job_summary['email'] = args.e
else:
job_summary['email'] = None
# did the user provide a range ?
if args.m:
job_summary['m_range'] = args.m
else:
job_summary['m_range'] = None
# some pretty shows
job_summary['cond_name'] = 'Drop Pkt' if (args.cond_type == 'drop') else 'High Latency'
if args.job == "find":
job_summary['job_type_str'] = "Find Max BW"
elif args.job == "plot":
job_summary['job_type_str'] = "Plot Graph"
else:
job_summary['job_type_str'] = "Find Max BW & Plot Graph"
if args.job != "find":
verify_glibc_version()
# verify file exists for argparse
def is_valid_file (parser, err_msg, filename):
if not os.path.exists(filename):
parser.error("{0}: '{1}'".format(err_msg, filename))
else:
return (filename) # return an open file handle
def entry ():
job_summary = {}
parser = argparse.ArgumentParser()
parser.add_argument("-n", help="Job Name",
type = str)
parser.add_argument("-m", help="M Range [default: auto calcuation]",
nargs = 2,
type = float)
parser.add_argument("-e", help="E-Mail for report [default: whoami@cisco.com]",
type = str)
parser.add_argument("-c", "--cfg", dest = "config_file", required = True,
help = "Configuration File For Trex/Router Pair",
type = lambda x: is_valid_file(parser, "config file does not exists",x))
parser.add_argument("job", help = "Job type",
type = str,
choices = ['find', 'plot', 'all'])
parser.add_argument("cond_type", help="type of failure condition",
type = str,
choices = ['latency','drop'])
parser.add_argument("-f", "--yaml", dest = "yaml", required = True,
help="YAML file to use", type = str)
args = parser.parse_args()
with TermMng():
try:
populate_fields(job_summary, args)
launch(job_summary)
except Exception as e:
ErrorHandler(e, traceback.format_exc())
logger.log("\nReport bugs to imarom@cisco.com\n")
g_stop = True
def dummy_test ():
job_summary = {}
find_results = {}
job_summary['config_file'] = 'config/trex01-1g.cfg'
job_summary['yaml'] = 'dummy.yaml'
job_summary['email'] = 'imarom@cisco.com'
job_summary['job_name'] = 'test'
job_summary['job_type_str'] = 'test'
prepare_for_run(job_summary)
time.sleep(2)
job_summary['yaml'] = 'dummy.yaml'
job_summary['job_type'] = 'find'
job_summary['cond_name'] = 'Drop'
job_summary['cond_type'] = 'drop'
job_summary['job_id']= 94817231
find_results['tx'] = 210.23
find_results['m'] = 1.292812
find_results['total-pps'] = 1000
find_results['cpu_util'] = 74.0
find_results['maximum-latency'] = 4892
find_results['average-latency'] = 201
find_results['total-pkt-drop'] = 0
findjob = FindJob(1,1,job_summary)
plotjob = PlotJob(findjob)
job_summary['plot_results'] = plotjob.plot()
job_summary['find_results'] = find_results
job_summary['plot_results'] = [{'cpu_util': 2.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 999980.0, 'average-latency': 85.0, 'tx': 0.00207*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 221.0},
{'cpu_util': 8.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 48500000.0, 'average-latency': 87.0, 'tx': 0.05005*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 279.0},
{'cpu_util': 14.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 95990000.0, 'average-latency': 92.0, 'tx': 0.09806*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 273.0},
{'cpu_util': 20.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 143490000.0, 'average-latency': 95.0, 'tx': 0.14613*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 271.0},
{'cpu_util': 25.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 190980000.0, 'average-latency': 97.0, 'tx': 0.1933*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 302.0},
{'cpu_util': 31.0,'norm_cpu': 1.0, 'total-pps': 1000,'expected-bps': 238480000.0, 'average-latency': 98.0, 'tx': 0.24213*1000, 'total-pkt-drop': 1.0, 'maximum-latency': 292.0},
{'cpu_util': 37.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 285970000.0, 'average-latency': 99.0, 'tx': 0.29011*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 344.0},
{'cpu_util': 43.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 333470000.0, 'average-latency': 100.0, 'tx': 0.3382*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 351.0},
{'cpu_util': 48.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 380970000.0, 'average-latency': 100.0, 'tx': 0.38595*1000, 'total-pkt-drop': 0.0, 'maximum-latency': 342.0},
{'cpu_util': 54.0,'norm_cpu': 1.0, 'total-pps': 1000, 'expected-bps': 428460000.0, 'average-latency': 19852.0, 'tx': 0.43438*1000, 'total-pkt-drop': 1826229.0, 'maximum-latency': 25344.0}]
after_run(job_summary)
if __name__ == "__main__":
entry ()