From 291ba0db67d62e89d69b37698f81615e15cc1d71 Mon Sep 17 00:00:00 2001 From: itraviv Date: Sun, 5 Mar 2017 15:00:57 +0200 Subject: TRexDataAnalysisV2 renamed to TRexDataAnalysis Signed-off-by: itraviv --- doc/TRexDataAnalysis.py | 235 ++++++++++++++++++++++++++++++++++++++++++++++ doc/TRexDataAnalysisV2.py | 235 ---------------------------------------------- 2 files changed, 235 insertions(+), 235 deletions(-) create mode 100755 doc/TRexDataAnalysis.py delete mode 100755 doc/TRexDataAnalysisV2.py diff --git a/doc/TRexDataAnalysis.py b/doc/TRexDataAnalysis.py new file mode 100755 index 00000000..0696f864 --- /dev/null +++ b/doc/TRexDataAnalysis.py @@ -0,0 +1,235 @@ +#!/scratch/Anaconda2.4.0/bin/python +import pandas as pd +import numpy as np +import matplotlib + +matplotlib.use('Agg') +from matplotlib import pyplot as plt +from matplotlib import dates as matdates +from matplotlib import lines as matlines +import os +import time +from datetime import datetime + +""" +This Module is structured to work with a raw data at the following JSON format: + + {'setup_name': {'test1_name':[QUERY1,QUERY2,QUERY3], + 'test2_name':[QUERY1,QUERY2,QUERY3] + } + 'setup_name2': {'test1_name':[QUERY1,QUERY2,QUERY3], + 'test2_name':[QUERY1,QUERY2,QUERY3] + } + } + + The Query structure is set (currently) to this: + + (test_name,state, date,hour,minute,mpps_result,mpps_min,mpps_max,build_id) example: + + ["syn attack - 64 bytes, single CPU", "stl", "20161226", "01", "39", "9.631898", "9.5", "11.5", "54289"] + + it can be changed to support other formats of queries, simply change the query class to support your desired structure + the query class specify the indexes of the data within the query tuple + +""" + + +class TestQuery(object): + QUERY_TIMEFORMAT = "%Y-%m-%d %H:%M:%S" # date format in the query + QUERY_TIMESTAMP = 1 + QUERY_MPPS_RESULT = 2 + QUERY_BUILD_ID = 3 + + +class Test: + def __init__(self, name, setup_name, end_date): + self.name = name + self.setup_name = setup_name + self.end_date = end_date + self.stats = [] # tuple + self.results_df = [] # dataFrame + self.latest_result = [] # float + self.latest_result_date = '' # string + + def analyze_all_test_data(self, raw_test_data): + test_results = [] + test_dates = [] + test_build_ids = [] + for query in raw_test_data: + # date_formatted = time.strftime("%d-%m-%Y", + # time.strptime(query[int(TestQuery.QUERY_DATE)], TestQuery.query_dateformat)) + # time_of_res = date_formatted + '-' + query[int(TestQuery.QUERY_HOUR)] + ':' + query[ + # int(TestQuery.QUERY_MINUTE)] + time_of_query = time.strptime(query[TestQuery.QUERY_TIMESTAMP], TestQuery.QUERY_TIMEFORMAT) + time_formatted = time.strftime("%d-%m-%Y-%H:%M", time_of_query) + test_dates.append(time_formatted) + test_results.append(float(query[int(TestQuery.QUERY_MPPS_RESULT)])) + test_build_ids.append(query[int(TestQuery.QUERY_BUILD_ID)]) + test_results_df = pd.DataFrame({self.name: test_results, self.name + ' Date': test_dates, + "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids}, + dtype='str') + stats_avg = float(test_results_df[self.name].mean()) + stats_min = float(test_results_df[self.name].min()) + stats_max = float(test_results_df[self.name].max()) + stats = tuple( + [stats_avg, stats_min, stats_max, + float(test_results_df[self.name].std()), + float(((stats_max - stats_min) / stats_avg) * 100), + len(test_results)]) # stats = (avg_mpps,min,max,std,error, no of test_results) error = ((max-min)/avg)*100 + self.latest_result = float(test_results_df[self.name].iloc[-1]) + self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1]) + self.results_df = test_results_df + self.stats = stats + + +class Setup: + def __init__(self, name, end_date, raw_setup_data): + self.name = name + self.end_date = end_date # string of date + self.tests = [] # list of test objects + self.all_tests_data_table = pd.DataFrame() # dataframe + self.setup_trend_stats = pd.DataFrame() # dataframe + self.latest_test_results = pd.DataFrame() # dataframe + self.raw_setup_data = raw_setup_data # dictionary + self.test_names = raw_setup_data.keys() # list of names + + def analyze_all_tests(self): + for test_name in self.test_names: + t = Test(test_name, self.name, self.end_date) + t.analyze_all_test_data(self.raw_setup_data[test_name]) + self.tests.append(t) + + def analyze_latest_test_results(self): + test_names = [] + test_dates = [] + test_latest_results = [] + for test in self.tests: + test_names.append(test.name) + test_dates.append(test.latest_result_date) + test_latest_results.append(test.latest_result) + self.latest_test_results = pd.DataFrame( + {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results}, + index=range(1, len(test_latest_results) + 1)) + self.latest_test_results = self.latest_test_results[[2, 1, 0]] # re-order columns to name|MPPS|date + + def analyze_all_tests_stats(self): + test_names = [] + all_test_stats = [] + for test in self.tests: + test_names.append(test.name) + all_test_stats.append(test.stats) + self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names, + columns=['Avg MPPS/Core (Norm)', 'Min', 'Max', 'Std','Error (%)', 'Total Results']) + self.setup_trend_stats.index.name = 'Test Name' + + def analyze_all_tests_trend(self): + all_tests_trend_data = [] + for test in self.tests: + all_tests_trend_data.append(test.results_df) + self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data) + + def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'): + time_format1 = '%d-%m-%Y-%H:%M' + time_format2 = '%Y-%m-%d-%H:%M' + for test in self.tests: + test_data = test.results_df[test.results_df.columns[2]].tolist() + test_time_stamps = test.results_df[test.results_df.columns[3]].tolist() + start_date = test_time_stamps[0] + test_time_stamps.append(self.end_date + '-23:59') + test_data.append(test_data[-1]) + float_test_time_stamps = [] + for ts in test_time_stamps: + try: + float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format1))) + except: + float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format2))) + plt.plot_date(x=float_test_time_stamps, y=test_data, label=test.name, fmt='.-', xdate=True) + plt.legend(fontsize='small', loc='best') + plt.ylabel('MPPS/Core (Norm)') + plt.title('Setup: ' + self.name) + plt.tick_params( + axis='x', + which='both', + bottom='off', + top='off', + labelbottom='off') + plt.xlabel('Time Period: ' + start_date[:-6] + ' - ' + self.end_date) + if save_path: + plt.savefig(os.path.join(save_path, self.name + file_name)) + if not self.setup_trend_stats.empty: + (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name + + '_trend_stats.csv')) + plt.close('all') + + def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png', + stats_file_name='_latest_test_runs_stats.csv'): + plt.figure() + colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y'] + self.latest_test_results[[1]].plot(kind='bar', legend=False, + color=colors_for_bars) # plot only mpps data, which is in column 1 + plt.xticks(rotation='horizontal') + plt.xlabel('Index of Tests') + plt.ylabel('MPPS/Core (Norm)') + plt.title("Test Runs for Setup: " + self.name) + if save_path: + plt.savefig(os.path.join(save_path, self.name + img_file_name)) + (self.latest_test_results.round(2)).to_csv( + os.path.join(save_path, self.name + stats_file_name)) + plt.close('all') + + def analyze_all_setup_data(self): + self.analyze_all_tests() + self.analyze_latest_test_results() + self.analyze_all_tests_stats() + self.analyze_all_tests_trend() + + def plot_all(self, save_path=''): + self.plot_latest_test_results_bar_chart(save_path) + self.plot_trend_graph_all_tests(save_path) + + +def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result, + save_path='' + ): + s1_res = setup1_latest_result[[0, 1]] # column0 is test name, column1 is MPPS\Core + s2_res = setup2_latest_result[[0, 1, 2]] # column0 is test name, column1 is MPPS\Core, column2 is Date + s1_res.columns = ['Test Name', setup_name1] + s2_res.columns = ['Test Name', setup_name2, 'Date'] + compare_dframe = pd.merge(s1_res, s2_res, on='Test Name') + compare_dframe.plot(kind='bar') + plt.legend(fontsize='small', loc='best') + plt.xticks(rotation='horizontal') + plt.xlabel('Index of Tests') + plt.ylabel('MPPS/Core (Norm)') + plt.title("Comparison between " + setup_name1 + " and " + setup_name2) + if save_path: + plt.savefig(os.path.join(save_path, "_comparison.png")) + compare_dframe = compare_dframe.round(2) + compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv')) + + # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data + + +def create_all_data(ga_data, end_date, save_path='', detailed_test_stats=''): + all_setups = {} + all_setups_data = [] + setup_names = ga_data.keys() + for setup_name in setup_names: + s = Setup(setup_name, end_date, ga_data[setup_name]) + s.analyze_all_setup_data() + s.plot_all(save_path) + all_setups_data.append(s.all_tests_data_table) + all_setups[setup_name] = s + + if detailed_test_stats: + if os.path.exists(os.path.join(save_path, '_detailed_table.csv')): + os.remove(os.path.join(save_path, '_detailed_table.csv')) + all_setups_data_dframe = pd.DataFrame().append(all_setups_data) + all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv')) + + trex07setup = all_setups['trex07'] + trex08setup = all_setups['trex08'] + latest_runs_comparison_bar_chart('Mellanox ConnectX-4', + 'Intel XL710', trex07setup.latest_test_results, + trex08setup.latest_test_results, + save_path=save_path) diff --git a/doc/TRexDataAnalysisV2.py b/doc/TRexDataAnalysisV2.py deleted file mode 100755 index 0696f864..00000000 --- a/doc/TRexDataAnalysisV2.py +++ /dev/null @@ -1,235 +0,0 @@ -#!/scratch/Anaconda2.4.0/bin/python -import pandas as pd -import numpy as np -import matplotlib - -matplotlib.use('Agg') -from matplotlib import pyplot as plt -from matplotlib import dates as matdates -from matplotlib import lines as matlines -import os -import time -from datetime import datetime - -""" -This Module is structured to work with a raw data at the following JSON format: - - {'setup_name': {'test1_name':[QUERY1,QUERY2,QUERY3], - 'test2_name':[QUERY1,QUERY2,QUERY3] - } - 'setup_name2': {'test1_name':[QUERY1,QUERY2,QUERY3], - 'test2_name':[QUERY1,QUERY2,QUERY3] - } - } - - The Query structure is set (currently) to this: - - (test_name,state, date,hour,minute,mpps_result,mpps_min,mpps_max,build_id) example: - - ["syn attack - 64 bytes, single CPU", "stl", "20161226", "01", "39", "9.631898", "9.5", "11.5", "54289"] - - it can be changed to support other formats of queries, simply change the query class to support your desired structure - the query class specify the indexes of the data within the query tuple - -""" - - -class TestQuery(object): - QUERY_TIMEFORMAT = "%Y-%m-%d %H:%M:%S" # date format in the query - QUERY_TIMESTAMP = 1 - QUERY_MPPS_RESULT = 2 - QUERY_BUILD_ID = 3 - - -class Test: - def __init__(self, name, setup_name, end_date): - self.name = name - self.setup_name = setup_name - self.end_date = end_date - self.stats = [] # tuple - self.results_df = [] # dataFrame - self.latest_result = [] # float - self.latest_result_date = '' # string - - def analyze_all_test_data(self, raw_test_data): - test_results = [] - test_dates = [] - test_build_ids = [] - for query in raw_test_data: - # date_formatted = time.strftime("%d-%m-%Y", - # time.strptime(query[int(TestQuery.QUERY_DATE)], TestQuery.query_dateformat)) - # time_of_res = date_formatted + '-' + query[int(TestQuery.QUERY_HOUR)] + ':' + query[ - # int(TestQuery.QUERY_MINUTE)] - time_of_query = time.strptime(query[TestQuery.QUERY_TIMESTAMP], TestQuery.QUERY_TIMEFORMAT) - time_formatted = time.strftime("%d-%m-%Y-%H:%M", time_of_query) - test_dates.append(time_formatted) - test_results.append(float(query[int(TestQuery.QUERY_MPPS_RESULT)])) - test_build_ids.append(query[int(TestQuery.QUERY_BUILD_ID)]) - test_results_df = pd.DataFrame({self.name: test_results, self.name + ' Date': test_dates, - "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids}, - dtype='str') - stats_avg = float(test_results_df[self.name].mean()) - stats_min = float(test_results_df[self.name].min()) - stats_max = float(test_results_df[self.name].max()) - stats = tuple( - [stats_avg, stats_min, stats_max, - float(test_results_df[self.name].std()), - float(((stats_max - stats_min) / stats_avg) * 100), - len(test_results)]) # stats = (avg_mpps,min,max,std,error, no of test_results) error = ((max-min)/avg)*100 - self.latest_result = float(test_results_df[self.name].iloc[-1]) - self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1]) - self.results_df = test_results_df - self.stats = stats - - -class Setup: - def __init__(self, name, end_date, raw_setup_data): - self.name = name - self.end_date = end_date # string of date - self.tests = [] # list of test objects - self.all_tests_data_table = pd.DataFrame() # dataframe - self.setup_trend_stats = pd.DataFrame() # dataframe - self.latest_test_results = pd.DataFrame() # dataframe - self.raw_setup_data = raw_setup_data # dictionary - self.test_names = raw_setup_data.keys() # list of names - - def analyze_all_tests(self): - for test_name in self.test_names: - t = Test(test_name, self.name, self.end_date) - t.analyze_all_test_data(self.raw_setup_data[test_name]) - self.tests.append(t) - - def analyze_latest_test_results(self): - test_names = [] - test_dates = [] - test_latest_results = [] - for test in self.tests: - test_names.append(test.name) - test_dates.append(test.latest_result_date) - test_latest_results.append(test.latest_result) - self.latest_test_results = pd.DataFrame( - {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results}, - index=range(1, len(test_latest_results) + 1)) - self.latest_test_results = self.latest_test_results[[2, 1, 0]] # re-order columns to name|MPPS|date - - def analyze_all_tests_stats(self): - test_names = [] - all_test_stats = [] - for test in self.tests: - test_names.append(test.name) - all_test_stats.append(test.stats) - self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names, - columns=['Avg MPPS/Core (Norm)', 'Min', 'Max', 'Std','Error (%)', 'Total Results']) - self.setup_trend_stats.index.name = 'Test Name' - - def analyze_all_tests_trend(self): - all_tests_trend_data = [] - for test in self.tests: - all_tests_trend_data.append(test.results_df) - self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data) - - def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'): - time_format1 = '%d-%m-%Y-%H:%M' - time_format2 = '%Y-%m-%d-%H:%M' - for test in self.tests: - test_data = test.results_df[test.results_df.columns[2]].tolist() - test_time_stamps = test.results_df[test.results_df.columns[3]].tolist() - start_date = test_time_stamps[0] - test_time_stamps.append(self.end_date + '-23:59') - test_data.append(test_data[-1]) - float_test_time_stamps = [] - for ts in test_time_stamps: - try: - float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format1))) - except: - float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format2))) - plt.plot_date(x=float_test_time_stamps, y=test_data, label=test.name, fmt='.-', xdate=True) - plt.legend(fontsize='small', loc='best') - plt.ylabel('MPPS/Core (Norm)') - plt.title('Setup: ' + self.name) - plt.tick_params( - axis='x', - which='both', - bottom='off', - top='off', - labelbottom='off') - plt.xlabel('Time Period: ' + start_date[:-6] + ' - ' + self.end_date) - if save_path: - plt.savefig(os.path.join(save_path, self.name + file_name)) - if not self.setup_trend_stats.empty: - (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name + - '_trend_stats.csv')) - plt.close('all') - - def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png', - stats_file_name='_latest_test_runs_stats.csv'): - plt.figure() - colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y'] - self.latest_test_results[[1]].plot(kind='bar', legend=False, - color=colors_for_bars) # plot only mpps data, which is in column 1 - plt.xticks(rotation='horizontal') - plt.xlabel('Index of Tests') - plt.ylabel('MPPS/Core (Norm)') - plt.title("Test Runs for Setup: " + self.name) - if save_path: - plt.savefig(os.path.join(save_path, self.name + img_file_name)) - (self.latest_test_results.round(2)).to_csv( - os.path.join(save_path, self.name + stats_file_name)) - plt.close('all') - - def analyze_all_setup_data(self): - self.analyze_all_tests() - self.analyze_latest_test_results() - self.analyze_all_tests_stats() - self.analyze_all_tests_trend() - - def plot_all(self, save_path=''): - self.plot_latest_test_results_bar_chart(save_path) - self.plot_trend_graph_all_tests(save_path) - - -def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result, - save_path='' - ): - s1_res = setup1_latest_result[[0, 1]] # column0 is test name, column1 is MPPS\Core - s2_res = setup2_latest_result[[0, 1, 2]] # column0 is test name, column1 is MPPS\Core, column2 is Date - s1_res.columns = ['Test Name', setup_name1] - s2_res.columns = ['Test Name', setup_name2, 'Date'] - compare_dframe = pd.merge(s1_res, s2_res, on='Test Name') - compare_dframe.plot(kind='bar') - plt.legend(fontsize='small', loc='best') - plt.xticks(rotation='horizontal') - plt.xlabel('Index of Tests') - plt.ylabel('MPPS/Core (Norm)') - plt.title("Comparison between " + setup_name1 + " and " + setup_name2) - if save_path: - plt.savefig(os.path.join(save_path, "_comparison.png")) - compare_dframe = compare_dframe.round(2) - compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv')) - - # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data - - -def create_all_data(ga_data, end_date, save_path='', detailed_test_stats=''): - all_setups = {} - all_setups_data = [] - setup_names = ga_data.keys() - for setup_name in setup_names: - s = Setup(setup_name, end_date, ga_data[setup_name]) - s.analyze_all_setup_data() - s.plot_all(save_path) - all_setups_data.append(s.all_tests_data_table) - all_setups[setup_name] = s - - if detailed_test_stats: - if os.path.exists(os.path.join(save_path, '_detailed_table.csv')): - os.remove(os.path.join(save_path, '_detailed_table.csv')) - all_setups_data_dframe = pd.DataFrame().append(all_setups_data) - all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv')) - - trex07setup = all_setups['trex07'] - trex08setup = all_setups['trex08'] - latest_runs_comparison_bar_chart('Mellanox ConnectX-4', - 'Intel XL710', trex07setup.latest_test_results, - trex08setup.latest_test_results, - save_path=save_path) -- cgit 1.2.3-korg