summaryrefslogtreecommitdiffstats
path: root/doc/TRexDataAnalysisV2.py
diff options
context:
space:
mode:
Diffstat (limited to 'doc/TRexDataAnalysisV2.py')
-rwxr-xr-xdoc/TRexDataAnalysisV2.py470
1 files changed, 235 insertions, 235 deletions
diff --git a/doc/TRexDataAnalysisV2.py b/doc/TRexDataAnalysisV2.py
index 19143199..0696f864 100755
--- a/doc/TRexDataAnalysisV2.py
+++ b/doc/TRexDataAnalysisV2.py
@@ -1,235 +1,235 @@
-#!/scratch/Anaconda2.4.0/bin/python
-import pandas as pd
-import numpy as np
-import matplotlib
-
-matplotlib.use('Agg')
-from matplotlib import pyplot as plt
-from matplotlib import dates as matdates
-from matplotlib import lines as matlines
-import os
-import time
-from datetime import datetime
-
-"""
-This Module is structured to work with a raw data at the following JSON format:
-
- {'setup_name': {'test1_name':[QUERY1,QUERY2,QUERY3],
- 'test2_name':[QUERY1,QUERY2,QUERY3]
- }
- 'setup_name2': {'test1_name':[QUERY1,QUERY2,QUERY3],
- 'test2_name':[QUERY1,QUERY2,QUERY3]
- }
- }
-
- The Query structure is set (currently) to this:
-
- (test_name,state, date,hour,minute,mpps_result,mpps_min,mpps_max,build_id) example:
-
- ["syn attack - 64 bytes, single CPU", "stl", "20161226", "01", "39", "9.631898", "9.5", "11.5", "54289"]
-
- it can be changed to support other formats of queries, simply change the query class to support your desired structure
- the query class specify the indexes of the data within the query tuple
-
-"""
-
-
-class TestQuery(object):
- query_dateformat = "%Y%m%d" # date format in the query
- QUERY_DATE = 2
- QUERY_HOUR = 3
- QUERY_MINUTE = 4
- QUERY_MPPS_RESULT = 5
- QUERY_TEST_MIN = 6
- QUERY_TEST_MAX = 7
- QUERY_BUILD_ID = 8
-
-
-class Test:
- def __init__(self, name, setup_name, end_date):
- self.name = name
- self.setup_name = setup_name
- self.end_date = end_date
- self.stats = [] # tuple
- self.results_df = [] # dataFrame
- self.latest_result = [] # float
- self.latest_result_date = '' # string
-
- def analyze_all_test_data(self, raw_test_data):
- test_results = []
- test_dates = []
- test_build_ids = []
- test_mins = set()
- test_maxs = set()
- for query in raw_test_data:
- date_formatted = time.strftime("%d-%m-%Y",
- time.strptime(query[int(TestQuery.QUERY_DATE)], TestQuery.query_dateformat))
- time_of_res = date_formatted + '-' + query[int(TestQuery.QUERY_HOUR)] + ':' + query[
- int(TestQuery.QUERY_MINUTE)]
- test_dates.append(time_of_res)
- test_results.append(float(query[int(TestQuery.QUERY_MPPS_RESULT)]))
- test_build_ids.append(query[int(TestQuery.QUERY_BUILD_ID)])
- test_mins.add(float(query[int(TestQuery.QUERY_TEST_MIN)]))
- test_maxs.add(float(query[int(TestQuery.QUERY_TEST_MAX)]))
- test_results_df = pd.DataFrame({self.name: test_results, self.name + ' Date': test_dates,
- "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids},
- dtype='str')
- stats = tuple(
- [float(test_results_df[self.name].mean()), min(test_mins), max(test_maxs)]) # stats = (avg_mpps,min,max)
- self.latest_result = float(test_results_df[self.name].iloc[-1])
- self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1])
- self.results_df = test_results_df
- self.stats = stats
-
-
-class Setup:
- def __init__(self, name, start_date, end_date, raw_setup_data):
- self.name = name
- self.start_date = start_date # string of date
- self.end_date = end_date # string of date
- self.tests = [] # list of test objects
- self.all_tests_data_table = pd.DataFrame() # dataframe
- self.setup_trend_stats = pd.DataFrame() # dataframe
- self.latest_test_results = pd.DataFrame() # dataframe
- self.raw_setup_data = raw_setup_data # dictionary
- self.test_names = raw_setup_data.keys() # list of names
-
- def analyze_all_tests(self):
- for test_name in self.test_names:
- t = Test(test_name, self.name, self.end_date)
- t.analyze_all_test_data(self.raw_setup_data[test_name])
- self.tests.append(t)
-
- def analyze_latest_test_results(self):
- test_names = []
- test_dates = []
- test_latest_results = []
- for test in self.tests:
- test_names.append(test.name)
- test_dates.append(test.latest_result_date)
- test_latest_results.append(test.latest_result)
- self.latest_test_results = pd.DataFrame(
- {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results},
- index=range(1, len(test_latest_results) + 1))
- self.latest_test_results = self.latest_test_results[[2, 1, 0]] # re-order columns to name|MPPS|date
-
- def analyze_all_tests_stats(self):
- test_names = []
- all_test_stats = []
- for test in self.tests:
- test_names.append(test.name)
- all_test_stats.append(test.stats)
- self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names,
- columns=['Avg MPPS/Core (Norm)', 'Golden Min', 'Golden Max'])
- self.setup_trend_stats.index.name = 'Test Name'
-
- def analyze_all_tests_trend(self):
- all_tests_trend_data = []
- for test in self.tests:
- all_tests_trend_data.append(test.results_df)
- self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data)
-
- def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'):
- time_format1 = '%d-%m-%Y-%H:%M'
- time_format2 = '%Y-%m-%d-%H:%M'
- for test in self.tests:
- test_data = test.results_df[test.results_df.columns[2]].tolist()
- test_time_stamps = test.results_df[test.results_df.columns[3]].tolist()
- test_time_stamps.append(self.end_date + '-23:59')
- test_data.append(test_data[-1])
- float_test_time_stamps = []
- for ts in test_time_stamps:
- try:
- float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format1)))
- except:
- float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format2)))
- plt.plot_date(x=float_test_time_stamps, y=test_data, label=test.name, fmt='-', xdate=True)
- plt.legend(fontsize='small', loc='best')
- plt.ylabel('MPPS/Core (Norm)')
- plt.title('Setup: ' + self.name)
- plt.tick_params(
- axis='x',
- which='both',
- bottom='off',
- top='off',
- labelbottom='off')
- plt.xlabel('Time Period: ' + self.start_date + ' - ' + self.end_date)
- if save_path:
- plt.savefig(os.path.join(save_path, self.name + file_name))
- if not self.setup_trend_stats.empty:
- (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name +
- '_trend_stats.csv'))
- plt.close('all')
-
- def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png',
- stats_file_name='_latest_test_runs_stats.csv'):
- plt.figure()
- colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y']
- self.latest_test_results[[1]].plot(kind='bar', legend=False,
- color=colors_for_bars) # plot only mpps data, which is in column 1
- plt.xticks(rotation='horizontal')
- plt.xlabel('Index of Tests')
- plt.ylabel('MPPS/Core (Norm)')
- plt.title("Test Runs for Setup: " + self.name)
- if save_path:
- plt.savefig(os.path.join(save_path, self.name + img_file_name))
- (self.latest_test_results.round(2)).to_csv(
- os.path.join(save_path, self.name + stats_file_name))
- plt.close('all')
-
- def analyze_all_setup_data(self):
- self.analyze_all_tests()
- self.analyze_latest_test_results()
- self.analyze_all_tests_stats()
- self.analyze_all_tests_trend()
-
- def plot_all(self, save_path=''):
- self.plot_latest_test_results_bar_chart(save_path)
- self.plot_trend_graph_all_tests(save_path)
-
-
-def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result,
- save_path=''
- ):
- s1_res = setup1_latest_result[[0, 1]] # column0 is test name, column1 is MPPS\Core
- s2_res = setup2_latest_result[[0, 1, 2]] # column0 is test name, column1 is MPPS\Core, column2 is Date
- s1_res.columns = ['Test Name', setup_name1]
- s2_res.columns = ['Test Name', setup_name2, 'Date']
- compare_dframe = pd.merge(s1_res, s2_res, on='Test Name')
- compare_dframe.plot(kind='bar')
- plt.legend(fontsize='small', loc='best')
- plt.xticks(rotation='horizontal')
- plt.xlabel('Index of Tests')
- plt.ylabel('MPPS/Core (Norm)')
- plt.title("Comparison between " + setup_name1 + " and " + setup_name2)
- if save_path:
- plt.savefig(os.path.join(save_path, "_comparison.png"))
- compare_dframe = compare_dframe.round(2)
- compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv'))
-
- # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data
-
-
-def create_all_data(ga_data, start_date, end_date, save_path='', detailed_test_stats=''):
- all_setups = {}
- all_setups_data = []
- setup_names = ga_data.keys()
- for setup_name in setup_names:
- s = Setup(setup_name, start_date, end_date, ga_data[setup_name])
- s.analyze_all_setup_data()
- s.plot_all(save_path)
- all_setups_data.append(s.all_tests_data_table)
- all_setups[setup_name] = s
-
- if detailed_test_stats:
- if os.path.exists(os.path.join(save_path, '_detailed_table.csv')):
- os.remove(os.path.join(save_path, '_detailed_table.csv'))
- all_setups_data_dframe = pd.DataFrame().append(all_setups_data)
- all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv'))
-
- trex07setup = all_setups['trex07']
- trex08setup = all_setups['trex08']
- latest_runs_comparison_bar_chart('Mellanox ConnectX-4',
- 'Intel XL710', trex07setup.latest_test_results,
- trex08setup.latest_test_results,
- save_path=save_path)
+#!/scratch/Anaconda2.4.0/bin/python
+import pandas as pd
+import numpy as np
+import matplotlib
+
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+from matplotlib import dates as matdates
+from matplotlib import lines as matlines
+import os
+import time
+from datetime import datetime
+
+"""
+This Module is structured to work with a raw data at the following JSON format:
+
+ {'setup_name': {'test1_name':[QUERY1,QUERY2,QUERY3],
+ 'test2_name':[QUERY1,QUERY2,QUERY3]
+ }
+ 'setup_name2': {'test1_name':[QUERY1,QUERY2,QUERY3],
+ 'test2_name':[QUERY1,QUERY2,QUERY3]
+ }
+ }
+
+ The Query structure is set (currently) to this:
+
+ (test_name,state, date,hour,minute,mpps_result,mpps_min,mpps_max,build_id) example:
+
+ ["syn attack - 64 bytes, single CPU", "stl", "20161226", "01", "39", "9.631898", "9.5", "11.5", "54289"]
+
+ it can be changed to support other formats of queries, simply change the query class to support your desired structure
+ the query class specify the indexes of the data within the query tuple
+
+"""
+
+
+class TestQuery(object):
+ QUERY_TIMEFORMAT = "%Y-%m-%d %H:%M:%S" # date format in the query
+ QUERY_TIMESTAMP = 1
+ QUERY_MPPS_RESULT = 2
+ QUERY_BUILD_ID = 3
+
+
+class Test:
+ def __init__(self, name, setup_name, end_date):
+ self.name = name
+ self.setup_name = setup_name
+ self.end_date = end_date
+ self.stats = [] # tuple
+ self.results_df = [] # dataFrame
+ self.latest_result = [] # float
+ self.latest_result_date = '' # string
+
+ def analyze_all_test_data(self, raw_test_data):
+ test_results = []
+ test_dates = []
+ test_build_ids = []
+ for query in raw_test_data:
+ # date_formatted = time.strftime("%d-%m-%Y",
+ # time.strptime(query[int(TestQuery.QUERY_DATE)], TestQuery.query_dateformat))
+ # time_of_res = date_formatted + '-' + query[int(TestQuery.QUERY_HOUR)] + ':' + query[
+ # int(TestQuery.QUERY_MINUTE)]
+ time_of_query = time.strptime(query[TestQuery.QUERY_TIMESTAMP], TestQuery.QUERY_TIMEFORMAT)
+ time_formatted = time.strftime("%d-%m-%Y-%H:%M", time_of_query)
+ test_dates.append(time_formatted)
+ test_results.append(float(query[int(TestQuery.QUERY_MPPS_RESULT)]))
+ test_build_ids.append(query[int(TestQuery.QUERY_BUILD_ID)])
+ test_results_df = pd.DataFrame({self.name: test_results, self.name + ' Date': test_dates,
+ "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids},
+ dtype='str')
+ stats_avg = float(test_results_df[self.name].mean())
+ stats_min = float(test_results_df[self.name].min())
+ stats_max = float(test_results_df[self.name].max())
+ stats = tuple(
+ [stats_avg, stats_min, stats_max,
+ float(test_results_df[self.name].std()),
+ float(((stats_max - stats_min) / stats_avg) * 100),
+ len(test_results)]) # stats = (avg_mpps,min,max,std,error, no of test_results) error = ((max-min)/avg)*100
+ self.latest_result = float(test_results_df[self.name].iloc[-1])
+ self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1])
+ self.results_df = test_results_df
+ self.stats = stats
+
+
+class Setup:
+ def __init__(self, name, end_date, raw_setup_data):
+ self.name = name
+ self.end_date = end_date # string of date
+ self.tests = [] # list of test objects
+ self.all_tests_data_table = pd.DataFrame() # dataframe
+ self.setup_trend_stats = pd.DataFrame() # dataframe
+ self.latest_test_results = pd.DataFrame() # dataframe
+ self.raw_setup_data = raw_setup_data # dictionary
+ self.test_names = raw_setup_data.keys() # list of names
+
+ def analyze_all_tests(self):
+ for test_name in self.test_names:
+ t = Test(test_name, self.name, self.end_date)
+ t.analyze_all_test_data(self.raw_setup_data[test_name])
+ self.tests.append(t)
+
+ def analyze_latest_test_results(self):
+ test_names = []
+ test_dates = []
+ test_latest_results = []
+ for test in self.tests:
+ test_names.append(test.name)
+ test_dates.append(test.latest_result_date)
+ test_latest_results.append(test.latest_result)
+ self.latest_test_results = pd.DataFrame(
+ {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results},
+ index=range(1, len(test_latest_results) + 1))
+ self.latest_test_results = self.latest_test_results[[2, 1, 0]] # re-order columns to name|MPPS|date
+
+ def analyze_all_tests_stats(self):
+ test_names = []
+ all_test_stats = []
+ for test in self.tests:
+ test_names.append(test.name)
+ all_test_stats.append(test.stats)
+ self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names,
+ columns=['Avg MPPS/Core (Norm)', 'Min', 'Max', 'Std','Error (%)', 'Total Results'])
+ self.setup_trend_stats.index.name = 'Test Name'
+
+ def analyze_all_tests_trend(self):
+ all_tests_trend_data = []
+ for test in self.tests:
+ all_tests_trend_data.append(test.results_df)
+ self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data)
+
+ def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'):
+ time_format1 = '%d-%m-%Y-%H:%M'
+ time_format2 = '%Y-%m-%d-%H:%M'
+ for test in self.tests:
+ test_data = test.results_df[test.results_df.columns[2]].tolist()
+ test_time_stamps = test.results_df[test.results_df.columns[3]].tolist()
+ start_date = test_time_stamps[0]
+ test_time_stamps.append(self.end_date + '-23:59')
+ test_data.append(test_data[-1])
+ float_test_time_stamps = []
+ for ts in test_time_stamps:
+ try:
+ float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format1)))
+ except:
+ float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format2)))
+ plt.plot_date(x=float_test_time_stamps, y=test_data, label=test.name, fmt='.-', xdate=True)
+ plt.legend(fontsize='small', loc='best')
+ plt.ylabel('MPPS/Core (Norm)')
+ plt.title('Setup: ' + self.name)
+ plt.tick_params(
+ axis='x',
+ which='both',
+ bottom='off',
+ top='off',
+ labelbottom='off')
+ plt.xlabel('Time Period: ' + start_date[:-6] + ' - ' + self.end_date)
+ if save_path:
+ plt.savefig(os.path.join(save_path, self.name + file_name))
+ if not self.setup_trend_stats.empty:
+ (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name +
+ '_trend_stats.csv'))
+ plt.close('all')
+
+ def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png',
+ stats_file_name='_latest_test_runs_stats.csv'):
+ plt.figure()
+ colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y']
+ self.latest_test_results[[1]].plot(kind='bar', legend=False,
+ color=colors_for_bars) # plot only mpps data, which is in column 1
+ plt.xticks(rotation='horizontal')
+ plt.xlabel('Index of Tests')
+ plt.ylabel('MPPS/Core (Norm)')
+ plt.title("Test Runs for Setup: " + self.name)
+ if save_path:
+ plt.savefig(os.path.join(save_path, self.name + img_file_name))
+ (self.latest_test_results.round(2)).to_csv(
+ os.path.join(save_path, self.name + stats_file_name))
+ plt.close('all')
+
+ def analyze_all_setup_data(self):
+ self.analyze_all_tests()
+ self.analyze_latest_test_results()
+ self.analyze_all_tests_stats()
+ self.analyze_all_tests_trend()
+
+ def plot_all(self, save_path=''):
+ self.plot_latest_test_results_bar_chart(save_path)
+ self.plot_trend_graph_all_tests(save_path)
+
+
+def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result,
+ save_path=''
+ ):
+ s1_res = setup1_latest_result[[0, 1]] # column0 is test name, column1 is MPPS\Core
+ s2_res = setup2_latest_result[[0, 1, 2]] # column0 is test name, column1 is MPPS\Core, column2 is Date
+ s1_res.columns = ['Test Name', setup_name1]
+ s2_res.columns = ['Test Name', setup_name2, 'Date']
+ compare_dframe = pd.merge(s1_res, s2_res, on='Test Name')
+ compare_dframe.plot(kind='bar')
+ plt.legend(fontsize='small', loc='best')
+ plt.xticks(rotation='horizontal')
+ plt.xlabel('Index of Tests')
+ plt.ylabel('MPPS/Core (Norm)')
+ plt.title("Comparison between " + setup_name1 + " and " + setup_name2)
+ if save_path:
+ plt.savefig(os.path.join(save_path, "_comparison.png"))
+ compare_dframe = compare_dframe.round(2)
+ compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv'))
+
+ # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data
+
+
+def create_all_data(ga_data, end_date, save_path='', detailed_test_stats=''):
+ all_setups = {}
+ all_setups_data = []
+ setup_names = ga_data.keys()
+ for setup_name in setup_names:
+ s = Setup(setup_name, end_date, ga_data[setup_name])
+ s.analyze_all_setup_data()
+ s.plot_all(save_path)
+ all_setups_data.append(s.all_tests_data_table)
+ all_setups[setup_name] = s
+
+ if detailed_test_stats:
+ if os.path.exists(os.path.join(save_path, '_detailed_table.csv')):
+ os.remove(os.path.join(save_path, '_detailed_table.csv'))
+ all_setups_data_dframe = pd.DataFrame().append(all_setups_data)
+ all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv'))
+
+ trex07setup = all_setups['trex07']
+ trex08setup = all_setups['trex08']
+ latest_runs_comparison_bar_chart('Mellanox ConnectX-4',
+ 'Intel XL710', trex07setup.latest_test_results,
+ trex08setup.latest_test_results,
+ save_path=save_path)