#!/scratch/Anaconda2.4.0/bin/python import pandas as pd import numpy as np import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from matplotlib import dates as matdates from matplotlib import lines as matlines import os import time from datetime import datetime """ This Module is structured to work with a raw data at the following JSON format: {'setup_name': {'test1_name':[QUERY1,QUERY2,QUERY3], 'test2_name':[QUERY1,QUERY2,QUERY3] } 'setup_name2': {'test1_name':[QUERY1,QUERY2,QUERY3], 'test2_name':[QUERY1,QUERY2,QUERY3] } } The Query structure is set (currently) to this: (test_name,state, date,hour,minute,mpps_result,mpps_min,mpps_max,build_id) example: ["syn attack - 64 bytes, single CPU", "stl", "20161226", "01", "39", "9.631898", "9.5", "11.5", "54289"] it can be changed to support other formats of queries, simply change the query class to support your desired structure the query class specify the indexes of the data within the query tuple """ class TestQuery(object): QUERY_TIMEFORMAT = "%Y-%m-%d %H:%M:%S" # date format in the query QUERY_TIMESTAMP = 1 QUERY_MPPS_RESULT = 2 QUERY_BUILD_ID = 3 class Test: def __init__(self, name, setup_name, end_date): self.name = name self.setup_name = setup_name self.end_date = end_date self.stats = [] # tuple self.results_df = [] # dataFrame self.latest_result = [] # float self.latest_result_date = '' # string def analyze_all_test_data(self, raw_test_data): test_results = [] test_dates = [] test_build_ids = [] for query in raw_test_data: # date_formatted = time.strftime("%d-%m-%Y", # time.strptime(query[int(TestQuery.QUERY_DATE)], TestQuery.query_dateformat)) # time_of_res = date_formatted + '-' + query[int(TestQuery.QUERY_HOUR)] + ':' + query[ # int(TestQuery.QUERY_MINUTE)] time_of_query = time.strptime(query[TestQuery.QUERY_TIMESTAMP], TestQuery.QUERY_TIMEFORMAT) time_formatted = time.strftime("%d-%m-%Y-%H:%M", time_of_query) test_dates.append(time_formatted) test_results.append(float(query[int(TestQuery.QUERY_MPPS_RESULT)])) test_build_ids.append(query[int(TestQuery.QUERY_BUILD_ID)]) test_results_df = pd.DataFrame({self.name: test_results, self.name + ' Date': test_dates, "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids}, dtype='str') stats_avg = float(test_results_df[self.name].mean()) stats_min = float(test_results_df[self.name].min()) stats_max = float(test_results_df[self.name].max()) stats = tuple( [stats_avg, stats_min, stats_max, float(test_results_df[self.name].std()), float(((stats_max - stats_min) / stats_avg) * 100), len(test_results)]) # stats = (avg_mpps,min,max,std,error, no of test_results) error = ((max-min)/avg)*100 self.latest_result = float(test_results_df[self.name].iloc[-1]) self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1]) self.results_df = test_results_df self.stats = stats class Setup: def __init__(self, name, end_date, raw_setup_data): self.name = name self.end_date = end_date # string of date self.tests = [] # list of test objects self.all_tests_data_table = pd.DataFrame() # dataframe self.setup_trend_stats = pd.DataFrame() # dataframe self.latest_test_results = pd.DataFrame() # dataframe self.raw_setup_data = raw_setup_data # dictionary self.test_names = raw_setup_data.keys() # list of names def analyze_all_tests(self): for test_name in self.test_names: t = Test(test_name, self.name, self.end_date) t.analyze_all_test_data(self.raw_setup_data[test_name]) self.tests.append(t) def analyze_latest_test_results(self): test_names = [] test_dates = [] test_latest_results = [] for test in self.tests: test_names.append(test.name) test_dates.append(test.latest_result_date) test_latest_results.append(test.latest_result) self.latest_test_results = pd.DataFrame( {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results}, index=range(1, len(test_latest_results) + 1)) self.latest_test_results = self.latest_test_results[[2, 1, 0]] # re-order columns to name|MPPS|date def analyze_all_tests_stats(self): test_names = [] all_test_stats = [] for test in self.tests: test_names.append(test.name) all_test_stats.append(test.stats) self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names, columns=['Avg MPPS/Core (Norm)', 'Min', 'Max', 'Std', 'Error (%)', 'Total Results']) self.setup_trend_stats.index.name = 'Test Name' def analyze_all_tests_trend(self): all_tests_trend_data = [] for test in self.tests: all_tests_trend_data.append(test.results_df) self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data) def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'): time_format1 = '%d-%m-%Y-%H:%M' time_format2 = '%Y-%m-%d-%H:%M' for test in self.tests: test_data = test.results_df[test.results_df.columns[2]].tolist() test_time_stamps = test.results_df[test.results_df.columns[3]].tolist() start_date = test_time_stamps[0] test_time_stamps.append(self.end_date + '-23:59') test_data.append(test_data[-1]) float_test_time_stamps = [] for ts in test_time_stamps: try: float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format1))) except: float_test_time_stamps.append(matdates.date2num(datetime.strptime(ts, time_format2))) plt.plot_date(x=float_test_time_stamps, y=test_data, label=test.name, fmt='.-', xdate=True) plt.legend(fontsize='small', loc='best') plt.ylabel('MPPS/Core (Norm)') plt.title('Setup: ' + self.name) plt.tick_params( axis='x', which='both', bottom='off', top='off', labelbottom='off') plt.xlabel('Time Period: ' + start_date[:-6] + ' - ' + self.end_date) if save_path: plt.savefig(os.path.join(save_path, self.name + file_name)) if not self.setup_trend_stats.empty: (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name + '_trend_stats.csv')) plt.close('all') def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png', stats_file_name='_latest_test_runs_stats.csv'): plt.figure() colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y'] self.latest_test_results[[1]].plot(kind='bar', legend=False, color=colors_for_bars) # plot only mpps data, which is in column 1 plt.xticks(rotation='horizontal') plt.xlabel('Index of Tests') plt.ylabel('MPPS/Core (Norm)') plt.title("Test Runs for Setup: " + self.name) if save_path: plt.savefig(os.path.join(save_path, self.name + img_file_name)) (self.latest_test_results.round(2)).to_csv( os.path.join(save_path, self.name + stats_file_name)) plt.close('all') def analyze_all_setup_data(self): self.analyze_all_tests() self.analyze_latest_test_results() self.analyze_all_tests_stats() self.analyze_all_tests_trend() def plot_all(self, save_path=''): self.plot_latest_test_results_bar_chart(save_path) self.plot_trend_graph_all_tests(save_path) def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result, save_path='' ): s1_res = setup1_latest_result[[0, 1]] # column0 is test name, column1 is MPPS\Core s2_res = setup2_latest_result[[0, 1, 2]] # column0 is test name, column1 is MPPS\Core, column2 is Date s1_res.columns = ['Test Name', setup_name1] s2_res.columns = ['Test Name', setup_name2, 'Date'] compare_dframe = pd.merge(s1_res, s2_res, on='Test Name') compare_dframe.plot(kind='bar') plt.legend(fontsize='small', loc='best') plt.xticks(rotation='horizontal') plt.xlabel('Index of Tests') plt.ylabel('MPPS/Core (Norm)') plt.title("Comparison between " + setup_name1 + " and " + setup_name2) if save_path: plt.savefig(os.path.join(save_path, "_comparison.png")) compare_dframe = compare_dframe.round(2) compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv')) # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data def create_all_data(ga_data, end_date, save_path='', detailed_test_stats=''): all_setups = {} all_setups_data = [] setup_names = ga_data.keys() for setup_name in setup_names: s = Setup(setup_name, end_date, ga_data[setup_name]) s.analyze_all_setup_data() s.plot_all(save_path) all_setups_data.append(s.all_tests_data_table) all_setups[setup_name] = s if detailed_test_stats: if os.path.exists(os.path.join(save_path, '_detailed_table.csv')): os.remove(os.path.join(save_path, '_detailed_table.csv')) if all_setups_data: all_setups_data_dframe = pd.DataFrame().append(all_setups_data) all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv')) trex07setup = all_setups['trex07'] trex08setup = all_setups['trex08'] latest_runs_comparison_bar_chart('Mellanox ConnectX-4', 'Intel XL710', trex07setup.latest_test_results, trex08setup.latest_test_results, save_path=save_path)