new TRexDataAnalysis version, with OOP design 27/5227/1
authoritraviv <[email protected]>
Thu, 5 Jan 2017 17:21:13 +0000 (19:21 +0200)
committeritraviv <[email protected]>
Sun, 8 Jan 2017 08:11:36 +0000 (10:11 +0200)
Signed-off-by: itraviv <[email protected]>
doc/AnalyticsWebReport.py
doc/TRexDataAnalysisV2.py [new file with mode: 0755]

index 1806cab..e3f6504 100755 (executable)
@@ -1,7 +1,7 @@
 import os\r
 import sys\r
 import AnalyticsConnect as ac\r
-import TRexDataAnalysis as tr\r
+import TRexDataAnalysisV2 as tr\r
 import time\r
 import datetime\r
 \r
@@ -20,8 +20,8 @@ def main(verbose=False, detailed_test_stats=''):
         print('Saving data to %s' % dest_path)\r
         if detailed_test_stats:\r
             print('generating detailed table for test results')\r
-    tr.create_all_data(ga_all_data_dict, setups, start_date, current_date, save_path=dest_path,\r
-                       add_stats='yes', detailed_test_stats=detailed_test_stats)\r
+    tr.create_all_data(ga_all_data_dict, start_date, current_date, save_path=dest_path,\r
+                       detailed_test_stats=detailed_test_stats)\r
     if verbose:\r
         print('Done without errors.')\r
 \r
diff --git a/doc/TRexDataAnalysisV2.py b/doc/TRexDataAnalysisV2.py
new file mode 100755 (executable)
index 0000000..e7e82b2
--- /dev/null
@@ -0,0 +1,185 @@
+#!/scratch/Anaconda2.4.0/bin/python\r
+import pandas as pd\r
+import numpy as np\r
+import matplotlib\r
+\r
+matplotlib.use('Agg')\r
+from matplotlib import pyplot as plt\r
+import os\r
+import time\r
+\r
+\r
+### TODO: insert a description of a test query\r
+\r
+class Test:\r
+    def __init__(self, name, setup_name):\r
+        self.name = name\r
+        self.setup_name = setup_name\r
+        self.stats = []  # tuple\r
+        self.results_df = []  # dataFrame\r
+        self.latest_result = []  # float\r
+        self.latest_result_date = ''  # string\r
+\r
+    def analyze_all_test_data(self, raw_test_data):\r
+        test_results = []\r
+        test_dates = []\r
+        test_build_ids = []\r
+        test_mins = set()\r
+        test_maxs = set()\r
+        for query in raw_test_data:\r
+            test_results.append(float(query[5]))\r
+            date_formatted = time.strftime("%d-%m-%Y", time.strptime(query[2], "%Y%m%d"))\r
+            time_of_res = date_formatted + '-' + query[3] + ':' + query[4]\r
+            test_dates.append(time_of_res)\r
+            test_build_ids.append(query[8])\r
+            test_mins.add(float(query[6]))\r
+            test_maxs.add(float(query[7]))\r
+        test_results_df = pd.DataFrame({self.name: test_results, (self.name + ' Date'): test_dates,\r
+                                        "Setup": ([self.setup_name] * len(test_results)), "Build Id": test_build_ids})\r
+        stats = tuple(\r
+            [float(test_results_df[self.name].mean()), min(test_mins), max(test_maxs)])  # stats = (avg_mpps,min,max)\r
+        self.latest_result = float(test_results_df[self.name].iloc[-1])\r
+        self.latest_result_date = str(test_results_df[test_results_df.columns[3]].iloc[-1])\r
+        self.results_df = test_results_df\r
+        self.stats = stats\r
+\r
+\r
+class Setup:\r
+    def __init__(self, name, start_date, end_date, raw_setup_data):\r
+        self.name = name\r
+        self.start_date = start_date  # string of date\r
+        self.end_date = end_date  # string of date\r
+        self.tests = []  # list of test objects\r
+        self.all_tests_data_table = pd.DataFrame()  # dataframe\r
+        self.setup_trend_stats = pd.DataFrame()  # dataframe\r
+        self.latest_test_results = pd.DataFrame()  # dataframe\r
+        self.raw_setup_data = raw_setup_data  # dictionary\r
+        self.test_names = raw_setup_data.keys()  # list of names\r
+\r
+    def analyze_all_tests(self):\r
+        for test_name in self.test_names:\r
+            t = Test(test_name, self.name)\r
+            t.analyze_all_test_data(self.raw_setup_data[test_name])\r
+            self.tests.append(t)\r
+\r
+    def analyze_latest_test_results(self):\r
+        test_names = []\r
+        test_dates = []\r
+        test_latest_results = []\r
+        for test in self.tests:\r
+            test_names.append(test.name)\r
+            test_dates.append(test.latest_result_date)\r
+            test_latest_results.append(test.latest_result)\r
+        self.latest_test_results = pd.DataFrame(\r
+            {'Date': test_dates, 'Test Name': test_names, 'MPPS\Core (Norm)': test_latest_results},\r
+            index=range(1, len(test_latest_results) + 1))\r
+        self.latest_test_results = self.latest_test_results[[2, 1, 0]]  # re-order columns to name|MPPS|date\r
+\r
+    def analyze_all_tests_stats(self):\r
+        test_names = []\r
+        all_test_stats = []\r
+        for test in self.tests:\r
+            test_names.append(test.name)\r
+            all_test_stats.append(test.stats)\r
+        self.setup_trend_stats = pd.DataFrame(all_test_stats, index=test_names,\r
+                                              columns=['Avg MPPS/Core (Norm)', 'Golden Min', 'Golden Max'])\r
+        self.setup_trend_stats.index.name = 'Test Name'\r
+\r
+    def analyze_all_tests_trend(self):\r
+        all_tests_trend_data = []\r
+        for test in self.tests:\r
+            all_tests_trend_data.append(test.results_df)\r
+        self.all_tests_data_table = reduce(lambda x, y: pd.merge(x, y, how='outer'), all_tests_trend_data)\r
+\r
+    def plot_trend_graph_all_tests(self, save_path='', file_name='_trend_graph.png'):\r
+        for test_name in self.test_names:\r
+            self.all_tests_data_table[test_name].plot()\r
+            plt.legend(fontsize='small', loc='best')\r
+        plt.ylabel('MPPS/Core (Norm)')\r
+        plt.title('Setup: ' + self.name)\r
+        plt.tick_params(\r
+            axis='x',\r
+            which='both',\r
+            bottom='off',\r
+            top='off',\r
+            labelbottom='off')\r
+        plt.xlabel('Time Period: ' + self.start_date + ' - ' + self.end_date)\r
+        if save_path:\r
+            plt.savefig(os.path.join(save_path, self.name + file_name))\r
+            if not self.setup_trend_stats.empty:\r
+                (self.setup_trend_stats.round(2)).to_csv(os.path.join(save_path, self.name +\r
+                                                                      '_trend_stats.csv'))\r
+            plt.close('all')\r
+\r
+    def plot_latest_test_results_bar_chart(self, save_path='', img_file_name='_latest_test_runs.png',\r
+                                           stats_file_name='_latest_test_runs_stats.csv'):\r
+        plt.figure()\r
+        colors_for_bars = ['b', 'g', 'r', 'c', 'm', 'y']\r
+        self.latest_test_results[[1]].plot(kind='bar', legend=False,\r
+                                           color=colors_for_bars)  # plot only mpps data, which is in column 1\r
+        plt.xticks(rotation='horizontal')\r
+        plt.xlabel('Index of Tests')\r
+        plt.ylabel('MPPS/Core (Norm)')\r
+        plt.title("Test Runs for Setup: " + self.name)\r
+        if save_path:\r
+            plt.savefig(os.path.join(save_path, self.name + img_file_name))\r
+            (self.latest_test_results.round(2)).to_csv(\r
+                os.path.join(save_path, self.name + stats_file_name))\r
+        plt.close('all')\r
+\r
+    def analyze_all_setup_data(self):\r
+        self.analyze_all_tests()\r
+        self.analyze_latest_test_results()\r
+        self.analyze_all_tests_stats()\r
+        self.analyze_all_tests_trend()\r
+\r
+    def plot_all(self, save_path=''):\r
+        self.plot_latest_test_results_bar_chart(save_path)\r
+        self.plot_trend_graph_all_tests(save_path)\r
+\r
+\r
+def latest_runs_comparison_bar_chart(setup_name1, setup_name2, setup1_latest_result, setup2_latest_result,\r
+                                     save_path=''\r
+                                     ):\r
+    s1_res = setup1_latest_result[[0, 1]]  # column0 is test name, column1 is MPPS\Core\r
+    s2_res = setup2_latest_result[[0, 1, 2]]  # column0 is test name, column1 is MPPS\Core, column2 is Date\r
+    s1_res.columns = ['Test Name', setup_name1]\r
+    s2_res.columns = ['Test Name', setup_name2, 'Date']\r
+    compare_dframe = pd.merge(s1_res, s2_res, on='Test Name')\r
+    compare_dframe.plot(kind='bar')\r
+    plt.legend(fontsize='small', loc='best')\r
+    plt.xticks(rotation='horizontal')\r
+    plt.xlabel('Index of Tests')\r
+    plt.ylabel('MPPS/Core (Norm)')\r
+    plt.title("Comparison between " + setup_name1 + " and " + setup_name2)\r
+    if save_path:\r
+        plt.savefig(os.path.join(save_path, "_comparison.png"))\r
+        compare_dframe = compare_dframe.round(2)\r
+        compare_dframe.to_csv(os.path.join(save_path, '_comparison_stats_table.csv'))\r
+\r
+        # WARNING: if the file _all_stats.csv already exists, this script deletes it, to prevent overflowing of data\r
+\r
+\r
+def create_all_data(ga_data, start_date, end_date, save_path='', detailed_test_stats=''):\r
+    all_setups = {}\r
+    all_setups_data = []\r
+    setup_names = ga_data.keys()\r
+    for setup_name in setup_names:\r
+        s = Setup(setup_name, start_date, end_date, ga_data[setup_name])\r
+        s.analyze_all_setup_data()\r
+        s.plot_all(save_path)\r
+        all_setups_data.append(s.all_tests_data_table)\r
+        all_setups[setup_name] = s\r
+\r
+    if detailed_test_stats:\r
+        if os.path.exists(os.path.join(save_path, '_detailed_table.csv')):\r
+            os.remove(os.path.join(save_path, '_detailed_table.csv'))\r
+        all_setups_data_dframe = pd.DataFrame().append(all_setups_data)\r
+        all_setups_data_dframe.to_csv(os.path.join(save_path, '_detailed_table.csv'))\r
+\r
+    trex07setup = all_setups['trex07']\r
+    trex08setup = all_setups['trex08']\r
+    latest_runs_comparison_bar_chart('Mellanox ConnectX-4',\r
+                                     'Intel XL710', trex07setup.latest_test_results,\r
+                                     trex08setup.latest_test_results,\r
+                                     save_path=save_path)\r