changeset 1269:d7209c062524

flow - do not lowercase paths
author Devel 1
date Fri, 03 Jul 2020 10:24:23 +0200
parents f3ef02718781
children 15c0fcd63815
files stress-tester/src/main/resources/flow_analyzer.py
diffstat 1 files changed, 9 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/stress-tester/src/main/resources/flow_analyzer.py	Thu Jul 02 16:49:17 2020 +0200
+++ b/stress-tester/src/main/resources/flow_analyzer.py	Fri Jul 03 10:24:23 2020 +0200
@@ -154,7 +154,7 @@
         df["datetime"] = pd.to_datetime(df.datetime)
         self.data = df
 
-    def prep_data(self, cleanRows = True, cleanRules = None, dropMissing = True, debug=False):
+    def prep_data(self, cleanRows = True, cleanRules = None, dropMissing = True, lowercasePath=False, debug=False):
         if cleanRules is None:
             self.cleanRules = [
                 ".css", ".js",
@@ -181,8 +181,12 @@
 
 
         # Proste czyszczenie i normalizowanie stringów (url, request method)
-        self.data["pathArgs"] = self.data["pathArgs"].str.lower().str.strip()
-        self.data["pathBase"] = self.data["pathBase"].str.lower().str.strip()
+        if lowercasePath:
+            self.data["pathArgs"] = self.data["pathArgs"].str.lower().str.strip()
+            self.data["pathBase"] = self.data["pathBase"].str.lower().str.strip()
+        else:
+            self.data["pathArgs"] = self.data["pathArgs"].str.strip()
+            self.data["pathBase"] = self.data["pathBase"].str.strip()
         self.data["requestMethod"] = self.data["requestMethod"].str.lower().str.strip()
 
         if dropMissing:
@@ -682,6 +686,7 @@
     parser.add_argument('-lmax', type=int, default=7)
     parser.add_argument('-maxSeq', type=int, default=100)
     parser.add_argument('--fold', action='store_true')
+    parser.add_argument('--lcase', action='store_true')
     args = parser.parse_args()
 
     cleanRules = [
@@ -701,7 +706,7 @@
     analyzer = FlowAnalyzer()
     analyzer.get_files_list(pathString=args.input)
     analyzer.get_events_line_csv(dateFormat='%Y-%m-%d_%H_%M_%S_%f') #yyyy-MM-dd_HH_mm_ss_SSS000
-    analyzer.prep_data(cleanRules=cleanRules)
+    analyzer.prep_data(cleanRules=cleanRules, lowercasePath=args.lcase)
     analyzer.prep_sequences(dropDuplicates=True)
 
     for length in range(args.lmin, args.lmax + 1):