changeset 1219:b1c1adbf6538

HttpUrlSequencePayloadAnalyzer - more config options
author Devel 1
date Wed, 24 Jun 2020 15:50:00 +0200
parents bb727af8a7ee
children 6f6f6c8c790e
files stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerCommand.java stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java stress-tester/src/main/resources/flow_analyzer.py stress-tester/src/main/resources/flow_analyzer_fold.py
diffstat 4 files changed, 82 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerCommand.java	Wed Jun 24 13:28:06 2020 +0200
+++ b/stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerCommand.java	Wed Jun 24 15:50:00 2020 +0200
@@ -6,6 +6,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
@@ -19,10 +20,15 @@
     public static final String DEFAULT_PYTHON_CMD = IS_WINDOWS ? "python" : "python3";
     public static final String DEFAULT_SCRIPT_PATH = "flow_analyzer.py";
     public static final String RESOURCE = "/flow_analyzer.py";
+    public static final int DEFAULT_LMIN = 3;
+    public static final int DEFAULT_LMAX = 7;
 
     String pythonCmd = DEFAULT_PYTHON_CMD;
     String scriptPath = DEFAULT_SCRIPT_PATH;
     String dataPath;
+    int lmin = DEFAULT_LMIN;
+    int lmax = DEFAULT_LMAX;
+    List<String> cleanRules;
 
     static void extractEmbeddedScript(File target) throws IOException {
         target.delete();
@@ -41,7 +47,15 @@
         checkExists(dataPath, "dataPath");
         checkCmd(pythonCmd);
 
-        List<String> commandLine = Arrays.asList(pythonCmd, scriptPath, dataPath);
+        List<String> commandLine = new ArrayList<>();
+        commandLine.addAll(Arrays.asList(
+                pythonCmd, scriptPath, dataPath, "-lmin", Integer.toString(lmin), "-lmax", Integer.toString(lmax)
+        ));
+        if (cleanRules != null) {
+            commandLine.add("-c");
+            commandLine.addAll(cleanRules);
+        }
+
         ProcessBuilder pb = new ProcessBuilder(commandLine);
 
         boolean showOutput = true;
--- a/stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java	Wed Jun 24 13:28:06 2020 +0200
+++ b/stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java	Wed Jun 24 15:50:00 2020 +0200
@@ -33,10 +33,13 @@
 import java.util.Date;
 import java.util.LinkedHashMap;
 
+import static com.passus.config.schema.ConfigurationSchemaBuilder.listDef;
 import static com.passus.config.schema.ConfigurationSchemaBuilder.mapDef;
 import static com.passus.config.schema.ConfigurationSchemaBuilder.tupleDef;
 import static com.passus.st.Protocols.HTTP;
+import static com.passus.st.config.CommonNodeDefs.INT_GREATER_THAN_ZERO_DEF;
 import static com.passus.st.config.CommonNodeDefs.STRING_DEF;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -52,7 +55,10 @@
 
     private UserIdExtractor userIdExtractor;
 
-    private String dataFile;
+    private String dataPath;
+    private List<String> cleanRules;
+    private int lmin = FlowAnalyzerCommand.DEFAULT_LMIN;
+    private int lmax = FlowAnalyzerCommand.DEFAULT_LMAX;
 
     private CSVWriter dataWriter;
 
@@ -64,7 +70,10 @@
     public void configure(Configuration config, ConfigurationContext context) {
         String userIdSource = config.getString("userIdSource", "ip");
         userIdExtractor = resolveUserIdExtractor(userIdSource);
-        dataFile = config.getString("dataFile", "sequence_data.csv");
+        dataPath = config.getString("dataPath", "sequence_data.csv");
+        lmin = config.getInteger("lmin", FlowAnalyzerCommand.DEFAULT_LMIN);
+        lmax = config.getInteger("lmax", FlowAnalyzerCommand.DEFAULT_LMAX);
+        cleanRules = config.getList("cleanRules");
     }
 
     @Override
@@ -73,7 +82,7 @@
         metric.activate();
 
         try {
-            FileOutputStream fos = new FileOutputStream(dataFile);
+            FileOutputStream fos = new FileOutputStream(dataPath);
             OutputStreamWriter osw = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
             dataWriter = new CSVWriter(osw);
         } catch (IOException ex) {
@@ -141,20 +150,23 @@
         try {
             dataWriter.flush(); // TODO: ???
             FlowAnalyzerCommand command = new FlowAnalyzerCommand();
-            command.dataPath = dataFile;
             FlowAnalyzerCommand.extractEmbeddedScript(new File(command.scriptPath));
+            command.dataPath = dataPath;
+            command.cleanRules = cleanRules;
+            command.lmin = lmin;
+            command.lmax = lmax;
             command.run();
-            
+
             File dir = new File(".");
             Gson gson = new Gson();
             Pattern pattern = Pattern.compile("^seq_(L\\d+)[.]json$");
-            for(String fname : dir.list()) {
+            for (String fname : dir.list()) {
                 Matcher m = pattern.matcher(fname);
                 if (m.matches()) {
                     metric.addSequences(m.group(1), readSequences(new File(dir, fname), gson));
                 }
             }
-            
+
         } catch (Exception ex) {
             ex.printStackTrace();
         }
@@ -234,7 +246,10 @@
         public NodeDefinition create() {
             return mapDef(
                     tupleDef("userIdSource", STRING_DEF).setRequired(false),
-                    tupleDef("dataFile", STRING_DEF).setRequired(false)
+                    tupleDef("dataPath", STRING_DEF).setRequired(false),
+                    tupleDef("cleanRules", listDef()).setRequired(false),
+                    tupleDef("lmin", INT_GREATER_THAN_ZERO_DEF).setRequired(false),
+                    tupleDef("lmax", INT_GREATER_THAN_ZERO_DEF).setRequired(false)
             );
         }
     }
--- a/stress-tester/src/main/resources/flow_analyzer.py	Wed Jun 24 13:28:06 2020 +0200
+++ b/stress-tester/src/main/resources/flow_analyzer.py	Wed Jun 24 15:50:00 2020 +0200
@@ -532,16 +532,33 @@
 
 if __name__ == "__main__":
     import sys
-    if len(sys.argv) < 2:
-        raise Exception('Input expected')
+    import argparse
+
+    parser = argparse.ArgumentParser('Flow Analyzer')
+    parser.add_argument('input')
+    parser.add_argument('-c', nargs='+')
+    parser.add_argument('-lmin', type=int, default=3)
+    parser.add_argument('-lmax', type=int, default=7)
+    args = parser.parse_args()
+
+    cleanRules = [
+        ".css", ".js", ".jpg", ".png", ".svg", ".gif", ".ico",
+        ".ttf", ".woff", ".woff2", ".eot",
+        ".pdf", ".doc", ".docx", ".ppt", ".pptx", ".txt",
+        "captcha"
+    ]
+
+    if args.c:
+        for cleanRule in args.c:
+            cleanRules.append(cleanRule)
 
     analyzer = flowAnalyzer()
-    analyzer.get_files_list(pathString=sys.argv[1])
+    analyzer.get_files_list(pathString=args.input)
     analyzer.get_events_line_csv(dateFormat='%Y-%m-%d_%H_%M_%S_%f') #yyyy-MM-dd_HH_mm_ss_SSS000
-    analyzer.prep_data()
+    analyzer.prep_data(cleanRules=cleanRules)
     analyzer.prep_sequences(dropDuplicates=True)
 
-    for length in range(3, 8):
+    for length in range(args.lmin, args.lmax + 1):
         name = "seq_L{}.json".format(length)
         seq = analyzer.analyze_markov(targetLength=length)
         with open(name, "w") as f:
--- a/stress-tester/src/main/resources/flow_analyzer_fold.py	Wed Jun 24 13:28:06 2020 +0200
+++ b/stress-tester/src/main/resources/flow_analyzer_fold.py	Wed Jun 24 15:50:00 2020 +0200
@@ -622,16 +622,33 @@
 
 if __name__ == "__main__":
     import sys
-    if len(sys.argv) < 2:
-        raise Exception('Input expected')
+    import argparse
+
+    parser = argparse.ArgumentParser('Flow Analyzer')
+    parser.add_argument('input')
+    parser.add_argument('-c', nargs='+')
+    parser.add_argument('-lmin', type=int, default=3)
+    parser.add_argument('-lmax', type=int, default=7)
+    args = parser.parse_args()
+
+    cleanRules = [
+        ".css", ".js", ".jpg", ".png", ".svg", ".gif", ".ico",
+        ".ttf", ".woff", ".woff2", ".eot",
+        ".pdf", ".doc", ".docx", ".ppt", ".pptx", ".txt",
+        "captcha"
+    ]
+
+    if args.c:
+        for cleanRule in args.c:
+            cleanRules.append(cleanRule)
 
     analyzer = flowAnalyzer()
-    analyzer.get_files_list(pathString=sys.argv[1])
+    analyzer.get_files_list(pathString=args.input)
     analyzer.get_events_line_csv(dateFormat='%Y-%m-%d_%H_%M_%S_%f') #yyyy-MM-dd_HH_mm_ss_SSS000
-    analyzer.prep_data()
+    analyzer.prep_data(cleanRules=cleanRules)
     analyzer.prep_sequences(dropDuplicates=True)
 
-    for length in range(3, 8):
+    for length in range(args.lmin, args.lmax + 1):
         name = "seq_L{}.json".format(length)
         seq = analyzer.analyze_markov2(targetLength=length, foldLoops=True)
         with open(name, "w") as f: