changeset 1250:0aaec16bfed7

flow - unified py output, passing ignoredUrls, cleanup
author Devel 1
date Wed, 01 Jul 2020 13:59:15 +0200
parents f9aa26c8f638
children 974d2217d08c
files stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerUtils.java stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java stress-tester/src/main/java/com/passus/st/scanner/SequenceConverter.java stress-tester/src/main/resources/flow_analyzer.py
diffstat 4 files changed, 40 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerUtils.java	Wed Jul 01 13:09:39 2020 +0200
+++ b/stress-tester/src/main/java/com/passus/st/scanner/FlowAnalyzerUtils.java	Wed Jul 01 13:59:15 2020 +0200
@@ -7,8 +7,6 @@
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
 
 /**
  *
@@ -16,25 +14,31 @@
  */
 public class FlowAnalyzerUtils {
 
-    static ArrayList<ArrayList<String>> readSequences(File file, Gson gson) throws IOException {
+    static AnalyzerResult readResult(File file, Gson gson) throws IOException {
         try (FileInputStream fis = new FileInputStream(file);
                 InputStreamReader isr = new InputStreamReader(fis, StandardCharsets.UTF_8)) {
-            return gson.fromJson(isr, ArrayList.class);
-        }
-    }
-
-    static List<SeqWithFolds> readFolds(File file, Gson gson) throws IOException {
-        try (FileInputStream fis = new FileInputStream(file);
-                InputStreamReader isr = new InputStreamReader(fis, StandardCharsets.UTF_8)) {
-            SeqWithFolds[] arr = gson.fromJson(isr, SeqWithFolds[].class);
-            return Arrays.asList(arr);
+            return gson.fromJson(isr, AnalyzerResult.class);
         }
     }
 
     static final class SeqWithFolds {
 
-        public List<String> sequence;
+        public ArrayList<String> sequence;
         public float prob;
         public ArrayList<ArrayList<String>> unfolded;
     }
+
+    static final class AnalyzerResult {
+
+        public String[] ignoredUrls;
+        public SeqWithFolds[] sequences;
+        
+        public ArrayList<ArrayList<String>> getSequences() {
+            ArrayList<ArrayList<String>> result = new ArrayList<>(sequences.length);
+            for (SeqWithFolds sequence : sequences) {
+                result.add(sequence.sequence);
+            }
+            return result;
+        }
+    }
 }
--- a/stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java	Wed Jul 01 13:09:39 2020 +0200
+++ b/stress-tester/src/main/java/com/passus/st/scanner/HttpUrlSequencePayloadAnalyzer.java	Wed Jul 01 13:59:15 2020 +0200
@@ -151,12 +151,13 @@
 
             File dir = new File(".");
             Gson gson = new Gson();
-            String patternStr = command.fold ? "^seq_(L\\d+)f[.]json$" : "^seq_(L\\d+)[.]json$";
+            String patternStr = command.fold ? "^seq_(L\\d+)f_out[.]json$" : "^seq_(L\\d+)_out[.]json$";
             Pattern pattern = Pattern.compile(patternStr);
             for (String fname : dir.list()) {
                 Matcher m = pattern.matcher(fname);
                 if (m.matches()) {
-                    metric.addSequences(m.group(1), FlowAnalyzerUtils.readSequences(new File(dir, fname), gson));
+                    FlowAnalyzerUtils.AnalyzerResult result = FlowAnalyzerUtils.readResult(new File(dir, fname), gson);
+                    metric.addSequences(m.group(1), result.getSequences());
                 }
             }
 
--- a/stress-tester/src/main/java/com/passus/st/scanner/SequenceConverter.java	Wed Jul 01 13:09:39 2020 +0200
+++ b/stress-tester/src/main/java/com/passus/st/scanner/SequenceConverter.java	Wed Jul 01 13:59:15 2020 +0200
@@ -133,15 +133,15 @@
 
             boolean fold = cl.hasOption('f');
             ArrayList<ArrayList<String>> sequences;
+            FlowAnalyzerUtils.AnalyzerResult result = FlowAnalyzerUtils.readResult(input, new Gson());
             if (fold) {
-                sequences = FlowAnalyzerUtils.readFolds(input, new Gson()).get(1).unfolded;
+                sequences = result.sequences[1].unfolded;
             } else {
-                sequences = FlowAnalyzerUtils.readSequences(input, new Gson());
+                sequences = result.getSequences();
             }
 
-            List<String> ignoredUrls = Arrays.asList("/user/check", "/stacje/podpowiedzi", "/api/mobile");
             SequenceConverter converter = new SequenceConverter();
-            converter.writeSequenceFilterConf(sequences, ignoredUrls, output);
+            converter.writeSequenceFilterConf(sequences, Arrays.asList(result.ignoredUrls), output);
 
         } catch (ParseException e) {
             System.out.println(e.getMessage());
--- a/stress-tester/src/main/resources/flow_analyzer.py	Wed Jul 01 13:09:39 2020 +0200
+++ b/stress-tester/src/main/resources/flow_analyzer.py	Wed Jul 01 13:59:15 2020 +0200
@@ -705,13 +705,11 @@
         if args.fold:
             suffix = 'f'
             res = analyzer.analyze(targetLength=length, foldLoops=True)
-            seqs_prob = [(s, prob) for s, prob in res if prob >= args.pmin]
-            seqs = [s for s, prob in res if prob >= args.pmin]
-            seqs_id = [(s, prob) for s, prob in analyzer.analyzed if prob >= args.pmin]
+            seqs_prob = [(s, prob) for s, prob in analyzer.analyzed if prob >= args.pmin]
 
             unfolded = []
             num_unfolded = 0
-            for (s, prob) in seqs_id:
+            for (s, prob) in seqs_prob:
                 seq_mapped = analyzer.seq_id_to_path(s)
                 si = [int(x) for x in s]
                 unfolded_ids = analyzer.get_unfolded(si)
@@ -725,34 +723,31 @@
 
             stats = {
                 'seqCount': len(res),
-                'seqCountP': len(seqs),
+                'seqCountP': len(seqs_prob),
                 'unfoldedCount': num_unfolded
             }
-            seqs = seqs[:args.maxSeq]
-            seqs_prob = seqs_prob[:args.maxSeq]
+            unfolded = unfolded[:args.maxSeq]
+            out = {
+                'ignoredUrls': args.c,
+                'sequences': unfolded
+            }
         else:
             suffix = ''
             res = analyzer.analyze_markov(targetLength=length)
             seqs_prob = [(s, prob) for s, prob in res if prob >= args.pmin]
-            seqs = [s for s, prob in res if prob >= args.pmin]
-            unfolded = None
             stats = {
                 'seqCount': len(res),
-                'seqCountP': len(seqs)
+                'seqCountP': len(seqs_prob)
             }
-            seqs = seqs[:args.maxSeq]
             seqs_prob = seqs_prob[:args.maxSeq]
+            out = {
+                'ignoredUrls': args.c,
+                'sequences': [{'sequence': seq, 'prob': prob} for seq, prob in seqs_prob]
+            }
 
-        # for metrics
-        with open("seq_L{}{}.json".format(length, suffix), "w") as f:
-            json.dump(seqs, f)
-        # input to next stage
-        with open("seq_L{}{}_prob.json".format(length, suffix), "w") as f:
-            json.dump(seqs_prob, f)
+        # for metrics and generator
+        with open("seq_L{}{}_out.json".format(length, suffix), "w") as f:
+            json.dump(out, f)
         # debug
         with open("seq_L{}{}_stats.json".format(length, suffix), "w") as f:
             json.dump(stats, f)
-        # input to next stage
-        if unfolded:
-            with open("seq_L{}{}_unfolded.json".format(length, suffix), "w") as f:
-                json.dump(unfolded, f)