diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend')
-rw-r--r-- | Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend new file mode 100644 index 00000000..c486a328 --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend | |||
@@ -0,0 +1,102 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
4 | import java.util.HashMap | ||
5 | import java.util.HashSet | ||
6 | import java.util.List | ||
7 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest | ||
8 | |||
9 | class KSDistance extends CostDistance { | ||
10 | var static ksTester = new KolmogorovSmirnovTest(); | ||
11 | var MetricSampleGroup g; | ||
12 | |||
13 | new(MetricSampleGroup g){ | ||
14 | this.g = g; | ||
15 | } | ||
16 | override double mpcDistance(List<Double> samples){ | ||
17 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
18 | if(samples.size < 2) return 1; | ||
19 | return ksTester.kolmogorovSmirnovStatistic(g.mpcSamples, samples); | ||
20 | } | ||
21 | |||
22 | override double naDistance(List<Double> samples){ | ||
23 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
24 | if(samples.size < 2) return 1; | ||
25 | return ksTester.kolmogorovSmirnovStatistic(g.naSamples as double[], samples); | ||
26 | } | ||
27 | |||
28 | override double outDegreeDistance(List<Double> samples){ | ||
29 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
30 | if(samples.size < 2) return 1; | ||
31 | return ksTester.kolmogorovSmirnovStatistic(g.outDegreeSamples, samples); | ||
32 | } | ||
33 | |||
34 | def double typedOutDegreeDistance(HashMap<String, List<Integer>> map){ | ||
35 | var value = 0.0; | ||
36 | // map list to array | ||
37 | val keySet = new HashSet<String>(map.keySet); | ||
38 | keySet.addAll(g.typedOutDegreeSamples.keySet); | ||
39 | for(key : keySet){ | ||
40 | if(!map.containsKey(key) ){ | ||
41 | value += 1; | ||
42 | }else if(!g.typedOutDegreeSamples.containsKey(key)){ | ||
43 | value += map.get(key).size * 100; | ||
44 | }else{ | ||
45 | var double[] rep = g.typedOutDegreeSamples.get(key).stream().mapToDouble([it|it]).toArray(); | ||
46 | var double[] ins = map.get(key).stream().mapToDouble([it|it]).toArray(); | ||
47 | if((rep.size < 2 || ins.size < 2) ){ | ||
48 | if(rep.size < 2 && rep.containsAll(ins)){ | ||
49 | value += 0; | ||
50 | }else{ | ||
51 | value += 1; | ||
52 | } | ||
53 | }else if(rep.size >= 2 && ins.size >= 2){ | ||
54 | value += ksTester.kolmogorovSmirnovStatistic(rep, ins); | ||
55 | } | ||
56 | } | ||
57 | } | ||
58 | |||
59 | |||
60 | return value; | ||
61 | } | ||
62 | |||
63 | def nodeTypeDistance(HashMap<String, Double> samples){ | ||
64 | var typesDistMap = g.nodeTypeSamples; | ||
65 | var sourceDist = newArrayList(); | ||
66 | var instanceDist = newArrayList(); | ||
67 | |||
68 | for(key : typesDistMap.keySet()){ | ||
69 | sourceDist.add(typesDistMap.get(key)); | ||
70 | instanceDist.add(samples.getOrDefault(key, 0.0)); | ||
71 | } | ||
72 | |||
73 | return ks_distance_two_dist(sourceDist, instanceDist); | ||
74 | } | ||
75 | |||
76 | def edgeTypeDistance(HashMap<String, Double> samples){ | ||
77 | var typesDistMap = g.edgeTypeSamples; | ||
78 | var sourceDist = newArrayList(); | ||
79 | var instanceDist = newArrayList(); | ||
80 | |||
81 | for(key : typesDistMap.keySet()){ | ||
82 | sourceDist.add(typesDistMap.get(key)); | ||
83 | instanceDist.add(samples.getOrDefault(key, 0.0)); | ||
84 | } | ||
85 | |||
86 | return ks_distance_two_dist(sourceDist, instanceDist); | ||
87 | } | ||
88 | |||
89 | def double ks_distance_two_dist(List<Double> dist1, List<Double> dist2){ | ||
90 | // Since we already know the pdf, we compute the ks-test manully | ||
91 | var ksStatistics = 0.0; | ||
92 | var sum1 = 0.0; | ||
93 | var sum2 = 0.0; | ||
94 | for(var i = 0; i < dist1.size(); i++){ | ||
95 | sum1 += dist1.get(i); | ||
96 | sum2 += dist2.get(i); | ||
97 | |||
98 | ksStatistics = Math.max(ksStatistics, Math.abs(sum1 - sum2)); | ||
99 | } | ||
100 | return ksStatistics; | ||
101 | } | ||
102 | } \ No newline at end of file | ||