aboutsummaryrefslogtreecommitdiffstats
path: root/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend
diff options
context:
space:
mode:
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend')
-rw-r--r--Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend102
1 files changed, 102 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend
new file mode 100644
index 00000000..c486a328
--- /dev/null
+++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend
@@ -0,0 +1,102 @@
1package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance
2
3import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup
4import java.util.HashMap
5import java.util.HashSet
6import java.util.List
7import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest
8
9class KSDistance extends CostDistance {
10 var static ksTester = new KolmogorovSmirnovTest();
11 var MetricSampleGroup g;
12
13 new(MetricSampleGroup g){
14 this.g = g;
15 }
16 override double mpcDistance(List<Double> samples){
17 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
18 if(samples.size < 2) return 1;
19 return ksTester.kolmogorovSmirnovStatistic(g.mpcSamples, samples);
20 }
21
22 override double naDistance(List<Double> samples){
23 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
24 if(samples.size < 2) return 1;
25 return ksTester.kolmogorovSmirnovStatistic(g.naSamples as double[], samples);
26 }
27
28 override double outDegreeDistance(List<Double> samples){
29 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
30 if(samples.size < 2) return 1;
31 return ksTester.kolmogorovSmirnovStatistic(g.outDegreeSamples, samples);
32 }
33
34 def double typedOutDegreeDistance(HashMap<String, List<Integer>> map){
35 var value = 0.0;
36 // map list to array
37 val keySet = new HashSet<String>(map.keySet);
38 keySet.addAll(g.typedOutDegreeSamples.keySet);
39 for(key : keySet){
40 if(!map.containsKey(key) ){
41 value += 1;
42 }else if(!g.typedOutDegreeSamples.containsKey(key)){
43 value += map.get(key).size * 100;
44 }else{
45 var double[] rep = g.typedOutDegreeSamples.get(key).stream().mapToDouble([it|it]).toArray();
46 var double[] ins = map.get(key).stream().mapToDouble([it|it]).toArray();
47 if((rep.size < 2 || ins.size < 2) ){
48 if(rep.size < 2 && rep.containsAll(ins)){
49 value += 0;
50 }else{
51 value += 1;
52 }
53 }else if(rep.size >= 2 && ins.size >= 2){
54 value += ksTester.kolmogorovSmirnovStatistic(rep, ins);
55 }
56 }
57 }
58
59
60 return value;
61 }
62
63 def nodeTypeDistance(HashMap<String, Double> samples){
64 var typesDistMap = g.nodeTypeSamples;
65 var sourceDist = newArrayList();
66 var instanceDist = newArrayList();
67
68 for(key : typesDistMap.keySet()){
69 sourceDist.add(typesDistMap.get(key));
70 instanceDist.add(samples.getOrDefault(key, 0.0));
71 }
72
73 return ks_distance_two_dist(sourceDist, instanceDist);
74 }
75
76 def edgeTypeDistance(HashMap<String, Double> samples){
77 var typesDistMap = g.edgeTypeSamples;
78 var sourceDist = newArrayList();
79 var instanceDist = newArrayList();
80
81 for(key : typesDistMap.keySet()){
82 sourceDist.add(typesDistMap.get(key));
83 instanceDist.add(samples.getOrDefault(key, 0.0));
84 }
85
86 return ks_distance_two_dist(sourceDist, instanceDist);
87 }
88
89 def double ks_distance_two_dist(List<Double> dist1, List<Double> dist2){
90 // Since we already know the pdf, we compute the ks-test manully
91 var ksStatistics = 0.0;
92 var sum1 = 0.0;
93 var sum2 = 0.0;
94 for(var i = 0; i < dist1.size(); i++){
95 sum1 += dist1.get(i);
96 sum2 += dist2.get(i);
97
98 ksStatistics = Math.max(ksStatistics, Math.abs(sum1 - sum2));
99 }
100 return ksStatistics;
101 }
102} \ No newline at end of file