diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance')
4 files changed, 139 insertions, 42 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend index 33d10fa3..613f0f43 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend | |||
@@ -1,9 +1,25 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | 1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance |
2 | 2 | ||
3 | import java.text.DecimalFormat | ||
4 | import java.util.HashMap | ||
5 | import java.util.List | ||
3 | import org.eclipse.xtend.lib.annotations.Accessors | 6 | import org.eclipse.xtend.lib.annotations.Accessors |
4 | 7 | ||
5 | class CostDistance { | 8 | abstract class CostDistance { |
6 | 9 | def abstract double naDistance(List<Double> samples); | |
10 | def abstract double mpcDistance(List<Double> samples); | ||
11 | def abstract double outDegreeDistance(List<Double> samples); | ||
12 | |||
13 | def protected pmfFromSamples(double[] samples, DecimalFormat formatter){ | ||
14 | var length = samples.length; | ||
15 | var pmfMap = new HashMap<String, Double>(); | ||
16 | |||
17 | for(sample : samples){ | ||
18 | pmfMap.put(formatter.format(sample), pmfMap.getOrDefault(formatter.format(sample), 0.0) + 1.0 / length); | ||
19 | } | ||
20 | |||
21 | return pmfMap; | ||
22 | } | ||
7 | } | 23 | } |
8 | 24 | ||
9 | class StateData{ | 25 | class StateData{ |
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend new file mode 100644 index 00000000..b945d97b --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend | |||
@@ -0,0 +1,73 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain | ||
4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader | ||
5 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
6 | import com.google.common.collect.Sets | ||
7 | import java.text.DecimalFormat | ||
8 | import java.util.ArrayList | ||
9 | import java.util.HashMap | ||
10 | import java.util.List | ||
11 | import java.util.Map | ||
12 | import java.util.Set | ||
13 | |||
14 | class EuclideanDistance extends CostDistance{ | ||
15 | var MetricSampleGroup g; | ||
16 | var HashMap<String, Double> mpcPMF; | ||
17 | var HashMap<String, Double> naPMF; | ||
18 | var HashMap<String, Double> outDegreePMF; | ||
19 | var DecimalFormat formatter; | ||
20 | |||
21 | new(Domain d){ | ||
22 | var metrics = RepMetricsReader.read(d); | ||
23 | this.g = metrics; | ||
24 | |||
25 | var mpcSamples = metrics.mpcSamples; | ||
26 | var naSamples = metrics.naSamples.stream.mapToDouble([it]).toArray(); | ||
27 | var outDegreeSamples = metrics.outDegreeSamples.stream.mapToDouble([it]).toArray(); | ||
28 | |||
29 | //needs to format the number to string avoid precision issue | ||
30 | formatter = new DecimalFormat("#0.00000"); | ||
31 | |||
32 | mpcPMF = pmfFromSamples(mpcSamples, formatter); | ||
33 | naPMF = pmfFromSamples(naSamples, formatter); | ||
34 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); | ||
35 | } | ||
36 | |||
37 | override naDistance(List<Double> samples) { | ||
38 | var pmfMap = pmfFromSamples(samples, formatter); | ||
39 | return euclideanDistance(pmfMap, naPMF); | ||
40 | } | ||
41 | |||
42 | override mpcDistance(List<Double> samples) { | ||
43 | var pmfMap = pmfFromSamples(samples, formatter); | ||
44 | return euclideanDistance(pmfMap, mpcPMF); | ||
45 | } | ||
46 | |||
47 | override outDegreeDistance(List<Double> samples) { | ||
48 | var pmfMap = pmfFromSamples(samples, formatter); | ||
49 | return euclideanDistance(pmfMap, outDegreePMF); | ||
50 | } | ||
51 | |||
52 | |||
53 | def private euclideanDistance(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ | ||
54 | var keys = Sets.union(pmf1.keySet(), pmf2.keySet()); | ||
55 | var pmfList1 = pmfMapToList(pmf1, keys); | ||
56 | var pmfList2 = pmfMapToList(pmf2, keys); | ||
57 | var distance = 0.0; | ||
58 | for(var i = 0; i < pmfList1.size(); i++){ | ||
59 | distance += Math.pow(pmfList1.get(i) + pmfList2.get(i), 2); | ||
60 | } | ||
61 | |||
62 | return Math.sqrt(distance); | ||
63 | } | ||
64 | |||
65 | def private pmfMapToList(Map<String, Double> map, Set<String> keys){ | ||
66 | var list = new ArrayList<Double>(); | ||
67 | for(key : keys){ | ||
68 | var value = map.getOrDefault(key, 0.0); | ||
69 | list.add(value); | ||
70 | } | ||
71 | return list; | ||
72 | } | ||
73 | } \ No newline at end of file | ||
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend index ced9eadb..df65b81f 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend | |||
@@ -7,7 +7,7 @@ import java.text.DecimalFormat | |||
7 | import java.util.HashMap | 7 | import java.util.HashMap |
8 | import java.util.List | 8 | import java.util.List |
9 | 9 | ||
10 | class JSDistance { | 10 | class JSDistance extends CostDistance { |
11 | var HashMap<String, Double> mpcPMF; | 11 | var HashMap<String, Double> mpcPMF; |
12 | var HashMap<String, Double> naPMF; | 12 | var HashMap<String, Double> naPMF; |
13 | var HashMap<String, Double> outDegreePMF; | 13 | var HashMap<String, Double> outDegreePMF; |
@@ -27,17 +27,6 @@ class JSDistance { | |||
27 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); | 27 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); |
28 | } | 28 | } |
29 | 29 | ||
30 | def private pmfFromSamples(double[] samples, DecimalFormat formatter){ | ||
31 | var length = samples.length; | ||
32 | var pmfMap = new HashMap<String, Double>(); | ||
33 | |||
34 | for(sample : samples){ | ||
35 | pmfMap.put(formatter.format(sample), pmfMap.getOrDefault(formatter.format(sample), 0.0) + 1.0 / length); | ||
36 | } | ||
37 | |||
38 | return pmfMap; | ||
39 | } | ||
40 | |||
41 | def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ | 30 | def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ |
42 | var pmfMap = new HashMap<String, Double>(); | 31 | var pmfMap = new HashMap<String, Double>(); |
43 | 32 | ||
@@ -68,7 +57,7 @@ class JSDistance { | |||
68 | return distance; | 57 | return distance; |
69 | } | 58 | } |
70 | 59 | ||
71 | def double mpcDistance(List<Double> samples){ | 60 | override double mpcDistance(List<Double> samples){ |
72 | // map list to array | 61 | // map list to array |
73 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | 62 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); |
74 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 63 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 |
@@ -76,7 +65,7 @@ class JSDistance { | |||
76 | return jsDivergence(map, mpcPMF); | 65 | return jsDivergence(map, mpcPMF); |
77 | } | 66 | } |
78 | 67 | ||
79 | def double naDistance(List<Double> samples){ | 68 | override double naDistance(List<Double> samples){ |
80 | // map list to array | 69 | // map list to array |
81 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | 70 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); |
82 | 71 | ||
@@ -85,7 +74,7 @@ class JSDistance { | |||
85 | return jsDivergence(map, naPMF); | 74 | return jsDivergence(map, naPMF); |
86 | } | 75 | } |
87 | 76 | ||
88 | def double outDegreeDistance(List<Double> samples){ | 77 | override double outDegreeDistance(List<Double> samples){ |
89 | // map list to array | 78 | // map list to array |
90 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | 79 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); |
91 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 80 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 |
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend index 58e0a8a3..86f5f23c 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend | |||
@@ -2,46 +2,65 @@ package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | |||
2 | 2 | ||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain | 3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain |
4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader | 4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader |
5 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
6 | import java.util.HashMap | ||
7 | import java.util.HashSet | ||
5 | import java.util.List | 8 | import java.util.List |
9 | import java.util.Set | ||
6 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest | 10 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest |
7 | 11 | ||
8 | class KSDistance { | 12 | class KSDistance extends CostDistance { |
9 | var static ksTester = new KolmogorovSmirnovTest(); | 13 | var static ksTester = new KolmogorovSmirnovTest(); |
10 | var double[] mpcSamples; | 14 | var MetricSampleGroup g; |
11 | var double[] naSamples; | ||
12 | var double[] outDegreeSamples; | ||
13 | |||
14 | new(Domain d){ | 15 | new(Domain d){ |
15 | var metrics = RepMetricsReader.read(d); | 16 | var metrics = RepMetricsReader.read(d); |
16 | mpcSamples = metrics.mpcSamples; | 17 | this.g = metrics; |
17 | naSamples = metrics.naSamples.stream.mapToDouble([it]).toArray(); | ||
18 | outDegreeSamples = metrics.outDegreeSamples.stream.mapToDouble([it]).toArray(); | ||
19 | } | 18 | } |
20 | 19 | ||
21 | def double mpcDistance(List<Double> samples){ | 20 | override double mpcDistance(List<Double> samples){ |
22 | // map list to array | ||
23 | var arr = samples.stream.mapToDouble([it]).toArray(); | ||
24 | |||
25 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 21 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 |
26 | if(arr.size < 2) return 1; | 22 | if(samples.size < 2) return 1; |
27 | return ksTester.kolmogorovSmirnovStatistic(mpcSamples, arr); | 23 | return ksTester.kolmogorovSmirnovStatistic(g.mpcSamples, samples); |
28 | } | 24 | } |
29 | 25 | ||
30 | def double naDistance(List<Double> samples){ | 26 | override double naDistance(List<Double> samples){ |
31 | // map list to array | ||
32 | var arr = samples.stream.mapToDouble([it]).toArray(); | ||
33 | |||
34 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 27 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 |
35 | if(arr.size < 2) return 1; | 28 | if(samples.size < 2) return 1; |
36 | return ksTester.kolmogorovSmirnovStatistic(naSamples as double[], arr); | 29 | return ksTester.kolmogorovSmirnovStatistic(g.naSamples as double[], samples); |
37 | } | 30 | } |
38 | 31 | ||
39 | def double outDegreeDistance(List<Double> samples){ | 32 | override double outDegreeDistance(List<Double> samples){ |
33 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
34 | if(samples.size < 2) return 1; | ||
35 | return ksTester.kolmogorovSmirnovStatistic(g.outDegreeSamples, samples); | ||
36 | } | ||
37 | |||
38 | def double typedOutDegreeDistance(HashMap<String, List<Integer>> map){ | ||
39 | var value = 0.0; | ||
40 | // map list to array | 40 | // map list to array |
41 | var arr = samples.stream.mapToDouble([it]).toArray(); | 41 | val keySet = new HashSet<String>(map.keySet); |
42 | keySet.addAll(g.typedOutDegreeSamples.keySet); | ||
43 | for(key : keySet){ | ||
44 | if(!map.containsKey(key) ){ | ||
45 | value += 1; | ||
46 | }else if(!g.typedOutDegreeSamples.containsKey(key)){ | ||
47 | value += map.get(key).size * 100; | ||
48 | }else{ | ||
49 | var double[] rep = g.typedOutDegreeSamples.get(key).stream().mapToDouble([it|it]).toArray(); | ||
50 | var double[] ins = map.get(key).stream().mapToDouble([it|it]).toArray(); | ||
51 | if((rep.size < 2 || ins.size < 2) ){ | ||
52 | if(rep.size < 2 && rep.containsAll(ins)){ | ||
53 | value += 0; | ||
54 | }else{ | ||
55 | value += 1; | ||
56 | } | ||
57 | }else if(rep.size >= 2 && ins.size >= 2){ | ||
58 | value += ksTester.kolmogorovSmirnovStatistic(rep, ins); | ||
59 | } | ||
60 | } | ||
61 | } | ||
42 | 62 | ||
43 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 63 | |
44 | if(arr.size < 2) return 1; | 64 | return value; |
45 | return ksTester.kolmogorovSmirnovStatistic(outDegreeSamples, arr); | ||
46 | } | 65 | } |
47 | } \ No newline at end of file | 66 | } \ No newline at end of file |