diff options
author | 2020-11-03 22:52:26 -0500 | |
---|---|---|
committer | 2020-11-03 22:52:26 -0500 | |
commit | 945f487a08b643392a5d5918c631640b9a0e4605 (patch) | |
tree | b537c456e395950ce98daaabb9468c7c17d5a72b /Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance | |
parent | Fix numeric-solver-at-end (diff) | |
download | VIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.tar.gz VIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.tar.zst VIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.zip |
add realistic solver
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance')
4 files changed, 300 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend new file mode 100644 index 00000000..613f0f43 --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend | |||
@@ -0,0 +1,38 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import java.text.DecimalFormat | ||
4 | import java.util.HashMap | ||
5 | import java.util.List | ||
6 | import org.eclipse.xtend.lib.annotations.Accessors | ||
7 | |||
8 | abstract class CostDistance { | ||
9 | def abstract double naDistance(List<Double> samples); | ||
10 | def abstract double mpcDistance(List<Double> samples); | ||
11 | def abstract double outDegreeDistance(List<Double> samples); | ||
12 | |||
13 | def protected pmfFromSamples(double[] samples, DecimalFormat formatter){ | ||
14 | var length = samples.length; | ||
15 | var pmfMap = new HashMap<String, Double>(); | ||
16 | |||
17 | for(sample : samples){ | ||
18 | pmfMap.put(formatter.format(sample), pmfMap.getOrDefault(formatter.format(sample), 0.0) + 1.0 / length); | ||
19 | } | ||
20 | |||
21 | return pmfMap; | ||
22 | } | ||
23 | } | ||
24 | |||
25 | class StateData{ | ||
26 | @Accessors(PUBLIC_GETTER) | ||
27 | var double[] features; | ||
28 | @Accessors(PUBLIC_GETTER) | ||
29 | var double value; | ||
30 | @Accessors(PUBLIC_GETTER) | ||
31 | var Object lastState; | ||
32 | |||
33 | new(double[] features, double value, Object lastState){ | ||
34 | this.features = features; | ||
35 | this.value = value | ||
36 | this.lastState = lastState; | ||
37 | } | ||
38 | } \ No newline at end of file | ||
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend new file mode 100644 index 00000000..d6adcc9a --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend | |||
@@ -0,0 +1,72 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain | ||
4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader | ||
5 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
6 | import com.google.common.collect.Sets | ||
7 | import java.text.DecimalFormat | ||
8 | import java.util.ArrayList | ||
9 | import java.util.HashMap | ||
10 | import java.util.List | ||
11 | import java.util.Map | ||
12 | import java.util.Set | ||
13 | |||
14 | class EuclideanDistance extends CostDistance{ | ||
15 | var MetricSampleGroup g; | ||
16 | var HashMap<String, Double> mpcPMF; | ||
17 | var HashMap<String, Double> naPMF; | ||
18 | var HashMap<String, Double> outDegreePMF; | ||
19 | var DecimalFormat formatter; | ||
20 | |||
21 | new(MetricSampleGroup g){ | ||
22 | this.g = g; | ||
23 | |||
24 | var mpcSamples = g.mpcSamples; | ||
25 | var naSamples = g.naSamples.stream.mapToDouble([it]).toArray(); | ||
26 | var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray(); | ||
27 | |||
28 | //needs to format the number to string avoid precision issue | ||
29 | formatter = new DecimalFormat("#0.00000"); | ||
30 | |||
31 | mpcPMF = pmfFromSamples(mpcSamples, formatter); | ||
32 | naPMF = pmfFromSamples(naSamples, formatter); | ||
33 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); | ||
34 | } | ||
35 | |||
36 | override naDistance(List<Double> samples) { | ||
37 | var pmfMap = pmfFromSamples(samples, formatter); | ||
38 | return euclideanDistance(pmfMap, naPMF); | ||
39 | } | ||
40 | |||
41 | override mpcDistance(List<Double> samples) { | ||
42 | var pmfMap = pmfFromSamples(samples, formatter); | ||
43 | return euclideanDistance(pmfMap, mpcPMF); | ||
44 | } | ||
45 | |||
46 | override outDegreeDistance(List<Double> samples) { | ||
47 | var pmfMap = pmfFromSamples(samples, formatter); | ||
48 | return euclideanDistance(pmfMap, outDegreePMF); | ||
49 | } | ||
50 | |||
51 | |||
52 | def private euclideanDistance(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ | ||
53 | var keys = Sets.union(pmf1.keySet(), pmf2.keySet()); | ||
54 | var pmfList1 = pmfMapToList(pmf1, keys); | ||
55 | var pmfList2 = pmfMapToList(pmf2, keys); | ||
56 | var distance = 0.0; | ||
57 | for(var i = 0; i < pmfList1.size(); i++){ | ||
58 | distance += Math.pow(pmfList1.get(i) + pmfList2.get(i), 2); | ||
59 | } | ||
60 | |||
61 | return Math.sqrt(distance); | ||
62 | } | ||
63 | |||
64 | def private pmfMapToList(Map<String, Double> map, Set<String> keys){ | ||
65 | var list = new ArrayList<Double>(); | ||
66 | for(key : keys){ | ||
67 | var value = map.getOrDefault(key, 0.0); | ||
68 | list.add(value); | ||
69 | } | ||
70 | return list; | ||
71 | } | ||
72 | } \ No newline at end of file | ||
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend new file mode 100644 index 00000000..4a0a0dc3 --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend | |||
@@ -0,0 +1,88 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
4 | import com.google.common.collect.Sets | ||
5 | import java.text.DecimalFormat | ||
6 | import java.util.HashMap | ||
7 | import java.util.List | ||
8 | |||
9 | class JSDistance extends CostDistance { | ||
10 | var HashMap<String, Double> mpcPMF; | ||
11 | var HashMap<String, Double> naPMF; | ||
12 | var HashMap<String, Double> outDegreePMF; | ||
13 | var HashMap<String, Double> nodeTypesPMF; | ||
14 | var DecimalFormat formatter; | ||
15 | |||
16 | new(MetricSampleGroup g){ | ||
17 | var mpcSamples = g.mpcSamples; | ||
18 | var naSamples = g.naSamples.stream.mapToDouble([it]).toArray(); | ||
19 | var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray(); | ||
20 | |||
21 | //needs to format the number to string avoid precision issue | ||
22 | formatter = new DecimalFormat("#0.00000"); | ||
23 | |||
24 | mpcPMF = pmfFromSamples(mpcSamples, formatter); | ||
25 | naPMF = pmfFromSamples(naSamples, formatter); | ||
26 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); | ||
27 | nodeTypesPMF = g.nodeTypeSamples; | ||
28 | } | ||
29 | |||
30 | def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ | ||
31 | var pmfMap = new HashMap<String, Double>(); | ||
32 | |||
33 | var union = Sets.union(pmf1.keySet(), pmf2.keySet()); | ||
34 | |||
35 | for(key : union){ | ||
36 | // corresponding to M in JS distance | ||
37 | var value = 1.0/2 * (pmf1.getOrDefault(key, 0.0) + pmf2.getOrDefault(key, 0.0)); | ||
38 | pmfMap.put(key, value); | ||
39 | } | ||
40 | return pmfMap; | ||
41 | } | ||
42 | |||
43 | def private jsDivergence(HashMap<String, Double> p, HashMap<String, Double> q){ | ||
44 | val m = combinePMF(q, p); | ||
45 | var distance = 1.0/2 * klDivergence(p, m) + 1.0/2 * klDivergence(q, m); | ||
46 | return distance; | ||
47 | } | ||
48 | |||
49 | def klDivergence(HashMap<String, Double> p, HashMap<String, Double> q){ | ||
50 | var distance = 0.0; | ||
51 | for(key : q.keySet()){ | ||
52 | //need to convert log e to log 2 | ||
53 | if(p.containsKey(key)){ | ||
54 | distance -= p.get(key) * Math.log(q.get(key) / p.get(key)) / Math.log(2); | ||
55 | } | ||
56 | } | ||
57 | return distance; | ||
58 | } | ||
59 | |||
60 | override double mpcDistance(List<Double> samples){ | ||
61 | // map list to array | ||
62 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | ||
63 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
64 | if(map.size < 2) return 1; | ||
65 | return jsDivergence(map, mpcPMF); | ||
66 | } | ||
67 | |||
68 | override double naDistance(List<Double> samples){ | ||
69 | // map list to array | ||
70 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | ||
71 | |||
72 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
73 | if(map.size < 2) return 1; | ||
74 | return jsDivergence(map, naPMF); | ||
75 | } | ||
76 | |||
77 | override double outDegreeDistance(List<Double> samples){ | ||
78 | // map list to array | ||
79 | var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter); | ||
80 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
81 | if(map.size < 2) return 1; | ||
82 | return jsDivergence(map, outDegreePMF); | ||
83 | } | ||
84 | |||
85 | def nodeTypeDistance(HashMap<String, Double> samples){ | ||
86 | return klDivergence(samples, nodeTypesPMF); | ||
87 | } | ||
88 | } \ No newline at end of file | ||
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend new file mode 100644 index 00000000..c486a328 --- /dev/null +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend | |||
@@ -0,0 +1,102 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | ||
2 | |||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | ||
4 | import java.util.HashMap | ||
5 | import java.util.HashSet | ||
6 | import java.util.List | ||
7 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest | ||
8 | |||
9 | class KSDistance extends CostDistance { | ||
10 | var static ksTester = new KolmogorovSmirnovTest(); | ||
11 | var MetricSampleGroup g; | ||
12 | |||
13 | new(MetricSampleGroup g){ | ||
14 | this.g = g; | ||
15 | } | ||
16 | override double mpcDistance(List<Double> samples){ | ||
17 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
18 | if(samples.size < 2) return 1; | ||
19 | return ksTester.kolmogorovSmirnovStatistic(g.mpcSamples, samples); | ||
20 | } | ||
21 | |||
22 | override double naDistance(List<Double> samples){ | ||
23 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
24 | if(samples.size < 2) return 1; | ||
25 | return ksTester.kolmogorovSmirnovStatistic(g.naSamples as double[], samples); | ||
26 | } | ||
27 | |||
28 | override double outDegreeDistance(List<Double> samples){ | ||
29 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | ||
30 | if(samples.size < 2) return 1; | ||
31 | return ksTester.kolmogorovSmirnovStatistic(g.outDegreeSamples, samples); | ||
32 | } | ||
33 | |||
34 | def double typedOutDegreeDistance(HashMap<String, List<Integer>> map){ | ||
35 | var value = 0.0; | ||
36 | // map list to array | ||
37 | val keySet = new HashSet<String>(map.keySet); | ||
38 | keySet.addAll(g.typedOutDegreeSamples.keySet); | ||
39 | for(key : keySet){ | ||
40 | if(!map.containsKey(key) ){ | ||
41 | value += 1; | ||
42 | }else if(!g.typedOutDegreeSamples.containsKey(key)){ | ||
43 | value += map.get(key).size * 100; | ||
44 | }else{ | ||
45 | var double[] rep = g.typedOutDegreeSamples.get(key).stream().mapToDouble([it|it]).toArray(); | ||
46 | var double[] ins = map.get(key).stream().mapToDouble([it|it]).toArray(); | ||
47 | if((rep.size < 2 || ins.size < 2) ){ | ||
48 | if(rep.size < 2 && rep.containsAll(ins)){ | ||
49 | value += 0; | ||
50 | }else{ | ||
51 | value += 1; | ||
52 | } | ||
53 | }else if(rep.size >= 2 && ins.size >= 2){ | ||
54 | value += ksTester.kolmogorovSmirnovStatistic(rep, ins); | ||
55 | } | ||
56 | } | ||
57 | } | ||
58 | |||
59 | |||
60 | return value; | ||
61 | } | ||
62 | |||
63 | def nodeTypeDistance(HashMap<String, Double> samples){ | ||
64 | var typesDistMap = g.nodeTypeSamples; | ||
65 | var sourceDist = newArrayList(); | ||
66 | var instanceDist = newArrayList(); | ||
67 | |||
68 | for(key : typesDistMap.keySet()){ | ||
69 | sourceDist.add(typesDistMap.get(key)); | ||
70 | instanceDist.add(samples.getOrDefault(key, 0.0)); | ||
71 | } | ||
72 | |||
73 | return ks_distance_two_dist(sourceDist, instanceDist); | ||
74 | } | ||
75 | |||
76 | def edgeTypeDistance(HashMap<String, Double> samples){ | ||
77 | var typesDistMap = g.edgeTypeSamples; | ||
78 | var sourceDist = newArrayList(); | ||
79 | var instanceDist = newArrayList(); | ||
80 | |||
81 | for(key : typesDistMap.keySet()){ | ||
82 | sourceDist.add(typesDistMap.get(key)); | ||
83 | instanceDist.add(samples.getOrDefault(key, 0.0)); | ||
84 | } | ||
85 | |||
86 | return ks_distance_two_dist(sourceDist, instanceDist); | ||
87 | } | ||
88 | |||
89 | def double ks_distance_two_dist(List<Double> dist1, List<Double> dist2){ | ||
90 | // Since we already know the pdf, we compute the ks-test manully | ||
91 | var ksStatistics = 0.0; | ||
92 | var sum1 = 0.0; | ||
93 | var sum2 = 0.0; | ||
94 | for(var i = 0; i < dist1.size(); i++){ | ||
95 | sum1 += dist1.get(i); | ||
96 | sum2 += dist2.get(i); | ||
97 | |||
98 | ksStatistics = Math.max(ksStatistics, Math.abs(sum1 - sum2)); | ||
99 | } | ||
100 | return ksStatistics; | ||
101 | } | ||
102 | } \ No newline at end of file | ||