aboutsummaryrefslogtreecommitdiffstats
path: root/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance
diff options
context:
space:
mode:
authorLibravatar 20001LastOrder <boqi.chen@mail.mcgill.ca>2020-11-03 22:52:26 -0500
committerLibravatar 20001LastOrder <boqi.chen@mail.mcgill.ca>2020-11-03 22:52:26 -0500
commit945f487a08b643392a5d5918c631640b9a0e4605 (patch)
treeb537c456e395950ce98daaabb9468c7c17d5a72b /Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance
parentFix numeric-solver-at-end (diff)
downloadVIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.tar.gz
VIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.tar.zst
VIATRA-Generator-945f487a08b643392a5d5918c631640b9a0e4605.zip
add realistic solver
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance')
-rw-r--r--Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend38
-rw-r--r--Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend72
-rw-r--r--Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend88
-rw-r--r--Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend102
4 files changed, 300 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend
new file mode 100644
index 00000000..613f0f43
--- /dev/null
+++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/CostDistance.xtend
@@ -0,0 +1,38 @@
1package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance
2
3import java.text.DecimalFormat
4import java.util.HashMap
5import java.util.List
6import org.eclipse.xtend.lib.annotations.Accessors
7
8abstract class CostDistance {
9 def abstract double naDistance(List<Double> samples);
10 def abstract double mpcDistance(List<Double> samples);
11 def abstract double outDegreeDistance(List<Double> samples);
12
13 def protected pmfFromSamples(double[] samples, DecimalFormat formatter){
14 var length = samples.length;
15 var pmfMap = new HashMap<String, Double>();
16
17 for(sample : samples){
18 pmfMap.put(formatter.format(sample), pmfMap.getOrDefault(formatter.format(sample), 0.0) + 1.0 / length);
19 }
20
21 return pmfMap;
22 }
23}
24
25class StateData{
26 @Accessors(PUBLIC_GETTER)
27 var double[] features;
28 @Accessors(PUBLIC_GETTER)
29 var double value;
30 @Accessors(PUBLIC_GETTER)
31 var Object lastState;
32
33 new(double[] features, double value, Object lastState){
34 this.features = features;
35 this.value = value
36 this.lastState = lastState;
37 }
38} \ No newline at end of file
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend
new file mode 100644
index 00000000..d6adcc9a
--- /dev/null
+++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend
@@ -0,0 +1,72 @@
1package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance
2
3import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain
4import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader
5import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup
6import com.google.common.collect.Sets
7import java.text.DecimalFormat
8import java.util.ArrayList
9import java.util.HashMap
10import java.util.List
11import java.util.Map
12import java.util.Set
13
14class EuclideanDistance extends CostDistance{
15 var MetricSampleGroup g;
16 var HashMap<String, Double> mpcPMF;
17 var HashMap<String, Double> naPMF;
18 var HashMap<String, Double> outDegreePMF;
19 var DecimalFormat formatter;
20
21 new(MetricSampleGroup g){
22 this.g = g;
23
24 var mpcSamples = g.mpcSamples;
25 var naSamples = g.naSamples.stream.mapToDouble([it]).toArray();
26 var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray();
27
28 //needs to format the number to string avoid precision issue
29 formatter = new DecimalFormat("#0.00000");
30
31 mpcPMF = pmfFromSamples(mpcSamples, formatter);
32 naPMF = pmfFromSamples(naSamples, formatter);
33 outDegreePMF = pmfFromSamples(outDegreeSamples, formatter);
34 }
35
36 override naDistance(List<Double> samples) {
37 var pmfMap = pmfFromSamples(samples, formatter);
38 return euclideanDistance(pmfMap, naPMF);
39 }
40
41 override mpcDistance(List<Double> samples) {
42 var pmfMap = pmfFromSamples(samples, formatter);
43 return euclideanDistance(pmfMap, mpcPMF);
44 }
45
46 override outDegreeDistance(List<Double> samples) {
47 var pmfMap = pmfFromSamples(samples, formatter);
48 return euclideanDistance(pmfMap, outDegreePMF);
49 }
50
51
52 def private euclideanDistance(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){
53 var keys = Sets.union(pmf1.keySet(), pmf2.keySet());
54 var pmfList1 = pmfMapToList(pmf1, keys);
55 var pmfList2 = pmfMapToList(pmf2, keys);
56 var distance = 0.0;
57 for(var i = 0; i < pmfList1.size(); i++){
58 distance += Math.pow(pmfList1.get(i) + pmfList2.get(i), 2);
59 }
60
61 return Math.sqrt(distance);
62 }
63
64 def private pmfMapToList(Map<String, Double> map, Set<String> keys){
65 var list = new ArrayList<Double>();
66 for(key : keys){
67 var value = map.getOrDefault(key, 0.0);
68 list.add(value);
69 }
70 return list;
71 }
72} \ No newline at end of file
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend
new file mode 100644
index 00000000..4a0a0dc3
--- /dev/null
+++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend
@@ -0,0 +1,88 @@
1package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance
2
3import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup
4import com.google.common.collect.Sets
5import java.text.DecimalFormat
6import java.util.HashMap
7import java.util.List
8
9class JSDistance extends CostDistance {
10 var HashMap<String, Double> mpcPMF;
11 var HashMap<String, Double> naPMF;
12 var HashMap<String, Double> outDegreePMF;
13 var HashMap<String, Double> nodeTypesPMF;
14 var DecimalFormat formatter;
15
16 new(MetricSampleGroup g){
17 var mpcSamples = g.mpcSamples;
18 var naSamples = g.naSamples.stream.mapToDouble([it]).toArray();
19 var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray();
20
21 //needs to format the number to string avoid precision issue
22 formatter = new DecimalFormat("#0.00000");
23
24 mpcPMF = pmfFromSamples(mpcSamples, formatter);
25 naPMF = pmfFromSamples(naSamples, formatter);
26 outDegreePMF = pmfFromSamples(outDegreeSamples, formatter);
27 nodeTypesPMF = g.nodeTypeSamples;
28 }
29
30 def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){
31 var pmfMap = new HashMap<String, Double>();
32
33 var union = Sets.union(pmf1.keySet(), pmf2.keySet());
34
35 for(key : union){
36 // corresponding to M in JS distance
37 var value = 1.0/2 * (pmf1.getOrDefault(key, 0.0) + pmf2.getOrDefault(key, 0.0));
38 pmfMap.put(key, value);
39 }
40 return pmfMap;
41 }
42
43 def private jsDivergence(HashMap<String, Double> p, HashMap<String, Double> q){
44 val m = combinePMF(q, p);
45 var distance = 1.0/2 * klDivergence(p, m) + 1.0/2 * klDivergence(q, m);
46 return distance;
47 }
48
49 def klDivergence(HashMap<String, Double> p, HashMap<String, Double> q){
50 var distance = 0.0;
51 for(key : q.keySet()){
52 //need to convert log e to log 2
53 if(p.containsKey(key)){
54 distance -= p.get(key) * Math.log(q.get(key) / p.get(key)) / Math.log(2);
55 }
56 }
57 return distance;
58 }
59
60 override double mpcDistance(List<Double> samples){
61 // map list to array
62 var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter);
63 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
64 if(map.size < 2) return 1;
65 return jsDivergence(map, mpcPMF);
66 }
67
68 override double naDistance(List<Double> samples){
69 // map list to array
70 var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter);
71
72 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
73 if(map.size < 2) return 1;
74 return jsDivergence(map, naPMF);
75 }
76
77 override double outDegreeDistance(List<Double> samples){
78 // map list to array
79 var map = pmfFromSamples(samples.stream().mapToDouble([it]).toArray(), formatter);
80 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
81 if(map.size < 2) return 1;
82 return jsDivergence(map, outDegreePMF);
83 }
84
85 def nodeTypeDistance(HashMap<String, Double> samples){
86 return klDivergence(samples, nodeTypesPMF);
87 }
88} \ No newline at end of file
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend
new file mode 100644
index 00000000..c486a328
--- /dev/null
+++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend
@@ -0,0 +1,102 @@
1package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance
2
3import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup
4import java.util.HashMap
5import java.util.HashSet
6import java.util.List
7import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest
8
9class KSDistance extends CostDistance {
10 var static ksTester = new KolmogorovSmirnovTest();
11 var MetricSampleGroup g;
12
13 new(MetricSampleGroup g){
14 this.g = g;
15 }
16 override double mpcDistance(List<Double> samples){
17 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
18 if(samples.size < 2) return 1;
19 return ksTester.kolmogorovSmirnovStatistic(g.mpcSamples, samples);
20 }
21
22 override double naDistance(List<Double> samples){
23 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
24 if(samples.size < 2) return 1;
25 return ksTester.kolmogorovSmirnovStatistic(g.naSamples as double[], samples);
26 }
27
28 override double outDegreeDistance(List<Double> samples){
29 //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1
30 if(samples.size < 2) return 1;
31 return ksTester.kolmogorovSmirnovStatistic(g.outDegreeSamples, samples);
32 }
33
34 def double typedOutDegreeDistance(HashMap<String, List<Integer>> map){
35 var value = 0.0;
36 // map list to array
37 val keySet = new HashSet<String>(map.keySet);
38 keySet.addAll(g.typedOutDegreeSamples.keySet);
39 for(key : keySet){
40 if(!map.containsKey(key) ){
41 value += 1;
42 }else if(!g.typedOutDegreeSamples.containsKey(key)){
43 value += map.get(key).size * 100;
44 }else{
45 var double[] rep = g.typedOutDegreeSamples.get(key).stream().mapToDouble([it|it]).toArray();
46 var double[] ins = map.get(key).stream().mapToDouble([it|it]).toArray();
47 if((rep.size < 2 || ins.size < 2) ){
48 if(rep.size < 2 && rep.containsAll(ins)){
49 value += 0;
50 }else{
51 value += 1;
52 }
53 }else if(rep.size >= 2 && ins.size >= 2){
54 value += ksTester.kolmogorovSmirnovStatistic(rep, ins);
55 }
56 }
57 }
58
59
60 return value;
61 }
62
63 def nodeTypeDistance(HashMap<String, Double> samples){
64 var typesDistMap = g.nodeTypeSamples;
65 var sourceDist = newArrayList();
66 var instanceDist = newArrayList();
67
68 for(key : typesDistMap.keySet()){
69 sourceDist.add(typesDistMap.get(key));
70 instanceDist.add(samples.getOrDefault(key, 0.0));
71 }
72
73 return ks_distance_two_dist(sourceDist, instanceDist);
74 }
75
76 def edgeTypeDistance(HashMap<String, Double> samples){
77 var typesDistMap = g.edgeTypeSamples;
78 var sourceDist = newArrayList();
79 var instanceDist = newArrayList();
80
81 for(key : typesDistMap.keySet()){
82 sourceDist.add(typesDistMap.get(key));
83 instanceDist.add(samples.getOrDefault(key, 0.0));
84 }
85
86 return ks_distance_two_dist(sourceDist, instanceDist);
87 }
88
89 def double ks_distance_two_dist(List<Double> dist1, List<Double> dist2){
90 // Since we already know the pdf, we compute the ks-test manully
91 var ksStatistics = 0.0;
92 var sum1 = 0.0;
93 var sum2 = 0.0;
94 for(var i = 0; i < dist1.size(); i++){
95 sum1 += dist1.get(i);
96 sum2 += dist2.get(i);
97
98 ksStatistics = Math.max(ksStatistics, Math.abs(sum1 - sum2));
99 }
100 return ksStatistics;
101 }
102} \ No newline at end of file