diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance')
3 files changed, 42 insertions, 20 deletions
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend index b945d97b..d6adcc9a 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/EuclideanDistance.xtend | |||
@@ -18,13 +18,12 @@ class EuclideanDistance extends CostDistance{ | |||
18 | var HashMap<String, Double> outDegreePMF; | 18 | var HashMap<String, Double> outDegreePMF; |
19 | var DecimalFormat formatter; | 19 | var DecimalFormat formatter; |
20 | 20 | ||
21 | new(Domain d){ | 21 | new(MetricSampleGroup g){ |
22 | var metrics = RepMetricsReader.read(d); | 22 | this.g = g; |
23 | this.g = metrics; | ||
24 | 23 | ||
25 | var mpcSamples = metrics.mpcSamples; | 24 | var mpcSamples = g.mpcSamples; |
26 | var naSamples = metrics.naSamples.stream.mapToDouble([it]).toArray(); | 25 | var naSamples = g.naSamples.stream.mapToDouble([it]).toArray(); |
27 | var outDegreeSamples = metrics.outDegreeSamples.stream.mapToDouble([it]).toArray(); | 26 | var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray(); |
28 | 27 | ||
29 | //needs to format the number to string avoid precision issue | 28 | //needs to format the number to string avoid precision issue |
30 | formatter = new DecimalFormat("#0.00000"); | 29 | formatter = new DecimalFormat("#0.00000"); |
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend index df65b81f..4a0a0dc3 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/JSDistance.xtend | |||
@@ -1,7 +1,6 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | 1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance |
2 | 2 | ||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain | 3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup |
4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader | ||
5 | import com.google.common.collect.Sets | 4 | import com.google.common.collect.Sets |
6 | import java.text.DecimalFormat | 5 | import java.text.DecimalFormat |
7 | import java.util.HashMap | 6 | import java.util.HashMap |
@@ -11,13 +10,13 @@ class JSDistance extends CostDistance { | |||
11 | var HashMap<String, Double> mpcPMF; | 10 | var HashMap<String, Double> mpcPMF; |
12 | var HashMap<String, Double> naPMF; | 11 | var HashMap<String, Double> naPMF; |
13 | var HashMap<String, Double> outDegreePMF; | 12 | var HashMap<String, Double> outDegreePMF; |
13 | var HashMap<String, Double> nodeTypesPMF; | ||
14 | var DecimalFormat formatter; | 14 | var DecimalFormat formatter; |
15 | 15 | ||
16 | new(Domain d){ | 16 | new(MetricSampleGroup g){ |
17 | var metrics = RepMetricsReader.read(d); | 17 | var mpcSamples = g.mpcSamples; |
18 | var mpcSamples = metrics.mpcSamples; | 18 | var naSamples = g.naSamples.stream.mapToDouble([it]).toArray(); |
19 | var naSamples = metrics.naSamples.stream.mapToDouble([it]).toArray(); | 19 | var outDegreeSamples = g.outDegreeSamples.stream.mapToDouble([it]).toArray(); |
20 | var outDegreeSamples = metrics.outDegreeSamples.stream.mapToDouble([it]).toArray(); | ||
21 | 20 | ||
22 | //needs to format the number to string avoid precision issue | 21 | //needs to format the number to string avoid precision issue |
23 | formatter = new DecimalFormat("#0.00000"); | 22 | formatter = new DecimalFormat("#0.00000"); |
@@ -25,6 +24,7 @@ class JSDistance extends CostDistance { | |||
25 | mpcPMF = pmfFromSamples(mpcSamples, formatter); | 24 | mpcPMF = pmfFromSamples(mpcSamples, formatter); |
26 | naPMF = pmfFromSamples(naSamples, formatter); | 25 | naPMF = pmfFromSamples(naSamples, formatter); |
27 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); | 26 | outDegreePMF = pmfFromSamples(outDegreeSamples, formatter); |
27 | nodeTypesPMF = g.nodeTypeSamples; | ||
28 | } | 28 | } |
29 | 29 | ||
30 | def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ | 30 | def private combinePMF(HashMap<String, Double> pmf1, HashMap<String, Double> pmf2){ |
@@ -81,4 +81,8 @@ class JSDistance extends CostDistance { | |||
81 | if(map.size < 2) return 1; | 81 | if(map.size < 2) return 1; |
82 | return jsDivergence(map, outDegreePMF); | 82 | return jsDivergence(map, outDegreePMF); |
83 | } | 83 | } |
84 | |||
85 | def nodeTypeDistance(HashMap<String, Double> samples){ | ||
86 | return klDivergence(samples, nodeTypesPMF); | ||
87 | } | ||
84 | } \ No newline at end of file | 88 | } \ No newline at end of file |
diff --git a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend index 86f5f23c..08d8704a 100644 --- a/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend +++ b/Metrics/Metrics-Calculation/ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator/src/ca/mcgill/ecse/dslreasoner/realistic/metrics/calculator/distance/KSDistance.xtend | |||
@@ -1,22 +1,18 @@ | |||
1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance | 1 | package ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.distance |
2 | 2 | ||
3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.app.Domain | ||
4 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.io.RepMetricsReader | ||
5 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup | 3 | import ca.mcgill.ecse.dslreasoner.realistic.metrics.calculator.metrics.MetricSampleGroup |
6 | import java.util.HashMap | 4 | import java.util.HashMap |
7 | import java.util.HashSet | 5 | import java.util.HashSet |
8 | import java.util.List | 6 | import java.util.List |
9 | import java.util.Set | ||
10 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest | 7 | import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest |
11 | 8 | ||
12 | class KSDistance extends CostDistance { | 9 | class KSDistance extends CostDistance { |
13 | var static ksTester = new KolmogorovSmirnovTest(); | 10 | var static ksTester = new KolmogorovSmirnovTest(); |
14 | var MetricSampleGroup g; | 11 | var MetricSampleGroup g; |
15 | new(Domain d){ | ||
16 | var metrics = RepMetricsReader.read(d); | ||
17 | this.g = metrics; | ||
18 | } | ||
19 | 12 | ||
13 | new(MetricSampleGroup g){ | ||
14 | this.g = g; | ||
15 | } | ||
20 | override double mpcDistance(List<Double> samples){ | 16 | override double mpcDistance(List<Double> samples){ |
21 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 | 17 | //if the size of array is smaller than 2, ks distance cannot be performed, simply return 1 |
22 | if(samples.size < 2) return 1; | 18 | if(samples.size < 2) return 1; |
@@ -63,4 +59,27 @@ class KSDistance extends CostDistance { | |||
63 | 59 | ||
64 | return value; | 60 | return value; |
65 | } | 61 | } |
62 | |||
63 | def nodeTypeDistance(HashMap<String, Double> samples){ | ||
64 | var typesDistMap = g.nodeTypeSamples; | ||
65 | var sourceDist = newArrayList(); | ||
66 | var instanceDist = newArrayList(); | ||
67 | |||
68 | for(key : typesDistMap.keySet()){ | ||
69 | sourceDist.add(typesDistMap.get(key)); | ||
70 | instanceDist.add(samples.getOrDefault(key, 0.0)); | ||
71 | } | ||
72 | |||
73 | // Since we already know the pdf, we compute the ks-test manully | ||
74 | var ksStatistics = 0.0; | ||
75 | var sum1 = 0.0; | ||
76 | var sum2 = 0.0; | ||
77 | for(var i = 0; i < sourceDist.size(); i++){ | ||
78 | sum1 += sourceDist.get(i); | ||
79 | sum2 += instanceDist.get(i); | ||
80 | |||
81 | ksStatistics = Math.max(ksStatistics, Math.abs(sum1 - sum2)); | ||
82 | } | ||
83 | return ksStatistics; | ||
84 | } | ||
66 | } \ No newline at end of file | 85 | } \ No newline at end of file |