diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py new file mode 100644 index 00000000..cf532bc5 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py | |||
@@ -0,0 +1,53 @@ | |||
1 | from scipy import stats | ||
2 | from scipy.spatial import distance | ||
3 | |||
4 | def ks_distance(samples1, samples2): | ||
5 | value, p = stats.ks_2samp(samples1, samples2) | ||
6 | return (value, p) | ||
7 | |||
8 | def manual_ks(pdf1, pdf2): | ||
9 | result = 0 | ||
10 | sum1 = 0 | ||
11 | sum2 = 0 | ||
12 | for(a, b) in zip(pdf1, pdf2): | ||
13 | sum1 += a | ||
14 | sum2 += b | ||
15 | result = max(result, abs(sum1-sum2)) | ||
16 | return result | ||
17 | |||
18 | def js_distance(samples1, samples2): | ||
19 | map1 = fromSamples(samples1) | ||
20 | map2 = fromSamples(samples2) | ||
21 | allKeys = set(map1.keys()) | set(map2.keys()) | ||
22 | dist1 = distributionFromMap(map1, allKeys) | ||
23 | dist2 = distributionFromMap(map2, allKeys) | ||
24 | return distance.jensenshannon(dist1, dist2, 2) | ||
25 | |||
26 | def euclidean_distance(samples1, samples2): | ||
27 | map1 = fromSamples(samples1) | ||
28 | map2 = fromSamples(samples2) | ||
29 | allKeys = set(map1.keys()) | set(map2.keys()) | ||
30 | dist1 = distributionFromMap(map1, allKeys) | ||
31 | dist2 = distributionFromMap(map2, allKeys) | ||
32 | distance = 0 | ||
33 | for i in range(len(dist2)): | ||
34 | distance += pow(dist1[i] - dist2[i], 2) | ||
35 | return pow(distance, 0.5) | ||
36 | |||
37 | def fromSamples(samples): | ||
38 | m = {} | ||
39 | length = len(samples) | ||
40 | for sample in samples: | ||
41 | value = m.get(sample, 0) | ||
42 | m[sample] = value + 1 | ||
43 | for key in list(m.keys()): | ||
44 | m[key] /= length | ||
45 | return m | ||
46 | |||
47 | def distributionFromMap(m, allKeys): | ||
48 | dist = [] | ||
49 | for key in allKeys: | ||
50 | value = m.get(key, 0) | ||
51 | dist.append(value) | ||
52 | return dist | ||
53 | |||