1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
from scipy import stats
from scipy.spatial import distance
def ks_distance(samples1, samples2):
value, p = stats.ks_2samp(samples1, samples2)
return (value, p)
def manual_ks(pdf1, pdf2):
result = 0
sum1 = 0
sum2 = 0
for(a, b) in zip(pdf1, pdf2):
sum1 += a
sum2 += b
result = max(result, abs(sum1-sum2))
return result
def js_distance(samples1, samples2):
map1 = fromSamples(samples1)
map2 = fromSamples(samples2)
allKeys = set(map1.keys()) | set(map2.keys())
dist1 = distributionFromMap(map1, allKeys)
dist2 = distributionFromMap(map2, allKeys)
return distance.jensenshannon(dist1, dist2, 2)
def euclidean_distance(samples1, samples2):
map1 = fromSamples(samples1)
map2 = fromSamples(samples2)
allKeys = set(map1.keys()) | set(map2.keys())
dist1 = distributionFromMap(map1, allKeys)
dist2 = distributionFromMap(map2, allKeys)
distance = 0
for i in range(len(dist2)):
distance += pow(dist1[i] - dist2[i], 2)
return pow(distance, 0.5)
def fromSamples(samples):
m = {}
length = len(samples)
for sample in samples:
value = m.get(sample, 0)
m[sample] = value + 1
for key in list(m.keys()):
m[key] /= length
return m
def distributionFromMap(m, allKeys):
dist = []
for key in allKeys:
value = m.get(key, 0)
dist.append(value)
return dist
|