From 991dacefdb8f78fccc359d3d2ec836dc2e7fc80a Mon Sep 17 00:00:00 2001 From: 20001LastOrder Date: Wed, 10 Jul 2019 10:56:00 -0400 Subject: measurements for the different violation types, comparison for differenct generation config --- .../metrics_plot/utils/DistributionMetrics.py | 43 ++++++++++++++++++++++ .../metrics_plot/utils/GraphType.py | 2 +- .../metrics_plot/utils/readCSV.py | 7 +++- 3 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py (limited to 'Metrics/Metrics-Calculation/metrics_plot/utils') diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py new file mode 100644 index 00000000..6e707108 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py @@ -0,0 +1,43 @@ +from scipy import stats +from scipy.spatial import distance + +def ks_distance(samples1, samples2): + value, p = stats.ks_2samp(samples1, samples2) + return (value, p) + +def js_distance(samples1, samples2): + map1 = fromSamples(samples1) + map2 = fromSamples(samples2) + allKeys = set(map1.keys()) | set(map2.keys()) + dist1 = distributionFromMap(map1, allKeys) + dist2 = distributionFromMap(map2, allKeys) + return distance.jensenshannon(dist1, dist2, 2) + +def euclidean_distance(samples1, samples2): + map1 = fromSamples(samples1) + map2 = fromSamples(samples2) + allKeys = set(map1.keys()) | set(map2.keys()) + dist1 = distributionFromMap(map1, allKeys) + dist2 = distributionFromMap(map2, allKeys) + distance = 0 + for i in range(len(dist2)): + distance += pow(dist1[i] - dist2[i], 2) + return pow(distance, 0.5) + +def fromSamples(samples): + m = {} + length = len(samples) + for sample in samples: + value = m.get(sample, 0) + m[sample] = value + 1 + for key in list(m.keys()): + m[key] /= length + return m + +def distributionFromMap(m, allKeys): + dist = [] + for key in allKeys: + value = m.get(key, 0) + dist.append(value) + return dist + diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py index 13754e80..827c2a5e 100644 --- a/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py @@ -11,13 +11,13 @@ class GraphCollection: self.nas = [] self.mpcs = [] self.name = name + self.size = number models = reader.readmultiplefiles(path, number, shouldShuffle) for i in range(len(models)): contents, out_d, na, mpc = reader.getmetrics(models[i]) self.out_ds.append(out_d) self.nas.append(na) self.mpcs.append(mpc) - print(len(self.out_ds)) #Graph stat for one graph class GraphStat: diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py index e0402519..b27a0ffc 100644 --- a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py @@ -24,9 +24,12 @@ def readcsvfile(filename): # meta models are string elif(arr[0] == constants.METAMODEL): contents[constants.METAMODEL] = arr[1:] - # all other contants are integer + # NA and OD are integers else: - contents[arr[0]] = list(map(int, arr[1:])) + try: + contents[arr[0]] = list(map(int, arr[1:])) + except: + print(arr[0], ' is not integer') f.close() return contents -- cgit v1.2.3-54-g00ecf