aboutsummaryrefslogtreecommitdiffstats
path: root/Metrics/Metrics-Calculation/metrics_plot/utils
diff options
context:
space:
mode:
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/utils')
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py53
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py46
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/utils/constants.py35
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py186
4 files changed, 320 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py
new file mode 100644
index 00000000..cf532bc5
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py
@@ -0,0 +1,53 @@
1from scipy import stats
2from scipy.spatial import distance
3
4def ks_distance(samples1, samples2):
5 value, p = stats.ks_2samp(samples1, samples2)
6 return (value, p)
7
8def manual_ks(pdf1, pdf2):
9 result = 0
10 sum1 = 0
11 sum2 = 0
12 for(a, b) in zip(pdf1, pdf2):
13 sum1 += a
14 sum2 += b
15 result = max(result, abs(sum1-sum2))
16 return result
17
18def js_distance(samples1, samples2):
19 map1 = fromSamples(samples1)
20 map2 = fromSamples(samples2)
21 allKeys = set(map1.keys()) | set(map2.keys())
22 dist1 = distributionFromMap(map1, allKeys)
23 dist2 = distributionFromMap(map2, allKeys)
24 return distance.jensenshannon(dist1, dist2, 2)
25
26def euclidean_distance(samples1, samples2):
27 map1 = fromSamples(samples1)
28 map2 = fromSamples(samples2)
29 allKeys = set(map1.keys()) | set(map2.keys())
30 dist1 = distributionFromMap(map1, allKeys)
31 dist2 = distributionFromMap(map2, allKeys)
32 distance = 0
33 for i in range(len(dist2)):
34 distance += pow(dist1[i] - dist2[i], 2)
35 return pow(distance, 0.5)
36
37def fromSamples(samples):
38 m = {}
39 length = len(samples)
40 for sample in samples:
41 value = m.get(sample, 0)
42 m[sample] = value + 1
43 for key in list(m.keys()):
44 m[key] /= length
45 return m
46
47def distributionFromMap(m, allKeys):
48 dist = []
49 for key in allKeys:
50 value = m.get(key, 0)
51 dist.append(value)
52 return dist
53
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py
new file mode 100644
index 00000000..48d96ccc
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py
@@ -0,0 +1,46 @@
1import readCSV as reader
2import constants
3import numpy as np
4
5# graph stats for a collection of graphs
6class GraphCollection:
7
8 # init with path contrain files and number of files to read reader is imported from (readCSV)
9 def __init__(self, path, number, name, shouldShuffle = True):
10 self.out_ds = []
11 self.nas = []
12 self.mpcs = []
13 self.nts = []
14 self.name = name
15 self.tccs = []
16 self.violations = []
17 models = reader.readmultiplefiles(path, number, shouldShuffle)
18 print(len(models))
19 self.size = len(models)
20 for i in range(len(models)):
21 contents, out_d, na, mpc = reader.getmetrics(models[i])
22 self.out_ds.append(out_d)
23 self.nas.append(na)
24 self.mpcs.append(mpc)
25 if(constants.Node_TYPE_KEY in contents):
26 self.nts.append(contents[constants.Node_TYPE_KEY])
27 if(constants.TCC_VALUE in contents):
28 self.tccs.append(contents[constants.TCC_VALUE])
29 if(constants.VIOLATION in contents):
30 self.violations.append(contents[constants.VIOLATION][0])
31
32#Graph stat for one graph
33class GraphStat:
34 # init with teh file name of the stat
35 def __init__(self, filename):
36 contents, self.out_d, self.na, self.mpc = reader.getmetrics(filename)
37 self.numNodes = np.array(contents[constants.NUMBER_NODES])
38 if constants.STATE_ID in contents:
39 self.id = (contents[constants.STATE_ID])[0]
40 if constants.Node_TYPE_KEY in contents:
41 self.nodeTypeStat = contents[constants.Node_TYPE_KEY]
42 if constants.VIOLATION in contents:
43 self.violations = int(contents[constants.VIOLATION][0])
44 if(constants.TCC_VALUE_KEY in contents):
45 self.tcc = contents[constants.TCC_VALUE_KEY]
46
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py b/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py
new file mode 100644
index 00000000..e30cc583
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py
@@ -0,0 +1,35 @@
1NUMBER_EDGE_TYPES = 'Number of Edge types'
2
3NUMBER_NODES = 'Number Of Nodes'
4
5OUT_DEGREE_COUNT = 'OutDegreeCount'
6
7OUT_DEGREE_VALUE = 'OutDegreeValue'
8
9NA_COUNT = 'NACount'
10
11NA_VALUE = 'NAValue'
12
13MPC_VALUE = 'MPCValue'
14
15MPC_COUNT = 'MPCCount'
16
17METAMODEL = 'Meta Mode'
18
19STATE_ID = 'State Id'
20
21NODE_TYPE = 'Node Type'
22
23Node_TYPE_KEY = 'NodeType'
24
25HUMAN_OUT_D_REP = '../input/humanOutput/R_2015225_run_1.csv'
26
27HUMAN_MPC_REP = '../input/humanOutput/R_2016324_run_1.csv'
28
29HUMAN_NA_REP = '../input/humanOutput/R_2017419_run_1.csv'
30
31VIOLATION = 'violations'
32
33TCC_VALUE= 'TCCValue'
34
35TCC_COUNT = 'TCCCount'
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
new file mode 100644
index 00000000..a56caf45
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
@@ -0,0 +1,186 @@
1import numpy as np
2import matplotlib.pyplot as plt
3from scipy import stats
4import glob
5import random
6import constants
7
8#
9# read csvfile returns outdegree, node activity, mpc
10# as matrix with the first row of values and second row of count
11#
12def readcsvfile(filename):
13
14 contents = {}
15 with open(filename) as f:
16 data = list(f)
17 f.close()
18 for i, line in enumerate(data):
19 arr = line.replace('\n', '').split(',')
20 # if there is no element in the line, continue
21 if len(line) < 0: continue
22 # else check for contents
23 # if it is MPC then use float
24 if arr[0] == constants.MPC_VALUE:
25 contents[constants.MPC_VALUE] = list(map(float, arr[1:]))
26 # meta models are string
27 elif(arr[0] == constants.METAMODEL):
28 contents[constants.METAMODEL] = arr[1:]
29 # Node types
30 elif(arr[0] == constants.NODE_TYPE):
31 types = data[i+1].replace('\n', '').split(',')
32 numbers = data[i+2].replace('\n', '').split(',')
33 #convert number to floats
34 numbers = [float(n) for n in numbers]
35 contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)}
36 elif(arr[0] == constants.TCC_VALUE):
37 contents[constants.TCC_VALUE] = list(map(float, arr[1:]))
38 # NA and OD are integers, and store other information as string
39 else:
40 try:
41 contents[arr[0]] = list(map(int, arr[1:]))
42 except:
43 contents[arr[0]] = arr[1:]
44 return contents
45
46def checkAndReshape(arr):
47 if len(arr.shape) < 2:
48 arr = np.reshape(arr, (arr.shape[0],1))
49 return arr
50
51def readTrajectory(filename):
52 state_codes = []
53 with open(filename) as f:
54 for i, line in enumerate(f):
55 if(line == ''): continue
56 state_codes.append(int(line))
57 return state_codes
58#
59# take a matrix as input
60# return the sample array
61#
62def getsample(dataMatrix):
63 data = []
64 value = dataMatrix[0, :]
65 count = dataMatrix[1, :]
66 for i, v in enumerate(value):
67 for x in range(0, int(count[i])):
68 data.append(v)
69 return data
70
71def reproduceSample(values, counts):
72 arr = np.array([values, counts])
73 return getsample(arr)
74
75#
76# take an array of filenames as input
77# return the samples of outdegree, na, mpc
78#
79def getmetrics(filename):
80 contents = readcsvfile(filename)
81 outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT])
82 na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT])
83 mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT])
84
85 if(constants.TCC_VALUE in contents):
86 contents[constants.TCC_VALUE] = reproduceSample(contents[constants.TCC_VALUE], contents[constants.TCC_COUNT])
87
88 return contents,outdegree_sample, na_sample, mpc_sample
89
90#
91# read number of files in the given path RANDOMLY
92#
93def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True):
94 list_of_files = glob.glob(dirName + '*.csv') # create the list of file
95 if shouldShuffle:
96 random.shuffle(list_of_files)
97 #if the number of files is out of bound then just give the whole list
98 file_names = list_of_files[:maxNumberOfFiles]
99 # print(file_names)
100 return file_names
101
102
103def plotlines(x, y, ax):
104 l1, = ax.plot(x, y)
105
106
107def testgetsamplesfromfiles():
108 files = readmultiplefiles('../statistics/viatraOutput/', 2)
109 for file in files:
110 getmetrics(file)
111
112def probability(data):
113 sum = np.sum(data)
114 probabilityList = []
115 for d in data:
116 p = d/sum
117 probabilityList.append(p)
118 a = np.array(probabilityList)
119 return a
120
121
122def cumulativeProbability(p):
123 cdf = np.cumsum(p)
124 return cdf
125
126
127def plot():
128 fig, ax = plt.subplots()
129 fig, ax1 = plt.subplots()
130 fig, ax2 = plt.subplots()
131 fig, ax3 = plt.subplots()
132 fig, ax4 = plt.subplots()
133 fig, ax5 = plt.subplots()
134 list_of_files = readmultiplefiles('../statistics/iatraOutput/')
135 for file_name in list_of_files:
136 contents = readcsvfile(file_name)
137 outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]]
138 na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]]
139 mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]]
140 outV = outdegree[0, :]
141 outC = outdegree[1, :]
142 outP = probability(outC)
143 outCumP = cumulativeProbability(outP)
144 plotlines(outV, outP, ax)
145 naV = na[0, :]
146 naC = na[1, :]
147 naP = probability(naC)
148 naCumP = cumulativeProbability(naP)
149 plotlines(naV, naP, ax1)
150 mpcV = mpc[0, :]
151 mpcC = mpc[1, :]
152 mpcP = probability(mpcC)
153 mpcCumP = cumulativeProbability(mpcP)
154 plotlines(mpcV, mpcP, ax2)
155 plotlines(outV, outCumP, ax3)
156 plotlines(naV, naCumP, ax4)
157 plotlines(mpcV, mpcCumP, ax5)
158 ax.set_xlabel('ourdegree')
159 ax.set_ylabel('pdf')
160 ax.grid()
161
162 ax1.set_xlabel('node activity')
163 ax1.set_ylabel('pdf')
164 ax1.grid()
165
166 ax2.set_xlabel('multiplex participation coefficient')
167 ax2.set_ylabel('pdf')
168 ax2.grid()
169
170 ax3.set_xlabel('ourdegree')
171 ax3.set_ylabel('cdf')
172 ax3.grid()
173
174 ax4.set_xlabel('node activity')
175 ax4.set_ylabel('cdf')
176 ax4.grid()
177
178 ax5.set_xlabel('multiplex participation coefficient')
179 ax5.set_ylabel('cdf')
180 ax5.grid()
181
182 plt.show()
183
184
185# plot()
186