diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/utils')
4 files changed, 320 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py new file mode 100644 index 00000000..cf532bc5 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/DistributionMetrics.py | |||
@@ -0,0 +1,53 @@ | |||
1 | from scipy import stats | ||
2 | from scipy.spatial import distance | ||
3 | |||
4 | def ks_distance(samples1, samples2): | ||
5 | value, p = stats.ks_2samp(samples1, samples2) | ||
6 | return (value, p) | ||
7 | |||
8 | def manual_ks(pdf1, pdf2): | ||
9 | result = 0 | ||
10 | sum1 = 0 | ||
11 | sum2 = 0 | ||
12 | for(a, b) in zip(pdf1, pdf2): | ||
13 | sum1 += a | ||
14 | sum2 += b | ||
15 | result = max(result, abs(sum1-sum2)) | ||
16 | return result | ||
17 | |||
18 | def js_distance(samples1, samples2): | ||
19 | map1 = fromSamples(samples1) | ||
20 | map2 = fromSamples(samples2) | ||
21 | allKeys = set(map1.keys()) | set(map2.keys()) | ||
22 | dist1 = distributionFromMap(map1, allKeys) | ||
23 | dist2 = distributionFromMap(map2, allKeys) | ||
24 | return distance.jensenshannon(dist1, dist2, 2) | ||
25 | |||
26 | def euclidean_distance(samples1, samples2): | ||
27 | map1 = fromSamples(samples1) | ||
28 | map2 = fromSamples(samples2) | ||
29 | allKeys = set(map1.keys()) | set(map2.keys()) | ||
30 | dist1 = distributionFromMap(map1, allKeys) | ||
31 | dist2 = distributionFromMap(map2, allKeys) | ||
32 | distance = 0 | ||
33 | for i in range(len(dist2)): | ||
34 | distance += pow(dist1[i] - dist2[i], 2) | ||
35 | return pow(distance, 0.5) | ||
36 | |||
37 | def fromSamples(samples): | ||
38 | m = {} | ||
39 | length = len(samples) | ||
40 | for sample in samples: | ||
41 | value = m.get(sample, 0) | ||
42 | m[sample] = value + 1 | ||
43 | for key in list(m.keys()): | ||
44 | m[key] /= length | ||
45 | return m | ||
46 | |||
47 | def distributionFromMap(m, allKeys): | ||
48 | dist = [] | ||
49 | for key in allKeys: | ||
50 | value = m.get(key, 0) | ||
51 | dist.append(value) | ||
52 | return dist | ||
53 | |||
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py new file mode 100644 index 00000000..48d96ccc --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/GraphType.py | |||
@@ -0,0 +1,46 @@ | |||
1 | import readCSV as reader | ||
2 | import constants | ||
3 | import numpy as np | ||
4 | |||
5 | # graph stats for a collection of graphs | ||
6 | class GraphCollection: | ||
7 | |||
8 | # init with path contrain files and number of files to read reader is imported from (readCSV) | ||
9 | def __init__(self, path, number, name, shouldShuffle = True): | ||
10 | self.out_ds = [] | ||
11 | self.nas = [] | ||
12 | self.mpcs = [] | ||
13 | self.nts = [] | ||
14 | self.name = name | ||
15 | self.tccs = [] | ||
16 | self.violations = [] | ||
17 | models = reader.readmultiplefiles(path, number, shouldShuffle) | ||
18 | print(len(models)) | ||
19 | self.size = len(models) | ||
20 | for i in range(len(models)): | ||
21 | contents, out_d, na, mpc = reader.getmetrics(models[i]) | ||
22 | self.out_ds.append(out_d) | ||
23 | self.nas.append(na) | ||
24 | self.mpcs.append(mpc) | ||
25 | if(constants.Node_TYPE_KEY in contents): | ||
26 | self.nts.append(contents[constants.Node_TYPE_KEY]) | ||
27 | if(constants.TCC_VALUE in contents): | ||
28 | self.tccs.append(contents[constants.TCC_VALUE]) | ||
29 | if(constants.VIOLATION in contents): | ||
30 | self.violations.append(contents[constants.VIOLATION][0]) | ||
31 | |||
32 | #Graph stat for one graph | ||
33 | class GraphStat: | ||
34 | # init with teh file name of the stat | ||
35 | def __init__(self, filename): | ||
36 | contents, self.out_d, self.na, self.mpc = reader.getmetrics(filename) | ||
37 | self.numNodes = np.array(contents[constants.NUMBER_NODES]) | ||
38 | if constants.STATE_ID in contents: | ||
39 | self.id = (contents[constants.STATE_ID])[0] | ||
40 | if constants.Node_TYPE_KEY in contents: | ||
41 | self.nodeTypeStat = contents[constants.Node_TYPE_KEY] | ||
42 | if constants.VIOLATION in contents: | ||
43 | self.violations = int(contents[constants.VIOLATION][0]) | ||
44 | if(constants.TCC_VALUE_KEY in contents): | ||
45 | self.tcc = contents[constants.TCC_VALUE_KEY] | ||
46 | |||
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py b/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py new file mode 100644 index 00000000..e30cc583 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/constants.py | |||
@@ -0,0 +1,35 @@ | |||
1 | NUMBER_EDGE_TYPES = 'Number of Edge types' | ||
2 | |||
3 | NUMBER_NODES = 'Number Of Nodes' | ||
4 | |||
5 | OUT_DEGREE_COUNT = 'OutDegreeCount' | ||
6 | |||
7 | OUT_DEGREE_VALUE = 'OutDegreeValue' | ||
8 | |||
9 | NA_COUNT = 'NACount' | ||
10 | |||
11 | NA_VALUE = 'NAValue' | ||
12 | |||
13 | MPC_VALUE = 'MPCValue' | ||
14 | |||
15 | MPC_COUNT = 'MPCCount' | ||
16 | |||
17 | METAMODEL = 'Meta Mode' | ||
18 | |||
19 | STATE_ID = 'State Id' | ||
20 | |||
21 | NODE_TYPE = 'Node Type' | ||
22 | |||
23 | Node_TYPE_KEY = 'NodeType' | ||
24 | |||
25 | HUMAN_OUT_D_REP = '../input/humanOutput/R_2015225_run_1.csv' | ||
26 | |||
27 | HUMAN_MPC_REP = '../input/humanOutput/R_2016324_run_1.csv' | ||
28 | |||
29 | HUMAN_NA_REP = '../input/humanOutput/R_2017419_run_1.csv' | ||
30 | |||
31 | VIOLATION = 'violations' | ||
32 | |||
33 | TCC_VALUE= 'TCCValue' | ||
34 | |||
35 | TCC_COUNT = 'TCCCount' | ||
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py new file mode 100644 index 00000000..a56caf45 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py | |||
@@ -0,0 +1,186 @@ | |||
1 | import numpy as np | ||
2 | import matplotlib.pyplot as plt | ||
3 | from scipy import stats | ||
4 | import glob | ||
5 | import random | ||
6 | import constants | ||
7 | |||
8 | # | ||
9 | # read csvfile returns outdegree, node activity, mpc | ||
10 | # as matrix with the first row of values and second row of count | ||
11 | # | ||
12 | def readcsvfile(filename): | ||
13 | |||
14 | contents = {} | ||
15 | with open(filename) as f: | ||
16 | data = list(f) | ||
17 | f.close() | ||
18 | for i, line in enumerate(data): | ||
19 | arr = line.replace('\n', '').split(',') | ||
20 | # if there is no element in the line, continue | ||
21 | if len(line) < 0: continue | ||
22 | # else check for contents | ||
23 | # if it is MPC then use float | ||
24 | if arr[0] == constants.MPC_VALUE: | ||
25 | contents[constants.MPC_VALUE] = list(map(float, arr[1:])) | ||
26 | # meta models are string | ||
27 | elif(arr[0] == constants.METAMODEL): | ||
28 | contents[constants.METAMODEL] = arr[1:] | ||
29 | # Node types | ||
30 | elif(arr[0] == constants.NODE_TYPE): | ||
31 | types = data[i+1].replace('\n', '').split(',') | ||
32 | numbers = data[i+2].replace('\n', '').split(',') | ||
33 | #convert number to floats | ||
34 | numbers = [float(n) for n in numbers] | ||
35 | contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)} | ||
36 | elif(arr[0] == constants.TCC_VALUE): | ||
37 | contents[constants.TCC_VALUE] = list(map(float, arr[1:])) | ||
38 | # NA and OD are integers, and store other information as string | ||
39 | else: | ||
40 | try: | ||
41 | contents[arr[0]] = list(map(int, arr[1:])) | ||
42 | except: | ||
43 | contents[arr[0]] = arr[1:] | ||
44 | return contents | ||
45 | |||
46 | def checkAndReshape(arr): | ||
47 | if len(arr.shape) < 2: | ||
48 | arr = np.reshape(arr, (arr.shape[0],1)) | ||
49 | return arr | ||
50 | |||
51 | def readTrajectory(filename): | ||
52 | state_codes = [] | ||
53 | with open(filename) as f: | ||
54 | for i, line in enumerate(f): | ||
55 | if(line == ''): continue | ||
56 | state_codes.append(int(line)) | ||
57 | return state_codes | ||
58 | # | ||
59 | # take a matrix as input | ||
60 | # return the sample array | ||
61 | # | ||
62 | def getsample(dataMatrix): | ||
63 | data = [] | ||
64 | value = dataMatrix[0, :] | ||
65 | count = dataMatrix[1, :] | ||
66 | for i, v in enumerate(value): | ||
67 | for x in range(0, int(count[i])): | ||
68 | data.append(v) | ||
69 | return data | ||
70 | |||
71 | def reproduceSample(values, counts): | ||
72 | arr = np.array([values, counts]) | ||
73 | return getsample(arr) | ||
74 | |||
75 | # | ||
76 | # take an array of filenames as input | ||
77 | # return the samples of outdegree, na, mpc | ||
78 | # | ||
79 | def getmetrics(filename): | ||
80 | contents = readcsvfile(filename) | ||
81 | outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]) | ||
82 | na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT]) | ||
83 | mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]) | ||
84 | |||
85 | if(constants.TCC_VALUE in contents): | ||
86 | contents[constants.TCC_VALUE] = reproduceSample(contents[constants.TCC_VALUE], contents[constants.TCC_COUNT]) | ||
87 | |||
88 | return contents,outdegree_sample, na_sample, mpc_sample | ||
89 | |||
90 | # | ||
91 | # read number of files in the given path RANDOMLY | ||
92 | # | ||
93 | def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True): | ||
94 | list_of_files = glob.glob(dirName + '*.csv') # create the list of file | ||
95 | if shouldShuffle: | ||
96 | random.shuffle(list_of_files) | ||
97 | #if the number of files is out of bound then just give the whole list | ||
98 | file_names = list_of_files[:maxNumberOfFiles] | ||
99 | # print(file_names) | ||
100 | return file_names | ||
101 | |||
102 | |||
103 | def plotlines(x, y, ax): | ||
104 | l1, = ax.plot(x, y) | ||
105 | |||
106 | |||
107 | def testgetsamplesfromfiles(): | ||
108 | files = readmultiplefiles('../statistics/viatraOutput/', 2) | ||
109 | for file in files: | ||
110 | getmetrics(file) | ||
111 | |||
112 | def probability(data): | ||
113 | sum = np.sum(data) | ||
114 | probabilityList = [] | ||
115 | for d in data: | ||
116 | p = d/sum | ||
117 | probabilityList.append(p) | ||
118 | a = np.array(probabilityList) | ||
119 | return a | ||
120 | |||
121 | |||
122 | def cumulativeProbability(p): | ||
123 | cdf = np.cumsum(p) | ||
124 | return cdf | ||
125 | |||
126 | |||
127 | def plot(): | ||
128 | fig, ax = plt.subplots() | ||
129 | fig, ax1 = plt.subplots() | ||
130 | fig, ax2 = plt.subplots() | ||
131 | fig, ax3 = plt.subplots() | ||
132 | fig, ax4 = plt.subplots() | ||
133 | fig, ax5 = plt.subplots() | ||
134 | list_of_files = readmultiplefiles('../statistics/iatraOutput/') | ||
135 | for file_name in list_of_files: | ||
136 | contents = readcsvfile(file_name) | ||
137 | outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]] | ||
138 | na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]] | ||
139 | mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]] | ||
140 | outV = outdegree[0, :] | ||
141 | outC = outdegree[1, :] | ||
142 | outP = probability(outC) | ||
143 | outCumP = cumulativeProbability(outP) | ||
144 | plotlines(outV, outP, ax) | ||
145 | naV = na[0, :] | ||
146 | naC = na[1, :] | ||
147 | naP = probability(naC) | ||
148 | naCumP = cumulativeProbability(naP) | ||
149 | plotlines(naV, naP, ax1) | ||
150 | mpcV = mpc[0, :] | ||
151 | mpcC = mpc[1, :] | ||
152 | mpcP = probability(mpcC) | ||
153 | mpcCumP = cumulativeProbability(mpcP) | ||
154 | plotlines(mpcV, mpcP, ax2) | ||
155 | plotlines(outV, outCumP, ax3) | ||
156 | plotlines(naV, naCumP, ax4) | ||
157 | plotlines(mpcV, mpcCumP, ax5) | ||
158 | ax.set_xlabel('ourdegree') | ||
159 | ax.set_ylabel('pdf') | ||
160 | ax.grid() | ||
161 | |||
162 | ax1.set_xlabel('node activity') | ||
163 | ax1.set_ylabel('pdf') | ||
164 | ax1.grid() | ||
165 | |||
166 | ax2.set_xlabel('multiplex participation coefficient') | ||
167 | ax2.set_ylabel('pdf') | ||
168 | ax2.grid() | ||
169 | |||
170 | ax3.set_xlabel('ourdegree') | ||
171 | ax3.set_ylabel('cdf') | ||
172 | ax3.grid() | ||
173 | |||
174 | ax4.set_xlabel('node activity') | ||
175 | ax4.set_ylabel('cdf') | ||
176 | ax4.grid() | ||
177 | |||
178 | ax5.set_xlabel('multiplex participation coefficient') | ||
179 | ax5.set_ylabel('cdf') | ||
180 | ax5.grid() | ||
181 | |||
182 | plt.show() | ||
183 | |||
184 | |||
185 | # plot() | ||
186 | |||