diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py new file mode 100644 index 00000000..a56caf45 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py | |||
@@ -0,0 +1,186 @@ | |||
1 | import numpy as np | ||
2 | import matplotlib.pyplot as plt | ||
3 | from scipy import stats | ||
4 | import glob | ||
5 | import random | ||
6 | import constants | ||
7 | |||
8 | # | ||
9 | # read csvfile returns outdegree, node activity, mpc | ||
10 | # as matrix with the first row of values and second row of count | ||
11 | # | ||
12 | def readcsvfile(filename): | ||
13 | |||
14 | contents = {} | ||
15 | with open(filename) as f: | ||
16 | data = list(f) | ||
17 | f.close() | ||
18 | for i, line in enumerate(data): | ||
19 | arr = line.replace('\n', '').split(',') | ||
20 | # if there is no element in the line, continue | ||
21 | if len(line) < 0: continue | ||
22 | # else check for contents | ||
23 | # if it is MPC then use float | ||
24 | if arr[0] == constants.MPC_VALUE: | ||
25 | contents[constants.MPC_VALUE] = list(map(float, arr[1:])) | ||
26 | # meta models are string | ||
27 | elif(arr[0] == constants.METAMODEL): | ||
28 | contents[constants.METAMODEL] = arr[1:] | ||
29 | # Node types | ||
30 | elif(arr[0] == constants.NODE_TYPE): | ||
31 | types = data[i+1].replace('\n', '').split(',') | ||
32 | numbers = data[i+2].replace('\n', '').split(',') | ||
33 | #convert number to floats | ||
34 | numbers = [float(n) for n in numbers] | ||
35 | contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)} | ||
36 | elif(arr[0] == constants.TCC_VALUE): | ||
37 | contents[constants.TCC_VALUE] = list(map(float, arr[1:])) | ||
38 | # NA and OD are integers, and store other information as string | ||
39 | else: | ||
40 | try: | ||
41 | contents[arr[0]] = list(map(int, arr[1:])) | ||
42 | except: | ||
43 | contents[arr[0]] = arr[1:] | ||
44 | return contents | ||
45 | |||
46 | def checkAndReshape(arr): | ||
47 | if len(arr.shape) < 2: | ||
48 | arr = np.reshape(arr, (arr.shape[0],1)) | ||
49 | return arr | ||
50 | |||
51 | def readTrajectory(filename): | ||
52 | state_codes = [] | ||
53 | with open(filename) as f: | ||
54 | for i, line in enumerate(f): | ||
55 | if(line == ''): continue | ||
56 | state_codes.append(int(line)) | ||
57 | return state_codes | ||
58 | # | ||
59 | # take a matrix as input | ||
60 | # return the sample array | ||
61 | # | ||
62 | def getsample(dataMatrix): | ||
63 | data = [] | ||
64 | value = dataMatrix[0, :] | ||
65 | count = dataMatrix[1, :] | ||
66 | for i, v in enumerate(value): | ||
67 | for x in range(0, int(count[i])): | ||
68 | data.append(v) | ||
69 | return data | ||
70 | |||
71 | def reproduceSample(values, counts): | ||
72 | arr = np.array([values, counts]) | ||
73 | return getsample(arr) | ||
74 | |||
75 | # | ||
76 | # take an array of filenames as input | ||
77 | # return the samples of outdegree, na, mpc | ||
78 | # | ||
79 | def getmetrics(filename): | ||
80 | contents = readcsvfile(filename) | ||
81 | outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]) | ||
82 | na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT]) | ||
83 | mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]) | ||
84 | |||
85 | if(constants.TCC_VALUE in contents): | ||
86 | contents[constants.TCC_VALUE] = reproduceSample(contents[constants.TCC_VALUE], contents[constants.TCC_COUNT]) | ||
87 | |||
88 | return contents,outdegree_sample, na_sample, mpc_sample | ||
89 | |||
90 | # | ||
91 | # read number of files in the given path RANDOMLY | ||
92 | # | ||
93 | def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True): | ||
94 | list_of_files = glob.glob(dirName + '*.csv') # create the list of file | ||
95 | if shouldShuffle: | ||
96 | random.shuffle(list_of_files) | ||
97 | #if the number of files is out of bound then just give the whole list | ||
98 | file_names = list_of_files[:maxNumberOfFiles] | ||
99 | # print(file_names) | ||
100 | return file_names | ||
101 | |||
102 | |||
103 | def plotlines(x, y, ax): | ||
104 | l1, = ax.plot(x, y) | ||
105 | |||
106 | |||
107 | def testgetsamplesfromfiles(): | ||
108 | files = readmultiplefiles('../statistics/viatraOutput/', 2) | ||
109 | for file in files: | ||
110 | getmetrics(file) | ||
111 | |||
112 | def probability(data): | ||
113 | sum = np.sum(data) | ||
114 | probabilityList = [] | ||
115 | for d in data: | ||
116 | p = d/sum | ||
117 | probabilityList.append(p) | ||
118 | a = np.array(probabilityList) | ||
119 | return a | ||
120 | |||
121 | |||
122 | def cumulativeProbability(p): | ||
123 | cdf = np.cumsum(p) | ||
124 | return cdf | ||
125 | |||
126 | |||
127 | def plot(): | ||
128 | fig, ax = plt.subplots() | ||
129 | fig, ax1 = plt.subplots() | ||
130 | fig, ax2 = plt.subplots() | ||
131 | fig, ax3 = plt.subplots() | ||
132 | fig, ax4 = plt.subplots() | ||
133 | fig, ax5 = plt.subplots() | ||
134 | list_of_files = readmultiplefiles('../statistics/iatraOutput/') | ||
135 | for file_name in list_of_files: | ||
136 | contents = readcsvfile(file_name) | ||
137 | outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]] | ||
138 | na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]] | ||
139 | mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]] | ||
140 | outV = outdegree[0, :] | ||
141 | outC = outdegree[1, :] | ||
142 | outP = probability(outC) | ||
143 | outCumP = cumulativeProbability(outP) | ||
144 | plotlines(outV, outP, ax) | ||
145 | naV = na[0, :] | ||
146 | naC = na[1, :] | ||
147 | naP = probability(naC) | ||
148 | naCumP = cumulativeProbability(naP) | ||
149 | plotlines(naV, naP, ax1) | ||
150 | mpcV = mpc[0, :] | ||
151 | mpcC = mpc[1, :] | ||
152 | mpcP = probability(mpcC) | ||
153 | mpcCumP = cumulativeProbability(mpcP) | ||
154 | plotlines(mpcV, mpcP, ax2) | ||
155 | plotlines(outV, outCumP, ax3) | ||
156 | plotlines(naV, naCumP, ax4) | ||
157 | plotlines(mpcV, mpcCumP, ax5) | ||
158 | ax.set_xlabel('ourdegree') | ||
159 | ax.set_ylabel('pdf') | ||
160 | ax.grid() | ||
161 | |||
162 | ax1.set_xlabel('node activity') | ||
163 | ax1.set_ylabel('pdf') | ||
164 | ax1.grid() | ||
165 | |||
166 | ax2.set_xlabel('multiplex participation coefficient') | ||
167 | ax2.set_ylabel('pdf') | ||
168 | ax2.grid() | ||
169 | |||
170 | ax3.set_xlabel('ourdegree') | ||
171 | ax3.set_ylabel('cdf') | ||
172 | ax3.grid() | ||
173 | |||
174 | ax4.set_xlabel('node activity') | ||
175 | ax4.set_ylabel('cdf') | ||
176 | ax4.grid() | ||
177 | |||
178 | ax5.set_xlabel('multiplex participation coefficient') | ||
179 | ax5.set_ylabel('cdf') | ||
180 | ax5.grid() | ||
181 | |||
182 | plt.show() | ||
183 | |||
184 | |||
185 | # plot() | ||
186 | |||