diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/src/readCSV.py')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/src/readCSV.py | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/src/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/src/readCSV.py new file mode 100644 index 00000000..8627ad4a --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/src/readCSV.py | |||
@@ -0,0 +1,169 @@ | |||
1 | import numpy as np | ||
2 | import matplotlib.pyplot as plt | ||
3 | from scipy import stats | ||
4 | import glob | ||
5 | import random | ||
6 | import constants | ||
7 | |||
8 | # | ||
9 | # read csvfile returns outdegree, node activity, mpc | ||
10 | # as matrix with the first row of values and second row of count | ||
11 | # | ||
12 | def readcsvfile(filename): | ||
13 | |||
14 | contents = {} | ||
15 | with open(filename) as f: | ||
16 | for i, line in enumerate(f): | ||
17 | arr = line.split(',') | ||
18 | # if there is no element in the line, continue | ||
19 | if len(line) < 0: continue | ||
20 | # else check for contents | ||
21 | # if it is MPC then use float | ||
22 | if arr[0] == constants.MPC_VALUE: | ||
23 | contents[constants.MPC_VALUE] = list(map(float, arr[1:])) | ||
24 | # meta models are string | ||
25 | elif(arr[0] == constants.METAMODEL): | ||
26 | contents[constants.METAMODEL] = arr[1:] | ||
27 | # all other contants are integer | ||
28 | else: | ||
29 | contents[arr[0]] = list(map(int, arr[1:])) | ||
30 | f.close() | ||
31 | return contents | ||
32 | |||
33 | def checkAndReshape(arr): | ||
34 | if len(arr.shape) < 2: | ||
35 | arr = np.reshape(arr, (arr.shape[0],1)) | ||
36 | return arr | ||
37 | |||
38 | def readTrajectory(filename): | ||
39 | state_codes = [] | ||
40 | with open(filename) as f: | ||
41 | for i, line in enumerate(f): | ||
42 | if(line == ''): continue | ||
43 | state_codes.append(int(line)) | ||
44 | return state_codes | ||
45 | # | ||
46 | # take a matrix as input | ||
47 | # return the sample array | ||
48 | # | ||
49 | def getsample(dataMatrix): | ||
50 | data = [] | ||
51 | value = dataMatrix[0, :] | ||
52 | count = dataMatrix[1, :] | ||
53 | for i, v in enumerate(value): | ||
54 | for x in range(0, int(count[i])): | ||
55 | data.append(v) | ||
56 | return data | ||
57 | |||
58 | def reproduceSample(values, counts): | ||
59 | arr = np.array([values, counts]) | ||
60 | return getsample(arr) | ||
61 | |||
62 | # | ||
63 | # take an array of filenames as input | ||
64 | # return the samples of outdegree, na, mpc | ||
65 | # | ||
66 | def getmetrics(filename): | ||
67 | contents = readcsvfile(filename) | ||
68 | outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]) | ||
69 | na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT]) | ||
70 | mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]) | ||
71 | return contents,outdegree_sample, na_sample, mpc_sample | ||
72 | |||
73 | # | ||
74 | # read number of files in the given path RANDOMLY | ||
75 | # | ||
76 | def readmultiplefiles(dirName, numberOfFiles, shouldShuffle = True): | ||
77 | list_of_files = glob.glob(dirName + '*.csv') # create the list of file | ||
78 | if shouldShuffle: | ||
79 | random.shuffle(list_of_files) | ||
80 | #if the number of files is out of bound then just give the whole list | ||
81 | file_names = list_of_files[:numberOfFiles] if numberOfFiles > len(list_of_files) else list_of_files | ||
82 | # print(file_names) | ||
83 | return file_names | ||
84 | |||
85 | |||
86 | def plotlines(x, y, ax): | ||
87 | l1, = ax.plot(x, y) | ||
88 | |||
89 | |||
90 | def testgetsamplesfromfiles(): | ||
91 | files = readmultiplefiles('../statistics/viatraOutput/', 2) | ||
92 | for file in files: | ||
93 | getmetrics(file) | ||
94 | |||
95 | def probability(data): | ||
96 | sum = np.sum(data) | ||
97 | probabilityList = [] | ||
98 | for d in data: | ||
99 | p = d/sum | ||
100 | probabilityList.append(p) | ||
101 | a = np.array(probabilityList) | ||
102 | return a | ||
103 | |||
104 | |||
105 | def cumulativeProbability(p): | ||
106 | cdf = np.cumsum(p) | ||
107 | return cdf | ||
108 | |||
109 | |||
110 | def plot(): | ||
111 | fig, ax = plt.subplots() | ||
112 | fig, ax1 = plt.subplots() | ||
113 | fig, ax2 = plt.subplots() | ||
114 | fig, ax3 = plt.subplots() | ||
115 | fig, ax4 = plt.subplots() | ||
116 | fig, ax5 = plt.subplots() | ||
117 | list_of_files = readmultiplefiles('../statistics/iatraOutput/') | ||
118 | for file_name in list_of_files: | ||
119 | contents = readcsvfile(file_name) | ||
120 | outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]] | ||
121 | na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]] | ||
122 | mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]] | ||
123 | outV = outdegree[0, :] | ||
124 | outC = outdegree[1, :] | ||
125 | outP = probability(outC) | ||
126 | outCumP = cumulativeProbability(outP) | ||
127 | plotlines(outV, outP, ax) | ||
128 | naV = na[0, :] | ||
129 | naC = na[1, :] | ||
130 | naP = probability(naC) | ||
131 | naCumP = cumulativeProbability(naP) | ||
132 | plotlines(naV, naP, ax1) | ||
133 | mpcV = mpc[0, :] | ||
134 | mpcC = mpc[1, :] | ||
135 | mpcP = probability(mpcC) | ||
136 | mpcCumP = cumulativeProbability(mpcP) | ||
137 | plotlines(mpcV, mpcP, ax2) | ||
138 | plotlines(outV, outCumP, ax3) | ||
139 | plotlines(naV, naCumP, ax4) | ||
140 | plotlines(mpcV, mpcCumP, ax5) | ||
141 | ax.set_xlabel('ourdegree') | ||
142 | ax.set_ylabel('pdf') | ||
143 | ax.grid() | ||
144 | |||
145 | ax1.set_xlabel('node activity') | ||
146 | ax1.set_ylabel('pdf') | ||
147 | ax1.grid() | ||
148 | |||
149 | ax2.set_xlabel('multiplex participation coefficient') | ||
150 | ax2.set_ylabel('pdf') | ||
151 | ax2.grid() | ||
152 | |||
153 | ax3.set_xlabel('ourdegree') | ||
154 | ax3.set_ylabel('cdf') | ||
155 | ax3.grid() | ||
156 | |||
157 | ax4.set_xlabel('node activity') | ||
158 | ax4.set_ylabel('cdf') | ||
159 | ax4.grid() | ||
160 | |||
161 | ax5.set_xlabel('multiplex participation coefficient') | ||
162 | ax5.set_ylabel('cdf') | ||
163 | ax5.grid() | ||
164 | |||
165 | plt.show() | ||
166 | |||
167 | |||
168 | # plot() | ||
169 | |||