1 files changed, 186 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
new file mode 100644
index 00000000..a56caf45
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
@@ -0,0 +1,186 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy import stats
+import glob
+import random
+import constants
+#
+# read csvfile returns outdegree, node activity, mpc
+# as matrix with the first row of values and second row of count
+#
+def readcsvfile(filename):
+    
+    contents = {}
+    with open(filename) as f:
+        data = list(f)
+        f.close()
+        for i, line in enumerate(data):
+            arr = line.replace('\n', '').split(',')
+            # if there is no element in the line, continue
+            if len(line) < 0: continue
+            # else check for contents
+            # if it is MPC then use float
+            if arr[0] == constants.MPC_VALUE:
+                contents[constants.MPC_VALUE] = list(map(float, arr[1:]))
+            # meta models are string
+            elif(arr[0] == constants.METAMODEL):
+                contents[constants.METAMODEL] = arr[1:]
+            # Node types
+            elif(arr[0] == constants.NODE_TYPE):
+                types = data[i+1].replace('\n', '').split(',')
+                numbers = data[i+2].replace('\n', '').split(',')
+                #convert number to floats
+                numbers = [float(n) for n in numbers]
+                contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)}
+            elif(arr[0] == constants.TCC_VALUE):
+                contents[constants.TCC_VALUE] = list(map(float, arr[1:]))
+            # NA and OD are integers, and store other information as string
+            else:
+                try:
+                    contents[arr[0]] = list(map(int, arr[1:]))
+                except:
+                    contents[arr[0]] = arr[1:]
+    return contents
+def checkAndReshape(arr):
+    if len(arr.shape) < 2:
+        arr = np.reshape(arr, (arr.shape[0],1))
+    return arr
+def readTrajectory(filename):
+    state_codes = []
+    with open(filename) as f:
+        for i, line in enumerate(f):
+            if(line == ''): continue
+            state_codes.append(int(line))
+    return state_codes
+#
+# take a matrix as input
+# return the sample array
+#
+def getsample(dataMatrix):
+    data = []
+    value = dataMatrix[0, :]
+    count = dataMatrix[1, :]
+    for i, v in enumerate(value):
+        for x in range(0, int(count[i])):
+            data.append(v)
+    return data
+def reproduceSample(values, counts):
+    arr = np.array([values, counts])
+    return getsample(arr)
+#
+# take an array of filenames as input
+# return the samples of outdegree, na, mpc
+#
+def getmetrics(filename):
+    contents = readcsvfile(filename)
+    outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT])
+    na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT])
+    mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT])
+    if(constants.TCC_VALUE in contents):
+        contents[constants.TCC_VALUE] = reproduceSample(contents[constants.TCC_VALUE], contents[constants.TCC_COUNT])
+    return contents,outdegree_sample, na_sample, mpc_sample
+#
+# read number of files in the given path RANDOMLY
+#
+def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True):
+    list_of_files = glob.glob(dirName + '*.csv')  # create the list of file
+    if shouldShuffle: 
+        random.shuffle(list_of_files)
+    #if the number of files is out of bound then just give the whole list
+    file_names =  list_of_files[:maxNumberOfFiles]
+    # print(file_names)
+    return file_names
+def plotlines(x, y, ax):
+    l1, = ax.plot(x, y)
+def testgetsamplesfromfiles():
+    files = readmultiplefiles('../statistics/viatraOutput/', 2)
+    for file in files:
+        getmetrics(file)
+def probability(data):
+    sum = np.sum(data)
+    probabilityList = []
+    for d in data:
+        p = d/sum
+        probabilityList.append(p)
+    a = np.array(probabilityList)
+    return a
+def cumulativeProbability(p):
+    cdf = np.cumsum(p)
+    return cdf
+def plot():
+    fig, ax = plt.subplots()
+    fig, ax1 = plt.subplots()
+    fig, ax2 = plt.subplots()
+    fig, ax3 = plt.subplots()
+    fig, ax4 = plt.subplots()
+    fig, ax5 = plt.subplots()
+    list_of_files = readmultiplefiles('../statistics/iatraOutput/')
+    for file_name in list_of_files:
+        contents = readcsvfile(file_name)
+        outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]]
+        na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]]
+        mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]]
+        outV = outdegree[0, :]
+        outC = outdegree[1, :]
+        outP = probability(outC)
+        outCumP = cumulativeProbability(outP)
+        plotlines(outV, outP, ax)
+        naV = na[0, :]
+        naC = na[1, :]
+        naP = probability(naC)
+        naCumP = cumulativeProbability(naP)
+        plotlines(naV, naP, ax1)
+        mpcV = mpc[0, :]
+        mpcC = mpc[1, :]
+        mpcP = probability(mpcC)
+        mpcCumP = cumulativeProbability(mpcP)
+        plotlines(mpcV, mpcP, ax2)
+        plotlines(outV, outCumP, ax3)
+        plotlines(naV, naCumP, ax4)
+        plotlines(mpcV, mpcCumP, ax5)
+    ax.set_xlabel('ourdegree')
+    ax.set_ylabel('pdf')
+    ax.grid()
+    ax1.set_xlabel('node activity')
+    ax1.set_ylabel('pdf')
+    ax1.grid()
+    ax2.set_xlabel('multiplex participation coefficient')
+    ax2.set_ylabel('pdf')
+    ax2.grid()
+    ax3.set_xlabel('ourdegree')
+    ax3.set_ylabel('cdf')
+    ax3.grid()
+    ax4.set_xlabel('node activity')
+    ax4.set_ylabel('cdf')
+    ax4.grid()
+    ax5.set_xlabel('multiplex participation coefficient')
+    ax5.set_ylabel('cdf')
+    ax5.grid()
+    plt.show()
+# plot()

diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py new file mode 100644 index 00000000..a56caf45 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
@@ -0,0 +1,186 @@
	1	import numpy as np
	2	import matplotlib.pyplot as plt
	3	from scipy import stats
	4	import glob
	5	import random
	6	import constants
	7
	8	#
	9	# read csvfile returns outdegree, node activity, mpc
	10	# as matrix with the first row of values and second row of count
	11	#
	12	def readcsvfile(filename):
	13
	14	contents = {}
	15	with open(filename) as f:
	16	data = list(f)
	17	f.close()
	18	for i, line in enumerate(data):
	19	arr = line.replace('\n', '').split(',')
	20	# if there is no element in the line, continue
	21	if len(line) < 0: continue
	22	# else check for contents
	23	# if it is MPC then use float
	24	if arr[0] == constants.MPC_VALUE:
	25	contents[constants.MPC_VALUE] = list(map(float, arr[1:]))
	26	# meta models are string
	27	elif(arr[0] == constants.METAMODEL):
	28	contents[constants.METAMODEL] = arr[1:]
	29	# Node types
	30	elif(arr[0] == constants.NODE_TYPE):
	31	types = data[i+1].replace('\n', '').split(',')
	32	numbers = data[i+2].replace('\n', '').split(',')
	33	#convert number to floats
	34	numbers = [float(n) for n in numbers]
	35	contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)}
	36	elif(arr[0] == constants.TCC_VALUE):
	37	contents[constants.TCC_VALUE] = list(map(float, arr[1:]))
	38	# NA and OD are integers, and store other information as string
	39	else:
	40	try:
	41	contents[arr[0]] = list(map(int, arr[1:]))
	42	except:
	43	contents[arr[0]] = arr[1:]
	44	return contents
	45
	46	def checkAndReshape(arr):
	47	if len(arr.shape) < 2:
	48	arr = np.reshape(arr, (arr.shape[0],1))
	49	return arr
	50
	51	def readTrajectory(filename):
	52	state_codes = []
	53	with open(filename) as f:
	54	for i, line in enumerate(f):
	55	if(line == ''): continue
	56	state_codes.append(int(line))
	57	return state_codes
	58	#
	59	# take a matrix as input
	60	# return the sample array
	61	#
	62	def getsample(dataMatrix):
	63	data = []
	64	value = dataMatrix[0, :]
	65	count = dataMatrix[1, :]
	66	for i, v in enumerate(value):
	67	for x in range(0, int(count[i])):
	68	data.append(v)
	69	return data
	70
	71	def reproduceSample(values, counts):
	72	arr = np.array([values, counts])
	73	return getsample(arr)
	74
	75	#
	76	# take an array of filenames as input
	77	# return the samples of outdegree, na, mpc
	78	#
	79	def getmetrics(filename):
	80	contents = readcsvfile(filename)
	81	outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT])
	82	na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT])
	83	mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT])
	84
	85	if(constants.TCC_VALUE in contents):
	86	contents[constants.TCC_VALUE] = reproduceSample(contents[constants.TCC_VALUE], contents[constants.TCC_COUNT])
	87
	88	return contents,outdegree_sample, na_sample, mpc_sample
	89
	90	#
	91	# read number of files in the given path RANDOMLY
	92	#
	93	def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True):
	94	list_of_files = glob.glob(dirName + '*.csv') # create the list of file
	95	if shouldShuffle:
	96	random.shuffle(list_of_files)
	97	#if the number of files is out of bound then just give the whole list
	98	file_names = list_of_files[:maxNumberOfFiles]
	99	# print(file_names)
	100	return file_names
	101
	102
	103	def plotlines(x, y, ax):
	104	l1, = ax.plot(x, y)
	105
	106
	107	def testgetsamplesfromfiles():
	108	files = readmultiplefiles('../statistics/viatraOutput/', 2)
	109	for file in files:
	110	getmetrics(file)
	111
	112	def probability(data):
	113	sum = np.sum(data)
	114	probabilityList = []
	115	for d in data:
	116	p = d/sum
	117	probabilityList.append(p)
	118	a = np.array(probabilityList)
	119	return a
	120
	121
	122	def cumulativeProbability(p):
	123	cdf = np.cumsum(p)
	124	return cdf
	125
	126
	127	def plot():
	128	fig, ax = plt.subplots()
	129	fig, ax1 = plt.subplots()
	130	fig, ax2 = plt.subplots()
	131	fig, ax3 = plt.subplots()
	132	fig, ax4 = plt.subplots()
	133	fig, ax5 = plt.subplots()
	134	list_of_files = readmultiplefiles('../statistics/iatraOutput/')
	135	for file_name in list_of_files:
	136	contents = readcsvfile(file_name)
	137	outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]]
	138	na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]]
	139	mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]]
	140	outV = outdegree[0, :]
	141	outC = outdegree[1, :]
	142	outP = probability(outC)
	143	outCumP = cumulativeProbability(outP)
	144	plotlines(outV, outP, ax)
	145	naV = na[0, :]
	146	naC = na[1, :]
	147	naP = probability(naC)
	148	naCumP = cumulativeProbability(naP)
	149	plotlines(naV, naP, ax1)
	150	mpcV = mpc[0, :]
	151	mpcC = mpc[1, :]
	152	mpcP = probability(mpcC)
	153	mpcCumP = cumulativeProbability(mpcP)
	154	plotlines(mpcV, mpcP, ax2)
	155	plotlines(outV, outCumP, ax3)
	156	plotlines(naV, naCumP, ax4)
	157	plotlines(mpcV, mpcCumP, ax5)
	158	ax.set_xlabel('ourdegree')
	159	ax.set_ylabel('pdf')
	160	ax.grid()
	161
	162	ax1.set_xlabel('node activity')
	163	ax1.set_ylabel('pdf')
	164	ax1.grid()
	165
	166	ax2.set_xlabel('multiplex participation coefficient')
	167	ax2.set_ylabel('pdf')
	168	ax2.grid()
	169
	170	ax3.set_xlabel('ourdegree')
	171	ax3.set_ylabel('cdf')
	172	ax3.grid()
	173
	174	ax4.set_xlabel('node activity')
	175	ax4.set_ylabel('cdf')
	176	ax4.grid()
	177
	178	ax5.set_xlabel('multiplex participation coefficient')
	179	ax5.set_ylabel('cdf')
	180	ax5.grid()
	181
	182	plt.show()
	183
	184
	185	# plot()
	186