From 98e0479f26ce0bc54016c4fba8e74e3223203b9a Mon Sep 17 00:00:00 2001
From: 20001LastOrder <boqi.chen@mail.mcgill.ca>
Date: Wed, 5 Jun 2019 11:38:23 -0400
Subject: plotting for metrics during generation

---
 .../metrics_plot/utils/readCSV.py                  | 169 +++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py

(limited to 'Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py')

diff --git a/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
new file mode 100644
index 00000000..e0402519
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/utils/readCSV.py
@@ -0,0 +1,169 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy import stats
+import glob
+import random
+import constants
+
+#
+# read csvfile returns outdegree, node activity, mpc
+# as matrix with the first row of values and second row of count
+#
+def readcsvfile(filename):
+    
+    contents = {}
+    with open(filename) as f:
+        for i, line in enumerate(f):
+            arr = line.split(',')
+            # if there is no element in the line, continue
+            if len(line) < 0: continue
+            # else check for contents
+            # if it is MPC then use float
+            if arr[0] == constants.MPC_VALUE:
+                contents[constants.MPC_VALUE] = list(map(float, arr[1:]))
+            # meta models are string
+            elif(arr[0] == constants.METAMODEL):
+                contents[constants.METAMODEL] = arr[1:]
+            # all other contants are integer
+            else:
+                contents[arr[0]] = list(map(int, arr[1:]))
+    f.close()
+    return contents
+
+def checkAndReshape(arr):
+    if len(arr.shape) < 2:
+        arr = np.reshape(arr, (arr.shape[0],1))
+    return arr
+
+def readTrajectory(filename):
+    state_codes = []
+    with open(filename) as f:
+        for i, line in enumerate(f):
+            if(line == ''): continue
+            state_codes.append(int(line))
+    return state_codes
+#
+# take a matrix as input
+# return the sample array
+#
+def getsample(dataMatrix):
+    data = []
+    value = dataMatrix[0, :]
+    count = dataMatrix[1, :]
+    for i, v in enumerate(value):
+        for x in range(0, int(count[i])):
+            data.append(v)
+    return data
+
+def reproduceSample(values, counts):
+    arr = np.array([values, counts])
+    return getsample(arr)
+
+#
+# take an array of filenames as input
+# return the samples of outdegree, na, mpc
+#
+def getmetrics(filename):
+    contents = readcsvfile(filename)
+    outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT])
+    na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT])
+    mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT])
+    return contents,outdegree_sample, na_sample, mpc_sample
+
+#
+# read number of files in the given path RANDOMLY
+#
+def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True):
+    list_of_files = glob.glob(dirName + '*.csv')  # create the list of file
+    if shouldShuffle: 
+        random.shuffle(list_of_files)
+    #if the number of files is out of bound then just give the whole list
+    file_names =  list_of_files[:maxNumberOfFiles]
+    # print(file_names)
+    return file_names
+
+
+def plotlines(x, y, ax):
+    l1, = ax.plot(x, y)
+
+
+def testgetsamplesfromfiles():
+    files = readmultiplefiles('../statistics/viatraOutput/', 2)
+    for file in files:
+        getmetrics(file)
+
+def probability(data):
+    sum = np.sum(data)
+    probabilityList = []
+    for d in data:
+        p = d/sum
+        probabilityList.append(p)
+    a = np.array(probabilityList)
+    return a
+
+
+def cumulativeProbability(p):
+    cdf = np.cumsum(p)
+    return cdf
+
+
+def plot():
+    fig, ax = plt.subplots()
+    fig, ax1 = plt.subplots()
+    fig, ax2 = plt.subplots()
+    fig, ax3 = plt.subplots()
+    fig, ax4 = plt.subplots()
+    fig, ax5 = plt.subplots()
+    list_of_files = readmultiplefiles('../statistics/iatraOutput/')
+    for file_name in list_of_files:
+        contents = readcsvfile(file_name)
+        outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]]
+        na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]]
+        mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]]
+        outV = outdegree[0, :]
+        outC = outdegree[1, :]
+        outP = probability(outC)
+        outCumP = cumulativeProbability(outP)
+        plotlines(outV, outP, ax)
+        naV = na[0, :]
+        naC = na[1, :]
+        naP = probability(naC)
+        naCumP = cumulativeProbability(naP)
+        plotlines(naV, naP, ax1)
+        mpcV = mpc[0, :]
+        mpcC = mpc[1, :]
+        mpcP = probability(mpcC)
+        mpcCumP = cumulativeProbability(mpcP)
+        plotlines(mpcV, mpcP, ax2)
+        plotlines(outV, outCumP, ax3)
+        plotlines(naV, naCumP, ax4)
+        plotlines(mpcV, mpcCumP, ax5)
+    ax.set_xlabel('ourdegree')
+    ax.set_ylabel('pdf')
+    ax.grid()
+
+    ax1.set_xlabel('node activity')
+    ax1.set_ylabel('pdf')
+    ax1.grid()
+
+    ax2.set_xlabel('multiplex participation coefficient')
+    ax2.set_ylabel('pdf')
+    ax2.grid()
+
+    ax3.set_xlabel('ourdegree')
+    ax3.set_ylabel('cdf')
+    ax3.grid()
+
+    ax4.set_xlabel('node activity')
+    ax4.set_ylabel('cdf')
+    ax4.grid()
+
+    ax5.set_xlabel('multiplex participation coefficient')
+    ax5.set_ylabel('cdf')
+    ax5.grid()
+
+    plt.show()
+
+
+# plot()
+
-- 
cgit v1.2.3-54-g00ecf