import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import glob
import random
import constants

#
# read csvfile returns outdegree, node activity, mpc
# as matrix with the first row of values and second row of count
#
def readcsvfile(filename):
    
    contents = {}
    with open(filename) as f:
        data = list(f)
        f.close()
        for i, line in enumerate(data):
            arr = line.replace('\n', '').split(',')
            # if there is no element in the line, continue
            if len(line) < 0: continue
            # else check for contents
            # if it is MPC then use float
            if arr[0] == constants.MPC_VALUE:
                contents[constants.MPC_VALUE] = list(map(float, arr[1:]))
            # meta models are string
            elif(arr[0] == constants.METAMODEL):
                contents[constants.METAMODEL] = arr[1:]
            # Node types
            elif(arr[0] == constants.NODE_TYPE):
                types = data[i+1].replace('\n', '').split(',')
                numbers = data[i+2].replace('\n', '').split(',')
                contents[constants.Node_TYPE_KEY] = {t : n for t, n in zip(types, numbers)}
            # NA and OD are integers, and store other information as string
            else:
                try:
                    contents[arr[0]] = list(map(int, arr[1:]))
                except:
                    contents[arr[0]] = arr[1:]
    return contents

def checkAndReshape(arr):
    if len(arr.shape) < 2:
        arr = np.reshape(arr, (arr.shape[0],1))
    return arr

def readTrajectory(filename):
    state_codes = []
    with open(filename) as f:
        for i, line in enumerate(f):
            if(line == ''): continue
            state_codes.append(int(line))
    return state_codes
#
# take a matrix as input
# return the sample array
#
def getsample(dataMatrix):
    data = []
    value = dataMatrix[0, :]
    count = dataMatrix[1, :]
    for i, v in enumerate(value):
        for x in range(0, int(count[i])):
            data.append(v)
    return data

def reproduceSample(values, counts):
    arr = np.array([values, counts])
    return getsample(arr)

#
# take an array of filenames as input
# return the samples of outdegree, na, mpc
#
def getmetrics(filename):
    contents = readcsvfile(filename)
    outdegree_sample = reproduceSample(contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT])
    na_sample = reproduceSample(contents[constants.NA_VALUE], contents[constants.NA_COUNT])
    mpc_sample = reproduceSample(contents[constants.MPC_VALUE], contents[constants.MPC_COUNT])
    return contents,outdegree_sample, na_sample, mpc_sample

#
# read number of files in the given path RANDOMLY
#
def readmultiplefiles(dirName, maxNumberOfFiles, shouldShuffle = True):
    list_of_files = glob.glob(dirName + '*.csv')  # create the list of file
    if shouldShuffle: 
        random.shuffle(list_of_files)
    #if the number of files is out of bound then just give the whole list
    file_names =  list_of_files[:maxNumberOfFiles]
    # print(file_names)
    return file_names


def plotlines(x, y, ax):
    l1, = ax.plot(x, y)


def testgetsamplesfromfiles():
    files = readmultiplefiles('../statistics/viatraOutput/', 2)
    for file in files:
        getmetrics(file)

def probability(data):
    sum = np.sum(data)
    probabilityList = []
    for d in data:
        p = d/sum
        probabilityList.append(p)
    a = np.array(probabilityList)
    return a


def cumulativeProbability(p):
    cdf = np.cumsum(p)
    return cdf


def plot():
    fig, ax = plt.subplots()
    fig, ax1 = plt.subplots()
    fig, ax2 = plt.subplots()
    fig, ax3 = plt.subplots()
    fig, ax4 = plt.subplots()
    fig, ax5 = plt.subplots()
    list_of_files = readmultiplefiles('../statistics/iatraOutput/')
    for file_name in list_of_files:
        contents = readcsvfile(file_name)
        outdegree = [contents[constants.OUT_DEGREE_VALUE], contents[constants.OUT_DEGREE_COUNT]]
        na = [contents[constants.NA_VALUE], contents[constants.NA_COUNT]]
        mpc = [contents[constants.MPC_VALUE], contents[constants.MPC_COUNT]]
        outV = outdegree[0, :]
        outC = outdegree[1, :]
        outP = probability(outC)
        outCumP = cumulativeProbability(outP)
        plotlines(outV, outP, ax)
        naV = na[0, :]
        naC = na[1, :]
        naP = probability(naC)
        naCumP = cumulativeProbability(naP)
        plotlines(naV, naP, ax1)
        mpcV = mpc[0, :]
        mpcC = mpc[1, :]
        mpcP = probability(mpcC)
        mpcCumP = cumulativeProbability(mpcP)
        plotlines(mpcV, mpcP, ax2)
        plotlines(outV, outCumP, ax3)
        plotlines(naV, naCumP, ax4)
        plotlines(mpcV, mpcCumP, ax5)
    ax.set_xlabel('ourdegree')
    ax.set_ylabel('pdf')
    ax.grid()

    ax1.set_xlabel('node activity')
    ax1.set_ylabel('pdf')
    ax1.grid()

    ax2.set_xlabel('multiplex participation coefficient')
    ax2.set_ylabel('pdf')
    ax2.grid()

    ax3.set_xlabel('ourdegree')
    ax3.set_ylabel('cdf')
    ax3.grid()

    ax4.set_xlabel('node activity')
    ax4.set_ylabel('cdf')
    ax4.grid()

    ax5.set_xlabel('multiplex participation coefficient')
    ax5.set_ylabel('cdf')
    ax5.grid()

    plt.show()


# plot()