# Compare Metrics Distances to The Human Models

### Imports

In [1]:
from GraphType import GraphStat
from GraphType import GraphCollection
from scipy import stats
from ipywidgets import interact, fixed, interactive
import readCSV as reader
import ipywidgets as widgets
import matplotlib.pyplot as plt
import random
import numpy as np


### Classes

* Record the average distances of different metrics for a model to the human models 

In [2]:
class GraphDistance:
    #init with a graph stat and a collection of graph stats
    def __init__(self, graphStat, collection):
        self.graph = graphStat
        self.collection = collection
        self.out_d_distance = average_ks_distance(collection.out_ds, graphStat.out_d)
        self.na_distance = average_ks_distance(collection.nas, graphStat.na)
        self.mpc_distance = average_ks_distance(collection.mpcs, graphStat.mpc)

### Methods

* Calculate the average ks distance

In [3]:
def average_ks_distance(targets, sample):
    distance = 0.0
    for target in targets:
        value, p = stats.ks_2samp(target, sample)
        distance += value
    
    distance = distance / len(targets)
    return distance


* Find the median ks distance of the same number of nodes

In [None]:
def find_median(x, metric_distances):
    distance_dic = {}
    for index, num_of_nodes in enumerate(x):
        if num_of_nodes[0] not in distance_dic:
            distance_dic[num_of_nodes[0]] = []
        distance_dic[num_of_nodes[0]].append(metric_distances[index])
    median_x = []
    y = []
    for num_of_nodes, distances in distance_dic.items():
        median_x.append(num_of_nodes)
        y.append(np.median(distances))
    order = np.argsort(median_x)
    median_x = np.array(median_x)[order]
    median_y = np.array(y)[order]
    return median_x, median_y


* Plot Diagram

In [4]:
# metric_selector: GraphDistance -> float
def plot(infos, lines, id, metric_selector,colors, title):
    metric_distances = retrive_info_from_list(metric_selector, list(infos.values()))
    x = retrive_info_from_list(lambda a : a.graph.num_nodes, list(infos.values()))
    graph = plt.figure(id,figsize=(18, 10))
    plt.title(title)
    plt.plot(x, metric_distances, color='red', linestyle='', marker='o',alpha=0.7)
    #plot ks distance median
    median_x, median_y = find_median(x, metric_distances)
    plt.plot(median_x, median_y, color='black',marker='o')
    for i in range(0, len(lines)):
        line_infos = retrive_info_from_list(lambda a: infos[a], lines[i])
        line_y = retrive_info_from_list(metric_selector, line_infos)
        line_x = retrive_info_from_list(lambda a : a.graph.num_nodes, line_infos)
        plt.plot(line_x, line_y, marker='o', color=colors[i])
    #graph.show()

* Retrieve information from a list 

In [5]:
def retrive_info_from_list(selector, distances):
    return list(map(selector, distances))

### Read Models

In [6]:
human = GraphCollection('../statistics/humanOutput/', 300, 'Human')
file_names = reader.readmultiplefiles('../statistics/viatraEvolve/', 1000, False)

### Calculate Distances

In [7]:
# Progress Widge
w2 = widgets.FloatProgress(
    value=0,
    min=0,
    max=1.0,
    step=0.1,
    description='Loading Files...:',
    bar_style='info',
    orientation='horizontal'
)

In [8]:
infos = []
# read all files
counter = 0.0
size = len(file_names)
#display progress bar
display(w2)
for name in file_names:
    infos.append(GraphStat(name))

info_dic = {}
for info in infos:
    w2.value = (counter/size)
    counter+=1
    info = GraphDistance(info, human)
    info_dic[info.graph.id] = info

FloatProgress(value=0.0, bar_style='info', description='Loading Files...:', max=1.0)

#### Plot Graphs

* widget for select trajectory

In [9]:
filenames = reader.readmultiplefiles('../statistics/trajectories/', 10, False)
trajectories = {}
for name in filenames:
    trajectories[name] = reader.readTrajectory(name)

w = widgets.SelectMultiple(
    options = trajectories,
    value = [trajectories[filenames[0]]],
    description='Trajectory:',
    disabled=False,
)

#generate random color for each line
colors = []

for i in range(0, len(trajectories)):
    color = "#%06x" % random.randint(0, 0xFFFFFF)
    colors.append(color)

#### Out Degree

In [10]:
def plot_out_degree(lines):
    plot(info_dic, lines, 0, lambda a: a.out_d_distance, colors, 'out degree')
interact(plot_out_degree, lines=w)

interactive(children=(SelectMultiple(description='Trajectory:', index=(0,), options={'../statistics/trajectori…

<function __main__.plot_out_degree(lines)>

#### Node Activity

In [11]:
def plot_out_degree(lines):
    plot(info_dic, lines, 0, lambda a: a.na_distance, colors, 'node activity')
interact(plot_out_degree, lines=w)

interactive(children=(SelectMultiple(description='Trajectory:', index=(0,), options={'../statistics/trajectori…

<function __main__.plot_out_degree(lines)>

#### MPC

In [12]:
def plot_out_degree(lines):
    plot(info_dic, lines, 0, lambda a: a.mpc_distance, colors, 'MPC')
interact(plot_out_degree, lines=w)

interactive(children=(SelectMultiple(description='Trajectory:', index=(0,), options={'../statistics/trajectori…

<function __main__.plot_out_degree(lines)>

In [42]:
for name in file_names:
    contents = reader.readcsvfile(name)
    if(contents['State Id'][0] == 1032396643):
        print(name)

../statistics/viatraEvolve\state_735.csv
