## Use K-medoid algorithm to find the suitable human model representitives

### Imports

In [2]:
import os, sys
lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))
sys.path.append(lib_path)
from GraphType import GraphStat
import readCSV as reader
from scipy import stats
from ipywidgets import interact, fixed, interactive
import ipywidgets as widgets
from pyclustering.cluster.kmedoids import kmedoids
from pyclustering.utils.metric import distance_metric, type_metric
import random
import numpy as np

### Define a new distance metric

In [3]:
def ks_value(dest1, dest2):
 value, p = stats.ks_2samp(dest1, dest2)
 return value


ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)

### Read Human Models

In [4]:
# Progress Widge
w = widgets.FloatProgress(
 value=0,
 min=0,
 max=1.0,
 step=0.1,
 description='Loading Files...:',
 bar_style='info',
 orientation='horizontal'
)

humanFiles = reader.readmultiplefiles('../input/Human/', 1300, False)
modelToFileName = {}
for name in humanFiles:
 modelToFileName[GraphStat(name)] = name

models = list(modelToFileName.keys())
len(humanFiles)

304

### Find Representative by K-medroid for different dists on GraphStat

* Returns the index of the representative

In [5]:
def findRep(graphStats, func):
 out_ds = list(map(func, models))

 #choose a random starting point
 start_index = random.randint(0, len(out_ds))

 # start with one initial metrid [start_index]
 outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)

 outdegree_kmedoid.process()
 centoids = outdegree_kmedoid.get_medoids()
 return centoids[0]

## Find representative for out degree

### For Yakindumm
#### For all human models
* the rep found is ../input/humanOutput\R_20158_run_1.csv
* the average distance between it and others is 0.05515988287586802

#### For human models with $100 \pm 10$ nodes
* the rep found is ../input/human_output_100\R_2015225_run_1.csv
* the average distance between it and others is $0.046150929558524685$

#### for human model with $100 \pm 10$ nodes and new metric
* the rep found is ../input/human_output_100\R_2015248_run_1.csv
* average distance: 0.052753778714861366
* median: 0.0468131868131868
* std: 0.0246917800149673
* max: 0.15993907083015996
* min: 0.0

In [6]:
od_rep_index = findRep(models, lambda m: m.out_d)
print(list(modelToFileName.values())[od_rep_index])
od_rep_model = models[od_rep_index]
print(modelToFileName[od_rep_model])


../input/Human\33_run_1.csv
../input/Human\33_run_1.csv


In [7]:
distances = []
for model in models:
 distances.append(ks_value(od_rep_model.out_d, model.out_d))
print('average distance: ', np.mean(distances))
print('median: ', np.median(distances))
print('std: ', np.std(distances))
print('max:', max(distances))
print('min:', min(distances))

average distance: 0.04615092955852465
median: 0.04402137483980782
std: 0.017305709419913242
max: 0.1411706837186424
min: 0.0


## Find Representative for node activities

### For Yakindumm
#### For all human models
* the rep found is ../input/humanOutput\R_2016176_run_1.csv
* the average distance between it and others is 0.05275267434589047

#### For human models with $100 \pm 10$ nodes
* the rep found is ../input/human_output_100\R_2017419_run_1.csv
* the average distance between it and others is $0.04679429311806747$

#### for human model with $100 \pm 10$ nodes and new metric
* the rep found is ../input/human_output_100\R_2017131_run_1.csv
* average distance: 0.024629205820449567
* median: 0.023787888564682946
* std: 0.013845547883198073
* max: 0.09044674910251294
* min: 0.0

In [8]:
na_rep_index = findRep(models, lambda m: m.na)
print(list(modelToFileName.values())[na_rep_index])
na_rep_model = models[na_rep_index]
print(modelToFileName[na_rep_model])


../input/Human\288_run_1.csv
../input/Human\288_run_1.csv


In [9]:
distances = []
for model in models:
 distances.append(ks_value(na_rep_model.na, model.na))
print('average distance: ', np.mean(distances))
print('median: ', np.median(distances))
print('std: ', np.std(distances))
print('max:', max(distances))
print('min:', min(distances))

average distance: 0.046794293118067494
median: 0.03898868458274401
std: 0.02880119213919405
max: 0.18702970297029703
min: 0.0


## Find Representative for MPC

### For Yakindumm

#### For all human models
* the rep found is ../input/humanOutput\R_2015246_run_1.csv
* the average distance between it and others is 0.08556632702185384

#### For human models with $100 \pm 10$ nodes
* the rep found is ../input/human_output_100\R_2016324_run_1.csv
* the average distance between it and others is $0.07028909225833631$

#### for human model with $100 \pm 10$ nodes and new metric
* average distance: 0.054782550772603904
* median: 0.048330503678551184
* std: 0.028208257424907526
* max: 0.21181525241675614
* min: 0.0

In [10]:
mpc_rep_index = findRep(models, lambda m: m.mpc)
print(list(modelToFileName.values())[mpc_rep_index])
mpc_rep_model = models[mpc_rep_index]
print(modelToFileName[mpc_rep_model])

../input/Human\151_run_1.csv
../input/Human\151_run_1.csv


In [11]:
distances = []
for model in models:
 distances.append(ks_value(mpc_rep_model.mpc, model.mpc))
print('average distance: ', np.mean(distances))
print('median: ', np.median(distances))
print('std: ', np.std(distances))
print('max:', max(distances))
print('min:', min(distances))

average distance: 0.07028909225833632
median: 0.06254480286738351
std: 0.037281890512224164
max: 0.21961550993809065
min: 0.0
