{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Use K-medoid algorithm to find the suitable human model representitives" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os, sys\n", "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n", "sys.path.append(lib_path)\n", "from GraphType import GraphStat\n", "import readCSV as reader\n", "from scipy import stats\n", "from ipywidgets import interact, fixed, interactive\n", "import ipywidgets as widgets\n", "from pyclustering.cluster.kmedoids import kmedoids\n", "from pyclustering.utils.metric import distance_metric, type_metric\n", "import random\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define a new distance metric" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def ks_value(dest1, dest2):\n", " value, p = stats.ks_2samp(dest1, dest2)\n", " return value\n", "\n", "\n", "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read Human Models" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "304" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Progress Widge\n", "w = widgets.FloatProgress(\n", " value=0,\n", " min=0,\n", " max=1.0,\n", " step=0.1,\n", " description='Loading Files...:',\n", " bar_style='info',\n", " orientation='horizontal'\n", ")\n", "\n", "humanFiles = reader.readmultiplefiles('../input/Human/', 1300, False)\n", "modelToFileName = {}\n", "for name in humanFiles:\n", " modelToFileName[GraphStat(name)] = name\n", "\n", "models = list(modelToFileName.keys())\n", "len(humanFiles)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Representative by K-medroid for different dists on GraphStat" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Returns the index of the representative" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def findRep(graphStats, func):\n", " out_ds = list(map(func, models))\n", "\n", " #choose a random starting point\n", " start_index = random.randint(0, len(out_ds))\n", "\n", " # start with one initial metrid [start_index]\n", " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n", "\n", " outdegree_kmedoid.process()\n", " centoids = outdegree_kmedoid.get_medoids()\n", " return centoids[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Find representative for out degree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For Yakindumm\n", "#### For all human models\n", "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n", "* the average distance between it and others is 0.05515988287586802\n", "\n", "#### For human models with $100 \\pm 10$ nodes\n", "* the rep found is ../input/human_output_100\\R_2015225_run_1.csv\n", "* the average distance between it and others is $0.046150929558524685$\n", "\n", "#### for human model with $100 \\pm 10$ nodes and new metric\n", "* the rep found is ../input/human_output_100\\R_2015248_run_1.csv\n", "* average distance: 0.052753778714861366\n", "* median: 0.0468131868131868\n", "* std: 0.0246917800149673\n", "* max: 0.15993907083015996\n", "* min: 0.0" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/Human\\33_run_1.csv\n", "../input/Human\\33_run_1.csv\n" ] } ], "source": [ "od_rep_index = findRep(models, lambda m: m.out_d)\n", "print(list(modelToFileName.values())[od_rep_index])\n", "od_rep_model = models[od_rep_index]\n", "print(modelToFileName[od_rep_model])\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "average distance: 0.04615092955852465\n", "median: 0.04402137483980782\n", "std: 0.017305709419913242\n", "max: 0.1411706837186424\n", "min: 0.0\n" ] } ], "source": [ "distances = []\n", "for model in models:\n", " distances.append(ks_value(od_rep_model.out_d, model.out_d))\n", "print('average distance: ', np.mean(distances))\n", "print('median: ', np.median(distances))\n", "print('std: ', np.std(distances))\n", "print('max:', max(distances))\n", "print('min:', min(distances))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Find Representative for node activities" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For Yakindumm\n", "#### For all human models\n", "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n", "* the average distance between it and others is 0.05275267434589047\n", "\n", "#### For human models with $100 \\pm 10$ nodes\n", "* the rep found is ../input/human_output_100\\R_2017419_run_1.csv\n", "* the average distance between it and others is $0.04679429311806747$\n", "\n", "#### for human model with $100 \\pm 10$ nodes and new metric\n", "* the rep found is ../input/human_output_100\\R_2017131_run_1.csv\n", "* average distance: 0.024629205820449567\n", "* median: 0.023787888564682946\n", "* std: 0.013845547883198073\n", "* max: 0.09044674910251294\n", "* min: 0.0" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/Human\\288_run_1.csv\n", "../input/Human\\288_run_1.csv\n" ] } ], "source": [ "na_rep_index = findRep(models, lambda m: m.na)\n", "print(list(modelToFileName.values())[na_rep_index])\n", "na_rep_model = models[na_rep_index]\n", "print(modelToFileName[na_rep_model])\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "average distance: 0.046794293118067494\n", "median: 0.03898868458274401\n", "std: 0.02880119213919405\n", "max: 0.18702970297029703\n", "min: 0.0\n" ] } ], "source": [ "distances = []\n", "for model in models:\n", " distances.append(ks_value(na_rep_model.na, model.na))\n", "print('average distance: ', np.mean(distances))\n", "print('median: ', np.median(distances))\n", "print('std: ', np.std(distances))\n", "print('max:', max(distances))\n", "print('min:', min(distances))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Find Representative for MPC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For Yakindumm\n", "\n", "#### For all human models\n", "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n", "* the average distance between it and others is 0.08556632702185384\n", "\n", "#### For human models with $100 \\pm 10$ nodes\n", "* the rep found is ../input/human_output_100\\R_2016324_run_1.csv\n", "* the average distance between it and others is $0.07028909225833631$\n", "\n", "#### for human model with $100 \\pm 10$ nodes and new metric\n", "* average distance: 0.054782550772603904\n", "* median: 0.048330503678551184\n", "* std: 0.028208257424907526\n", "* max: 0.21181525241675614\n", "* min: 0.0" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/Human\\151_run_1.csv\n", "../input/Human\\151_run_1.csv\n" ] } ], "source": [ "mpc_rep_index = findRep(models, lambda m: m.mpc)\n", "print(list(modelToFileName.values())[mpc_rep_index])\n", "mpc_rep_model = models[mpc_rep_index]\n", "print(modelToFileName[mpc_rep_model])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "average distance: 0.07028909225833632\n", "median: 0.06254480286738351\n", "std: 0.037281890512224164\n", "max: 0.21961550993809065\n", "min: 0.0\n" ] } ], "source": [ "distances = []\n", "for model in models:\n", " distances.append(ks_value(mpc_rep_model.mpc, model.mpc))\n", "print('average distance: ', np.mean(distances))\n", "print('median: ', np.median(distances))\n", "print('std: ', np.std(distances))\n", "print('max:', max(distances))\n", "print('min:', min(distances))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }