{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Use K-medoid algorithm to find the suitable human model representitives" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os, sys\n", "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n", "sys.path.append(lib_path)\n", "from GraphType import GraphStat\n", "import readCSV as reader\n", "from scipy import stats\n", "from ipywidgets import interact, fixed, interactive\n", "import ipywidgets as widgets\n", "from pyclustering.cluster.kmedoids import kmedoids\n", "from pyclustering.utils.metric import distance_metric, type_metric\n", "import random" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define a new distance metric" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def ks_value(dest1, dest2):\n", " value, p = stats.ks_2samp(dest1, dest2)\n", " return value\n", "\n", "\n", "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read Human Models" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1253" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Progress Widge\n", "w = widgets.FloatProgress(\n", " value=0,\n", " min=0,\n", " max=1.0,\n", " step=0.1,\n", " description='Loading Files...:',\n", " bar_style='info',\n", " orientation='horizontal'\n", ")\n", "\n", "\n", "humanFiles = reader.readmultiplefiles('../input/humanOutput/', 1300, False)\n", "modelToFileName = {}\n", "for name in humanFiles:\n", " modelToFileName[GraphStat(name)] = name\n", "\n", "models = list(modelToFileName.keys())\n", "len(humanFiles)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Representative by K-medroid for different dists on GraphStat" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Returns the index of the representative" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def findRep(graphStats, func):\n", " out_ds = list(map(func, models))\n", "\n", " #choose a random starting point\n", " start_index = random.randint(0, len(out_ds))\n", "\n", " # start with one initial metrid [start_index]\n", " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n", "\n", " outdegree_kmedoid.process()\n", " centoids = outdegree_kmedoid.get_medoids()\n", " return centoids[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find representative for out degree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n", "* the average distance between it and others is 0.05515988287586802" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/humanOutput\\R_20158_run_1.csv\n", "../input/humanOutput\\R_20158_run_1.csv\n" ] } ], "source": [ "od_rep_index = findRep(models, lambda m: m.out_d)\n", "print(list(modelToFileName.values())[od_rep_index])\n", "od_rep_model = models[od_rep_index]\n", "print(modelToFileName[od_rep_model])\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.05515988287586802\n" ] } ], "source": [ "total_distance = 0\n", "count = 0\n", "for model in models:\n", " total_distance += ks_value(od_rep_model.out_d, model.out_d)\n", "print(total_distance / len(models))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Representative for node activities" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n", "* the average distance between it and others is 0.05275267434589047" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/humanOutput\\R_2016176_run_1.csv\n", "../input/humanOutput\\R_2016176_run_1.csv\n" ] } ], "source": [ "total_distance = 0\n", "for model in models:\n", " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n", "print(total_distance / len(models))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.05275267434589047\n" ] } ], "source": [ "total_distance = 0\n", "count = 0\n", "for model in models:\n", " total_distance += ks_value(od_rep_model.na, model.na)\n", "print(total_distance / len(models))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Find Representative for MPC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n", "* the average distance between it and others is 0.08556632702185384" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../input/humanOutput\\R_2015246_run_1.csv\n", "../input/humanOutput\\R_2015246_run_1.csv\n" ] } ], "source": [ "mpc_rep_index = findRep(models, lambda m: m.mpc)\n", "print(list(modelToFileName.values())[mpc_rep_index])\n", "mpc_rep_model = models[mpc_rep_index]\n", "print(modelToFileName[mpc_rep_model])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.08556632702185384\n" ] } ], "source": [ "total_distance = 0\n", "count = 0\n", "for model in models:\n", " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n", "print(total_distance / len(models))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }