aboutsummaryrefslogtreecommitdiffstats
path: root/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb')
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb392
1 files changed, 392 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
new file mode 100644
index 00000000..32edb00c
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
@@ -0,0 +1,392 @@
1{
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "## Use K-medoid algorithm to find the suitable human model representitives"
8 ]
9 },
10 {
11 "cell_type": "markdown",
12 "metadata": {},
13 "source": [
14 "### Imports"
15 ]
16 },
17 {
18 "cell_type": "code",
19 "execution_count": 2,
20 "metadata": {},
21 "outputs": [],
22 "source": [
23 "import os, sys\n",
24 "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n",
25 "sys.path.append(lib_path)\n",
26 "from GraphType import GraphStat\n",
27 "import readCSV as reader\n",
28 "from scipy import stats\n",
29 "from ipywidgets import interact, fixed, interactive\n",
30 "import ipywidgets as widgets\n",
31 "from pyclustering.cluster.kmedoids import kmedoids\n",
32 "from pyclustering.utils.metric import distance_metric, type_metric\n",
33 "import random\n",
34 "import numpy as np"
35 ]
36 },
37 {
38 "cell_type": "markdown",
39 "metadata": {},
40 "source": [
41 "### Define a new distance metric"
42 ]
43 },
44 {
45 "cell_type": "code",
46 "execution_count": 3,
47 "metadata": {},
48 "outputs": [],
49 "source": [
50 "def ks_value(dest1, dest2):\n",
51 " value, p = stats.ks_2samp(dest1, dest2)\n",
52 " return value\n",
53 "\n",
54 "\n",
55 "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)"
56 ]
57 },
58 {
59 "cell_type": "markdown",
60 "metadata": {},
61 "source": [
62 "### Read Human Models"
63 ]
64 },
65 {
66 "cell_type": "code",
67 "execution_count": 4,
68 "metadata": {},
69 "outputs": [
70 {
71 "data": {
72 "text/plain": [
73 "304"
74 ]
75 },
76 "execution_count": 4,
77 "metadata": {},
78 "output_type": "execute_result"
79 }
80 ],
81 "source": [
82 "# Progress Widge\n",
83 "w = widgets.FloatProgress(\n",
84 " value=0,\n",
85 " min=0,\n",
86 " max=1.0,\n",
87 " step=0.1,\n",
88 " description='Loading Files...:',\n",
89 " bar_style='info',\n",
90 " orientation='horizontal'\n",
91 ")\n",
92 "\n",
93 "humanFiles = reader.readmultiplefiles('../input/Human/', 1300, False)\n",
94 "modelToFileName = {}\n",
95 "for name in humanFiles:\n",
96 " modelToFileName[GraphStat(name)] = name\n",
97 "\n",
98 "models = list(modelToFileName.keys())\n",
99 "len(humanFiles)"
100 ]
101 },
102 {
103 "cell_type": "markdown",
104 "metadata": {},
105 "source": [
106 "### Find Representative by K-medroid for different dists on GraphStat"
107 ]
108 },
109 {
110 "cell_type": "markdown",
111 "metadata": {},
112 "source": [
113 "* Returns the index of the representative"
114 ]
115 },
116 {
117 "cell_type": "code",
118 "execution_count": 5,
119 "metadata": {},
120 "outputs": [],
121 "source": [
122 "def findRep(graphStats, func):\n",
123 " out_ds = list(map(func, models))\n",
124 "\n",
125 " #choose a random starting point\n",
126 " start_index = random.randint(0, len(out_ds))\n",
127 "\n",
128 " # start with one initial metrid [start_index]\n",
129 " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n",
130 "\n",
131 " outdegree_kmedoid.process()\n",
132 " centoids = outdegree_kmedoid.get_medoids()\n",
133 " return centoids[0]"
134 ]
135 },
136 {
137 "cell_type": "markdown",
138 "metadata": {},
139 "source": [
140 "## Find representative for out degree"
141 ]
142 },
143 {
144 "cell_type": "markdown",
145 "metadata": {},
146 "source": [
147 "### For Yakindumm\n",
148 "#### For all human models\n",
149 "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n",
150 "* the average distance between it and others is 0.05515988287586802\n",
151 "\n",
152 "#### For human models with $100 \\pm 10$ nodes\n",
153 "* the rep found is ../input/human_output_100\\R_2015225_run_1.csv\n",
154 "* the average distance between it and others is $0.046150929558524685$\n",
155 "\n",
156 "#### for human model with $100 \\pm 10$ nodes and new metric\n",
157 "* the rep found is ../input/human_output_100\\R_2015248_run_1.csv\n",
158 "* average distance: 0.052753778714861366\n",
159 "* median: 0.0468131868131868\n",
160 "* std: 0.0246917800149673\n",
161 "* max: 0.15993907083015996\n",
162 "* min: 0.0"
163 ]
164 },
165 {
166 "cell_type": "code",
167 "execution_count": 6,
168 "metadata": {},
169 "outputs": [
170 {
171 "name": "stdout",
172 "output_type": "stream",
173 "text": [
174 "../input/Human\\33_run_1.csv\n",
175 "../input/Human\\33_run_1.csv\n"
176 ]
177 }
178 ],
179 "source": [
180 "od_rep_index = findRep(models, lambda m: m.out_d)\n",
181 "print(list(modelToFileName.values())[od_rep_index])\n",
182 "od_rep_model = models[od_rep_index]\n",
183 "print(modelToFileName[od_rep_model])\n"
184 ]
185 },
186 {
187 "cell_type": "code",
188 "execution_count": 7,
189 "metadata": {},
190 "outputs": [
191 {
192 "name": "stdout",
193 "output_type": "stream",
194 "text": [
195 "average distance: 0.04615092955852465\n",
196 "median: 0.04402137483980782\n",
197 "std: 0.017305709419913242\n",
198 "max: 0.1411706837186424\n",
199 "min: 0.0\n"
200 ]
201 }
202 ],
203 "source": [
204 "distances = []\n",
205 "for model in models:\n",
206 " distances.append(ks_value(od_rep_model.out_d, model.out_d))\n",
207 "print('average distance: ', np.mean(distances))\n",
208 "print('median: ', np.median(distances))\n",
209 "print('std: ', np.std(distances))\n",
210 "print('max:', max(distances))\n",
211 "print('min:', min(distances))"
212 ]
213 },
214 {
215 "cell_type": "markdown",
216 "metadata": {},
217 "source": [
218 "## Find Representative for node activities"
219 ]
220 },
221 {
222 "cell_type": "markdown",
223 "metadata": {},
224 "source": [
225 "### For Yakindumm\n",
226 "#### For all human models\n",
227 "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n",
228 "* the average distance between it and others is 0.05275267434589047\n",
229 "\n",
230 "#### For human models with $100 \\pm 10$ nodes\n",
231 "* the rep found is ../input/human_output_100\\R_2017419_run_1.csv\n",
232 "* the average distance between it and others is $0.04679429311806747$\n",
233 "\n",
234 "#### for human model with $100 \\pm 10$ nodes and new metric\n",
235 "* the rep found is ../input/human_output_100\\R_2017131_run_1.csv\n",
236 "* average distance: 0.024629205820449567\n",
237 "* median: 0.023787888564682946\n",
238 "* std: 0.013845547883198073\n",
239 "* max: 0.09044674910251294\n",
240 "* min: 0.0"
241 ]
242 },
243 {
244 "cell_type": "code",
245 "execution_count": 8,
246 "metadata": {},
247 "outputs": [
248 {
249 "name": "stdout",
250 "output_type": "stream",
251 "text": [
252 "../input/Human\\288_run_1.csv\n",
253 "../input/Human\\288_run_1.csv\n"
254 ]
255 }
256 ],
257 "source": [
258 "na_rep_index = findRep(models, lambda m: m.na)\n",
259 "print(list(modelToFileName.values())[na_rep_index])\n",
260 "na_rep_model = models[na_rep_index]\n",
261 "print(modelToFileName[na_rep_model])\n"
262 ]
263 },
264 {
265 "cell_type": "code",
266 "execution_count": 9,
267 "metadata": {},
268 "outputs": [
269 {
270 "name": "stdout",
271 "output_type": "stream",
272 "text": [
273 "average distance: 0.046794293118067494\n",
274 "median: 0.03898868458274401\n",
275 "std: 0.02880119213919405\n",
276 "max: 0.18702970297029703\n",
277 "min: 0.0\n"
278 ]
279 }
280 ],
281 "source": [
282 "distances = []\n",
283 "for model in models:\n",
284 " distances.append(ks_value(na_rep_model.na, model.na))\n",
285 "print('average distance: ', np.mean(distances))\n",
286 "print('median: ', np.median(distances))\n",
287 "print('std: ', np.std(distances))\n",
288 "print('max:', max(distances))\n",
289 "print('min:', min(distances))"
290 ]
291 },
292 {
293 "cell_type": "markdown",
294 "metadata": {},
295 "source": [
296 "## Find Representative for MPC"
297 ]
298 },
299 {
300 "cell_type": "markdown",
301 "metadata": {},
302 "source": [
303 "### For Yakindumm\n",
304 "\n",
305 "#### For all human models\n",
306 "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n",
307 "* the average distance between it and others is 0.08556632702185384\n",
308 "\n",
309 "#### For human models with $100 \\pm 10$ nodes\n",
310 "* the rep found is ../input/human_output_100\\R_2016324_run_1.csv\n",
311 "* the average distance between it and others is $0.07028909225833631$\n",
312 "\n",
313 "#### for human model with $100 \\pm 10$ nodes and new metric\n",
314 "* average distance: 0.054782550772603904\n",
315 "* median: 0.048330503678551184\n",
316 "* std: 0.028208257424907526\n",
317 "* max: 0.21181525241675614\n",
318 "* min: 0.0"
319 ]
320 },
321 {
322 "cell_type": "code",
323 "execution_count": 10,
324 "metadata": {},
325 "outputs": [
326 {
327 "name": "stdout",
328 "output_type": "stream",
329 "text": [
330 "../input/Human\\151_run_1.csv\n",
331 "../input/Human\\151_run_1.csv\n"
332 ]
333 }
334 ],
335 "source": [
336 "mpc_rep_index = findRep(models, lambda m: m.mpc)\n",
337 "print(list(modelToFileName.values())[mpc_rep_index])\n",
338 "mpc_rep_model = models[mpc_rep_index]\n",
339 "print(modelToFileName[mpc_rep_model])"
340 ]
341 },
342 {
343 "cell_type": "code",
344 "execution_count": 11,
345 "metadata": {},
346 "outputs": [
347 {
348 "name": "stdout",
349 "output_type": "stream",
350 "text": [
351 "average distance: 0.07028909225833632\n",
352 "median: 0.06254480286738351\n",
353 "std: 0.037281890512224164\n",
354 "max: 0.21961550993809065\n",
355 "min: 0.0\n"
356 ]
357 }
358 ],
359 "source": [
360 "distances = []\n",
361 "for model in models:\n",
362 " distances.append(ks_value(mpc_rep_model.mpc, model.mpc))\n",
363 "print('average distance: ', np.mean(distances))\n",
364 "print('median: ', np.median(distances))\n",
365 "print('std: ', np.std(distances))\n",
366 "print('max:', max(distances))\n",
367 "print('min:', min(distances))"
368 ]
369 }
370 ],
371 "metadata": {
372 "kernelspec": {
373 "display_name": "Python 3",
374 "language": "python",
375 "name": "python3"
376 },
377 "language_info": {
378 "codemirror_mode": {
379 "name": "ipython",
380 "version": 3
381 },
382 "file_extension": ".py",
383 "mimetype": "text/x-python",
384 "name": "python",
385 "nbconvert_exporter": "python",
386 "pygments_lexer": "ipython3",
387 "version": "3.7.3"
388 }
389 },
390 "nbformat": 4,
391 "nbformat_minor": 2
392}