diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb new file mode 100644 index 00000000..32edb00c --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb | |||
@@ -0,0 +1,392 @@ | |||
1 | { | ||
2 | "cells": [ | ||
3 | { | ||
4 | "cell_type": "markdown", | ||
5 | "metadata": {}, | ||
6 | "source": [ | ||
7 | "## Use K-medoid algorithm to find the suitable human model representitives" | ||
8 | ] | ||
9 | }, | ||
10 | { | ||
11 | "cell_type": "markdown", | ||
12 | "metadata": {}, | ||
13 | "source": [ | ||
14 | "### Imports" | ||
15 | ] | ||
16 | }, | ||
17 | { | ||
18 | "cell_type": "code", | ||
19 | "execution_count": 2, | ||
20 | "metadata": {}, | ||
21 | "outputs": [], | ||
22 | "source": [ | ||
23 | "import os, sys\n", | ||
24 | "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n", | ||
25 | "sys.path.append(lib_path)\n", | ||
26 | "from GraphType import GraphStat\n", | ||
27 | "import readCSV as reader\n", | ||
28 | "from scipy import stats\n", | ||
29 | "from ipywidgets import interact, fixed, interactive\n", | ||
30 | "import ipywidgets as widgets\n", | ||
31 | "from pyclustering.cluster.kmedoids import kmedoids\n", | ||
32 | "from pyclustering.utils.metric import distance_metric, type_metric\n", | ||
33 | "import random\n", | ||
34 | "import numpy as np" | ||
35 | ] | ||
36 | }, | ||
37 | { | ||
38 | "cell_type": "markdown", | ||
39 | "metadata": {}, | ||
40 | "source": [ | ||
41 | "### Define a new distance metric" | ||
42 | ] | ||
43 | }, | ||
44 | { | ||
45 | "cell_type": "code", | ||
46 | "execution_count": 3, | ||
47 | "metadata": {}, | ||
48 | "outputs": [], | ||
49 | "source": [ | ||
50 | "def ks_value(dest1, dest2):\n", | ||
51 | " value, p = stats.ks_2samp(dest1, dest2)\n", | ||
52 | " return value\n", | ||
53 | "\n", | ||
54 | "\n", | ||
55 | "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)" | ||
56 | ] | ||
57 | }, | ||
58 | { | ||
59 | "cell_type": "markdown", | ||
60 | "metadata": {}, | ||
61 | "source": [ | ||
62 | "### Read Human Models" | ||
63 | ] | ||
64 | }, | ||
65 | { | ||
66 | "cell_type": "code", | ||
67 | "execution_count": 4, | ||
68 | "metadata": {}, | ||
69 | "outputs": [ | ||
70 | { | ||
71 | "data": { | ||
72 | "text/plain": [ | ||
73 | "304" | ||
74 | ] | ||
75 | }, | ||
76 | "execution_count": 4, | ||
77 | "metadata": {}, | ||
78 | "output_type": "execute_result" | ||
79 | } | ||
80 | ], | ||
81 | "source": [ | ||
82 | "# Progress Widge\n", | ||
83 | "w = widgets.FloatProgress(\n", | ||
84 | " value=0,\n", | ||
85 | " min=0,\n", | ||
86 | " max=1.0,\n", | ||
87 | " step=0.1,\n", | ||
88 | " description='Loading Files...:',\n", | ||
89 | " bar_style='info',\n", | ||
90 | " orientation='horizontal'\n", | ||
91 | ")\n", | ||
92 | "\n", | ||
93 | "humanFiles = reader.readmultiplefiles('../input/Human/', 1300, False)\n", | ||
94 | "modelToFileName = {}\n", | ||
95 | "for name in humanFiles:\n", | ||
96 | " modelToFileName[GraphStat(name)] = name\n", | ||
97 | "\n", | ||
98 | "models = list(modelToFileName.keys())\n", | ||
99 | "len(humanFiles)" | ||
100 | ] | ||
101 | }, | ||
102 | { | ||
103 | "cell_type": "markdown", | ||
104 | "metadata": {}, | ||
105 | "source": [ | ||
106 | "### Find Representative by K-medroid for different dists on GraphStat" | ||
107 | ] | ||
108 | }, | ||
109 | { | ||
110 | "cell_type": "markdown", | ||
111 | "metadata": {}, | ||
112 | "source": [ | ||
113 | "* Returns the index of the representative" | ||
114 | ] | ||
115 | }, | ||
116 | { | ||
117 | "cell_type": "code", | ||
118 | "execution_count": 5, | ||
119 | "metadata": {}, | ||
120 | "outputs": [], | ||
121 | "source": [ | ||
122 | "def findRep(graphStats, func):\n", | ||
123 | " out_ds = list(map(func, models))\n", | ||
124 | "\n", | ||
125 | " #choose a random starting point\n", | ||
126 | " start_index = random.randint(0, len(out_ds))\n", | ||
127 | "\n", | ||
128 | " # start with one initial metrid [start_index]\n", | ||
129 | " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n", | ||
130 | "\n", | ||
131 | " outdegree_kmedoid.process()\n", | ||
132 | " centoids = outdegree_kmedoid.get_medoids()\n", | ||
133 | " return centoids[0]" | ||
134 | ] | ||
135 | }, | ||
136 | { | ||
137 | "cell_type": "markdown", | ||
138 | "metadata": {}, | ||
139 | "source": [ | ||
140 | "## Find representative for out degree" | ||
141 | ] | ||
142 | }, | ||
143 | { | ||
144 | "cell_type": "markdown", | ||
145 | "metadata": {}, | ||
146 | "source": [ | ||
147 | "### For Yakindumm\n", | ||
148 | "#### For all human models\n", | ||
149 | "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n", | ||
150 | "* the average distance between it and others is 0.05515988287586802\n", | ||
151 | "\n", | ||
152 | "#### For human models with $100 \\pm 10$ nodes\n", | ||
153 | "* the rep found is ../input/human_output_100\\R_2015225_run_1.csv\n", | ||
154 | "* the average distance between it and others is $0.046150929558524685$\n", | ||
155 | "\n", | ||
156 | "#### for human model with $100 \\pm 10$ nodes and new metric\n", | ||
157 | "* the rep found is ../input/human_output_100\\R_2015248_run_1.csv\n", | ||
158 | "* average distance: 0.052753778714861366\n", | ||
159 | "* median: 0.0468131868131868\n", | ||
160 | "* std: 0.0246917800149673\n", | ||
161 | "* max: 0.15993907083015996\n", | ||
162 | "* min: 0.0" | ||
163 | ] | ||
164 | }, | ||
165 | { | ||
166 | "cell_type": "code", | ||
167 | "execution_count": 6, | ||
168 | "metadata": {}, | ||
169 | "outputs": [ | ||
170 | { | ||
171 | "name": "stdout", | ||
172 | "output_type": "stream", | ||
173 | "text": [ | ||
174 | "../input/Human\\33_run_1.csv\n", | ||
175 | "../input/Human\\33_run_1.csv\n" | ||
176 | ] | ||
177 | } | ||
178 | ], | ||
179 | "source": [ | ||
180 | "od_rep_index = findRep(models, lambda m: m.out_d)\n", | ||
181 | "print(list(modelToFileName.values())[od_rep_index])\n", | ||
182 | "od_rep_model = models[od_rep_index]\n", | ||
183 | "print(modelToFileName[od_rep_model])\n" | ||
184 | ] | ||
185 | }, | ||
186 | { | ||
187 | "cell_type": "code", | ||
188 | "execution_count": 7, | ||
189 | "metadata": {}, | ||
190 | "outputs": [ | ||
191 | { | ||
192 | "name": "stdout", | ||
193 | "output_type": "stream", | ||
194 | "text": [ | ||
195 | "average distance: 0.04615092955852465\n", | ||
196 | "median: 0.04402137483980782\n", | ||
197 | "std: 0.017305709419913242\n", | ||
198 | "max: 0.1411706837186424\n", | ||
199 | "min: 0.0\n" | ||
200 | ] | ||
201 | } | ||
202 | ], | ||
203 | "source": [ | ||
204 | "distances = []\n", | ||
205 | "for model in models:\n", | ||
206 | " distances.append(ks_value(od_rep_model.out_d, model.out_d))\n", | ||
207 | "print('average distance: ', np.mean(distances))\n", | ||
208 | "print('median: ', np.median(distances))\n", | ||
209 | "print('std: ', np.std(distances))\n", | ||
210 | "print('max:', max(distances))\n", | ||
211 | "print('min:', min(distances))" | ||
212 | ] | ||
213 | }, | ||
214 | { | ||
215 | "cell_type": "markdown", | ||
216 | "metadata": {}, | ||
217 | "source": [ | ||
218 | "## Find Representative for node activities" | ||
219 | ] | ||
220 | }, | ||
221 | { | ||
222 | "cell_type": "markdown", | ||
223 | "metadata": {}, | ||
224 | "source": [ | ||
225 | "### For Yakindumm\n", | ||
226 | "#### For all human models\n", | ||
227 | "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n", | ||
228 | "* the average distance between it and others is 0.05275267434589047\n", | ||
229 | "\n", | ||
230 | "#### For human models with $100 \\pm 10$ nodes\n", | ||
231 | "* the rep found is ../input/human_output_100\\R_2017419_run_1.csv\n", | ||
232 | "* the average distance between it and others is $0.04679429311806747$\n", | ||
233 | "\n", | ||
234 | "#### for human model with $100 \\pm 10$ nodes and new metric\n", | ||
235 | "* the rep found is ../input/human_output_100\\R_2017131_run_1.csv\n", | ||
236 | "* average distance: 0.024629205820449567\n", | ||
237 | "* median: 0.023787888564682946\n", | ||
238 | "* std: 0.013845547883198073\n", | ||
239 | "* max: 0.09044674910251294\n", | ||
240 | "* min: 0.0" | ||
241 | ] | ||
242 | }, | ||
243 | { | ||
244 | "cell_type": "code", | ||
245 | "execution_count": 8, | ||
246 | "metadata": {}, | ||
247 | "outputs": [ | ||
248 | { | ||
249 | "name": "stdout", | ||
250 | "output_type": "stream", | ||
251 | "text": [ | ||
252 | "../input/Human\\288_run_1.csv\n", | ||
253 | "../input/Human\\288_run_1.csv\n" | ||
254 | ] | ||
255 | } | ||
256 | ], | ||
257 | "source": [ | ||
258 | "na_rep_index = findRep(models, lambda m: m.na)\n", | ||
259 | "print(list(modelToFileName.values())[na_rep_index])\n", | ||
260 | "na_rep_model = models[na_rep_index]\n", | ||
261 | "print(modelToFileName[na_rep_model])\n" | ||
262 | ] | ||
263 | }, | ||
264 | { | ||
265 | "cell_type": "code", | ||
266 | "execution_count": 9, | ||
267 | "metadata": {}, | ||
268 | "outputs": [ | ||
269 | { | ||
270 | "name": "stdout", | ||
271 | "output_type": "stream", | ||
272 | "text": [ | ||
273 | "average distance: 0.046794293118067494\n", | ||
274 | "median: 0.03898868458274401\n", | ||
275 | "std: 0.02880119213919405\n", | ||
276 | "max: 0.18702970297029703\n", | ||
277 | "min: 0.0\n" | ||
278 | ] | ||
279 | } | ||
280 | ], | ||
281 | "source": [ | ||
282 | "distances = []\n", | ||
283 | "for model in models:\n", | ||
284 | " distances.append(ks_value(na_rep_model.na, model.na))\n", | ||
285 | "print('average distance: ', np.mean(distances))\n", | ||
286 | "print('median: ', np.median(distances))\n", | ||
287 | "print('std: ', np.std(distances))\n", | ||
288 | "print('max:', max(distances))\n", | ||
289 | "print('min:', min(distances))" | ||
290 | ] | ||
291 | }, | ||
292 | { | ||
293 | "cell_type": "markdown", | ||
294 | "metadata": {}, | ||
295 | "source": [ | ||
296 | "## Find Representative for MPC" | ||
297 | ] | ||
298 | }, | ||
299 | { | ||
300 | "cell_type": "markdown", | ||
301 | "metadata": {}, | ||
302 | "source": [ | ||
303 | "### For Yakindumm\n", | ||
304 | "\n", | ||
305 | "#### For all human models\n", | ||
306 | "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n", | ||
307 | "* the average distance between it and others is 0.08556632702185384\n", | ||
308 | "\n", | ||
309 | "#### For human models with $100 \\pm 10$ nodes\n", | ||
310 | "* the rep found is ../input/human_output_100\\R_2016324_run_1.csv\n", | ||
311 | "* the average distance between it and others is $0.07028909225833631$\n", | ||
312 | "\n", | ||
313 | "#### for human model with $100 \\pm 10$ nodes and new metric\n", | ||
314 | "* average distance: 0.054782550772603904\n", | ||
315 | "* median: 0.048330503678551184\n", | ||
316 | "* std: 0.028208257424907526\n", | ||
317 | "* max: 0.21181525241675614\n", | ||
318 | "* min: 0.0" | ||
319 | ] | ||
320 | }, | ||
321 | { | ||
322 | "cell_type": "code", | ||
323 | "execution_count": 10, | ||
324 | "metadata": {}, | ||
325 | "outputs": [ | ||
326 | { | ||
327 | "name": "stdout", | ||
328 | "output_type": "stream", | ||
329 | "text": [ | ||
330 | "../input/Human\\151_run_1.csv\n", | ||
331 | "../input/Human\\151_run_1.csv\n" | ||
332 | ] | ||
333 | } | ||
334 | ], | ||
335 | "source": [ | ||
336 | "mpc_rep_index = findRep(models, lambda m: m.mpc)\n", | ||
337 | "print(list(modelToFileName.values())[mpc_rep_index])\n", | ||
338 | "mpc_rep_model = models[mpc_rep_index]\n", | ||
339 | "print(modelToFileName[mpc_rep_model])" | ||
340 | ] | ||
341 | }, | ||
342 | { | ||
343 | "cell_type": "code", | ||
344 | "execution_count": 11, | ||
345 | "metadata": {}, | ||
346 | "outputs": [ | ||
347 | { | ||
348 | "name": "stdout", | ||
349 | "output_type": "stream", | ||
350 | "text": [ | ||
351 | "average distance: 0.07028909225833632\n", | ||
352 | "median: 0.06254480286738351\n", | ||
353 | "std: 0.037281890512224164\n", | ||
354 | "max: 0.21961550993809065\n", | ||
355 | "min: 0.0\n" | ||
356 | ] | ||
357 | } | ||
358 | ], | ||
359 | "source": [ | ||
360 | "distances = []\n", | ||
361 | "for model in models:\n", | ||
362 | " distances.append(ks_value(mpc_rep_model.mpc, model.mpc))\n", | ||
363 | "print('average distance: ', np.mean(distances))\n", | ||
364 | "print('median: ', np.median(distances))\n", | ||
365 | "print('std: ', np.std(distances))\n", | ||
366 | "print('max:', max(distances))\n", | ||
367 | "print('min:', min(distances))" | ||
368 | ] | ||
369 | } | ||
370 | ], | ||
371 | "metadata": { | ||
372 | "kernelspec": { | ||
373 | "display_name": "Python 3", | ||
374 | "language": "python", | ||
375 | "name": "python3" | ||
376 | }, | ||
377 | "language_info": { | ||
378 | "codemirror_mode": { | ||
379 | "name": "ipython", | ||
380 | "version": 3 | ||
381 | }, | ||
382 | "file_extension": ".py", | ||
383 | "mimetype": "text/x-python", | ||
384 | "name": "python", | ||
385 | "nbconvert_exporter": "python", | ||
386 | "pygments_lexer": "ipython3", | ||
387 | "version": "3.7.3" | ||
388 | } | ||
389 | }, | ||
390 | "nbformat": 4, | ||
391 | "nbformat_minor": 2 | ||
392 | } | ||