diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb new file mode 100644 index 00000000..9653b2a0 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb | |||
@@ -0,0 +1,336 @@ | |||
1 | { | ||
2 | "cells": [ | ||
3 | { | ||
4 | "cell_type": "markdown", | ||
5 | "metadata": {}, | ||
6 | "source": [ | ||
7 | "## Use K-medoid algorithm to find the suitable human model representitives" | ||
8 | ] | ||
9 | }, | ||
10 | { | ||
11 | "cell_type": "markdown", | ||
12 | "metadata": {}, | ||
13 | "source": [ | ||
14 | "### Imports" | ||
15 | ] | ||
16 | }, | ||
17 | { | ||
18 | "cell_type": "code", | ||
19 | "execution_count": 1, | ||
20 | "metadata": {}, | ||
21 | "outputs": [], | ||
22 | "source": [ | ||
23 | "import os, sys\n", | ||
24 | "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n", | ||
25 | "sys.path.append(lib_path)\n", | ||
26 | "from GraphType import GraphStat\n", | ||
27 | "import readCSV as reader\n", | ||
28 | "from scipy import stats\n", | ||
29 | "from ipywidgets import interact, fixed, interactive\n", | ||
30 | "import ipywidgets as widgets\n", | ||
31 | "from pyclustering.cluster.kmedoids import kmedoids\n", | ||
32 | "from pyclustering.utils.metric import distance_metric, type_metric\n", | ||
33 | "import random" | ||
34 | ] | ||
35 | }, | ||
36 | { | ||
37 | "cell_type": "markdown", | ||
38 | "metadata": {}, | ||
39 | "source": [ | ||
40 | "### Define a new distance metric" | ||
41 | ] | ||
42 | }, | ||
43 | { | ||
44 | "cell_type": "code", | ||
45 | "execution_count": 2, | ||
46 | "metadata": {}, | ||
47 | "outputs": [], | ||
48 | "source": [ | ||
49 | "def ks_value(dest1, dest2):\n", | ||
50 | " value, p = stats.ks_2samp(dest1, dest2)\n", | ||
51 | " return value\n", | ||
52 | "\n", | ||
53 | "\n", | ||
54 | "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)" | ||
55 | ] | ||
56 | }, | ||
57 | { | ||
58 | "cell_type": "markdown", | ||
59 | "metadata": {}, | ||
60 | "source": [ | ||
61 | "### Read Human Models" | ||
62 | ] | ||
63 | }, | ||
64 | { | ||
65 | "cell_type": "code", | ||
66 | "execution_count": 4, | ||
67 | "metadata": {}, | ||
68 | "outputs": [ | ||
69 | { | ||
70 | "data": { | ||
71 | "text/plain": [ | ||
72 | "1253" | ||
73 | ] | ||
74 | }, | ||
75 | "execution_count": 4, | ||
76 | "metadata": {}, | ||
77 | "output_type": "execute_result" | ||
78 | } | ||
79 | ], | ||
80 | "source": [ | ||
81 | "# Progress Widge\n", | ||
82 | "w = widgets.FloatProgress(\n", | ||
83 | " value=0,\n", | ||
84 | " min=0,\n", | ||
85 | " max=1.0,\n", | ||
86 | " step=0.1,\n", | ||
87 | " description='Loading Files...:',\n", | ||
88 | " bar_style='info',\n", | ||
89 | " orientation='horizontal'\n", | ||
90 | ")\n", | ||
91 | "\n", | ||
92 | "\n", | ||
93 | "humanFiles = reader.readmultiplefiles('../input/humanOutput/', 1300, False)\n", | ||
94 | "modelToFileName = {}\n", | ||
95 | "for name in humanFiles:\n", | ||
96 | " modelToFileName[GraphStat(name)] = name\n", | ||
97 | "\n", | ||
98 | "models = list(modelToFileName.keys())\n", | ||
99 | "len(humanFiles)" | ||
100 | ] | ||
101 | }, | ||
102 | { | ||
103 | "cell_type": "markdown", | ||
104 | "metadata": {}, | ||
105 | "source": [ | ||
106 | "### Find Representative by K-medroid for different dists on GraphStat" | ||
107 | ] | ||
108 | }, | ||
109 | { | ||
110 | "cell_type": "markdown", | ||
111 | "metadata": {}, | ||
112 | "source": [ | ||
113 | "* Returns the index of the representative" | ||
114 | ] | ||
115 | }, | ||
116 | { | ||
117 | "cell_type": "code", | ||
118 | "execution_count": 5, | ||
119 | "metadata": {}, | ||
120 | "outputs": [], | ||
121 | "source": [ | ||
122 | "def findRep(graphStats, func):\n", | ||
123 | " out_ds = list(map(func, models))\n", | ||
124 | "\n", | ||
125 | " #choose a random starting point\n", | ||
126 | " start_index = random.randint(0, len(out_ds))\n", | ||
127 | "\n", | ||
128 | " # start with one initial metrid [start_index]\n", | ||
129 | " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n", | ||
130 | "\n", | ||
131 | " outdegree_kmedoid.process()\n", | ||
132 | " centoids = outdegree_kmedoid.get_medoids()\n", | ||
133 | " return centoids[0]" | ||
134 | ] | ||
135 | }, | ||
136 | { | ||
137 | "cell_type": "markdown", | ||
138 | "metadata": {}, | ||
139 | "source": [ | ||
140 | "### Find representative for out degree" | ||
141 | ] | ||
142 | }, | ||
143 | { | ||
144 | "cell_type": "markdown", | ||
145 | "metadata": {}, | ||
146 | "source": [ | ||
147 | "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n", | ||
148 | "* the average distance between it and others is 0.05515988287586802" | ||
149 | ] | ||
150 | }, | ||
151 | { | ||
152 | "cell_type": "code", | ||
153 | "execution_count": 6, | ||
154 | "metadata": {}, | ||
155 | "outputs": [ | ||
156 | { | ||
157 | "name": "stdout", | ||
158 | "output_type": "stream", | ||
159 | "text": [ | ||
160 | "../input/humanOutput\\R_20158_run_1.csv\n", | ||
161 | "../input/humanOutput\\R_20158_run_1.csv\n" | ||
162 | ] | ||
163 | } | ||
164 | ], | ||
165 | "source": [ | ||
166 | "od_rep_index = findRep(models, lambda m: m.out_d)\n", | ||
167 | "print(list(modelToFileName.values())[od_rep_index])\n", | ||
168 | "od_rep_model = models[od_rep_index]\n", | ||
169 | "print(modelToFileName[od_rep_model])\n" | ||
170 | ] | ||
171 | }, | ||
172 | { | ||
173 | "cell_type": "code", | ||
174 | "execution_count": 19, | ||
175 | "metadata": {}, | ||
176 | "outputs": [ | ||
177 | { | ||
178 | "name": "stdout", | ||
179 | "output_type": "stream", | ||
180 | "text": [ | ||
181 | "0.05515988287586802\n" | ||
182 | ] | ||
183 | } | ||
184 | ], | ||
185 | "source": [ | ||
186 | "total_distance = 0\n", | ||
187 | "count = 0\n", | ||
188 | "for model in models:\n", | ||
189 | " total_distance += ks_value(od_rep_model.out_d, model.out_d)\n", | ||
190 | "print(total_distance / len(models))" | ||
191 | ] | ||
192 | }, | ||
193 | { | ||
194 | "cell_type": "markdown", | ||
195 | "metadata": {}, | ||
196 | "source": [ | ||
197 | "### Find Representative for node activities" | ||
198 | ] | ||
199 | }, | ||
200 | { | ||
201 | "cell_type": "markdown", | ||
202 | "metadata": {}, | ||
203 | "source": [ | ||
204 | "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n", | ||
205 | "* the average distance between it and others is 0.05275267434589047" | ||
206 | ] | ||
207 | }, | ||
208 | { | ||
209 | "cell_type": "code", | ||
210 | "execution_count": 7, | ||
211 | "metadata": {}, | ||
212 | "outputs": [ | ||
213 | { | ||
214 | "name": "stdout", | ||
215 | "output_type": "stream", | ||
216 | "text": [ | ||
217 | "../input/humanOutput\\R_2016176_run_1.csv\n", | ||
218 | "../input/humanOutput\\R_2016176_run_1.csv\n" | ||
219 | ] | ||
220 | } | ||
221 | ], | ||
222 | "source": [ | ||
223 | "total_distance = 0\n", | ||
224 | "for model in models:\n", | ||
225 | " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n", | ||
226 | "print(total_distance / len(models))" | ||
227 | ] | ||
228 | }, | ||
229 | { | ||
230 | "cell_type": "code", | ||
231 | "execution_count": 18, | ||
232 | "metadata": {}, | ||
233 | "outputs": [ | ||
234 | { | ||
235 | "name": "stdout", | ||
236 | "output_type": "stream", | ||
237 | "text": [ | ||
238 | "0.05275267434589047\n" | ||
239 | ] | ||
240 | } | ||
241 | ], | ||
242 | "source": [ | ||
243 | "total_distance = 0\n", | ||
244 | "count = 0\n", | ||
245 | "for model in models:\n", | ||
246 | " total_distance += ks_value(od_rep_model.na, model.na)\n", | ||
247 | "print(total_distance / len(models))" | ||
248 | ] | ||
249 | }, | ||
250 | { | ||
251 | "cell_type": "markdown", | ||
252 | "metadata": {}, | ||
253 | "source": [ | ||
254 | "### Find Representative for MPC" | ||
255 | ] | ||
256 | }, | ||
257 | { | ||
258 | "cell_type": "markdown", | ||
259 | "metadata": {}, | ||
260 | "source": [ | ||
261 | "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n", | ||
262 | "* the average distance between it and others is 0.08556632702185384" | ||
263 | ] | ||
264 | }, | ||
265 | { | ||
266 | "cell_type": "code", | ||
267 | "execution_count": 8, | ||
268 | "metadata": {}, | ||
269 | "outputs": [ | ||
270 | { | ||
271 | "name": "stdout", | ||
272 | "output_type": "stream", | ||
273 | "text": [ | ||
274 | "../input/humanOutput\\R_2015246_run_1.csv\n", | ||
275 | "../input/humanOutput\\R_2015246_run_1.csv\n" | ||
276 | ] | ||
277 | } | ||
278 | ], | ||
279 | "source": [ | ||
280 | "mpc_rep_index = findRep(models, lambda m: m.mpc)\n", | ||
281 | "print(list(modelToFileName.values())[mpc_rep_index])\n", | ||
282 | "mpc_rep_model = models[mpc_rep_index]\n", | ||
283 | "print(modelToFileName[mpc_rep_model])" | ||
284 | ] | ||
285 | }, | ||
286 | { | ||
287 | "cell_type": "code", | ||
288 | "execution_count": 20, | ||
289 | "metadata": {}, | ||
290 | "outputs": [ | ||
291 | { | ||
292 | "name": "stdout", | ||
293 | "output_type": "stream", | ||
294 | "text": [ | ||
295 | "0.08556632702185384\n" | ||
296 | ] | ||
297 | } | ||
298 | ], | ||
299 | "source": [ | ||
300 | "total_distance = 0\n", | ||
301 | "count = 0\n", | ||
302 | "for model in models:\n", | ||
303 | " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n", | ||
304 | "print(total_distance / len(models))" | ||
305 | ] | ||
306 | }, | ||
307 | { | ||
308 | "cell_type": "code", | ||
309 | "execution_count": null, | ||
310 | "metadata": {}, | ||
311 | "outputs": [], | ||
312 | "source": [] | ||
313 | } | ||
314 | ], | ||
315 | "metadata": { | ||
316 | "kernelspec": { | ||
317 | "display_name": "Python 3", | ||
318 | "language": "python", | ||
319 | "name": "python3" | ||
320 | }, | ||
321 | "language_info": { | ||
322 | "codemirror_mode": { | ||
323 | "name": "ipython", | ||
324 | "version": 3 | ||
325 | }, | ||
326 | "file_extension": ".py", | ||
327 | "mimetype": "text/x-python", | ||
328 | "name": "python", | ||
329 | "nbconvert_exporter": "python", | ||
330 | "pygments_lexer": "ipython3", | ||
331 | "version": "3.7.3" | ||
332 | } | ||
333 | }, | ||
334 | "nbformat": 4, | ||
335 | "nbformat_minor": 2 | ||
336 | } | ||