aboutsummaryrefslogtreecommitdiffstats
path: root/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb')
-rw-r--r--Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb336
1 files changed, 336 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
new file mode 100644
index 00000000..9653b2a0
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/model_evolve_comparison/src/representative_selector .ipynb
@@ -0,0 +1,336 @@
1{
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "## Use K-medoid algorithm to find the suitable human model representitives"
8 ]
9 },
10 {
11 "cell_type": "markdown",
12 "metadata": {},
13 "source": [
14 "### Imports"
15 ]
16 },
17 {
18 "cell_type": "code",
19 "execution_count": 1,
20 "metadata": {},
21 "outputs": [],
22 "source": [
23 "import os, sys\n",
24 "lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))\n",
25 "sys.path.append(lib_path)\n",
26 "from GraphType import GraphStat\n",
27 "import readCSV as reader\n",
28 "from scipy import stats\n",
29 "from ipywidgets import interact, fixed, interactive\n",
30 "import ipywidgets as widgets\n",
31 "from pyclustering.cluster.kmedoids import kmedoids\n",
32 "from pyclustering.utils.metric import distance_metric, type_metric\n",
33 "import random"
34 ]
35 },
36 {
37 "cell_type": "markdown",
38 "metadata": {},
39 "source": [
40 "### Define a new distance metric"
41 ]
42 },
43 {
44 "cell_type": "code",
45 "execution_count": 2,
46 "metadata": {},
47 "outputs": [],
48 "source": [
49 "def ks_value(dest1, dest2):\n",
50 " value, p = stats.ks_2samp(dest1, dest2)\n",
51 " return value\n",
52 "\n",
53 "\n",
54 "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)"
55 ]
56 },
57 {
58 "cell_type": "markdown",
59 "metadata": {},
60 "source": [
61 "### Read Human Models"
62 ]
63 },
64 {
65 "cell_type": "code",
66 "execution_count": 4,
67 "metadata": {},
68 "outputs": [
69 {
70 "data": {
71 "text/plain": [
72 "1253"
73 ]
74 },
75 "execution_count": 4,
76 "metadata": {},
77 "output_type": "execute_result"
78 }
79 ],
80 "source": [
81 "# Progress Widge\n",
82 "w = widgets.FloatProgress(\n",
83 " value=0,\n",
84 " min=0,\n",
85 " max=1.0,\n",
86 " step=0.1,\n",
87 " description='Loading Files...:',\n",
88 " bar_style='info',\n",
89 " orientation='horizontal'\n",
90 ")\n",
91 "\n",
92 "\n",
93 "humanFiles = reader.readmultiplefiles('../input/humanOutput/', 1300, False)\n",
94 "modelToFileName = {}\n",
95 "for name in humanFiles:\n",
96 " modelToFileName[GraphStat(name)] = name\n",
97 "\n",
98 "models = list(modelToFileName.keys())\n",
99 "len(humanFiles)"
100 ]
101 },
102 {
103 "cell_type": "markdown",
104 "metadata": {},
105 "source": [
106 "### Find Representative by K-medroid for different dists on GraphStat"
107 ]
108 },
109 {
110 "cell_type": "markdown",
111 "metadata": {},
112 "source": [
113 "* Returns the index of the representative"
114 ]
115 },
116 {
117 "cell_type": "code",
118 "execution_count": 5,
119 "metadata": {},
120 "outputs": [],
121 "source": [
122 "def findRep(graphStats, func):\n",
123 " out_ds = list(map(func, models))\n",
124 "\n",
125 " #choose a random starting point\n",
126 " start_index = random.randint(0, len(out_ds))\n",
127 "\n",
128 " # start with one initial metrid [start_index]\n",
129 " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n",
130 "\n",
131 " outdegree_kmedoid.process()\n",
132 " centoids = outdegree_kmedoid.get_medoids()\n",
133 " return centoids[0]"
134 ]
135 },
136 {
137 "cell_type": "markdown",
138 "metadata": {},
139 "source": [
140 "### Find representative for out degree"
141 ]
142 },
143 {
144 "cell_type": "markdown",
145 "metadata": {},
146 "source": [
147 "* the rep found is ../input/humanOutput\\R_20158_run_1.csv\n",
148 "* the average distance between it and others is 0.05515988287586802"
149 ]
150 },
151 {
152 "cell_type": "code",
153 "execution_count": 6,
154 "metadata": {},
155 "outputs": [
156 {
157 "name": "stdout",
158 "output_type": "stream",
159 "text": [
160 "../input/humanOutput\\R_20158_run_1.csv\n",
161 "../input/humanOutput\\R_20158_run_1.csv\n"
162 ]
163 }
164 ],
165 "source": [
166 "od_rep_index = findRep(models, lambda m: m.out_d)\n",
167 "print(list(modelToFileName.values())[od_rep_index])\n",
168 "od_rep_model = models[od_rep_index]\n",
169 "print(modelToFileName[od_rep_model])\n"
170 ]
171 },
172 {
173 "cell_type": "code",
174 "execution_count": 19,
175 "metadata": {},
176 "outputs": [
177 {
178 "name": "stdout",
179 "output_type": "stream",
180 "text": [
181 "0.05515988287586802\n"
182 ]
183 }
184 ],
185 "source": [
186 "total_distance = 0\n",
187 "count = 0\n",
188 "for model in models:\n",
189 " total_distance += ks_value(od_rep_model.out_d, model.out_d)\n",
190 "print(total_distance / len(models))"
191 ]
192 },
193 {
194 "cell_type": "markdown",
195 "metadata": {},
196 "source": [
197 "### Find Representative for node activities"
198 ]
199 },
200 {
201 "cell_type": "markdown",
202 "metadata": {},
203 "source": [
204 "* the rep found is ../input/humanOutput\\R_2016176_run_1.csv\n",
205 "* the average distance between it and others is 0.05275267434589047"
206 ]
207 },
208 {
209 "cell_type": "code",
210 "execution_count": 7,
211 "metadata": {},
212 "outputs": [
213 {
214 "name": "stdout",
215 "output_type": "stream",
216 "text": [
217 "../input/humanOutput\\R_2016176_run_1.csv\n",
218 "../input/humanOutput\\R_2016176_run_1.csv\n"
219 ]
220 }
221 ],
222 "source": [
223 "total_distance = 0\n",
224 "for model in models:\n",
225 " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n",
226 "print(total_distance / len(models))"
227 ]
228 },
229 {
230 "cell_type": "code",
231 "execution_count": 18,
232 "metadata": {},
233 "outputs": [
234 {
235 "name": "stdout",
236 "output_type": "stream",
237 "text": [
238 "0.05275267434589047\n"
239 ]
240 }
241 ],
242 "source": [
243 "total_distance = 0\n",
244 "count = 0\n",
245 "for model in models:\n",
246 " total_distance += ks_value(od_rep_model.na, model.na)\n",
247 "print(total_distance / len(models))"
248 ]
249 },
250 {
251 "cell_type": "markdown",
252 "metadata": {},
253 "source": [
254 "### Find Representative for MPC"
255 ]
256 },
257 {
258 "cell_type": "markdown",
259 "metadata": {},
260 "source": [
261 "* the rep found is ../input/humanOutput\\R_2015246_run_1.csv\n",
262 "* the average distance between it and others is 0.08556632702185384"
263 ]
264 },
265 {
266 "cell_type": "code",
267 "execution_count": 8,
268 "metadata": {},
269 "outputs": [
270 {
271 "name": "stdout",
272 "output_type": "stream",
273 "text": [
274 "../input/humanOutput\\R_2015246_run_1.csv\n",
275 "../input/humanOutput\\R_2015246_run_1.csv\n"
276 ]
277 }
278 ],
279 "source": [
280 "mpc_rep_index = findRep(models, lambda m: m.mpc)\n",
281 "print(list(modelToFileName.values())[mpc_rep_index])\n",
282 "mpc_rep_model = models[mpc_rep_index]\n",
283 "print(modelToFileName[mpc_rep_model])"
284 ]
285 },
286 {
287 "cell_type": "code",
288 "execution_count": 20,
289 "metadata": {},
290 "outputs": [
291 {
292 "name": "stdout",
293 "output_type": "stream",
294 "text": [
295 "0.08556632702185384\n"
296 ]
297 }
298 ],
299 "source": [
300 "total_distance = 0\n",
301 "count = 0\n",
302 "for model in models:\n",
303 " total_distance += ks_value(od_rep_model.mpc, model.mpc)\n",
304 "print(total_distance / len(models))"
305 ]
306 },
307 {
308 "cell_type": "code",
309 "execution_count": null,
310 "metadata": {},
311 "outputs": [],
312 "source": []
313 }
314 ],
315 "metadata": {
316 "kernelspec": {
317 "display_name": "Python 3",
318 "language": "python",
319 "name": "python3"
320 },
321 "language_info": {
322 "codemirror_mode": {
323 "name": "ipython",
324 "version": 3
325 },
326 "file_extension": ".py",
327 "mimetype": "text/x-python",
328 "name": "python",
329 "nbconvert_exporter": "python",
330 "pygments_lexer": "ipython3",
331 "version": "3.7.3"
332 }
333 },
334 "nbformat": 4,
335 "nbformat_minor": 2
336}