diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/src/representative_selector .ipynb')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/src/representative_selector .ipynb | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/src/representative_selector .ipynb b/Metrics/Metrics-Calculation/metrics_plot/src/representative_selector .ipynb new file mode 100644 index 00000000..4886c215 --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/src/representative_selector .ipynb | |||
@@ -0,0 +1,262 @@ | |||
1 | { | ||
2 | "cells": [ | ||
3 | { | ||
4 | "cell_type": "markdown", | ||
5 | "metadata": {}, | ||
6 | "source": [ | ||
7 | "## Use K-medoid algorithm to find the suitable human model representitives" | ||
8 | ] | ||
9 | }, | ||
10 | { | ||
11 | "cell_type": "markdown", | ||
12 | "metadata": {}, | ||
13 | "source": [ | ||
14 | "### Imports" | ||
15 | ] | ||
16 | }, | ||
17 | { | ||
18 | "cell_type": "code", | ||
19 | "execution_count": 1, | ||
20 | "metadata": {}, | ||
21 | "outputs": [], | ||
22 | "source": [ | ||
23 | "from GraphType import GraphStat\n", | ||
24 | "import readCSV as reader\n", | ||
25 | "from scipy import stats\n", | ||
26 | "from ipywidgets import interact, fixed, interactive\n", | ||
27 | "import ipywidgets as widgets\n", | ||
28 | "from pyclustering.cluster.kmedoids import kmedoids\n", | ||
29 | "from pyclustering.utils.metric import distance_metric, type_metric\n", | ||
30 | "import random" | ||
31 | ] | ||
32 | }, | ||
33 | { | ||
34 | "cell_type": "markdown", | ||
35 | "metadata": {}, | ||
36 | "source": [ | ||
37 | "### Define a new distance metric" | ||
38 | ] | ||
39 | }, | ||
40 | { | ||
41 | "cell_type": "code", | ||
42 | "execution_count": 2, | ||
43 | "metadata": {}, | ||
44 | "outputs": [], | ||
45 | "source": [ | ||
46 | "def ks_value(dest1, dest2):\n", | ||
47 | " value, p = stats.ks_2samp(dest1, dest2)\n", | ||
48 | " return value\n", | ||
49 | "\n", | ||
50 | "\n", | ||
51 | "ks_metric = distance_metric(type_metric.USER_DEFINED, func=ks_value)" | ||
52 | ] | ||
53 | }, | ||
54 | { | ||
55 | "cell_type": "markdown", | ||
56 | "metadata": {}, | ||
57 | "source": [ | ||
58 | "### Read Human Models" | ||
59 | ] | ||
60 | }, | ||
61 | { | ||
62 | "cell_type": "code", | ||
63 | "execution_count": 3, | ||
64 | "metadata": {}, | ||
65 | "outputs": [ | ||
66 | { | ||
67 | "data": { | ||
68 | "text/plain": [ | ||
69 | "1253" | ||
70 | ] | ||
71 | }, | ||
72 | "execution_count": 3, | ||
73 | "metadata": {}, | ||
74 | "output_type": "execute_result" | ||
75 | } | ||
76 | ], | ||
77 | "source": [ | ||
78 | "# Progress Widge\n", | ||
79 | "w = widgets.FloatProgress(\n", | ||
80 | " value=0,\n", | ||
81 | " min=0,\n", | ||
82 | " max=1.0,\n", | ||
83 | " step=0.1,\n", | ||
84 | " description='Loading Files...:',\n", | ||
85 | " bar_style='info',\n", | ||
86 | " orientation='horizontal'\n", | ||
87 | ")\n", | ||
88 | "\n", | ||
89 | "\n", | ||
90 | "humanFiles = reader.readmultiplefiles('../statistics/humanOutput/', 1300, False)\n", | ||
91 | "modelToFileName = {}\n", | ||
92 | "for name in humanFiles:\n", | ||
93 | " modelToFileName[GraphStat(name)] = name\n", | ||
94 | "\n", | ||
95 | "models = list(modelToFileName.keys())\n", | ||
96 | "len(humanFiles)" | ||
97 | ] | ||
98 | }, | ||
99 | { | ||
100 | "cell_type": "markdown", | ||
101 | "metadata": {}, | ||
102 | "source": [ | ||
103 | "### Find Representative by K-medroid for different dists on GraphStat" | ||
104 | ] | ||
105 | }, | ||
106 | { | ||
107 | "cell_type": "markdown", | ||
108 | "metadata": {}, | ||
109 | "source": [ | ||
110 | "* Returns the index of the representative" | ||
111 | ] | ||
112 | }, | ||
113 | { | ||
114 | "cell_type": "code", | ||
115 | "execution_count": 7, | ||
116 | "metadata": {}, | ||
117 | "outputs": [], | ||
118 | "source": [ | ||
119 | "def findRep(graphStats, func):\n", | ||
120 | " out_ds = list(map(func, models))\n", | ||
121 | "\n", | ||
122 | " #choose a random starting point\n", | ||
123 | " start_index = random.randint(0, len(out_ds))\n", | ||
124 | "\n", | ||
125 | " # start with one initial metrid [start_index]\n", | ||
126 | " outdegree_kmedoid = kmedoids(out_ds, [start_index], metric=ks_metric)\n", | ||
127 | "\n", | ||
128 | " outdegree_kmedoid.process()\n", | ||
129 | " centoids = outdegree_kmedoid.get_medoids()\n", | ||
130 | " return centoids[0]" | ||
131 | ] | ||
132 | }, | ||
133 | { | ||
134 | "cell_type": "markdown", | ||
135 | "metadata": {}, | ||
136 | "source": [ | ||
137 | "### Find representative for out degree" | ||
138 | ] | ||
139 | }, | ||
140 | { | ||
141 | "cell_type": "code", | ||
142 | "execution_count": 8, | ||
143 | "metadata": {}, | ||
144 | "outputs": [ | ||
145 | { | ||
146 | "name": "stdout", | ||
147 | "output_type": "stream", | ||
148 | "text": [ | ||
149 | "../statistics/humanOutput\\R_20158_run_1.csv\n", | ||
150 | "../statistics/humanOutput\\R_20158_run_1.csv\n" | ||
151 | ] | ||
152 | } | ||
153 | ], | ||
154 | "source": [ | ||
155 | "od_rep_index = findRep(models, lambda m: m.out_d)\n", | ||
156 | "print(list(modelToFileName.values())[od_rep_index])\n", | ||
157 | "od_rep_model = models[od_rep_index]\n", | ||
158 | "print(modelToFileName[od_rep_model])\n" | ||
159 | ] | ||
160 | }, | ||
161 | { | ||
162 | "cell_type": "markdown", | ||
163 | "metadata": {}, | ||
164 | "source": [ | ||
165 | "### Find Representative for node activities" | ||
166 | ] | ||
167 | }, | ||
168 | { | ||
169 | "cell_type": "code", | ||
170 | "execution_count": 9, | ||
171 | "metadata": {}, | ||
172 | "outputs": [ | ||
173 | { | ||
174 | "ename": "NameError", | ||
175 | "evalue": "name 'na_rep_index' is not defined", | ||
176 | "output_type": "error", | ||
177 | "traceback": [ | ||
178 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | ||
179 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | ||
180 | "\u001b[1;32m<ipython-input-9-7899480190c8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mna_rp_index\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfindRep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mm\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mna\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodelToFileName\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mna_rep_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mna_rep_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodels\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mna_rep_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodelToFileName\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mna_rep_model\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | ||
181 | "\u001b[1;31mNameError\u001b[0m: name 'na_rep_index' is not defined" | ||
182 | ] | ||
183 | } | ||
184 | ], | ||
185 | "source": [ | ||
186 | "na_rep_index = findRep(models, lambda m: m.na)\n", | ||
187 | "print(list(modelToFileName.values())[na_rep_index])\n", | ||
188 | "na_rep_model = models[na_rep_index]\n", | ||
189 | "print(modelToFileName[na_rep_model])" | ||
190 | ] | ||
191 | }, | ||
192 | { | ||
193 | "cell_type": "code", | ||
194 | "execution_count": 11, | ||
195 | "metadata": {}, | ||
196 | "outputs": [ | ||
197 | { | ||
198 | "name": "stdout", | ||
199 | "output_type": "stream", | ||
200 | "text": [ | ||
201 | "../statistics/humanOutput\\R_2016176_run_1.csv\n", | ||
202 | "../statistics/humanOutput\\R_2016176_run_1.csv\n" | ||
203 | ] | ||
204 | } | ||
205 | ], | ||
206 | "source": [ | ||
207 | "print(list(modelToFileName.values())[na_rp_index])\n", | ||
208 | "na_rep_model = models[na_rp_index]\n", | ||
209 | "print(modelToFileName[na_rep_model])" | ||
210 | ] | ||
211 | }, | ||
212 | { | ||
213 | "cell_type": "markdown", | ||
214 | "metadata": {}, | ||
215 | "source": [ | ||
216 | "### Find Representative for MPC" | ||
217 | ] | ||
218 | }, | ||
219 | { | ||
220 | "cell_type": "code", | ||
221 | "execution_count": 12, | ||
222 | "metadata": {}, | ||
223 | "outputs": [ | ||
224 | { | ||
225 | "name": "stdout", | ||
226 | "output_type": "stream", | ||
227 | "text": [ | ||
228 | "../statistics/humanOutput\\R_2015246_run_1.csv\n", | ||
229 | "../statistics/humanOutput\\R_2015246_run_1.csv\n" | ||
230 | ] | ||
231 | } | ||
232 | ], | ||
233 | "source": [ | ||
234 | "mpc_rep_index = findRep(models, lambda m: m.mpc)\n", | ||
235 | "print(list(modelToFileName.values())[mpc_rep_index])\n", | ||
236 | "mpc_rep_model = models[mpc_rep_index]\n", | ||
237 | "print(modelToFileName[mpc_rep_model])" | ||
238 | ] | ||
239 | } | ||
240 | ], | ||
241 | "metadata": { | ||
242 | "kernelspec": { | ||
243 | "display_name": "Python 3", | ||
244 | "language": "python", | ||
245 | "name": "python3" | ||
246 | }, | ||
247 | "language_info": { | ||
248 | "codemirror_mode": { | ||
249 | "name": "ipython", | ||
250 | "version": 3 | ||
251 | }, | ||
252 | "file_extension": ".py", | ||
253 | "mimetype": "text/x-python", | ||
254 | "name": "python", | ||
255 | "nbconvert_exporter": "python", | ||
256 | "pygments_lexer": "ipython3", | ||
257 | "version": "3.7.3" | ||
258 | } | ||
259 | }, | ||
260 | "nbformat": 4, | ||
261 | "nbformat_minor": 2 | ||
262 | } | ||