diff options
Diffstat (limited to 'Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py')
-rw-r--r-- | Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py new file mode 100644 index 00000000..75fe78eb --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py | |||
@@ -0,0 +1,158 @@ | |||
1 | import os, sys | ||
2 | lib_path = os.path.abspath(os.path.join('..', '..', 'utils')) | ||
3 | sys.path.append(lib_path) | ||
4 | import glob | ||
5 | import random | ||
6 | from sklearn.manifold import MDS | ||
7 | import matplotlib.pyplot as plt | ||
8 | from scipy import stats | ||
9 | import numpy as np | ||
10 | from GraphType import GraphCollection | ||
11 | import DistributionMetrics as metrics | ||
12 | |||
13 | def main(): | ||
14 | domain = 'github' | ||
15 | # read models | ||
16 | alloy = GraphCollection('../input/measurement2/{}/Alloy/'.format(domain), 100, 'All') | ||
17 | human = GraphCollection('../input/measurement2/{}/Human/'.format(domain), 304, 'Hum') | ||
18 | base = GraphCollection('../input/measurement2/{}/BaseViatra/'.format(domain), 100, 'GS') | ||
19 | real = GraphCollection('../input/measurement2/{}/RealViatra/'.format(domain), 100, 'Real') | ||
20 | random = GraphCollection('../input/measurement2/{}/Random/'.format(domain), 100, 'Rand') | ||
21 | na_rep = GraphCollection('../input/measurement2/{}/Human/na_rep/'.format(domain), 1, 'Med') | ||
22 | mpc_rep = GraphCollection('../input/measurement2/{}/Human/mpc_rep/'.format(domain), 1, 'Med') | ||
23 | od_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med') | ||
24 | |||
25 | # a hack to make the node type as the same as an exiting model | ||
26 | type_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med') | ||
27 | if(domain == 'yakindu'): | ||
28 | type_rep.nts = [{'Entry': 0.04257802080554814, 'Choice': 0.1267671379034409, 'State': 0.1596092291277674, 'Transition': 0.6138636969858629, 'Statechart': 0.010136036276340358, 'Region': 0.04467858095492131, 'Exit': 0.0018338223526273673, 'FinalState': 0.0005334755934915977}] | ||
29 | elif (domain == 'ecore'): | ||
30 | type_rep.nts = [{'EAttribute': 0.23539778449144008, 'EClass': 0.30996978851963747, 'EReference': 0.33081570996978854, 'EPackage': 0.012789526686807653, 'EAnnotation': 0.002517623363544813, 'EEnumLiteral': 0.07275931520644502, 'EEnum': 0.013645518630412891, 'EDataType': 0.004028197381671702, 'EParameter': 0.005941591137965764, 'EGenericType': 0.002014098690835851, 'EOperation': 0.009415911379657605, 'ETypeParameter': 0.0007049345417925478}] | ||
31 | elif (domain == 'github'): | ||
32 | type_rep.nts = [{'Project': 0.012636538873420432, 'Commit': 0.5525808524309276, 'User': 0.05847076461769116, 'Issue': 0.12743628185907047, 'PullRequest': 0.07560505461554937, 'IssueEvent': 0.17327050760334123}] | ||
33 | |||
34 | types = sorted(type_rep.nts[0].keys()) | ||
35 | |||
36 | model_collections = [human, alloy, random, base, real] | ||
37 | for model_collection in model_collections: | ||
38 | print(model_collection.name) | ||
39 | length = len(model_collection.violations) | ||
40 | percentage = sum(map(lambda v: int(v==0), model_collection.violations)) / length | ||
41 | print(percentage) | ||
42 | |||
43 | |||
44 | models_to_compare_na = [human, alloy, random, base, real, na_rep] | ||
45 | models_to_compare_mpc = [human, alloy, random, base, real, mpc_rep] | ||
46 | models_to_compare_od = [human, alloy, random, base, real, od_rep] | ||
47 | models_to_compare_nt = [human, alloy, random, base, real, type_rep] | ||
48 | for modelCollection in models_to_compare_nt: | ||
49 | type_dists = [] | ||
50 | for nt in modelCollection.nts: | ||
51 | type_dist = [] | ||
52 | for key in types: | ||
53 | type_dist.append(nt.get(key, 0.0)) | ||
54 | type_dists.append(type_dist) | ||
55 | modelCollection.nts = type_dists | ||
56 | |||
57 | |||
58 | # define output folder | ||
59 | outputFolder = '../output/{}/'.format(domain) | ||
60 | |||
61 | #calculate metrics | ||
62 | metricStat(models_to_compare_na, 'Node_Activity', nodeActivity, 0, outputFolder, calculateKSMatrix) | ||
63 | metricStat(models_to_compare_od, 'Out_Degree', outDegree, 1, outputFolder, calculateKSMatrix) | ||
64 | metricStat(models_to_compare_mpc, 'MPC', mpc, 2, outputFolder, calculateKSMatrix) | ||
65 | metricStat(models_to_compare_nt, 'Node_Types', nodeType, 3, outputFolder, calculateManualKSMatrix) | ||
66 | |||
67 | def calculateKSMatrix(dists): | ||
68 | dist = [] | ||
69 | |||
70 | for i in range(len(dists)): | ||
71 | dist = dist + dists[i] | ||
72 | matrix = np.empty((len(dist),len(dist))) | ||
73 | |||
74 | for i in range(len(dist)): | ||
75 | matrix[i,i] = 0 | ||
76 | for j in range(i+1, len(dist)): | ||
77 | value, p= metrics.ks_distance(dist[i], dist[j]) | ||
78 | matrix[i, j] = value | ||
79 | matrix[j, i] = value | ||
80 | return matrix | ||
81 | |||
82 | def calculateManualKSMatrix(dists): | ||
83 | dist = [] | ||
84 | |||
85 | for i in range(len(dists)): | ||
86 | dist = dist + dists[i] | ||
87 | matrix = np.empty((len(dist),len(dist))) | ||
88 | |||
89 | for i in range(len(dist)): | ||
90 | matrix[i,i] = 0 | ||
91 | for j in range(i+1, len(dist)): | ||
92 | value = metrics.manual_ks(dist[i], dist[j]) | ||
93 | matrix[i, j] = value | ||
94 | matrix[j, i] = value | ||
95 | return matrix | ||
96 | |||
97 | |||
98 | def calculateMDS(dissimilarities): | ||
99 | embedding = MDS(n_components=2, dissimilarity='precomputed') | ||
100 | trans = embedding.fit_transform(X=dissimilarities) | ||
101 | return trans | ||
102 | |||
103 | def plot(graphTypes, coords, title='',index = 0, savePath = ''): | ||
104 | color = ['#377eb8' , '#e41a1c', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33'] | ||
105 | markers = ['o', '+', 'x', '^', 'v', '*'] | ||
106 | fill_styles = ['full', 'full', 'full', 'none', 'none', 'full'] | ||
107 | plt.figure(index, figsize=(5, 2)) | ||
108 | # plt.title(title) | ||
109 | index = 0 | ||
110 | for i in range(len(graphTypes)): | ||
111 | x = (coords[index:index+graphTypes[i].size, 0].tolist()) | ||
112 | y = (coords[index:index+graphTypes[i].size, 1].tolist()) | ||
113 | index += graphTypes[i].size | ||
114 | plt.plot(x, y, color=color[i], marker=markers[i], label = graphTypes[i].name, linestyle='', alpha=0.7, fillstyle = fill_styles[i]) | ||
115 | plt.savefig(fname = savePath+'.png', dpi=500) | ||
116 | plt.legend(loc='upper right') | ||
117 | plt.savefig(fname = savePath+'_lengend.png', dpi=500) | ||
118 | |||
119 | def mkdir_p(mypath): | ||
120 | '''Creates a directory. equivalent to using mkdir -p on the command line''' | ||
121 | |||
122 | from errno import EEXIST | ||
123 | from os import makedirs,path | ||
124 | |||
125 | try: | ||
126 | makedirs(mypath) | ||
127 | except OSError as exc: # Python >2.5 | ||
128 | if exc.errno == EEXIST and path.isdir(mypath): | ||
129 | pass | ||
130 | else: raise | ||
131 | |||
132 | def metricStat(graphTypes, metricName, metric, graphIndex, outputFolder, matrix_calculator): | ||
133 | metrics = [] | ||
134 | for graph in graphTypes: | ||
135 | metrics.append(metric(graph)) | ||
136 | outputFolder = outputFolder + graph.name + '-' | ||
137 | print('calculate' + metricName +' for ' + outputFolder) | ||
138 | mkdir_p(outputFolder) | ||
139 | out_d_coords = calculateMDS(matrix_calculator(metrics)) | ||
140 | plot(graphTypes, out_d_coords, metricName, graphIndex,outputFolder + '/'+ metricName) | ||
141 | |||
142 | def nodeActivity(graphType): | ||
143 | return graphType.nas | ||
144 | |||
145 | def outDegree(graphType): | ||
146 | return graphType.out_ds | ||
147 | |||
148 | def mpc(graphType): | ||
149 | return graphType.mpcs | ||
150 | |||
151 | def nodeType(graphType): | ||
152 | return graphType.nts | ||
153 | |||
154 | def tcc(graphType): | ||
155 | return graphType.tccs | ||
156 | |||
157 | if __name__ == '__main__': | ||
158 | main() \ No newline at end of file | ||