1 files changed, 158 insertions, 0 deletions
diff --git a/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py
new file mode 100644
index 00000000..75fe78eb
--- /dev/null
+++ b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py
@@ -0,0 +1,158 @@
+import os, sys
+lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))
+sys.path.append(lib_path)
+import glob
+import random 
+from sklearn.manifold import MDS
+import matplotlib.pyplot as plt
+from scipy import stats
+import numpy as np
+from GraphType import GraphCollection
+import DistributionMetrics as metrics
+def main():
+    domain = 'github'
+    # read models
+    alloy = GraphCollection('../input/measurement2/{}/Alloy/'.format(domain), 100, 'All')
+    human = GraphCollection('../input/measurement2/{}/Human/'.format(domain), 304, 'Hum')
+    base = GraphCollection('../input/measurement2/{}/BaseViatra/'.format(domain), 100, 'GS')
+    real = GraphCollection('../input/measurement2/{}/RealViatra/'.format(domain), 100, 'Real')
+    random = GraphCollection('../input/measurement2/{}/Random/'.format(domain), 100, 'Rand')
+    na_rep = GraphCollection('../input/measurement2/{}/Human/na_rep/'.format(domain), 1, 'Med')
+    mpc_rep = GraphCollection('../input/measurement2/{}/Human/mpc_rep/'.format(domain), 1, 'Med')
+    od_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med')
+    # a hack to make the node type as the same as an exiting model
+    type_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med')
+    if(domain == 'yakindu'):
+        type_rep.nts = [{'Entry': 0.04257802080554814, 'Choice': 0.1267671379034409, 'State': 0.1596092291277674, 'Transition': 0.6138636969858629, 'Statechart': 0.010136036276340358, 'Region': 0.04467858095492131, 'Exit': 0.0018338223526273673, 'FinalState': 0.0005334755934915977}]
+    elif (domain == 'ecore'):
+        type_rep.nts = [{'EAttribute': 0.23539778449144008, 'EClass': 0.30996978851963747, 'EReference': 0.33081570996978854, 'EPackage': 0.012789526686807653, 'EAnnotation': 0.002517623363544813, 'EEnumLiteral': 0.07275931520644502, 'EEnum': 0.013645518630412891, 'EDataType': 0.004028197381671702, 'EParameter': 0.005941591137965764, 'EGenericType': 0.002014098690835851, 'EOperation': 0.009415911379657605, 'ETypeParameter': 0.0007049345417925478}]
+    elif (domain == 'github'):
+        type_rep.nts = [{'Project': 0.012636538873420432, 'Commit': 0.5525808524309276, 'User': 0.05847076461769116, 'Issue': 0.12743628185907047, 'PullRequest': 0.07560505461554937, 'IssueEvent': 0.17327050760334123}]
+    types = sorted(type_rep.nts[0].keys())
+    model_collections = [human, alloy, random, base,  real]
+    for model_collection in model_collections:
+        print(model_collection.name)
+        length = len(model_collection.violations)
+        percentage = sum(map(lambda v: int(v==0), model_collection.violations)) / length
+        print(percentage)
+    models_to_compare_na = [human, alloy, random, base,  real, na_rep]
+    models_to_compare_mpc = [human, alloy, random, base,  real, mpc_rep]
+    models_to_compare_od = [human, alloy, random, base,  real, od_rep]
+    models_to_compare_nt = [human, alloy, random, base,  real, type_rep]
+    for modelCollection in models_to_compare_nt:
+        type_dists = []
+        for nt in modelCollection.nts:
+            type_dist = []
+            for key in types:
+                type_dist.append(nt.get(key, 0.0))
+            type_dists.append(type_dist)
+        modelCollection.nts = type_dists
+    # define output folder
+    outputFolder = '../output/{}/'.format(domain)
+    #calculate metrics
+    metricStat(models_to_compare_na, 'Node_Activity', nodeActivity, 0, outputFolder, calculateKSMatrix)
+    metricStat(models_to_compare_od, 'Out_Degree', outDegree, 1, outputFolder, calculateKSMatrix)
+    metricStat(models_to_compare_mpc, 'MPC', mpc, 2, outputFolder, calculateKSMatrix)
+    metricStat(models_to_compare_nt, 'Node_Types', nodeType, 3, outputFolder, calculateManualKSMatrix)   
+def calculateKSMatrix(dists):
+    dist = []
+    for i in range(len(dists)):
+        dist = dist + dists[i]
+    matrix = np.empty((len(dist),len(dist)))
+    for i in range(len(dist)):
+        matrix[i,i] = 0
+        for j in range(i+1, len(dist)):
+            value, p= metrics.ks_distance(dist[i], dist[j])
+            matrix[i, j] = value
+            matrix[j, i] = value
+    return matrix
+def calculateManualKSMatrix(dists):
+    dist = []
+    for i in range(len(dists)):
+        dist = dist + dists[i]
+    matrix = np.empty((len(dist),len(dist)))
+    for i in range(len(dist)):
+        matrix[i,i] = 0
+        for j in range(i+1, len(dist)):
+            value = metrics.manual_ks(dist[i], dist[j])
+            matrix[i, j] = value
+            matrix[j, i] = value
+    return matrix
+def calculateMDS(dissimilarities):
+    embedding = MDS(n_components=2, dissimilarity='precomputed')
+    trans = embedding.fit_transform(X=dissimilarities)
+    return trans
+def plot(graphTypes, coords, title='',index = 0, savePath = ''):
+    color = ['#377eb8' , '#e41a1c', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33']
+    markers = ['o', '+', 'x', '^', 'v', '*']
+    fill_styles = ['full', 'full', 'full', 'none', 'none', 'full']
+    plt.figure(index, figsize=(5, 2))
+    # plt.title(title)
+    index = 0
+    for i in range(len(graphTypes)):
+        x = (coords[index:index+graphTypes[i].size, 0].tolist())
+        y = (coords[index:index+graphTypes[i].size, 1].tolist())
+        index += graphTypes[i].size
+        plt.plot(x, y, color=color[i], marker=markers[i], label = graphTypes[i].name, linestyle='', alpha=0.7, fillstyle = fill_styles[i])
+    plt.savefig(fname = savePath+'.png', dpi=500)
+    plt.legend(loc='upper right')
+    plt.savefig(fname = savePath+'_lengend.png', dpi=500)
+def mkdir_p(mypath):
+    '''Creates a directory. equivalent to using mkdir -p on the command line'''
+    from errno import EEXIST
+    from os import makedirs,path
+    try:
+        makedirs(mypath)
+    except OSError as exc: # Python >2.5
+        if exc.errno == EEXIST and path.isdir(mypath):
+            pass
+        else: raise
+def metricStat(graphTypes, metricName, metric, graphIndex, outputFolder, matrix_calculator):
+    metrics = []
+    for graph in graphTypes:
+        metrics.append(metric(graph))
+        outputFolder = outputFolder + graph.name + '-'
+    print('calculate' + metricName +' for ' + outputFolder)
+    mkdir_p(outputFolder)
+    out_d_coords = calculateMDS(matrix_calculator(metrics))
+    plot(graphTypes, out_d_coords, metricName, graphIndex,outputFolder + '/'+ metricName)
+def nodeActivity(graphType):
+    return graphType.nas
+def outDegree(graphType):
+    return graphType.out_ds
+def mpc(graphType):
+    return graphType.mpcs
+def nodeType(graphType):
+    return graphType.nts
+def tcc(graphType):
+    return graphType.tccs
+if __name__ == '__main__':
+    main()
+\ No newline at end of file

diff --git a/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py new file mode 100644 index 00000000..75fe78eb --- /dev/null +++ b/Metrics/Metrics-Calculation/metrics_plot/model comparison/src/plot_ks_stats.py
@@ -0,0 +1,158 @@
	1	import os, sys
	2	lib_path = os.path.abspath(os.path.join('..', '..', 'utils'))
	3	sys.path.append(lib_path)
	4	import glob
	5	import random
	6	from sklearn.manifold import MDS
	7	import matplotlib.pyplot as plt
	8	from scipy import stats
	9	import numpy as np
	10	from GraphType import GraphCollection
	11	import DistributionMetrics as metrics
	12
	13	def main():
	14	domain = 'github'
	15	# read models
	16	alloy = GraphCollection('../input/measurement2/{}/Alloy/'.format(domain), 100, 'All')
	17	human = GraphCollection('../input/measurement2/{}/Human/'.format(domain), 304, 'Hum')
	18	base = GraphCollection('../input/measurement2/{}/BaseViatra/'.format(domain), 100, 'GS')
	19	real = GraphCollection('../input/measurement2/{}/RealViatra/'.format(domain), 100, 'Real')
	20	random = GraphCollection('../input/measurement2/{}/Random/'.format(domain), 100, 'Rand')
	21	na_rep = GraphCollection('../input/measurement2/{}/Human/na_rep/'.format(domain), 1, 'Med')
	22	mpc_rep = GraphCollection('../input/measurement2/{}/Human/mpc_rep/'.format(domain), 1, 'Med')
	23	od_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med')
	24
	25	# a hack to make the node type as the same as an exiting model
	26	type_rep = GraphCollection('../input/measurement2/{}/Human/od_rep/'.format(domain), 1, 'Med')
	27	if(domain == 'yakindu'):
	28	type_rep.nts = [{'Entry': 0.04257802080554814, 'Choice': 0.1267671379034409, 'State': 0.1596092291277674, 'Transition': 0.6138636969858629, 'Statechart': 0.010136036276340358, 'Region': 0.04467858095492131, 'Exit': 0.0018338223526273673, 'FinalState': 0.0005334755934915977}]
	29	elif (domain == 'ecore'):
	30	type_rep.nts = [{'EAttribute': 0.23539778449144008, 'EClass': 0.30996978851963747, 'EReference': 0.33081570996978854, 'EPackage': 0.012789526686807653, 'EAnnotation': 0.002517623363544813, 'EEnumLiteral': 0.07275931520644502, 'EEnum': 0.013645518630412891, 'EDataType': 0.004028197381671702, 'EParameter': 0.005941591137965764, 'EGenericType': 0.002014098690835851, 'EOperation': 0.009415911379657605, 'ETypeParameter': 0.0007049345417925478}]
	31	elif (domain == 'github'):
	32	type_rep.nts = [{'Project': 0.012636538873420432, 'Commit': 0.5525808524309276, 'User': 0.05847076461769116, 'Issue': 0.12743628185907047, 'PullRequest': 0.07560505461554937, 'IssueEvent': 0.17327050760334123}]
	33
	34	types = sorted(type_rep.nts[0].keys())
	35
	36	model_collections = [human, alloy, random, base, real]
	37	for model_collection in model_collections:
	38	print(model_collection.name)
	39	length = len(model_collection.violations)
	40	percentage = sum(map(lambda v: int(v==0), model_collection.violations)) / length
	41	print(percentage)
	42
	43
	44	models_to_compare_na = [human, alloy, random, base, real, na_rep]
	45	models_to_compare_mpc = [human, alloy, random, base, real, mpc_rep]
	46	models_to_compare_od = [human, alloy, random, base, real, od_rep]
	47	models_to_compare_nt = [human, alloy, random, base, real, type_rep]
	48	for modelCollection in models_to_compare_nt:
	49	type_dists = []
	50	for nt in modelCollection.nts:
	51	type_dist = []
	52	for key in types:
	53	type_dist.append(nt.get(key, 0.0))
	54	type_dists.append(type_dist)
	55	modelCollection.nts = type_dists
	56
	57
	58	# define output folder
	59	outputFolder = '../output/{}/'.format(domain)
	60
	61	#calculate metrics
	62	metricStat(models_to_compare_na, 'Node_Activity', nodeActivity, 0, outputFolder, calculateKSMatrix)
	63	metricStat(models_to_compare_od, 'Out_Degree', outDegree, 1, outputFolder, calculateKSMatrix)
	64	metricStat(models_to_compare_mpc, 'MPC', mpc, 2, outputFolder, calculateKSMatrix)
	65	metricStat(models_to_compare_nt, 'Node_Types', nodeType, 3, outputFolder, calculateManualKSMatrix)
	66
	67	def calculateKSMatrix(dists):
	68	dist = []
	69
	70	for i in range(len(dists)):
	71	dist = dist + dists[i]
	72	matrix = np.empty((len(dist),len(dist)))
	73
	74	for i in range(len(dist)):
	75	matrix[i,i] = 0
	76	for j in range(i+1, len(dist)):
	77	value, p= metrics.ks_distance(dist[i], dist[j])
	78	matrix[i, j] = value
	79	matrix[j, i] = value
	80	return matrix
	81
	82	def calculateManualKSMatrix(dists):
	83	dist = []
	84
	85	for i in range(len(dists)):
	86	dist = dist + dists[i]
	87	matrix = np.empty((len(dist),len(dist)))
	88
	89	for i in range(len(dist)):
	90	matrix[i,i] = 0
	91	for j in range(i+1, len(dist)):
	92	value = metrics.manual_ks(dist[i], dist[j])
	93	matrix[i, j] = value
	94	matrix[j, i] = value
	95	return matrix
	96
	97
	98	def calculateMDS(dissimilarities):
	99	embedding = MDS(n_components=2, dissimilarity='precomputed')
	100	trans = embedding.fit_transform(X=dissimilarities)
	101	return trans
	102
	103	def plot(graphTypes, coords, title='',index = 0, savePath = ''):
	104	color = ['#377eb8' , '#e41a1c', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33']
	105	markers = ['o', '+', 'x', '^', 'v', '*']
	106	fill_styles = ['full', 'full', 'full', 'none', 'none', 'full']
	107	plt.figure(index, figsize=(5, 2))
	108	# plt.title(title)
	109	index = 0
	110	for i in range(len(graphTypes)):
	111	x = (coords[index:index+graphTypes[i].size, 0].tolist())
	112	y = (coords[index:index+graphTypes[i].size, 1].tolist())
	113	index += graphTypes[i].size
	114	plt.plot(x, y, color=color[i], marker=markers[i], label = graphTypes[i].name, linestyle='', alpha=0.7, fillstyle = fill_styles[i])
	115	plt.savefig(fname = savePath+'.png', dpi=500)
	116	plt.legend(loc='upper right')
	117	plt.savefig(fname = savePath+'_lengend.png', dpi=500)
	118
	119	def mkdir_p(mypath):
	120	'''Creates a directory. equivalent to using mkdir -p on the command line'''
	121
	122	from errno import EEXIST
	123	from os import makedirs,path
	124
	125	try:
	126	makedirs(mypath)
	127	except OSError as exc: # Python >2.5
	128	if exc.errno == EEXIST and path.isdir(mypath):
	129	pass
	130	else: raise
	131
	132	def metricStat(graphTypes, metricName, metric, graphIndex, outputFolder, matrix_calculator):
	133	metrics = []
	134	for graph in graphTypes:
	135	metrics.append(metric(graph))
	136	outputFolder = outputFolder + graph.name + '-'
	137	print('calculate' + metricName +' for ' + outputFolder)
	138	mkdir_p(outputFolder)
	139	out_d_coords = calculateMDS(matrix_calculator(metrics))
	140	plot(graphTypes, out_d_coords, metricName, graphIndex,outputFolder + '/'+ metricName)
	141
	142	def nodeActivity(graphType):
	143	return graphType.nas
	144
	145	def outDegree(graphType):
	146	return graphType.out_ds
	147
	148	def mpc(graphType):
	149	return graphType.mpcs
	150
	151	def nodeType(graphType):
	152	return graphType.nts
	153
	154	def tcc(graphType):
	155	return graphType.tccs
	156
	157	if __name__ == '__main__':
	158	main() \ No newline at end of file