{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data analysis for the paper \"Automated Generation of Consistent Models with Structural and Attribute Constraints\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's load some packages." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message in dir.create(\"plots\"):\n", "“'plots' already exists”\n" ] } ], "source": [ "require(tidyverse)\n", "dir.create('plots')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "scrolled": false }, "outputs": [], "source": [ "ProcessDetailedStatistics <- function(str) {\n", " str <- sub('TransformationExecutionTime', 'TransformationExecutionTime:', str)\n", " str <- sub('Backtrackingtime', 'BacktrackingTime', str)\n", " str <- gsub('\\\\(|\\\\)', '', str)\n", " str <- lapply(strsplit(str, '\\\\||:'), function (v) {\n", " dim(v) <- c(2, 11)\n", " values <- as.double(v[2,])\n", " names(values) <- v[1,]\n", " as.data.frame(t(values))\n", " })\n", " str\n", "}\n", "Load10Log <- function(filename, size) {\n", " read_csv(filename, col_types = cols(\n", " .default = col_double(),\n", " Result = col_character(),\n", " Solution1DetailedStatistics = col_character(),\n", " Solution2DetailedStatistics = col_character(),\n", " Solution3DetailedStatistics = col_character(),\n", " Solution4DetailedStatistics = col_character(),\n", " Solution5DetailedStatistics = col_character(),\n", " Solution6DetailedStatistics = col_character(),\n", " Solution7DetailedStatistics = col_character(),\n", " Solution8DetailedStatistics = col_character(),\n", " Solution9DetailedStatistics = col_character(),\n", " Solution10DetailedStatistics = col_character()\n", " )) %>% transmute(\n", " n = size,\n", " Run = Run,\n", " preprocessingTime = get('Domain to logic transformation time') + get('Logic to solver transformation time') + ExplorationInitializationTime,\n", " Solution0FoundAt = Solution0FoundAt,\n", " additionalTime = Solution9FoundAt - Solution0FoundAt,\n", " Solution1DetailedStatistics = ProcessDetailedStatistics(Solution1DetailedStatistics)\n", " ) %>% unnest()\n", "# %>% unnest() %>% mutate(\n", "# # (Logical) constraint evluation should count as refinement.\n", "# ForwardTime = ForwardTime + GlobalConstraintEvaluationTime + FitnessCalculationTime,\n", "# preprocessingTime = preprocessingTime,\n", "# BacktrackingTime = Solution0FoundAt - (StateCoderTime + ForwardTime + NumericalSolverSumTime)\n", "# ) %>% select(n, Run, preprocessingTime, StateCoderTime, ForwardTime, BacktrackingTime, NumericalSolverSumTime, additionalTime)\n", "}\n", "Load1Log <- function(filename, size) {\n", " read_csv(filename, col_types = cols(\n", " .default = col_double(),\n", " Result = col_character(),\n", " Solution1DetailedStatistics = col_character()\n", " )) %>% filter(Result == \"ModelResultImpl\") %>% transmute(\n", " n = size,\n", " Run = Run,\n", " preprocessingTime = get('Domain to logic transformation time') + get('Logic to solver transformation time') + ExplorationInitializationTime,\n", " Solution0FoundAt = Solution0FoundAt,\n", " Solution1DetailedStatistics = ProcessDetailedStatistics(Solution1DetailedStatistics)\n", " ) %>% unnest(cols = c(Solution1DetailedStatistics)) %>% mutate(\n", " ForwardTime = ForwardTime + GlobalConstraintEvaluationTime + FitnessCalculationTime,\n", " BacktrackingTime = Solution0FoundAt - (StateCoderTime + ForwardTime + NumericalSolverSumTime)\n", " ) %>% select(n, Run, preprocessingTime, StateCoderTime, ForwardTime, BacktrackingTime, NumericalSolverSumTime)\n", "}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "ProcessRQ1 <- function(df) {\n", " df %>% group_by(n) %>% summarize(\n", " .groups = 'drop',\n", " time = median(preprocessingTime + StateCoderTime + ForwardTime + BacktrackingTime + NumericalSolverSumTime) / 1000.0\n", " )\n", "}\n", "ProcessRQ2 <- function(df) {\n", " df %>% group_by(n) %>% summarize(\n", " .groups = 'drop',\n", " preprocessingTime = median(preprocessingTime) / 1000.0,\n", " StateCoderTime = median(StateCoderTime) / 1000.0,\n", " ForwardTime = median(ForwardTime) / 1000.0,\n", " BacktrackingTime = median(BacktrackingTime) / 1000.0,\n", " NumericalSolverSumTime = median(NumericalSolverSumTime) / 1000.0,\n", " additionalTime = median(additionalTime) / 1000.0\n", " )\n", "}\n", "ProcessRQ3 <- ProcessRQ1" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "RQ2Plot <- function(df, name) {\n", " df <- df %>% gather(name, value, -n) %>% filter(name != \"preprocessingTime\")\n", " df$name <- factor(df$name, levels=rev(c('ForwardTime', 'BacktrackingTime', 'StateCoderTime', 'NumericalSolverSumTime', 'additionalTime')))\n", " plot <- df %>% ggplot(aes(x=n, y=value, fill=name)) +\n", " geom_bar(stat='identity') +\n", " scale_fill_brewer(palette='Set2',\n", " labels=rev(c('Refinement', 'Backtracking', 'State Coding', 'SMT Solver Calls', 'Additional Model Generation')),\n", " guide=FALSE) +\n", " scale_x_continuous(breaks=c(20, 40, 60, 80, 100), name=\"Model Size (# nodes)\") +\n", " scale_y_continuous(name=\"Runtime (s)\") +\n", " theme_bw()\n", " ggsave(plot=plot, filename=paste0('plots/plot_RQ2_', name, '.pdf'), width=3.5, height=2.5)\n", " plot\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fam domain" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message:\n", "“`cols` is now required when using unnest().\n", "Please use `cols = c(Solution1DetailedStatistics)`”\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A tibble: 10 × 16
nRunpreprocessingTimeSolution0FoundAtadditionalTimeTransformationExecutionTimeForwardTimeBacktrackingTimeGlobalConstraintEvaluationTimeFitnessCalculationTimeActivationSelectionTimeSolutionCopyTimeNumericalSolverSumTimeNumericalSolverProblemFormingTimeNumericalSolverSolvingTimeNumericalSolverInterpretingSolution
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
20 110385248398031225108101014675223922390
20 2 7244132362820155 76 61003780201920190
20 3 7355329443720195106 62004876258825880
20 4 6944261492016150 69 51003925208520850
20 5 8885959508618200 93 81005490294629460
20 6 6656310432017195 90 61005870316731670
20 7 6045024573814165 74 41004662246424640
20 8 5895733391715181 81 41005337282728270
20 9 7054719425914156 71 41024367239023900
2010 5544061399013141 68 31003741199119910
\n" ], "text/latex": [ "A tibble: 10 × 16\n", "\\begin{tabular}{llllllllllllllll}\n", " n & Run & preprocessingTime & Solution0FoundAt & additionalTime & TransformationExecutionTime & ForwardTime & BacktrackingTime & GlobalConstraintEvaluationTime & FitnessCalculationTime & ActivationSelectionTime & SolutionCopyTime & NumericalSolverSumTime & NumericalSolverProblemFormingTime & NumericalSolverSolvingTime & NumericalSolverInterpretingSolution\\\\\n", " & & & & & & & & & & & & & & & \\\\\n", "\\hline\n", "\t 20 & 1 & 1038 & 5248 & 3980 & 31 & 225 & 108 & 10 & 1 & 0 & 1 & 4675 & 2239 & 2239 & 0\\\\\n", "\t 20 & 2 & 724 & 4132 & 3628 & 20 & 155 & 76 & 6 & 1 & 0 & 0 & 3780 & 2019 & 2019 & 0\\\\\n", "\t 20 & 3 & 735 & 5329 & 4437 & 20 & 195 & 106 & 6 & 2 & 0 & 0 & 4876 & 2588 & 2588 & 0\\\\\n", "\t 20 & 4 & 694 & 4261 & 4920 & 16 & 150 & 69 & 5 & 1 & 0 & 0 & 3925 & 2085 & 2085 & 0\\\\\n", "\t 20 & 5 & 888 & 5959 & 5086 & 18 & 200 & 93 & 8 & 1 & 0 & 0 & 5490 & 2946 & 2946 & 0\\\\\n", "\t 20 & 6 & 665 & 6310 & 4320 & 17 & 195 & 90 & 6 & 1 & 0 & 0 & 5870 & 3167 & 3167 & 0\\\\\n", "\t 20 & 7 & 604 & 5024 & 5738 & 14 & 165 & 74 & 4 & 1 & 0 & 0 & 4662 & 2464 & 2464 & 0\\\\\n", "\t 20 & 8 & 589 & 5733 & 3917 & 15 & 181 & 81 & 4 & 1 & 0 & 0 & 5337 & 2827 & 2827 & 0\\\\\n", "\t 20 & 9 & 705 & 4719 & 4259 & 14 & 156 & 71 & 4 & 1 & 0 & 2 & 4367 & 2390 & 2390 & 0\\\\\n", "\t 20 & 10 & 554 & 4061 & 3990 & 13 & 141 & 68 & 3 & 1 & 0 & 0 & 3741 & 1991 & 1991 & 0\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A tibble: 10 × 16\n", "\n", "| n <dbl> | Run <dbl> | preprocessingTime <dbl> | Solution0FoundAt <dbl> | additionalTime <dbl> | TransformationExecutionTime <dbl> | ForwardTime <dbl> | BacktrackingTime <dbl> | GlobalConstraintEvaluationTime <dbl> | FitnessCalculationTime <dbl> | ActivationSelectionTime <dbl> | SolutionCopyTime <dbl> | NumericalSolverSumTime <dbl> | NumericalSolverProblemFormingTime <dbl> | NumericalSolverSolvingTime <dbl> | NumericalSolverInterpretingSolution <dbl> |\n", "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", "| 20 | 1 | 1038 | 5248 | 3980 | 31 | 225 | 108 | 10 | 1 | 0 | 1 | 4675 | 2239 | 2239 | 0 |\n", "| 20 | 2 | 724 | 4132 | 3628 | 20 | 155 | 76 | 6 | 1 | 0 | 0 | 3780 | 2019 | 2019 | 0 |\n", "| 20 | 3 | 735 | 5329 | 4437 | 20 | 195 | 106 | 6 | 2 | 0 | 0 | 4876 | 2588 | 2588 | 0 |\n", "| 20 | 4 | 694 | 4261 | 4920 | 16 | 150 | 69 | 5 | 1 | 0 | 0 | 3925 | 2085 | 2085 | 0 |\n", "| 20 | 5 | 888 | 5959 | 5086 | 18 | 200 | 93 | 8 | 1 | 0 | 0 | 5490 | 2946 | 2946 | 0 |\n", "| 20 | 6 | 665 | 6310 | 4320 | 17 | 195 | 90 | 6 | 1 | 0 | 0 | 5870 | 3167 | 3167 | 0 |\n", "| 20 | 7 | 604 | 5024 | 5738 | 14 | 165 | 74 | 4 | 1 | 0 | 0 | 4662 | 2464 | 2464 | 0 |\n", "| 20 | 8 | 589 | 5733 | 3917 | 15 | 181 | 81 | 4 | 1 | 0 | 0 | 5337 | 2827 | 2827 | 0 |\n", "| 20 | 9 | 705 | 4719 | 4259 | 14 | 156 | 71 | 4 | 1 | 0 | 2 | 4367 | 2390 | 2390 | 0 |\n", "| 20 | 10 | 554 | 4061 | 3990 | 13 | 141 | 68 | 3 | 1 | 0 | 0 | 3741 | 1991 | 1991 | 0 |\n", "\n" ], "text/plain": [ " n Run preprocessingTime Solution0FoundAt additionalTime\n", "1 20 1 1038 5248 3980 \n", "2 20 2 724 4132 3628 \n", "3 20 3 735 5329 4437 \n", "4 20 4 694 4261 4920 \n", "5 20 5 888 5959 5086 \n", "6 20 6 665 6310 4320 \n", "7 20 7 604 5024 5738 \n", "8 20 8 589 5733 3917 \n", "9 20 9 705 4719 4259 \n", "10 20 10 554 4061 3990 \n", " TransformationExecutionTime ForwardTime BacktrackingTime\n", "1 31 225 108 \n", "2 20 155 76 \n", "3 20 195 106 \n", "4 16 150 69 \n", "5 18 200 93 \n", "6 17 195 90 \n", "7 14 165 74 \n", "8 15 181 81 \n", "9 14 156 71 \n", "10 13 141 68 \n", " GlobalConstraintEvaluationTime FitnessCalculationTime\n", "1 10 1 \n", "2 6 1 \n", "3 6 2 \n", "4 5 1 \n", "5 8 1 \n", "6 6 1 \n", "7 4 1 \n", "8 4 1 \n", "9 4 1 \n", "10 3 1 \n", " ActivationSelectionTime SolutionCopyTime NumericalSolverSumTime\n", "1 0 1 4675 \n", "2 0 0 3780 \n", "3 0 0 4876 \n", "4 0 0 3925 \n", "5 0 0 5490 \n", "6 0 0 5870 \n", "7 0 0 4662 \n", "8 0 0 5337 \n", "9 0 2 4367 \n", "10 0 0 3741 \n", " NumericalSolverProblemFormingTime NumericalSolverSolvingTime\n", "1 2239 2239 \n", "2 2019 2019 \n", "3 2588 2588 \n", "4 2085 2085 \n", "5 2946 2946 \n", "6 3167 3167 \n", "7 2464 2464 \n", "8 2827 2827 \n", "9 2390 2390 \n", "10 1991 1991 \n", " NumericalSolverInterpretingSolution\n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "5 0 \n", "6 0 \n", "7 0 \n", "8 0 \n", "9 0 \n", "10 0 " ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "ERROR", "evalue": "Error: Problem with `summarise()` input `StateCoderTime`.\n\u001b[31m✖\u001b[39m object 'StateCoderTime' not found\n\u001b[34mℹ\u001b[39m Input `StateCoderTime` is `median(StateCoderTime)/1000`.\n\u001b[34mℹ\u001b[39m The error occurred in group 1: n = 20.\n", "output_type": "error", "traceback": [ "Error: Problem with `summarise()` input `StateCoderTime`.\n\u001b[31m✖\u001b[39m object 'StateCoderTime' not found\n\u001b[34mℹ\u001b[39m Input `StateCoderTime` is `median(StateCoderTime)/1000`.\n\u001b[34mℹ\u001b[39m The error occurred in group 1: n = 20.\nTraceback:\n", "1. FamilyTreeRQ2Raw %>% ProcessRQ2", "2. ProcessRQ2(.)", "3. df %>% group_by(n) %>% summarize(.groups = \"drop\", preprocessingTime = median(preprocessingTime)/1000, \n . StateCoderTime = median(StateCoderTime)/1000, ForwardTime = median(ForwardTime)/1000, \n . BacktrackingTime = median(BacktrackingTime)/1000, NumericalSolverSumTime = median(NumericalSolverSumTime)/1000, \n . additionalTime = median(additionalTime)/1000) # at line 8-16 of file ", "4. summarize(., .groups = \"drop\", preprocessingTime = median(preprocessingTime)/1000, \n . StateCoderTime = median(StateCoderTime)/1000, ForwardTime = median(ForwardTime)/1000, \n . BacktrackingTime = median(BacktrackingTime)/1000, NumericalSolverSumTime = median(NumericalSolverSumTime)/1000, \n . additionalTime = median(additionalTime)/1000)", "5. summarise.grouped_df(., .groups = \"drop\", preprocessingTime = median(preprocessingTime)/1000, \n . StateCoderTime = median(StateCoderTime)/1000, ForwardTime = median(ForwardTime)/1000, \n . BacktrackingTime = median(BacktrackingTime)/1000, NumericalSolverSumTime = median(NumericalSolverSumTime)/1000, \n . additionalTime = median(additionalTime)/1000)", "6. summarise_cols(.data, ...)", "7. withCallingHandlers({\n . for (i in seq_along(dots)) {\n . quo <- dots[[i]]\n . chunks[[i]] <- mask$eval_all_summarise(quo)\n . mask$across_cache_reset()\n . result_type <- types[[i]] <- withCallingHandlers(vec_ptype_common(!!!chunks[[i]]), \n . vctrs_error_incompatible_type = function(cnd) {\n . abort(class = \"dplyr:::error_summarise_incompatible_combine\", \n . parent = cnd)\n . })\n . if ((is.null(dots_names) || dots_names[i] == \"\") && is.data.frame(result_type)) {\n . map2(seq_along(result_type), names(result_type), \n . function(j, nm) {\n . mask$add(nm, pluck(chunks[[i]], j))\n . })\n . }\n . else {\n . mask$add(auto_named_dots[i], chunks[[i]])\n . }\n . }\n . recycle_info <- .Call(dplyr_summarise_recycle_chunks, chunks, \n . mask$get_rows(), types)\n . chunks <- recycle_info$chunks\n . sizes <- recycle_info$sizes\n . for (i in seq_along(dots)) {\n . result <- vec_c(!!!chunks[[i]], .ptype = types[[i]])\n . if ((is.null(dots_names) || dots_names[i] == \"\") && is.data.frame(result)) {\n . cols[names(result)] <- result\n . }\n . else {\n . cols[[auto_named_dots[i]]] <- result\n . }\n . }\n . }, error = function(e) {\n . local_call_step(dots = dots, .index = i, .fn = \"summarise\", \n . .dot_data = inherits(e, \"rlang_error_data_pronoun_not_found\"))\n . call_step <- peek_call_step()\n . error_name <- call_step$error_name\n . if (inherits(e, \"dplyr:::error_summarise_incompatible_combine\")) {\n . bullets <- c(x = glue(\"Input `{error_name}` must return compatible vectors across groups\", \n . .envir = peek_call_step()), i = cnd_bullet_combine_details(e$parent$x, \n . e$parent$x_arg), i = cnd_bullet_combine_details(e$parent$y, \n . e$parent$y_arg))\n . }\n . else if (inherits(e, \"dplyr:::summarise_unsupported_type\")) {\n . bullets <- c(x = glue(\"Input `{error_name}` must be a vector, not {friendly_type_of(result)}.\", \n . result = e$result), i = cnd_bullet_rowwise_unlist())\n . }\n . else if (inherits(e, \"dplyr:::summarise_incompatible_size\")) {\n . peek_mask()$set_current_group(e$group)\n . bullets <- c(x = glue(\"Input `{error_name}` must be size {or_1(expected_size)}, not {size}.\", \n . expected_size = e$expected_size, size = e$size), \n . i = glue(\"An earlier column had size {expected_size}.\", \n . expected_size = e$expected_size))\n . }\n . else {\n . bullets <- c(x = conditionMessage(e))\n . }\n . bullets <- c(cnd_bullet_header(), bullets, i = cnd_bullet_input_info())\n . if (!inherits(e, \"dplyr:::error_summarise_incompatible_combine\")) {\n . bullets <- c(bullets, i = cnd_bullet_cur_group_label())\n . }\n . abort(bullets, class = \"dplyr_error\")\n . })", "8. mask$eval_all_summarise(quo)", "9. median(StateCoderTime)", "10. .handleSimpleError(function (e) \n . {\n . local_call_step(dots = dots, .index = i, .fn = \"summarise\", \n . .dot_data = inherits(e, \"rlang_error_data_pronoun_not_found\"))\n . call_step <- peek_call_step()\n . error_name <- call_step$error_name\n . if (inherits(e, \"dplyr:::error_summarise_incompatible_combine\")) {\n . bullets <- c(x = glue(\"Input `{error_name}` must return compatible vectors across groups\", \n . .envir = peek_call_step()), i = cnd_bullet_combine_details(e$parent$x, \n . e$parent$x_arg), i = cnd_bullet_combine_details(e$parent$y, \n . e$parent$y_arg))\n . }\n . else if (inherits(e, \"dplyr:::summarise_unsupported_type\")) {\n . bullets <- c(x = glue(\"Input `{error_name}` must be a vector, not {friendly_type_of(result)}.\", \n . result = e$result), i = cnd_bullet_rowwise_unlist())\n . }\n . else if (inherits(e, \"dplyr:::summarise_incompatible_size\")) {\n . peek_mask()$set_current_group(e$group)\n . bullets <- c(x = glue(\"Input `{error_name}` must be size {or_1(expected_size)}, not {size}.\", \n . expected_size = e$expected_size, size = e$size), \n . i = glue(\"An earlier column had size {expected_size}.\", \n . expected_size = e$expected_size))\n . }\n . else {\n . bullets <- c(x = conditionMessage(e))\n . }\n . bullets <- c(cnd_bullet_header(), bullets, i = cnd_bullet_input_info())\n . if (!inherits(e, \"dplyr:::error_summarise_incompatible_combine\")) {\n . bullets <- c(bullets, i = cnd_bullet_cur_group_label())\n . }\n . abort(bullets, class = \"dplyr_error\")\n . }, \"object 'StateCoderTime' not found\", base::quote(median(StateCoderTime)))", "11. h(simpleError(msg, call))", "12. abort(bullets, class = \"dplyr_error\")", "13. signal_abort(cnd)" ] } ], "source": [ "FamilyTreeRQ2Raw <- rbind(\n", "# Load10Log(\"measurements/stats/FamilyTree//size010to-1r10n10rt300nsdrealstats_06-0249.csv\", 10),\n", " Load10Log(\"measurements/stats/FamilyTree//size020to-1r10n10rt3600nsz3stats_06-0205.csv\", 20)\n", ")\n", "FamilyTreeRQ2Raw\n", "FamilyTreeRQ2 <- FamilyTreeRQ2Raw %>% ProcessRQ2\n", "FamilyTreeRQ2\n", "# median(FamilyTreeRQ2Raw$preprocessingTime) / 1000.0\n", "# FamilyTreeRQ2 %>% RQ2Plot('FamilyTree')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.0.3" } }, "nbformat": 4, "nbformat_minor": 2 }