# Data analysis for the paper "Automated Generation of Consistent Models with Structural and Attribute Constraints"

First, let's load some packages.

In [7]:
require(tidyverse)
dir.create('plots')

“'plots' already exists”


In [28]:
ProcessDetailedStatistics <- function(str) {
    str <- sub('TransformationExecutionTime', 'TransformationExecutionTime:', str)
    str <- sub('Backtrackingtime', 'BacktrackingTime', str)
    str <- gsub('\\(|\\)', '', str)
    str <- lapply(strsplit(str, '\\||:'), function (v) {
        dim(v) <- c(2, 11)
        values <- as.double(v[2,])
        names(values) <- v[1,]
        as.data.frame(t(values))
    })
    str
}
Load10Log <- function(filename, size) {
    read_csv(filename, col_types = cols(
      .default = col_double(),
      Result = col_character(),
      Solution1DetailedStatistics = col_character(),
      Solution2DetailedStatistics = col_character(),
      Solution3DetailedStatistics = col_character(),
      Solution4DetailedStatistics = col_character(),
      Solution5DetailedStatistics = col_character(),
      Solution6DetailedStatistics = col_character(),
      Solution7DetailedStatistics = col_character(),
      Solution8DetailedStatistics = col_character(),
      Solution9DetailedStatistics = col_character(),
      Solution10DetailedStatistics = col_character()
    )) %>% transmute(
        n = size,
        Run = Run,
        preprocessingTime = get('Domain to logic transformation time') + get('Logic to solver transformation time') + ExplorationInitializationTime,
        Solution0FoundAt = Solution0FoundAt,
        additionalTime = Solution9FoundAt - Solution0FoundAt,
        Solution1DetailedStatistics = ProcessDetailedStatistics(Solution1DetailedStatistics)
    ) %>% unnest()
#     %>% unnest() %>% mutate(
#         # (Logical) constraint evluation should count as refinement.
#         ForwardTime = ForwardTime + GlobalConstraintEvaluationTime + FitnessCalculationTime,
#         preprocessingTime = preprocessingTime,
#         BacktrackingTime = Solution0FoundAt - (StateCoderTime + ForwardTime + NumericalSolverSumTime)
#     ) %>% select(n, Run, preprocessingTime, StateCoderTime, ForwardTime, BacktrackingTime, NumericalSolverSumTime, additionalTime)
}
Load1Log <- function(filename, size) {
    read_csv(filename, col_types = cols(
      .default = col_double(),
      Result = col_character(),
      Solution1DetailedStatistics = col_character()
    )) %>% filter(Result == "ModelResultImpl") %>% transmute(
        n = size,
        Run = Run,
        preprocessingTime = get('Domain to logic transformation time') + get('Logic to solver transformation time') + ExplorationInitializationTime,
        Solution0FoundAt = Solution0FoundAt,
        Solution1DetailedStatistics = ProcessDetailedStatistics(Solution1DetailedStatistics)
    ) %>% unnest(cols = c(Solution1DetailedStatistics)) %>% mutate(
        ForwardTime = ForwardTime + GlobalConstraintEvaluationTime + FitnessCalculationTime,
        BacktrackingTime = Solution0FoundAt - (StateCoderTime + ForwardTime + NumericalSolverSumTime)
    ) %>% select(n, Run, preprocessingTime, StateCoderTime, ForwardTime, BacktrackingTime, NumericalSolverSumTime)
}

In [13]:
ProcessRQ1 <- function(df) {
    df %>% group_by(n) %>% summarize(
        .groups = 'drop',
        time = median(preprocessingTime + StateCoderTime + ForwardTime + BacktrackingTime + NumericalSolverSumTime) / 1000.0
    )
}
ProcessRQ2 <- function(df) {
    df %>% group_by(n) %>% summarize(
        .groups = 'drop',
        preprocessingTime = median(preprocessingTime) / 1000.0,
        StateCoderTime = median(StateCoderTime) / 1000.0,
        ForwardTime = median(ForwardTime) / 1000.0,
        BacktrackingTime = median(BacktrackingTime) / 1000.0,
        NumericalSolverSumTime = median(NumericalSolverSumTime) / 1000.0,
        additionalTime = median(additionalTime) / 1000.0
    )
}
ProcessRQ3 <- ProcessRQ1

In [14]:
RQ2Plot <- function(df, name) {
    df <- df %>% gather(name, value, -n) %>% filter(name != "preprocessingTime")
    df$name <- factor(df$name, levels=rev(c('ForwardTime', 'BacktrackingTime', 'StateCoderTime', 'NumericalSolverSumTime', 'additionalTime')))
    plot <- df %>% ggplot(aes(x=n, y=value, fill=name)) +
        geom_bar(stat='identity') +
        scale_fill_brewer(palette='Set2',
                          labels=rev(c('Refinement', 'Backtracking', 'State Coding', 'SMT Solver Calls', 'Additional Model Generation')),
                          guide=FALSE) +
        scale_x_continuous(breaks=c(20, 40, 60, 80, 100), name="Model Size (# nodes)") +
        scale_y_continuous(name="Runtime (s)") +
        theme_bw()
    ggsave(plot=plot, filename=paste0('plots/plot_RQ2_', name, '.pdf'), width=3.5, height=2.5)
    plot
}

### Fam domain

In [29]:
FamilyTreeRQ2Raw <- rbind(
#     Load10Log("measurements/stats/FamilyTree//size010to-1r10n10rt300nsdrealstats_06-0249.csv", 10),
    Load10Log("measurements/stats/FamilyTree//size020to-1r10n10rt3600nsz3stats_06-0205.csv", 20)
)
FamilyTreeRQ2Raw
FamilyTreeRQ2 <- FamilyTreeRQ2Raw %>% ProcessRQ2
FamilyTreeRQ2
# median(FamilyTreeRQ2Raw$preprocessingTime) / 1000.0
# FamilyTreeRQ2 %>% RQ2Plot('FamilyTree')

“`cols` is now required when using unnest().
Please use `cols = c(Solution1DetailedStatistics)`”


n,Run,preprocessingTime,Solution0FoundAt,additionalTime,TransformationExecutionTime,ForwardTime,BacktrackingTime,GlobalConstraintEvaluationTime,FitnessCalculationTime,ActivationSelectionTime,SolutionCopyTime,NumericalSolverSumTime,NumericalSolverProblemFormingTime,NumericalSolverSolvingTime,NumericalSolverInterpretingSolution
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
20,1,1038,5248,3980,31,225,108,10,1,0,1,4675,2239,2239,0
20,2,724,4132,3628,20,155,76,6,1,0,0,3780,2019,2019,0
20,3,735,5329,4437,20,195,106,6,2,0,0,4876,2588,2588,0
20,4,694,4261,4920,16,150,69,5,1,0,0,3925,2085,2085,0
20,5,888,5959,5086,18,200,93,8,1,0,0,5490,2946,2946,0
20,6,665,6310,4320,17,195,90,6,1,0,0,5870,3167,3167,0
20,7,604,5024,5738,14,165,74,4,1,0,0,4662,2464,2464,0
20,8,589,5733,3917,15,181,81,4,1,0,0,5337,2827,2827,0
20,9,705,4719,4259,14,156,71,4,1,0,2,4367,2390,2390,0
20,10,554,4061,3990,13,141,68,3,1,0,0,3741,1991,1991,0


ERROR: Error: Problem with `summarise()` input `StateCoderTime`.
[31m✖[39m object 'StateCoderTime' not found
[34mℹ[39m Input `StateCoderTime` is `median(StateCoderTime)/1000`.
[34mℹ[39m The error occurred in group 1: n = 20.
