Systematic benchmarking of clustering methods on complete and incomplete multi-modal data

1 Abstract

We present a comprehensive roadmap of recommendations for guiding an optimal clustering analysis of multi-modal datasets, tailored to the specific characteristics of the data. Even if clustering is one of the most common data analysis tasks on multi-modal settings, no clear guidelines exist on how to choose the best algorithm to partition a given dataset into clusters. To fill this gap, we conducted a systematic empirical benchmarking study using 20 multi-modal datasets, both fully observed and with modality-wise missing data. We evaluated 29 algorithms from multiple perspectives (ground-truth agreement, cluster stability, robustness to missing data, cluster structure and computational efficiency) using diverse and robust metrics. Our findings highlighted that IMSR and SNF deliver overall the best performance on complete datasets, while NEMO, PIMVC, and IMSR were the most effective methods when working with incomplete multi-modal data. All results have been made publicly available through an interactive website: https://mmcbench.netlify.app.

2 Setting environment

Code

if (!require('ggplot2')) install.packages('ggplot2', version="3.5.1"); library('ggplot2')
if (!require('dplyr')) install.packages('dplyr', version="1.1.4"); library('dplyr')
if (!require('gghighlight')) install.packages('gghighlight', version="0.4.1"); library('gghighlight')
if (!require('mlr3benchmark')) install.packages('mlr3benchmark', version="0.1.6"); library('mlr3benchmark')
if (!require('pals')) install.packages('pals', version="1.8"); library('pals')
if (!require('ggdist')) install.packages('ggdist', version="3.3.2"); library('ggdist')
if (!require('tidyr')) install.packages('tidyr', version="1.3.1"); library('tidyr')
if (!require('readr')) install.packages('readr', version="2.1.5"); library('readr')
if (!require('cowplot')) install.packages('cowplot', version="1.1.3"); library('cowplot')
if (!require('ggiraphExtra')) install.packages('ggiraphExtra', version="0.3.0"); library('ggiraphExtra')
if (!require('corrplot')) install.packages('corrplot', version="0.92"); library('corrplot')
if (!require('plotly')) install.packages('plotly', version="4.10.4"); library('plotly')
if (!require('htmlwidgets')) install.packages('htmlwidgets', version= "1,6,4"); library('htmlwidgets')
if (!require('reactable')) install.packages('reactable', version= "0.4.4"); library('reactable')
if (!require('svglite')) install.packages('svglite', version= "2.1.3"); library('svglite')
if (!require('PMCMRplus')) install.packages('PMCMRplus', version= "1.9.10"); library('PMCMRplus')
if (!require('igraph')) install.packages('igraph', version= "2.0.3"); library('igraph')
if (!require('ggraph')) install.packages('ggraph', version= "2.2.1"); library('ggraph')
if (!require('xtable')) install.packages('xtable', version= "1.8.4"); library('xtable')
if (!require('caTools')) install.packages('caTools', version= "1.18.3"); library('caTools')
if (!require('Rtsne')) install.packages('Rtsne', version= "0.17"); library('Rtsne')

library('tibble')
library('ggrepel')
library('reshape2')
library('rlang')
library("stringr")
library("grid")
# if (!require('rmarkdown')) install.packages('rmarkdown', dependencies =T); library('rmarkdown')

3 Loading files

Code

paper_results_folder <- "paper_results"
paper_figures_folder <- "paper_figures"

incomplete_algs_metrics_file <- "incomplete_algorithms_metrics.csv"
incomplete_algs_metrics_path <- file.path(paper_results_folder, incomplete_algs_metrics_file)
complete_algs_metrics_file <- "complete_algorithms_metrics.csv"
complete_algs_metrics_path <- file.path(paper_results_folder, complete_algs_metrics_file)
uns_metrics_by_com_alg_file <- "uns_metrics_by_com_alg.csv"
uns_metrics_by_com_alg_path <- file.path(paper_results_folder, uns_metrics_by_com_alg_file)
uns_metrics_by_incom_alg_file <- "uns_metrics_by_inc_alg.csv"
uns_metrics_by_incom_alg_path <- file.path(paper_results_folder, uns_metrics_by_incom_alg_file)
incomplete_algs_rbmetrics_file <- "incomplete_algorithms_rbmetrics.csv"
incomplete_algs_rbmetrics_path <- file.path(paper_results_folder, incomplete_algs_rbmetrics_file)
complete_algs_rbmetrics_file <- "complete_algorithms_rbmetrics.csv"
complete_algs_rbmetrics_path <- file.path(paper_results_folder, complete_algs_rbmetrics_file)
alg_comparisons_file <- "alg_comparisons.csv"
alg_comparisons_path <- file.path(paper_results_folder, alg_comparisons_file)
resources_file <- "time_evaluation.csv"
resources_path <- file.path(paper_results_folder, resources_file)
dataset_table_file <- "dataset_table.csv"
dataset_table_path <- file.path(paper_results_folder, dataset_table_file)


com_results <- read_csv(complete_algs_metrics_path, show_col_types = FALSE)
incom_results <- read_csv(incomplete_algs_metrics_path, show_col_types = FALSE)
com_rbresults <- read_csv(complete_algs_rbmetrics_path, show_col_types = FALSE)
incom_rbresults <- read_csv(incomplete_algs_rbmetrics_path, show_col_types = FALSE)
incomplete_algorithms <- unique(incom_results$Algorithm)
complete_algorithms <- setdiff(unique(com_results$Algorithm),incomplete_algorithms)

uns_metrics_by_com_alg <- read_csv(uns_metrics_by_com_alg_path, show_col_types = FALSE)
uns_metrics_by_incom_alg <- read_csv(uns_metrics_by_incom_alg_path, show_col_types = FALSE)
alg_comparisons <- read_csv(alg_comparisons_path, show_col_types = FALSE)
resources_df <- read_csv(resources_path, show_col_types = FALSE)
dataset_table <- read_csv(dataset_table_path, show_col_types = FALSE)
dataset_table$Cells <- dataset_table$Samples * dataset_table[["Total features"]]
dataset_table <- rbind(
      dataset_table  %>%
          mutate(Dataset = replace(Dataset, Dataset == "Nutrimouse", "nm_gen")) %>%
          mutate(Clusters = replace(Clusters, Dataset == "nm_gen", 2)),
      dataset_table[dataset_table[["Dataset"]] == "Nutrimouse",]  %>%
          mutate(Dataset = replace(Dataset, Dataset == "Nutrimouse", "nm_diet")) %>%
          mutate(Clusters = replace(Clusters, Dataset == "nm_diet", 5)))

algorithm_families <- data.frame(
  "Algorithm" = c("AJIVE", "COCA", "KMeans", "DFMF", "EEIMVC",
                  "GPCA", "IMSR", "jNMF", "LFIMVC", "MOFA",
                  "MRGCN", "MSNE", "MVCRSC", "MVSC", "NEMO",
                  "NMF", "Parea", "SIMCADC", "SNF", "DAIMC",
                  "IMSCAGL", "iNMF", "MKKMIK", "MONET_EO", "MONET", "MONET_IO", "OMVC",
                  "OPIMC", "OSLFIMVC", "PIMVC", "SUMO"),
  "Strategy" = c("Subspace", "Ensemble", "Early-fusion", "Subspace", "Kernel",
               "Subspace", "Subspace", "Subspace", "Kernel", "Subspace",
               "Deep learning", "Graph", "Graph", "Graph", "Graph",
               "Subspace", "Ensemble", "Graph", "Graph", "Subspace",
               "Graph", "Subspace", "Kernel", "Graph", "Graph", "Graph", "Subspace",
               "Subspace", "Kernel", "Subspace", "Subspace")
)

rename_var <- function(df) {
  df <- df %>%
    mutate(Dataset = replace(Dataset, Dataset == "forest", "Forest")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "derma", "Derm")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "buaa", "BUAA")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "statlog", "Statlog")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "prokaryotic", "Prokaryotic")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "wisconsin", "Wisconsin")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "bbcsport", "BBCSport")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "bdgp", "BDGP")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "webkb", "WebKB")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "metabric", "METABRIC")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "tcga", "TCGA")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "nuswide", "NUSWIDE")) %>%
    mutate(Dataset = replace(Dataset, Dataset == "caltech101", "Caltech101"))
  return(df)
}


com_results <- com_results %>%
  left_join(algorithm_families, by = "Algorithm") %>%
  rename_var()
incom_results <- incom_results %>%
  rename_var()
uns_metrics_by_com_alg <- uns_metrics_by_com_alg %>%
  left_join(algorithm_families, by = "Algorithm") %>%
  rename_var()
uns_metrics_by_incom_alg <- uns_metrics_by_incom_alg %>%
  rename_var()
com_rbresults <- com_rbresults %>%
  left_join(algorithm_families, by = "Algorithm") %>%
  rename_var()
incom_rbresults <- incom_rbresults %>%
  rename_var()
resources_df <- resources_df %>%
  mutate(
    Algorithm = factor(algorithm),
    Dataset = dataset,
    Time = time,
    Memory = memory_usage*1e-6,
    Peak = peak*1e-6,
    CPU = cpu_usage) %>%
  rename_var() %>%
  mutate(Dataset = factor(Dataset))

amputation_mechanisms <- c("All", "MEM", "PM", "MCAR", "MNAR")

unique_algorithms <- unique(com_results$Algorithm)
colors <- polychrome(length(unique_algorithms))
algorithm_colors <- setNames(colors, unique_algorithms)
# com_results$color_alg <- algorithm_colors[com_results$Algorithm]

unique_datasets <- unique(com_results$Dataset)
colors <- cols25(length(unique_datasets))
dataset_colors <- setNames(colors, unique_datasets)
# com_results$color_dat <- dataset_colors[com_results$Dataset]

marks <- seq(length(unique_algorithms))
algorithm_marks <- setNames(marks, unique_algorithms)
# com_results$mark_alg <- algorithm_marks[com_results$Algorithm]

metrics_performance <- c("MCC", "AMI", "ARI")
extraw_metrics_performance <- "_mean"
metrics_performance <- paste0(metrics_performance, extraw_metrics_performance)
metrics_stab <- c("AMI", "ARI")
extraw_metrics_stab <- "_Stab"
metrics_stab <- paste0(metrics_stab, extraw_metrics_stab)
metrics_rob <- c("AMI", "ARI")
metrics_rob <- paste0(metrics_rob, extraw_metrics_performance)
baseline <- "KMeans"

4 Results

We aimed to provide a holistic assessment of clustering performance by evaluating the clustering solutions from multiple perspectives. Specific metrics were chosen for each aspect of performance, ensuring they were robust to group imbalance or the number of clusters. We evaluated the following aspects:

Ground-truth agreement. We compared the clusters identified by the algorithms with the ground-truth labels of the original groups in each dataset using the following metrics: Matthews correlation coefficient (MCC), adjusted mutual information (AMI) and adjusted rand index (ARI).
Cluster stability. To measure the clustering solution consistency, we compared the pairwise clustering results across independent runs using AMI and ARI.
Internal cluster indices. We used unsupervised quality indices that assess cluster structure, such as intrinsic compactness, connectedness, and separation of the cluster partitions, using only information inherent to the dataset. Nine indices were computed: silhouette; variance ratio criterion (VRC); density-based clustering validation index (DBCVI); Duda hart index (DHI); Dunn index; sum of squared error index (SSEI); R-squared index (RSI); Davies-Bouldin index (DB); and the ball hall index (BHI).
Cluster robustness. For incomplete data, we evaluated how closely the clusters matched to those obtained in the reference scenario with complete data with AMI and ARI.
Computational efficiency. We measured computational efficiency in terms of elapsed time and resource consumption (CPU and memory usage).

Conclusions from the results. Recommendation flowchart for choosing the best clustering algorithms based on multi-modal data characteristics and available computing resources. Limited resources refer to scenarios where certain methods are impractical due to time or hardware constraints. Symbols next to each method indicate the properties the algorithm is expected to satisfy.

4.1 Evaluation on complete data

4.1.1 Comparison with ground-truth labels

Code

results_complete <- com_results[com_results["Amputation_Mechanism"] == "Resampling",]

4.1.1.1 Aggregated

Code

metric <- metrics_performance[1]
results_summary <- results_complete %>%
    select(Dataset, Algorithm, all_of(metric)) %>%
    group_by(Dataset) %>%
    mutate(rank = rank(-!!as.name(metric))) %>%
    group_by(Algorithm) %>%
    summarise(
        "# in Top 3 Models" = sum(rank <= 3),
        "# Best" = sum(rank == 1),
        "Average Rank" = mean(rank),
        "Sd Rank" = sd(rank),
    ) %>%
    arrange(-`# in Top 3 Models`, -`# Best`, `Average Rank`) %>%
    mutate_if(is.numeric, round, digits= 1) %>%
    left_join(results_complete %>% count(Algorithm, name = "# Datasets"), by = "Algorithm")
sorted_algs <- results_summary$Algorithm
reactable::reactable(results_summary, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(results_summary[1:10,] %>% mutate("Average Rank" = paste0(round(`Average Rank`, 1), "\u00B1", round(`Sd Rank`, 1))) %>% select(-c(`Sd Rank`))), include.rownames=FALSE)

External validation summary. Summary of performance, showing the number of times each method was within the top-3 methods, the number of times they achieved the best score, the average rank (mean\(\pm\)standard deviation), and the number of datasets evaluated.

Code

ba <- expand.grid(Dataset = unique(results_complete$Dataset),
                  Algorithm = unique(results_complete$Algorithm,
                  Amputation_Mechanism = "Resampling")) %>%
  merge(results_complete, all = TRUE) %>%
  select(Dataset, Algorithm, all_of(metrics_performance)) %>%
  group_by(Dataset) %>%
  mutate(across(where(is.numeric), ~ if_else(is.na(.), .[Algorithm == baseline], .))) %>%
  mutate(across(where(is.numeric), ~ if_else(is.na(.), 0, .))) %>%
  ungroup() %>%
  droplevels() %>%
  group_by(Algorithm) %>%
  mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
  rename(task_id = Dataset, learner_id = Algorithm) %>%
  as_benchmark_aggr()
global_significance <- ba$friedman_test(p.adjust.method = "fdr")$p.value

y_height <- -0.5
minimize <- FALSE

for (metric in metrics_performance) {
  cd <- ba$.__enclos_env__$private$.crit_differences(meas = metric, minimize = minimize, baseline= baseline, test = "bd", friedman_global = F)
  cdx <- as.numeric(unlist(subset(cd$data, baseline == 1, "mean_rank")))
  figure <- autoplot(ba, type = "cd", meas = metric, minimize = minimize, style= 2, friedman_global = F) +
  # ggtitle(paste("Metric:", metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index")) +
  # theme(plot.title = element_text(size=15, hjust = 0.5, vjust=-1)) +
    scale_color_manual(values = algorithm_colors) +
    annotate("segment", x = c(cdx - cd$cd, cdx + cd$cd), xend = cdx, y = y_height, yend = y_height, alpha = 0.9, color = "black", linewidth = 1.3) +
    annotate("segment", x = c(cdx - cd$cd, cdx + cd$cd), xend = c(cdx - cd$cd, cdx + cd$cd), y = y_height+0.25, yend = y_height-0.25, alpha = 0.9, color = "black", linewidth = 1.3) +
    annotate("text", label = paste("Critical Difference (baseline) =", round(cd$cd, 2), sep = " "),
             y = y_height - 0.5, x = cdx) +
    theme(text = element_text(family = "Times"))
  figure$layers[[5]]$aes_params$label <- paste0(figure$layers[[5]]$aes_params$label, "; ", "Friedman test,p=", format(global_significance[[metric]], digits = 2))
  figure$layers[[5]]$mapping$y <- figure$layers[[6]]$data$y + 0.4
  figure$layers[[5]]$mapping$x <- mean(figure$data$mean_rank)
  figure$layers[[4]]$aes_params$size <- 3
  figure$data$xend <- pmin(pmax(figure$data$xend, 2), dim(figure$data)[1]-1)
  figure$layers[[4]]$aes_params$colour <- "black"
  figure$layers[[4]]$aes_params$hjust <- ifelse(figure$data$right, 1, 0)

  filename <- paste0("rank_", gsub(extraw_metrics_performance, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure,  width = 20, height = 13.5, units = "cm", dpi = 300, bg = 'white')
  if ((metric == metrics_performance[1]) | (metric == metrics_performance[2])) {
    figure <- figure +
      theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 18))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    extrafont::loadfonts()
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure)
  }
}

Metric:

Critical difference plot. Critical difference plot showing the average rank of the methods. Missing results were estimated with the baseline (K-means), what may have introduced artifacts, artificially making the performance appear closer to the baseline than they actually are. Friedman global test indicates that there is a significant difference in the rankings of the methods over all the datasets.

Code

for (metric in metrics_performance) {
  figure <- results_complete %>%
    select(Dataset, Algorithm, all_of(metric), Strategy) %>%
    group_by(Dataset) %>%
    mutate("{metric}" := !!as.name(metric) / max(!!as.name(metric))) %>%
    ungroup() %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup() %>%
    mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm))) %>%
    ggplot(aes(x=Algorithm, y=get(metric), fill=Strategy)) +
    geom_boxplot(outliers = FALSE, varwidth = TRUE) +
    ylim(0, 1) +
    xlab("") +  ylab(paste("Normalized", ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "adjusted mutual information", "adjusted rand index")))) +
    theme_cowplot() +
    scale_x_discrete(guide = guide_axis(angle = 60)) +
    scale_fill_manual(values = c(
        "Deep learning" = scales::hue_pal()(6)[1],
        "Early-fusion" = scales::hue_pal()(6)[2],
        "Ensemble" = scales::hue_pal()(6)[3],
        "Graph" = scales::hue_pal()(6)[4],
        "Kernel" = scales::hue_pal()(6)[5],
        "Subspace" = scales::hue_pal()(6)[6]
    )) +
    geom_point(aes(x=Algorithm, y=.data[[paste0("mean_", metric)]]), size = 2, shape=2, color=  "green", show.legend = FALSE) +
    theme(legend.position = c(0.01, 0.14), legend.direction = "horizontal", legend.box.background = element_rect(color = "black"), legend.box.margin = margin(t = 3, r = 3, l=2), text = element_text(family = "Times")) +
    guides(fill = guide_legend(nrow = 3, title.position="top"))

  filename <- paste0("boxplots_", gsub(extraw_metrics_performance, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, width = 10, height = 8, bg = 'white')
  if (metric == metrics_performance[1]) {
    figure$data <- figure$data[figure$data$Algorithm %in% unique(figure$data$Algorithm)[1:10],]
    figure <- figure +
      theme(legend.position = c(0.03, 0.19), legend.text=element_text(size=12), axis.text.x = element_text(size = 15), axis.title.y = element_text(size = 15))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure)
  }
}

Metric:

Soft-ranking: Normalized metrics across datasets. Boxplots show the 25th, 50th, 75th, and 100th percentiles, with the width indicating the number of datasets on which each algorithm was computed. The green triangle marks the mean score for each method. For the normalization step, the score was divided by the result of the best-performing algorithm for each dataset, ensuring the best solution always received a normalized score of 1. Although iNMF emerged as the best-performing algorithms when using a soft-ranking, this result may be influenced by the fact that the method was only evaluated on the smallest (and easiest) datasets.

4.1.1.2 Individual

Code

for (metric in metrics_performance) {
  figure <- results_complete %>%
    select(Dataset, Algorithm, metric) %>%
    group_by(Dataset) %>%
    mutate(Best = (get(metric) == max(get(metric), na.rm = TRUE))) %>%
    ungroup() %>%
    mutate(
        Algorithm = factor(Algorithm, levels = sorted_algs),
        Dataset = factor(Dataset, levels = results_complete %>% 
                             group_by(Dataset) %>% summarize(n = n()) %>% 
                             ungroup() %>% arrange(n) %>% pull(Dataset)),
        PointType = ifelse(Best, "Best", "Other")
    ) %>%
    ggplot(aes(x = Algorithm, y = Dataset, size = get(metric), color = PointType)) +
    geom_point(alpha = 0.75) +
    scale_color_manual(values = c("Other" = "lightblue", "Best" = "purple"), name = "") +
    xlab("") + ylab("") +
    scale_size(name = gsub(extraw_metrics_performance, "", metric), range = c(1.5, 10), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
    theme_cowplot(font_family = "Times") +
    scale_x_discrete(guide = guide_axis(angle = 60))

  filename <- paste0("bubble_", gsub(extraw_metrics_performance, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, width = 10, height = 6, bg = 'white')
  if (metric == metrics_performance[1]) {
    figure <- figure +
      theme(axis.text.x = element_text(size = 10), axis.text.y = element_text(size = 10))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, width = 10, height = 6, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure, width = 12, height = 6)
  }
}

Metric:

Individual performance of the algorithms. Bubble plot where the size of each bubble represents the average performance of the algorithm on each dataset.

Code

for (metric in metrics_performance) {
  normalized_results_complete <- results_complete %>%
    select(Dataset, Algorithm, all_of(metric)) %>%
    group_by(Dataset) %>%
    mutate("{metric}" := !!as.name(metric) / max(!!as.name(metric))) %>%
    ungroup() %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup()

  alg_coms <- combs(sorted_algs, 2) %>%
    as.data.frame() %>%
    mutate(
        Alg1 = V1,
        Alg2 = V2,
        p_value = NA
    ) %>%
    ungroup() %>%
    select(Alg1, Alg2, p_value)

  for (pw_algs in 1:nrow(alg_coms)){
    alg1 <- alg_coms[pw_algs,"Alg1"]
    alg2 <- alg_coms[pw_algs,"Alg2"]
    filtered_data <- normalized_results_complete %>%
      filter(Algorithm %in% c(alg1, alg2)) %>%
      group_by(Dataset) %>%
      filter(n_distinct(Algorithm) == 2) %>%
      ungroup()
    if (nrow(filtered_data) > 0) {
      kruskal_result <- kruskal.test(as.formula(paste(metric, "~ Algorithm")), data = filtered_data)
      alg_coms[pw_algs,"p_value"] <- kruskal_result$p.value
    }
  }
  figure <- alg_coms %>%
    filter(!is.na(p_value)) %>%
    mutate(
        Alg1 = factor(Alg1, levels = sorted_algs[sorted_algs %in% Alg1]),
        Alg2 = factor(Alg2, levels = rev(sorted_algs[sorted_algs %in% Alg2])),
        p_value = round(p_value, digits = 2)
    ) %>%
    arrange(Alg1, Alg2) %>%
    ggplot(aes(x = Alg1, y = Alg2, fill = p_value)) +
    geom_tile() +
    geom_text(aes(label = ifelse(p_value < 0.05, ifelse(p_value <= 0.01, ifelse(p_value <= 0.001, "***", "**"), "*"), "")), color = "black", size=6) +
    scale_fill_gradientn(
        colors = c("red", "orange", "yellow", "white", "white"),
        values = scales::rescale(c(0, 0.05, 0.1, 0.15, 1)),
        limits = c(0, 1),
        name = "p-value",
        na.value = "grey",
        guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
    ) +
    xlab("") + ylab("") +
    theme_cowplot() +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust= 0.5), legend.position = c(0.8,0.8), text = element_text(family = "Times")) +
    coord_cartesian(clip = "off")

  filename <- paste0("diff_", gsub(extraw_metrics_performance, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, width = 12, height = 9, bg = 'white')
  if (metric == metrics_performance[1]) {
    figure <- figure + theme(axis.text.x=element_text(angle=90, hjust=1, vjust= 0.5, size=13), axis.text.y=element_text(size=13), legend.position = c(0.1,0.9), plot.margin=grid::unit(c(0,0,-5,-5), "mm"))
    # figure$layers[[2]]$aes_params$size <- 4.3
    ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, width = 12, height = 9, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 12, height = 9)
  }
}

Metric:

Statistical differences in scoring. Statistical differences of the metrics across methods using pairwise comparison with Kruskal-Wallis test.

Code

interesting_algs <- c("IMSR", "SNF", "NEMO", baseline)
for (metric in metrics_performance) {
  figure <- results_complete %>%
    select(Dataset, Algorithm, metric) %>%
    filter(Algorithm %in% interesting_algs) %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup() %>%
    mutate(Algorithm = factor(Algorithm, levels = interesting_algs)) %>%
    ggplot(aes(x=Algorithm, y=get(metric), color=Dataset)) +
    geom_point(aes(), size=4) +
    geom_line(aes(group=Dataset), linewidth = 1.4) +
    ylim(0, 1) +
    xlab("") +  ylab(ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index"))) +
    theme_cowplot() +
    scale_color_manual(values = dataset_colors) +
    theme(legend.position = "None", text = element_text(family = "Times"))

  filename <- paste0("top_", gsub(extraw_metrics_performance, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, bg = 'white')
  if (metric == metrics_performance[1]) {
    figure <- figure +
      theme(axis.text.x = element_text(size = 18), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18), plot.margin=grid::unit(c(0,-3,-5,0), "mm"))
    # figure <- figure  + theme(legend.position = "right") + guides(color= guide_legend(ncol=3))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure, height = 5)
    # ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure)
  }
}

Metric:

Top methods comparison. Score for the top-algorithms and the baseline method (K-means).

Code

table_show <- results_complete %>%
  group_by(Dataset) %>%
  mutate(across(all_of(metrics_performance), ~ rank(-.), .names ="{.col}_rank")) %>%
  ungroup() %>%
  rename_with(~ str_remove(., extraw_metrics_performance), ends_with("_rank")) %>%
  {
    metric_cols <- grep(paste0("^(", paste(gsub(extraw_metrics_performance, "", metrics_performance), collapse = "|"), ")_(mean|std|rank)$"),
                        colnames(.), value = TRUE)
    mean_cols <- grep("_mean$", metric_cols, value = TRUE)
    std_cols <- grep("_std$", metric_cols, value = TRUE)
    rank_cols <- grep("_rank$", metric_cols, value = TRUE)
    ordered_cols <- c(rbind(mean_cols, std_cols, rank_cols))
    ordered_cols <- ordered_cols[!is.na(ordered_cols)]
    select(., Dataset, Algorithm, all_of(ordered_cols))
  } %>%
  arrange(Dataset, Algorithm) %>%
  mutate(across(where(is.numeric), round, digits = 2))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results. Table with all results, showing mean, standard deviation and rank for each metric.

4.1.2 Internal cluster indices

Code

filtered_table <- expand.grid(Dataset = unique(uns_metrics_by_com_alg$Dataset),
                              Algorithm = unique(uns_metrics_by_com_alg$Algorithm),
                              Amputation_Mechanism = "Resampling") %>%
  merge(uns_metrics_by_com_alg, all = TRUE) %>%
  filter(Amputation_Mechanism == "Resampling") %>%
  group_by(Dataset) %>%
  mutate(
    Silh = rank(Silhouette),
    VRC = rank(VRC),
    DB = rank(-DB),
    DBCV = rank(-DBCV),
    Dunn = rank(Dunn),
    DHI = rank(-DHI),
    SSEI = rank(-SSEI),
    RSI = rank(RSI),
    BHI = rank(-BHI)) %>%
  ungroup()  %>%
  group_by(Algorithm) %>%
  summarize_if(is.numeric, mean) %>%
  mutate(mean_rank = rowMeans(select(., where(is.numeric)), na.rm = TRUE)) %>%
  mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm[order(mean_rank, decreasing = TRUE)]))) %>%
  relocate(DB, .after=RSI) %>%
  relocate(DHI, .after=DBCV) %>%
  select(-c(AMI_Stab, ARI_Stab, mean_rank, Missing_Percentage, Silhouette))
sorted_algs <- levels(filtered_table$Algorithm)

figure <- filtered_table %>%
  ggRadar(aes(facet=Algorithm), colour = Algorithm, rescale = FALSE, interactive = FALSE, size = 1, plot.legend = FALSE, alpha = 0.5) +
  theme_bw() +
  theme(legend.position="none", axis.ticks.y=element_blank(), text = element_text(family = "Times")) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = algorithm_colors) +
  scale_color_manual(values = algorithm_colors)

filename <- "cleval"
ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, width = 8, height = 8, bg = 'white')
figure$data <- figure$data[figure$data$Algorithm %in% pull(results_summary[results_summary[,"# Datasets"] >= 15,], "Algorithm"),]
figure$data <- figure$data[figure$data$Algorithm %in% unique(figure$data$Algorithm)[1:6],]
figure <- figure +
  theme(axis.text.x = element_text(size = 7), strip.text = element_text(size = 7), panel.margin = unit(0.1, "lines"), plot.margin=grid::unit(c(0,0,-6,-5), "mm"), text = element_text(family = "Times")) +
  scale_fill_manual(values = rep("#BCCACF", length(unique(figure$data$Algorithm)))) +
  scale_color_manual(values = rep("#9EC9D9", length(unique(figure$data$Algorithm))))


ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, width = 2.5, height = 2., bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 2.5, height = 2.)

Cluster structure quality. Radar chart illustrating various unsupervised internal indices. Each metric was computed based on the algorithm’s rank across datasets, with the mean rank used for comparison. Algorithms are sorted by their mean rank.

Code

table_show <- filtered_table %>%
   arrange(Algorithm) %>%
   mutate(across(where(is.numeric), \(x) round(length(x)-x, digits = 1))) %>%
   left_join(results_complete %>% count(Algorithm, name = "# Datasets"), by = "Algorithm") %>%
   arrange(Algorithm)

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Internal metrics rank. Rank of the algorithms by dataset for each internal index.

Code

table_show <- uns_metrics_by_com_alg %>%
  filter(Amputation_Mechanism == "Resampling") %>%
  select(-c(Missing_Percentage, Amputation_Mechanism, Imputation, Strategy, all_of(metrics_stab))) %>%
  arrange(Dataset, Algorithm) %>%
  mutate(across(where(is.numeric), \(x) round(x, digits = 2)))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results. Table with all results, showing mean for each metric.

4.1.3 Stability

Code

stability_results <- uns_metrics_by_com_alg[uns_metrics_by_com_alg["Amputation_Mechanism"] == "Resampling",]

4.1.3.1 Aggregated

Code

metric <- metrics_stab[1]
results_summary <- stability_results %>%
    select(Dataset, Algorithm, all_of(metric)) %>%
    group_by(Dataset) %>%
    mutate(rank = rank(-!!as.name(metric))) %>%
    group_by(Algorithm) %>%
    summarise(
        "# in Top 3 Models" = sum(rank <= 3),
        "# Best" = sum(rank == 1),
        "Average Rank" = mean(rank),
        "Sd Rank" = sd(rank),
    ) %>%
    arrange(-`# in Top 3 Models`, -`# Best`, `Average Rank`) %>%
    mutate_if(is.numeric, round, digits= 1) %>%
    left_join(results_complete %>% count(Algorithm, name = "# Datasets"), by = "Algorithm")
sorted_algs <- results_summary$Algorithm
reactable::reactable(results_summary, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(results_summary[1:10,] %>% mutate("Average Rank" = paste0(round(`Average Rank`, 1), "\u00B1", round(`Sd Rank`, 1))) %>% select(-c(`Sd Rank`))), include.rownames=FALSE)

Cluster stability summary. Summary of performance, showing the number of times each method was within the top-3 methods, the number of times they achieved the best score, the average rank (mean\(\pm\)standard deviation), and the number of datasets evaluated.

Code

ba <- expand.grid(Dataset = unique(stability_results$Dataset),
                  Algorithm = unique(stability_results$Algorithm),
                  Amputation_Mechanism = "Resampling") %>%
  merge(stability_results, all = TRUE) %>%
  select(Dataset, Algorithm, all_of(metrics_stab)) %>%
  group_by(Dataset) %>%
  mutate(across(where(is.numeric), ~ if_else(is.na(.), .[Algorithm == baseline], .))) %>%
  mutate(across(where(is.numeric), ~ if_else(is.na(.), 0, .))) %>%
  ungroup() %>%
  droplevels() %>%
  group_by(Algorithm) %>%
  mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
  rename(task_id = Dataset, learner_id = Algorithm) %>%
  as_benchmark_aggr()
global_significance <- ba$friedman_test(p.adjust.method = "fdr")$p.value

y_height <- -0.5
minimize <- FALSE

for (metric in metrics_stab) {
  cd <- ba$.__enclos_env__$private$.crit_differences(meas = metric, minimize = minimize, baseline= baseline, test = "bd", friedman_global = F)
  cdx <- as.numeric(unlist(subset(cd$data, baseline == 1, "mean_rank")))
  figure <- autoplot(ba, type = "cd", meas = metric, minimize = minimize, style= 2, friedman_global = F) +
    scale_color_manual(values = algorithm_colors) +
    annotate("segment", x = c(cdx - cd$cd, cdx + cd$cd), xend = cdx, y = y_height, yend = y_height, alpha = 0.9, color = "black", linewidth = 1.3) +
    annotate("segment", x = c(cdx - cd$cd, cdx + cd$cd), xend = c(cdx - cd$cd, cdx + cd$cd), y = y_height+0.25, yend = y_height-0.25, alpha = 0.9, color = "black", linewidth = 1.3) +
    annotate("text", label = paste("Critical Difference (baseline) =", round(cd$cd, 2), sep = " "),
             y = y_height - 0.5, x = cdx) +
    theme(text = element_text(family = "Times"))
  figure$layers[[5]]$aes_params$label <- paste0(figure$layers[[5]]$aes_params$label, "; ", "Friedman test,p=", format(global_significance[[metric]], digits = 2))
  figure$layers[[5]]$mapping$y <- figure$layers[[6]]$data$y + 0.4
  figure$layers[[5]]$mapping$x <- mean(figure$data$mean_rank)
  figure$layers[[4]]$aes_params$size <- 3
  figure$data$xend <- pmin(pmax(figure$data$xend, 2), dim(figure$data)[1]-1)
  figure$layers[[4]]$aes_params$colour <- "black"
  figure$layers[[4]]$aes_params$hjust <- ifelse(figure$data$right, 1, 0)

  filename <- paste0("rank_stab_", gsub(extraw_metrics_stab, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure,  width = 20, height = 13.5, units = "cm", dpi = 300, bg = 'white')
  if (metric == metrics_stab[1]) {
    figure <- figure +
      theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 18))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure, bg = 'white')
  }
}

Metric:

Code

for (metric in metrics_stab) {
  figure <- stability_results %>%
    select(Dataset, Algorithm, metric, Strategy) %>%
    filter(!(is.na(get(metric)))) %>%
    group_by(Dataset) %>%
    mutate("{metric}" := !!as.name(metric) / max(!!as.name(metric))) %>%
    ungroup() %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup() %>%
    mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm))) %>%
    ggplot(aes(x=Algorithm, y=get(metric), fill=Strategy)) +
    geom_boxplot(outliers = FALSE, varwidth = TRUE) +
    ylim(0, 1) +
    xlab("") +  ylab(paste("Normalized", ifelse(metric == "AMI_Stab", "adjusted mutual information", "adjusted rand index"))) +
    theme_cowplot() +
    scale_x_discrete(guide = guide_axis(angle = 60)) +
    scale_fill_manual(values = c(
        "Deep learning" = scales::hue_pal()(6)[1],
        "Early-fusion" = scales::hue_pal()(6)[2],
        "Ensemble" = scales::hue_pal()(6)[3],
        "Graph" = scales::hue_pal()(6)[4],
        "Kernel" = scales::hue_pal()(6)[5],
        "Subspace" = scales::hue_pal()(6)[6]
    )) +
    geom_point(aes(x=Algorithm, y=.data[[paste0("mean_", metric)]]), size = 2, shape=2, color=  "green", show.legend = FALSE) +
    theme(legend.position = c(0.01, 0.14), legend.direction = "horizontal", legend.box.background = element_rect(color = "black"), legend.box.margin = margin(t = 3, r = 3, l=2), text = element_text(family = "Times")) +
    guides(fill = guide_legend(nrow = 3, title.position="top"))

  filename <- paste0("boxplots_stab_", gsub(extraw_metrics_stab, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, width = 10, height = 8, bg = 'white')
  figure <- figure +
    theme(legend.position = c(0.03, 0.16), legend.text=element_text(size=12), axis.text.x = element_text(size = 15), axis.title.y = element_text(size = 15))
  if (metric == metrics_stab[1]) {
    figure$data <- figure$data[figure$data$Algorithm %in% unique(figure$data$Algorithm)[1:10],]
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure)
  }
}

Metric:

Soft-ranking: Normalized metrics across datasets. Boxplots show the 25th, 50th, 75th, and 100th percentiles, with the width indicating the number of datasets on which each algorithm was computed. The green triangle marks the mean score for each method. For the normalization step, the score was divided by the result of the best-performing algorithm for each dataset, ensuring the best solution always received a normalized score of 1. Although MONET emerged as the best-performing algorithms when using a soft-ranking, this result may be influenced by the fact that the method was only evaluated on the smallest (and easiest) datasets.

4.1.3.2 Individual

Code

for (metric in metrics_stab) {
  figure <- stability_results %>%
    select(Dataset, Algorithm, metric) %>%
    group_by(Dataset) %>%
    mutate(Best = (get(metric) == max(get(metric), na.rm = TRUE))) %>%
    ungroup() %>%
    mutate(
        Algorithm = factor(Algorithm, levels = sorted_algs),
        Dataset = factor(Dataset, levels = results_complete %>% 
                             group_by(Dataset) %>% summarize(n = n()) %>% 
                             ungroup() %>% arrange(n) %>% pull(Dataset)),
        PointType = ifelse(Best & !is.na(Best), "Best", "Other")
    ) %>%
    ggplot(aes(x = Algorithm, y = Dataset, size = get(metric), color = PointType)) +
    geom_point(alpha = 0.75) +
    scale_color_manual(values = c("Other" = "lightblue", "Best" = "purple"), name = "") +
    xlab("") + ylab("") +
    scale_size(name = gsub(extraw_metrics_stab, "", metric), range = c(1.5, 10), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
    theme_cowplot(font_family = "Times") +
    scale_x_discrete(guide = guide_axis(angle = 60))

  filename <- paste0("bubble_stab_", gsub(extraw_metrics_stab, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, width = 10, height = 6, bg = 'white')
  figure <- figure +
    theme(axis.text.x = element_text(size = 18), axis.text.y = element_text(size = 18))
  if (metric == metrics_stab[1]) {
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, width = 10, height = 6, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure, width = 12, height = 6)
  }
}

Metric:

Individual performance of the algorithms. Bubble plot where the size of each bubble represents the average performance of the algorithm on each dataset.

Code

for (metric in metrics_stab) {
  normalized_results_complete <- stability_results %>%
    select(Dataset, Algorithm, all_of(metric)) %>%
    group_by(Dataset) %>%
    mutate("{metric}" := !!as.name(metric) / max(!!as.name(metric))) %>%
    ungroup() %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup()

  alg_coms <- combs(sorted_algs, 2) %>%
    as.data.frame() %>%
    mutate(
        Alg1 = V1,
        Alg2 = V2,
        p_value = NA
    ) %>%
    ungroup() %>%
    select(Alg1, Alg2, p_value)

  for (pw_algs in 1:nrow(alg_coms)){
    alg1 <- alg_coms[pw_algs,"Alg1"]
    alg2 <- alg_coms[pw_algs,"Alg2"]
    filtered_data <- normalized_results_complete %>%
      filter(Algorithm %in% c(alg1, alg2)) %>%
      group_by(Dataset) %>%
      filter(n_distinct(Algorithm) == 2) %>%
      ungroup()
    if (nrow(filtered_data) > 0) {
      kruskal_result <- kruskal.test(as.formula(paste(metric, "~ Algorithm")), data = filtered_data)
      alg_coms[pw_algs,"p_value"] <- kruskal_result$p.value
    }
  }
  figure <- alg_coms %>%
    filter(!is.na(p_value)) %>%
    mutate(
        Alg1 = factor(Alg1, levels = sorted_algs[sorted_algs %in% Alg1]),
        Alg2 = factor(Alg2, levels = rev(sorted_algs[sorted_algs %in% Alg2])),
        p_value = round(p_value, digits = 2)
    ) %>%
    arrange(Alg1, Alg2) %>%
    ggplot(aes(x = Alg1, y = Alg2, fill = p_value)) +
    geom_tile() +
    geom_text(aes(label = ifelse(p_value < 0.05, ifelse(p_value <= 0.01, ifelse(p_value <= 0.001, "***", "**"), "*"), "")), color = "black", size=6) +
    scale_fill_gradientn(
        colors = c("red", "orange", "yellow", "white", "white"),
        values = scales::rescale(c(0, 0.05, 0.1, 0.15, 1)),
        limits = c(0, 1),
        name = "p-value",
        na.value = "grey",
        guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
    ) +
    xlab("") + ylab("") +
    theme_cowplot() +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust= 0.5), legend.position = c(0.8,0.8), text = element_text(family = "Times")) +
    coord_cartesian(clip = "off")

  filename <- paste0("diff_stab_", gsub(extraw_metrics_stab, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, width = 12, height = 9, bg = 'white')
  if (metric == metrics_stab[1]) {
    figure <- figure + theme(axis.text.x=element_text(angle=90, hjust=1, vjust= 0.5, size=13), axis.text.y=element_text(size=13), legend.position = c(0.8,0.8), plot.margin=grid::unit(c(0,0,-5,-5), "mm"))
    # figure$layers[[2]]$aes_params$size <- 4.3
    ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, width = 12, height = 9, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 12, height = 9)
  }
}

Metric:

Statistical differences in scoring. Statistical differences of the metrics across methods using pairwise comparison with Kruskal-Wallis test.

Code

interesting_algs <- c("IMSR", "SNF", "NEMO", "SIMCADC", baseline)
for (metric in metrics_stab) {
  figure <- stability_results %>%
    select(Dataset, Algorithm, metric) %>%
    filter(Algorithm %in% interesting_algs) %>%
    group_by(Algorithm) %>%
    mutate(across(where(is.numeric), ~ mean(.), .names = "mean_{col}")) %>%
    arrange(desc(.data[[paste0("mean_", metric)]])) %>%
    ungroup() %>%
    mutate(Algorithm = factor(Algorithm, levels = interesting_algs)) %>%
    ggplot(aes(x=Algorithm, y=get(metric), color=Dataset)) +
    geom_point(aes(), size=4) +
    geom_line(aes(group=Dataset), linewidth = 1.4) +
    ylim(0, 1) +
    xlab("") +  ylab(ifelse(metric == "AMI_Stab", "Adjusted mutual information", "Adjusted rand index")) +
    theme_cowplot() +
    scale_color_manual(values = dataset_colors) +
    theme(legend.position = "None", text = element_text(family = "Times"))

  filename <- paste0("top_stab_", gsub(extraw_metrics_stab, "", metric))
  ggsave(file.path(paper_figures_folder, paste0(filename,".png")), figure, bg = 'white')
  if (metric == metrics_stab[1]) {
    figure <- figure + theme(axis.text.x = element_text(size = 18), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18), plot.margin=grid::unit(c(0,-3,-5,0), "mm"))
    ggsave(file.path(paper_figures_folder, paste0(filename,".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0(filename,".pdf")), figure)
  }
}

Metric:

Top methods comparison. Score for the top-algorithms and the baseline method (K-means).

Code

table_show <- stability_results %>%
  group_by(Dataset) %>%
  mutate(across(all_of(metrics_stab), ~ rank(-.), .names ="{.col}_rank")) %>%
  ungroup() %>%
  rename_with(~ str_remove(., extraw_metrics_stab)) %>%
  {
    ordered_cols <- c(c(gsub(extraw_metrics_stab, "", metrics_stab), paste0(gsub(extraw_metrics_stab, "", metrics_stab), "_rank")))
    ordered_cols <- sort(ordered_cols)
    select(., Dataset, Algorithm, all_of(ordered_cols))
  } %>%
  arrange(Dataset, Algorithm) %>%
  mutate(across(where(is.numeric), round, digits = 2))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results. Table with all results, showing mean, standard deviation and rank for each metric.

4.1.4 Computational efficiency

4.1.4.1 Agregatted

Code

time_matrix <- resources_df %>%
  select(Dataset, Algorithm, Time)

for (axis in c("All", "Samples", "Features")) {
  for (imputing in c(TRUE, FALSE)) {
    titlesuffix  <- ifelse(axis=="All", "Cell", ifelse(axis=="Samples", "Sample", "Feature"))
    figure <- time_matrix %>%
      group_by(Dataset) %>%
      mutate(Time = ifelse(imputing & is.na(Time), max(Time, na.rm = TRUE), Time)) %>%
      ungroup() %>%
      left_join(select(dataset_table, Dataset, Samples, Features = "Total features", All= Cells), by = "Dataset") %>%
      mutate(Time = Time / !!as.name(axis)) %>%
      group_by(Algorithm) %>%
      summarise(
        avg_time = mean(Time, na.rm = TRUE),
        ) %>%
      arrange(avg_time) %>%
      mutate(
        Algorithm = factor(Algorithm, levels = unique(Algorithm)),
        RelativeSpeed = avg_time / min(avg_time)
        ) %>%
      left_join(algorithm_families, by = "Algorithm") %>%
      ggplot(aes(x = reorder(Algorithm, avg_time), y = avg_time, fill=Strategy)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = ifelse(RelativeSpeed >= 1000,
                                   paste0(round(RelativeSpeed / 1000), "K"),
                                   ifelse(RelativeSpeed >= 10,
                                      round(RelativeSpeed),
                                      round(RelativeSpeed, 1)))), vjust = -0.3, size= 3) +
      ylab(paste("Average time (s) per", tolower(titlesuffix))) + xlab("") +
      theme_cowplot() +
      theme(axis.text.x=element_text(angle=60, hjust=1), plot.margin=grid::unit(c(1,5,-6,0), "mm"), legend.position= c(0.05, 0.7), text = element_text(family = "Times")) +
      coord_cartesian(clip = "off")

    filename <- "time_barplot"
    filename <- ifelse(imputing, paste0(filename, "_imputed"), filename)
    filename <- paste0(filename, "_", tolower(axis))
    ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, bg = 'white')
    if (!(imputing) & (axis == "Samples")) {
      figure <- figure + theme(axis.text.x=element_text(angle=60, hjust=1, size=22), axis.text.y=element_text(size=22), axis.title.y=element_text(size=22), plot.margin=grid::unit(c(3,0,-7,0.5), "mm"), legend.text=element_text(size=22), legend.title=element_text(size=22))
      figure$layers[[2]]$aes_params$size <- 7
      ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 13)
    }
    if (!(imputing) & (axis == "All")) {
      avg_time <- figure$data[c("Algorithm", "avg_time")]
    }
  }
}

Include estimated times: By:

Average computing time. Average computing time (in seconds) across datasets and divided by the number of elements (samples, features, cells) in each dataset, with relative speed with respect to the fastest algorithm on top.

Code

cpu_matrix <- resources_df %>%
  select(Dataset, Algorithm, CPU)

for (axis in c("All", "Samples", "Features")) {
  for (imputing in c(TRUE, FALSE)) {
    titlesuffix  <- ifelse(axis=="All", "Cell", ifelse(axis=="Samples", "Sample", "Feature"))
    figure <- cpu_matrix %>%
      group_by(Dataset) %>%
      mutate(CPU = ifelse(imputing & is.na(CPU), max(CPU, na.rm = TRUE), CPU)) %>%
      ungroup() %>%
      left_join(select(dataset_table, Dataset, Samples, Features = "Total features", All= Cells), by = "Dataset") %>%
      mutate(CPU = CPU / !!as.name(axis)) %>%
      group_by(Algorithm) %>%
      summarise(
        avg_cpu = mean(CPU, na.rm = TRUE),
        ) %>%
      arrange(avg_cpu) %>%
      mutate(
        Algorithm = factor(Algorithm, levels = unique(Algorithm)),
        RelativeSpeed = avg_cpu / min(avg_cpu)
        ) %>%
      left_join(algorithm_families, by = "Algorithm") %>%
      ggplot(aes(x = reorder(Algorithm, avg_cpu), y = avg_cpu, fill=Strategy)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = ifelse(RelativeSpeed >= 1000,
                                   paste0(round(RelativeSpeed / 1000, 1), "K"),
                                   ifelse(RelativeSpeed >= 10,
                                      round(RelativeSpeed),
                                      round(RelativeSpeed, 1)))), vjust = -0.3, size= 3) +
      ylab(paste("(%) CPU usage per", tolower(titlesuffix))) + xlab("") +
      theme_cowplot() +
      theme(axis.text.x=element_text(angle=60, hjust=1), plot.margin=grid::unit(c(1,5,-6,0), "mm"), legend.position= c(0.2, 0.7), text = element_text(family = "Times")) +
      coord_cartesian(clip = "off")

    filename <- "cpu_barplot"
    filename <- ifelse(imputing, paste0(filename, "_imputed"), filename)
    filename <- paste0(filename, "_", tolower(axis))
    ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, bg = 'white')
    if (!(imputing) & (axis == "Samples")) {
      figure <- figure + theme(axis.text.x=element_text(angle=60, hjust=1, size=22), axis.text.y=element_text(size=22), axis.title.y=element_text(size=22), plot.margin=grid::unit(c(3,0,-7,0.5), "mm"))
      figure$layers[[2]]$aes_params$size <- 7
      figure$theme$legend.position <- "none"
      ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 13, height = 5.9)
    }
    if (!(imputing) & (axis == "All")) {
      avg_cpu <- figure$data[c("Algorithm", "avg_cpu")]
    }
  }
}

Include estimated usage: By:

Average CPU usage. Average (%) CPU usage across datasets and divided by the number of elements (samples, features, cells) in each dataset, with relative usage with respect to the most efficient algorithm on top.

Code

peak_matrix <- resources_df %>%
  select(Dataset, Algorithm, Peak)

for (axis in c("All", "Samples", "Features")) {
  for (imputing in c(TRUE, FALSE)) {
    titlesuffix  <- ifelse(axis=="All", "Cell", ifelse(axis=="Samples", "Sample", "Feature"))
    figure <- peak_matrix %>%
      group_by(Dataset) %>%
      mutate(Peak = ifelse(imputing & is.na(Peak), max(Peak, na.rm = TRUE), Peak)) %>%
      ungroup() %>%
      left_join(select(dataset_table, Dataset, Samples, Features = "Total features", All= Cells), by = "Dataset") %>%
      mutate(Peak = Peak / !!as.name(axis)) %>%
      group_by(Algorithm) %>%
      summarise(
        avg_peak = mean(Peak, na.rm = TRUE),
        ) %>%
      arrange(avg_peak) %>%
      mutate(
        Algorithm = factor(Algorithm, levels = unique(Algorithm)),
        RelativeSpeed = avg_peak / min(avg_peak)
        ) %>%
      left_join(algorithm_families, by = "Algorithm") %>%
      ggplot(aes(x = reorder(Algorithm, avg_peak), y = avg_peak, fill=Strategy)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = ifelse(RelativeSpeed >= 1000,
                                   paste0(round(RelativeSpeed / 1000, 1), "K"),
                                   ifelse(RelativeSpeed >= 10,
                                      round(RelativeSpeed),
                                      round(RelativeSpeed, 1)))), vjust = -0.3, size= 3) +
      ylab(paste("Memory (Mb) usage per", tolower(titlesuffix))) + xlab("") +
      theme_cowplot() +
      theme(axis.text.x=element_text(angle=60, hjust=1), plot.margin=grid::unit(c(1,5,-6,0), "mm"), legend.position= c(0.2, 0.7), text = element_text(family = "Times")) +
      coord_cartesian(clip = "off")

    filename <- "peak_barplot"
    filename <- ifelse(imputing, paste0(filename, "_imputed"), filename)
    filename <- paste0(filename, "_", tolower(axis))
    ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, bg = 'white')
    if (!(imputing) & (axis == "Samples")) {
      figure <- figure + theme(axis.text.x=element_text(angle=60, hjust=1, size=22), axis.text.y=element_text(size=22), axis.title.y=element_text(size=22), plot.margin=grid::unit(c(3,0,-7,0.5), "mm"))
      figure$layers[[2]]$aes_params$size <- 7
      figure$theme$legend.position <- "none"
      ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 13, height = 5.9)
    }
    if (!(imputing) & (axis == "All")) {
      avg_peak <- figure$data[c("Algorithm", "avg_peak")]
    }
  }
}

Include estimated usage: By:

Average peak memory usage. Average peak memory (Mb) usage across datasets and divided by the number of elements (samples, features, cells) in each dataset, with relative usage with respect to the most efficient algorithm on top.

Code

memory_matrix <- resources_df %>%
  select(Dataset, Algorithm, Memory)

for (axis in c("All", "Samples", "Features")) {
  for (imputing in c(TRUE, FALSE)) {
    titlesuffix  <- ifelse(axis=="All", "Cell", ifelse(axis=="Samples", "Sample", "Feature"))
    figure <- memory_matrix %>%
      group_by(Dataset) %>%
      mutate(Memory = ifelse(imputing & is.na(Memory), max(Memory, na.rm = TRUE), Memory)) %>%
      ungroup() %>%
      left_join(select(dataset_table, Dataset, Samples, Features = "Total features", All= Cells), by = "Dataset") %>%
      mutate(Memory = Memory / !!as.name(axis)) %>%
      group_by(Algorithm) %>%
      summarise(
        avg_memory = mean(Memory, na.rm = TRUE),
        ) %>%
      arrange(avg_memory) %>%
      mutate(
        Algorithm = factor(Algorithm, levels = unique(Algorithm)),
        RelativeSpeed = avg_memory / min(avg_memory)
        ) %>%
      left_join(algorithm_families, by = "Algorithm") %>%
      ggplot(aes(x = reorder(Algorithm, avg_memory), y = avg_memory, fill=Strategy)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = ifelse(RelativeSpeed >= 1000,
                                   paste0(round(RelativeSpeed / 1000, 1), "K"),
                                   ifelse(RelativeSpeed >= 10,
                                      round(RelativeSpeed),
                                      round(RelativeSpeed, 1)))), vjust = -0.3, size= 3) +
      ylab(paste("Memory (Mb) usage per", tolower(titlesuffix))) + xlab("") +
      theme_cowplot() +
      theme(axis.text.x=element_text(angle=60, hjust=1), plot.margin=grid::unit(c(1,5,-6,0), "mm"), legend.position= c(0.2, 0.8), text = element_text(family = "Times")) +
      coord_cartesian(clip = "off")

    filename <- "memory_barplot"
    filename <- ifelse(imputing, paste0(filename, "_imputed"), filename)
    filename <- paste0(filename, "_", tolower(axis))
    ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, bg = 'white')
    if (!(imputing) & (axis == "Samples")) {
      figure <- figure + theme(axis.text.x=element_text(angle=60, hjust=1, size=21), axis.text.y=element_text(size=21), axis.title.y=element_text(size=22), plot.margin=grid::unit(c(3,0,-7,0.5), "mm"))
      figure$layers[[2]]$aes_params$size <- 7
      figure$theme$legend.position <- "none"
      ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, width = 13, height = 5.9)
    }
    if (!(imputing) & (axis == "All")) {
      avg_memory <- figure$data[c("Algorithm", "avg_memory")]
    }
  }
}

Include estimated usage: By:

Average memory usage. Average memory (Mb) usage across datasets and divided by the number of elements (samples, features, cells) in each dataset, with relative usage with respect to the most efficient algorithm on top.

4.1.4.2 Individual

Code

figure <- time_matrix %>%
  right_join(avg_time, by="Algorithm") %>%
  arrange(desc(avg_time)) %>%
  mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm)),
         Time = round(Time/60, 1)) %>%
  ggplot(aes(x = Dataset, y = Algorithm, fill = Time, colour="")) +
  geom_tile() +
  geom_text(aes(label = Time), color = "black") +
  scale_fill_gradient(low = "green", high = "red", limits = c(0, 60), oob = scales::squish, name = "Time (m)") +
  scale_colour_manual(values=NA) +              
  guides(colour=guide_legend("Error", override.aes=list(colour="black"))) +  
  ylab("") + xlab("") +
  scale_y_discrete(expand = c(0, 0)) +
  theme_cowplot() +
  theme(axis.text.x=element_text(angle=60, vjust= 0.5), text = element_text(family = "Times")) +
  coord_cartesian(clip = "off")

filename <- "time_matrix"
ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, height = 6.5, width = 11)

Computing time. Heatmap of the individual computing times (in minutes).

Code

figure <- cpu_matrix %>%
  right_join(avg_cpu, by="Algorithm") %>%
  arrange(desc(avg_cpu)) %>%
  mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm)), CPU = round(CPU)) %>%
  ggplot(aes(x = Dataset, y = Algorithm, fill = CPU, colour="")) +
  geom_tile() +
  geom_text(aes(label = CPU), color = "black") +
  scale_fill_gradient(low = "green", high = "red", oob = scales::squish, name = "(%) CPU") +
  scale_colour_manual(values=NA) +              
  guides(colour=guide_legend("Error", override.aes=list(colour="black"))) +  
  ylab("") + xlab("") +
  scale_y_discrete(expand = c(0, 0)) +
  theme_cowplot() +
  theme(axis.text.x=element_text(angle=60, vjust= 0.5), text = element_text(family = "Times")) +
  coord_cartesian(clip = "off")

filename <- "cpu_matrix"
ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, height = 6.5, width = 11)

CPU usage. Heatmap of the individual % CPU usage.

Code

figure <- peak_matrix %>%
  right_join(avg_peak, by="Algorithm") %>%
  arrange(desc(avg_peak)) %>%
  mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm)), Peak = round(Peak)) %>%
  ggplot(aes(x = Dataset, y = Algorithm, fill = Peak, colour="")) +
  geom_tile() +
  geom_text(aes(label = round(Peak, 1)), color = "black") +
  scale_fill_gradient(low = "green", high = "red", limits = c(0, 1000), oob = scales::squish, name = "Peak memory (Mb)") +
  scale_colour_manual(values=NA) +
  guides(colour=guide_legend("Error", override.aes=list(colour="black"))) +  
  ylab("") + xlab("") +
  scale_y_discrete(expand = c(0, 0)) +
  theme_cowplot() +
  theme(axis.text.x=element_text(angle=60, vjust= 0.5), text = element_text(family = "Times")) +
  coord_cartesian(clip = "off")

filename <- "peak_matrix"
ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, height = 6.5, width = 11)

Peak memory usage. Heatmap of the individual peak memory usage (Mb).

Code

figure <- memory_matrix %>%
  right_join(avg_memory, by="Algorithm") %>%
  arrange(desc(avg_memory)) %>%
  mutate(Algorithm = factor(Algorithm, levels = unique(Algorithm)), Memory = round(Memory/1000, 1)) %>%
  ggplot(aes(x = Dataset, y = Algorithm, fill = Memory, colour="")) +
  geom_tile() +
  geom_text(aes(label = round(Memory, 1)), color = "black") +
  scale_fill_gradient(low = "green", high = "red", oob = scales::squish, name = "Memory (Gb)") +
  scale_colour_manual(values=NA) +              
  guides(colour=guide_legend("Error", override.aes=list(colour="black"))) +  
  ylab("") + xlab("") +
  scale_y_discrete(expand = c(0, 0)) +
  theme_cowplot() +
  theme(axis.text.x=element_text(angle=60, vjust= 0.5), text = element_text(family = "Times")) +
  coord_cartesian(clip = "off")

filename <- "memory_matrix"
ggsave(file.path(paper_figures_folder, paste0(filename, ".png")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".svg")), figure, height = 6.5, width = 11, bg = 'white')
ggsave(file.path(paper_figures_folder, paste0(filename, ".pdf")), figure, height = 6.5, width = 11)

Memory usage. Heatmap of the individual memory usage (Gb).

Code

table_show <- time_matrix %>%
  right_join(cpu_matrix, by= c("Dataset", "Algorithm")) %>%
  right_join(memory_matrix, by= c("Dataset", "Algorithm")) %>%
  right_join(peak_matrix, by= c("Dataset", "Algorithm")) %>%
  mutate("Time (s)" = round(Time, digits= 2), "CPU (%)" = round(CPU, digits= 2), "Memory (Gb)" = round(Memory, digits= 2), "Peak memory (Mb)" = round(Peak, digits= 2)) %>%
  arrange(Dataset, Algorithm) %>%
  select(-c(Time, Memory, Peak, CPU))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results.. Table with all computing resources.

4.1.5 Algorithm comparison

Code

filtered_table <- alg_comparisons %>%
  mutate_if(is.numeric, round, digits= 2) %>%
  mutate(alg1 = factor(alg1)) %>%
  mutate(alg2 = factor(alg2, levels = rev(levels(alg1)))) %>%
  arrange(alg1, alg2) %>%
  distinct(Comparison, .keep_all = TRUE)

metrics_comp <- names(which(sapply(filtered_table, is.numeric)))
for (metric in metrics_comp) {
  figure <- filtered_table %>%
    ggplot(aes(x = alg1, y = alg2, fill = get(metric))) +
    geom_tile() +
    geom_text(aes(label = get(metric)), color = "black") +
    scale_fill_gradient(low = "white", high = "red", name = metric, limits=c(0, 1)) +
    xlab("") + ylab("") +
    theme_cowplot() +
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust= 0.5), legend.position = c(0.8,0.8), text = element_text(family = "Times")) +
    coord_cartesian(clip = "off")

  ggsave(file.path(paper_figures_folder, paste0("agreement_", metric, ".png")), figure, width = 12, height = 9, bg = 'white')
  if (metric == "AMI") {
    ggsave(file.path(paper_figures_folder, paste0("agreement_", metric, ".svg")), figure, width = 12, height = 9, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0("agreement_", metric, ".pdf")), figure, width = 12, height = 9)
  }
}

Metric:

Pairwise algorithm similarity: Similarity between clustering solutions across algorithms.

Code

for (metric in metrics_comp) {
  filtered_table <- alg_comparisons %>%
    arrange(alg1, alg2) %>%
    dcast(alg1 ~ alg2, value.var = metric) %>%
    as.matrix()
  row.names(filtered_table) <- filtered_table[,1]
  filtered_table <- filtered_table[,-1]
  diag(filtered_table) <- 1
  cnames = colnames(filtered_table)
  rnames = rownames(filtered_table)
  filtered_table = as.data.frame(filtered_table)
  rownames(filtered_table) = rnames
  colnames(filtered_table) = cnames

  set.seed(0)
  vis <- Rtsne(1-apply(as.matrix(filtered_table), 2, as.numeric), is_distance=TRUE, perplexity = floor((nrow(filtered_table) - 1) / 3), normalize=FALSE)
  figure <- data.frame(TSNE1 = vis$Y[,1], TSNE2 = vis$Y[,2], Algorithm = rownames(filtered_table)) %>%
    left_join(algorithm_families, by = "Algorithm") %>%
    ggplot(aes(x = TSNE1, y = TSNE2, label = Algorithm, colour=Strategy)) +
    geom_point(size=0) +
    geom_text_repel(size =6, seed=0) +
    theme_cowplot() +
    theme(legend.position="none", text = element_text(family = "Times"))

  ggsave(file.path(paper_figures_folder, paste0("network_", metric, ".png")), figure, bg = 'white')
  if (metric == "AMI") {
    figure <- figure +
      theme(legend.position = c(0.575,0.15), legend.box.background = element_rect(color = "black"), legend.box.margin = margin(t = 3, r = 3, l=2)) +
      guides(colour = guide_legend(nrow = 3, override.aes=aes(size=4, label="")))
    ggsave(file.path(paper_figures_folder, paste0("network_", metric, ".png")), figure, bg = 'white')
    figure <- figure +
      theme(legend.text=element_text(size=16), axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20))
    ggsave(file.path(paper_figures_folder, paste0("network_", metric, ".svg")), figure, bg = 'white')
    ggsave(file.path(paper_figures_folder, paste0("network_", metric, ".pdf")), figure)
  }
}

Metric:

Algorithm similarity: Similarity between clustering solutions across algorithms, measured by pairwise comparisons and visualized with t-SNE.

4.2 Evaluation on incomplete data

4.2.1 Comparison with grounth-truth labels

Code

idx <- !(names(algorithm_colors) %in% incomplete_algorithms)
alg_colors2 <- algorithm_colors[idx]
names(alg_colors2) <- paste0("imp_", names(alg_colors2))
algorithm_colors <- c(algorithm_colors, alg_colors2)

basic_com_results <- com_results %>%
  filter(Amputation_Mechanism != "Resampling") %>%
  mutate(Methodology = ifelse(Imputation, "Mean imputation", "Only complete"))
basic_com_results <- basic_com_results %>%
  rbind(
    basic_com_results %>%
    filter(Amputation_Mechanism == "No") %>%
    mutate(Methodology = "Mean imputation")
  ) %>%
  rbind(
    basic_com_results %>%
    filter(Amputation_Mechanism == "No") %>%
    mutate(Methodology = "Whole dataset")
  )

results_incomplete <- incom_results %>%
  mutate("Real_Alg" = Algorithm, "Methodology" = "Whole dataset") %>%
  rbind(
    basic_com_results %>%
    select(-Strategy) %>%
    mutate("Real_Alg" = Algorithm) %>%
    mutate(Algorithm = ifelse(Methodology == "Mean imputation", paste0("imp_", Algorithm), ifelse(Methodology == "Only complete", paste0("com_", Algorithm), Algorithm)))
  ) %>%
  mutate("dataset_missing_pattern" = paste0(Dataset, Missing_Percentage, Amputation_Mechanism)) %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism, Imputation)

4.2.1.1 Aggregated

Code

metric <- metrics_performance[1]
results_summary_input <- results_incomplete %>%
  filter(!((Real_Alg %in% incomplete_algorithms) & Imputation)) %>%
  filter(Methodology != "Only complete", Missing_Percentage > 0)
results_summary <- results_summary_input %>%
  select(dataset_missing_pattern, Dataset, Algorithm, metric, Real_Alg) %>%
  group_by(dataset_missing_pattern) %>%
  mutate(rank = rank(-!!as.name(metric))) %>%
  group_by(Algorithm, Real_Alg) %>%
  summarise(
      "# in Top 3 Models" = sum(rank <= 3),
      "# Best" = sum(rank == 1),
      "Average Rank" = mean(rank),
      "Sd Rank" = sd(rank),
      "# Datasets Best" = n_distinct(Dataset[rank == 1]),
      "# Datasets Top 3" = n_distinct(Dataset[rank <= 3]),
  ) %>%
  arrange(-`# in Top 3 Models`, -`# Best`, `Average Rank`) %>%
  mutate_if(is.numeric, round, digits= 1) %>%
  left_join(results_summary_input %>% filter((!(Real_Alg %in% incomplete_algorithms) & (Imputation == TRUE)) | ((Algorithm %in% incomplete_algorithms) & (Imputation == FALSE))) %>% count(Algorithm, name = "# Conditions"), by = "Algorithm") %>%
  select(-c("Real_Alg"))
minimum_conditions <- 500
minimum_conditions <- filter(results_summary, `# Conditions` >= minimum_conditions)$Algorithm
reactable::reactable(results_summary, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(results_summary[1:10,] %>% mutate("Average Rank" = paste0(round(`Average Rank`, 1), "\u00B1", round(`Sd Rank`, 1)), "# Best" = paste0(`# Best`, "(", `# Datasets Best`, ")"), "# in Top 3 Models" = paste0(`# in Top 3 Models`, "(", `# Datasets Top 3`, ")"), "Algorithm" = gsub("imp_", "(imp)", Algorithm)) %>% select(-c(`Sd Rank`, "# Datasets Best", "# Datasets Top 3"))), include.rownames=FALSE)

External validation summary. Summary of performance, showing the number of times each method was among the top three methods (with the number of distinct datasets indicated in parentheses), the number of times it achieved the best score (also with dataset count), the average rank (mean\(\pm\)standard deviation), and the total number of conditions in which it was evaluated.

Code

interesting_algs <- c("NEMO", "IMSR", "PIMVC")
for (metric in metrics_performance) {
  baseline_values <- results_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "adjusted mutual information", "adjusted rand index")))) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6))

    ggsave(file.path(paper_figures_folder, paste0("inc_lines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if (metric == metrics_performance[1]) {
      figure$layers[[4]]$aes_params$size <- 7
      figure <- figure + ylab("Matthews correlation coefficient")
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure + theme(element_text(size=18)), bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure + theme(element_text(size=18)))
    }
  }
}

Metric: Missingness pattern:

Aggregated performance. Average performance of each method across datasets for varying levels of incomplete samples and different missigness pattern. To ensure a consistent aggregation across dataset, all scores were normalized to a common scale prior to averaging.

Code

for (metric in metrics_performance) {
  baseline_values <- results_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      arrange(Missing_Percentage, Algorithm, Methodology) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "adjusted mutual information", "adjusted rand index")))) +
      theme_cowplot() +
      # scale_color_manual(values = algorithm_colors) +
      theme(legend.position = c(0.05, 0.13), legend.box = "horizontal", text = element_text(family = "Times")) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_basic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_performance[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Performance vs common simple approaches. Average performance of top methods able to handle incomplete multi-modal data across datasets compared with mean imputation or excluding incomplete samples for varying levels of incomplete samples and different missigness pattern. To ensure a consistent aggregation across dataset, all scores were normalized to a common scale prior to averaging.

4.2.1.2 Individual

Code

for (metric in metrics_performance) {
  baseline_values <- results_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      group_by(Dataset, Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index"))) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6), calculate_per_facet = TRUE)

    ggsave(file.path(paper_figures_folder, paste0("inc_indlines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_performance[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_indlines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_indlines_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure, width=12, height = 10)

    }
  }
}

Metric: Missingness pattern:

Individual performance. Performance of each method for varying levels of incomplete samples and different missigness pattern.

Code

for (metric in metrics_performance) {
  baseline_values <- results_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, "mean" = metric, "sd"= paste0(gsub(extraw_metrics_performance, "", metric), "_std")) %>%
      group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("sd" = mean(sd), "mean" := mean(mean)) %>%
      arrange(Dataset, Missing_Percentage, Algorithm, Methodology) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=mean, group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index"))) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_performance[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure, width=12, height = 10)
    }
  }
}

Metric: Missingness pattern:

Individual performance vs common simple approaches. Performance of top methods able to handle incomplete multi-modal data compared with mean imputation or excluding incomplete samples for varying levels of incomplete samples and different missigness pattern.

Code

interesting_algs <- "IMSR"
interesting_datasets <- "BBCSport"

for (metric in metrics_performance) {
  for (ampt_mech in amputation_mechanisms) {
  figure <- results_incomplete %>%
    mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
    filter(Real_Alg %in% interesting_algs) %>%
    filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
    select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, "mean" = metric, "sd"= paste0(gsub(extraw_metrics_performance, "", metric), "_std")) %>%
    filter((Dataset == interesting_datasets) & (Real_Alg == interesting_algs)) %>%
    group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
    summarise("sd" = mean(sd), "mean" := mean(mean)) %>%
    filter(Missing_Percentage > 0) %>%
    ggplot(aes(x=Missing_Percentage, y=mean, group = Methodology, colour=Methodology)) +
    geom_line() +
    ylim(0, 1) +
    geom_ribbon(aes(ymax = mean + sd, ymin= mean - sd, fill= Methodology, x=Missing_Percentage), alpha = 0.3) +
    scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
    xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "MCC_mean", "Matthews correlation coefficient",  ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index"))) +
    theme_cowplot(font_family = "Times") +
    theme(legend.position = c(0.1, 0.2), legend.text = element_text(size=18), legend.title = element_blank())

    ggsave(file.path(paper_figures_folder, paste0("conf_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_performance[1]) & (ampt_mech == "PM")) {
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("conf_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("conf_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Confidence interval. Confidence interval (mean\(\pm\)standard deviation) for IMSR on BBCSport dataset compared with mean imputation or excluding incomplete samples for varying levels of incomplete samples and different missigness pattern.

Code

table_show <- results_incomplete %>%
  filter(Missing_Percentage > 0) %>%
  group_by(dataset_missing_pattern) %>%
  mutate(across(all_of(metrics_performance), ~ rank(-.), .names ="{.col}_rank")) %>%
  ungroup() %>%
  rename_with(~ str_remove(., extraw_metrics_performance), ends_with("_rank")) %>%
  {
    metric_cols <- grep(paste0("^(", paste(gsub(extraw_metrics_performance, "", metrics_performance), collapse = "|"), ")_(mean|std|rank)$"),
                        colnames(.), value = TRUE)
    mean_cols <- grep("_mean$", metric_cols, value = TRUE)
    std_cols <- grep("_std$", metric_cols, value = TRUE)
    rank_cols <- grep("_rank$", metric_cols, value = TRUE)
    ordered_cols <- c(rbind(mean_cols, std_cols, rank_cols))
    ordered_cols <- ordered_cols[!is.na(ordered_cols)]
    select(., Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, all_of(ordered_cols))
  } %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism) %>%
  filter(Missing_Percentage > 0) %>%
  mutate(across(where(is.numeric), \(x) round(x, digits = 2)))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results.. Table with all results, showing mean, standard deviation and rank for each metric.

4.2.2 Stability

Code

basic_com_results <- uns_metrics_by_com_alg %>%
  filter(Amputation_Mechanism != "Resampling") %>%
  mutate(Methodology = ifelse(Imputation, "Mean imputation", "Only complete"))
basic_com_results <- basic_com_results %>%
  rbind(
    basic_com_results %>%
    filter(Amputation_Mechanism == "No") %>%
    mutate(Methodology = "Mean imputation")
  ) %>%
  rbind(
    basic_com_results %>%
    filter(Amputation_Mechanism == "No") %>%
    mutate(Methodology = "Whole dataset")
  )

stab_incomplete <- uns_metrics_by_incom_alg %>%
  mutate("Real_Alg" = Algorithm, "Methodology" = "Whole dataset") %>%
  rbind(
    basic_com_results %>%
    select(-Strategy) %>%
    mutate("Real_Alg" = Algorithm) %>%
    mutate(Algorithm = ifelse(Methodology == "Mean imputation", paste0("imp_", Algorithm), ifelse(Methodology == "Only complete", paste0("com_", Algorithm), Algorithm)))
  ) %>%
  mutate("dataset_missing_pattern" = paste0(Dataset, Missing_Percentage, Amputation_Mechanism)) %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism, Imputation)

4.2.2.1 Aggregated

Code

metric <- metrics_stab[1]
results_summary_input <- stab_incomplete %>%
  filter(!((Real_Alg %in% incomplete_algorithms) & Imputation)) %>%
  filter(Methodology != "Only complete", Missing_Percentage > 0)
results_summary <- results_summary_input %>%
  select(dataset_missing_pattern, Dataset, Algorithm, metric, Real_Alg) %>%
  group_by(dataset_missing_pattern) %>%
  mutate(rank = rank(-!!as.name(metric))) %>%
  group_by(Algorithm, Real_Alg) %>%
  summarise(
      "# in Top 3 Models" = sum(rank <= 3),
      "# Best" = sum(rank == 1),
      "Average Rank" = mean(rank),
      "Sd Rank" = sd(rank),
      "# Datasets Best" = n_distinct(Dataset[rank == 1]),
      "# Datasets Top 3" = n_distinct(Dataset[rank <= 3]),
  ) %>%
  arrange(-`# in Top 3 Models`, -`# Best`, `Average Rank`) %>%
  mutate_if(is.numeric, round, digits= 1) %>%
  left_join(results_summary_input %>% filter((!(Real_Alg %in% incomplete_algorithms) & (Imputation == TRUE)) | ((Algorithm %in% incomplete_algorithms) & (Imputation == FALSE))) %>% count(Algorithm, name = "# Conditions"), by = "Algorithm") %>%
  select(-c("Real_Alg"))
minimum_conditions <- 500
minimum_conditions <- filter(results_summary, `# Conditions` >= minimum_conditions)$Algorithm
reactable::reactable(results_summary, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(results_summary[1:10,] %>% mutate("Average Rank" = paste0(round(`Average Rank`, 1), "\u00B1", round(`Sd Rank`, 1)), "# Best" = paste0(`# Best`, "(", `# Datasets Best`, ")"), "# in Top 3 Models" = paste0(`# in Top 3 Models`, "(", `# Datasets Top 3`, ")"), "Algorithm" = gsub("imp_", "(imp)", Algorithm)) %>% select(-c(`Sd Rank`, "# Datasets Best", "# Datasets Top 3"))), include.rownames=FALSE)

Cluster stability summary. Summary of performance, showing the number of times each method was among the top three methods (with the number of distinct datasets indicated in parentheses), the number of times it achieved the best score (also with dataset count), the average rank (mean\(\pm\)standard deviation), and the total number of conditions in which it was evaluated.

Code

interesting_algs <- c("NEMO", "IMSR", "PIMVC")
for (metric in metrics_stab) {
  baseline_values <- stab_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- stab_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "AMI_Stab", "adjusted mutual information", "adjusted rand index"))) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6))

    ggsave(file.path(paper_figures_folder, paste0("inc_lines_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_stab[1]) & (ampt_mech == "All")) {
      figure$layers[[4]]$aes_params$size <- 7
      figure <- figure + ylab("Adjusted mutual information")
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Code

for (metric in metrics_stab) {
  baseline_values <- stab_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- stab_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      arrange(Missing_Percentage, Algorithm, Methodology) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "AMI_Stab", "adjusted mutual information", "adjusted rand index"))) +
      theme_cowplot() +
      # scale_color_manual(values = algorithm_colors) +
      theme(legend.position = c(0.05, 0.13), legend.box = "horizontal", text = element_text(family = "Times")) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_basic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_stab[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Comparison with common simple approaches. Average performance of top methods able to handle incomplete multi-modal data across datasets compared with mean imputation or excluding incomplete samples for varying levels of incomplete samples and different missigness pattern. To ensure a consistent aggregation across dataset, all scores were normalized to a common scale prior to averaging.

4.2.2.2 Individual

Code

for (metric in metrics_stab) {
  baseline_values <- stab_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- stab_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      group_by(Dataset, Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "AMI_Stab", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6), calculate_per_facet = TRUE)

    ggsave(file.path(paper_figures_folder, paste0("inc_lines_indstab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_stab[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_indstab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_indstab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".pdf")), figure, width=12, height = 10)
    }
  }
}

Metric: Missingness pattern:

Individual performance. Performance of each method for varying levels of incomplete samples and different missigness pattern.

Code

for (metric in metrics_stab) {
  baseline_values <- stab_incomplete %>%
    filter(Amputation_Mechanism == "No", Methodology == "Only complete") %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- stab_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, "mean" = metric) %>%
      group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("mean" := mean(mean)) %>%
      arrange(Dataset, Missing_Percentage, Algorithm, Methodology) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=mean, group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "AMI_Stab", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_stab[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_indbasic_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".pdf")), figure, width=12, height = 10)
    }
  }
}

Metric: Missingness pattern:

Comparison with common simple approaches. Performance of top methods able to handle incomplete multi-modal data compared with mean imputation or excluding incomplete samples for varying levels of incomplete samples and different missigness pattern.

Code

interesting_algs <- "IMSR"
interesting_datasets <- "BBCSport"

for (metric in metrics_stab) {
  for (ampt_mech in amputation_mechanisms) {
    figure <- stab_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      filter((Dataset == interesting_datasets) & (Real_Alg == interesting_algs)) %>%
      group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("mean" := mean(!!sym(metric))) %>%
      filter(Missing_Percentage > 0) %>%
      ggplot(aes(x=Missing_Percentage, y=mean, group = Methodology, colour=Methodology)) +
      geom_line() +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "AMI_Stab", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      theme(legend.position = c(0.1, 0.2), legend.text = element_text(size=18), legend.title = element_blank())

    ggsave(file.path(paper_figures_folder, paste0("conf_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_stab[1]) & (ampt_mech == "PM")) {
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("conf_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("conf_stab_", ampt_mech, "_", gsub(extraw_metrics_stab, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Code

table_show <- stab_incomplete %>%
  filter(Missing_Percentage > 0) %>%
  group_by(dataset_missing_pattern) %>%
  mutate(across(all_of(metrics_stab), ~ rank(-.), .names ="{.col}_rank")) %>%
  ungroup() %>%
  rename_with(~ str_remove(., extraw_metrics_stab), ends_with("_rank")) %>%
  {
    metric_cols <- grep(paste0("^(", paste(gsub(extraw_metrics_stab, "", metrics_stab), collapse = "|"), ")_(Stab|rank)$"),
                        colnames(.), value = TRUE)
    mean_cols <- grep("_Stab$", metric_cols, value = TRUE)
    rank_cols <- grep("_rank$", metric_cols, value = TRUE)
    ordered_cols <- c(rbind(mean_cols, rank_cols))
    ordered_cols <- ordered_cols[!is.na(ordered_cols)]
    select(., Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, all_of(ordered_cols))
  } %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism) %>%
  mutate(across(where(is.numeric), \(x) round(x, digits = 2)))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results. Table with all results, showing mean, standard deviation and rank for each metric.

4.2.3 Robustness

Code

idx <- !(names(algorithm_colors) %in% incomplete_algorithms)
alg_colors2 <- algorithm_colors[idx]
names(alg_colors2) <- paste0("imp_", names(alg_colors2))
algorithm_colors <- c(algorithm_colors, alg_colors2)

basic_com_results <- com_rbresults %>%
  mutate(Methodology = ifelse(Imputation, "Mean imputation", "Only complete"))

results_rb_incomplete <- incom_rbresults %>%
  mutate("Real_Alg" = Algorithm, "Methodology" = "Whole dataset") %>%
  rbind(
    basic_com_results %>%
    select(-Strategy) %>%
    mutate("Real_Alg" = Algorithm) %>%
    mutate(Algorithm = ifelse(Methodology == "Mean imputation", paste0("imp_", Algorithm), ifelse(Methodology == "Only complete", paste0("com_", Algorithm), Algorithm)))
  ) %>%
  mutate("dataset_missing_pattern" = paste0(Dataset, Missing_Percentage, Amputation_Mechanism)) %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism, Imputation)

4.2.3.1 Aggregated

Code

metric <- metrics_rob[1]
results_summary_input <- results_rb_incomplete %>%
  filter(!((Real_Alg %in% incomplete_algorithms) & Imputation)) %>%
  filter(Methodology != "Only complete")
results_summary <- results_summary_input %>%
  select(dataset_missing_pattern, Dataset, Algorithm, all_of(metric), Real_Alg) %>%
  group_by(dataset_missing_pattern) %>%
  mutate(rank = rank(-!!as.name(metric))) %>%
  group_by(Algorithm, Real_Alg) %>%
  summarise(
      "# in Top 3 Models" = sum(rank <= 3),
      "# Best" = sum(rank == 1),
      "Average Rank" = mean(rank),
      "Sd Rank" = sd(rank),
      "# Datasets Best" = n_distinct(Dataset[rank == 1]),
      "# Datasets Top 3" = n_distinct(Dataset[rank <= 3]),
  ) %>%
  arrange(-`# in Top 3 Models`, -`# Best`, `Average Rank`) %>%
  mutate_if(is.numeric, round, digits= 1) %>%
  left_join(results_summary_input %>% filter((!(Real_Alg %in% incomplete_algorithms) & (Imputation == TRUE)) | ((Algorithm %in% incomplete_algorithms) & (Imputation == FALSE))) %>% count(Algorithm, name = "# Conditions"), by = "Algorithm") %>%
  select(-c("Real_Alg"))
minimum_conditions <- 500
minimum_conditions <- filter(results_summary, `# Conditions` >= minimum_conditions)$Algorithm
reactable::reactable(results_summary, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(results_summary[1:10,] %>% mutate("Average Rank" = paste0(round(`Average Rank`, 1), "\u00B1", round(`Sd Rank`, 1)), "# Best" = paste0(`# Best`, "(", `# Datasets Best`, ")"), "# in Top 3 Models" = paste0(`# in Top 3 Models`, "(", `# Datasets Top 3`, ")"), "Algorithm" = gsub("imp_", "(imp)", Algorithm)) %>% select(-c(`Sd Rank`, "# Datasets Best", "# Datasets Top 3"))), include.rownames=FALSE)

Code

interesting_algs <- c("NEMO", "IMSR", "PIMVC")
for (metric in metrics_rob) {
  baseline_values <- results_rb_incomplete %>%
    filter(Missing_Percentage == 10) %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_rb_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "AMI_mean", "adjusted mutual information", "adjusted rand index"))) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6))

    ggsave(file.path(paper_figures_folder, paste0("inc_lines_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_rob[1]) & (ampt_mech == "All")) {
      figure$layers[[4]]$aes_params$size <- 7
      figure <- figure + ylab("Adjusted mutual information")
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Code

for (metric in metrics_rob) {
  baseline_values <- results_rb_incomplete %>%
    filter(Missing_Percentage == 10) %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_rb_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, metric) %>%
      left_join(baseline_values, by = "Dataset") %>%
      mutate("{metric}" := pmax(0, pmin(!!as.name(metric) / Baseline, 1))) %>%
      group_by(Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      arrange(Missing_Percentage, Algorithm, Methodology) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(paste("Normalized", ifelse(metric == "AMI_mean", "adjusted mutual information", "adjusted rand index"))) +
      theme_cowplot() +
      # scale_color_manual(values = algorithm_colors) +
      theme(legend.position = c(0.05, 0.13), legend.box = "horizontal", text = element_text(family = "Times")) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_basic_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_rob[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

4.2.3.2 Individual

Code

for (metric in metrics_rob) {
  baseline_values <- results_rb_incomplete %>%
    filter(Missing_Percentage == 10) %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_rb_incomplete %>%
      filter(Algorithm %in% minimum_conditions) %>%
      filter(!((Real_Alg %in% incomplete_algorithms) & Imputation), Methodology != "Only complete") %>%
      filter(Methodology != "Only complete") %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Whole dataset") & (Real_Alg %in% complete_algorithms))) %>%
      filter(!((Missing_Percentage == 0) & (Methodology == "Mean imputation") & (Real_Alg %in% incomplete_algorithms))) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Real_Alg, metric) %>%
      group_by(Dataset, Algorithm, Missing_Percentage) %>%
      summarise("{metric}" := mean(!!as.name(metric))) %>%
      ggplot(aes(x=Missing_Percentage, y=get(metric), group=Algorithm, colour=Algorithm)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") + ylab(ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      gghighlight(Algorithm %in% interesting_algs, use_direct_label = TRUE, keep_scales = TRUE, unhighlighted_params = list(linewidth = .6), calculate_per_facet = TRUE)

    ggsave(file.path(paper_figures_folder, paste0("inc_lines_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_rob[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_lines_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure, width=12, height = 10)
    }
  }
}

Metric: Missingness pattern:

Individual performance. Performance of each method for varying levels of incomplete samples and different missigness pattern.

Code

for (metric in metrics_rob) {
  baseline_values <- results_rb_incomplete %>%
    filter(Missing_Percentage == 10) %>%
    group_by(Dataset) %>%
    summarise("{metric}" := max(!!as.name(metric))) %>%
    select(Dataset, Baseline = metric)
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_rb_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, "mean" = metric, "sd"= paste0(gsub(extraw_metrics_performance, "", metric), "_std")) %>%
      group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("sd" = mean(sd), "mean" := mean(mean)) %>%
      arrange(Dataset, Missing_Percentage, Algorithm, Methodology) %>%
      ggplot(aes(x=Missing_Percentage, y=mean, group = interaction(Real_Alg, Methodology), colour=Real_Alg, linetype = Methodology)) +
      facet_wrap(~ Dataset) +
      geom_line(linewidth = 1.2) +
      ylim(0, 1) +
      scale_linetype_manual(values=c("solid", "dotted", "twodash")) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") + ylab(ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      # scale_color_manual(values = algorithm_colors) +
      guides(colour= guide_legend(title="Algorithm"))

    ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, width=12, height = 10, bg = 'white')
    if ((metric == metrics_rob[1]) & (ampt_mech == "All")) {
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, width=12, height = 10, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure, width=12, height = 10)
    }
    if ((metric == metrics_rob[1]) & (ampt_mech == "PM")) {
      figure$data <- figure$data[(figure$data$Real_Alg %in% c("NEMO", "PIMVC")) & (figure$data$Methodology %in% c("Whole dataset", "Only complete")) & (figure$data$Missing_Percentage <= 80),]
      figure2 <- figure +
                  scale_linetype_manual(values=c("solid", "twodash")) +
                  theme(strip.text.x = element_blank())
      figure2$data <- figure2$data[figure$data$Dataset == "BUAA",]
      figure2 <- figure2 +
                    theme(legend.position = c(0.7, 0.3), legend.text = element_text(size=12), legend.title = element_text(size=12))
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_exp_", figure2$data$Dataset[1], "_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure2, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_exp_", figure2$data$Dataset[1], "_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure2)
      figure2 <- figure +
                  scale_linetype_manual(values=c("solid", "twodash")) +
                  theme(strip.text.x = element_blank())
      figure2$data <- figure2$data[figure$data$Dataset == "TCGA",]
      figure2 <- figure2 +
                    theme(legend.position = "none")
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_exp_", figure2$data$Dataset[1], "_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure2, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("inc_basic_indrb_exp_", figure2$data$Dataset[1], "_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure2)
    }
  }
}

Metric: Missingness pattern:

Code

interesting_algs <- "IMSR"
interesting_datasets <- "BBCSport"

for (metric in metrics_rob) {
  for (ampt_mech in amputation_mechanisms) {
    figure <- results_rb_incomplete %>%
      mutate(Methodology = factor(Methodology, levels=c("Whole dataset", "Mean imputation", "Only complete"))) %>%
      filter(Real_Alg %in% interesting_algs) %>%
      filter(Amputation_Mechanism == "No" | ampt_mech == "All" | Amputation_Mechanism == ampt_mech) %>%
      select(Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, Methodology, Real_Alg, "mean" = metric, "sd"= paste0(gsub(extraw_metrics_performance, "", metric), "_std")) %>%
      filter((Dataset == interesting_datasets) & (Real_Alg == interesting_algs)) %>%
      group_by(Dataset, Algorithm, Missing_Percentage, Methodology, Real_Alg) %>%
      summarise("sd" = mean(sd), "mean" := mean(mean)) %>%
      ggplot(aes(x=Missing_Percentage, y=mean, group = Methodology, colour=Methodology)) +
      geom_line() +
      ylim(0, 1) +
      geom_ribbon(aes(ymax = mean + sd, ymin= mean - sd, fill= Methodology, x=Missing_Percentage), alpha = 0.3) +
      scale_x_continuous(breaks=c(10, 25, 50, 75, 90)) +
      xlab("Incomplete samples (%)") +  ylab(ifelse(metric == "AMI_mean", "Adjusted mutual information", "Adjusted rand index")) +
      theme_cowplot(font_family = "Times") +
      theme(legend.position = c(0.1, 0.2), legend.text = element_text(size=18), legend.title = element_blank())

    ggsave(file.path(paper_figures_folder, paste0("conf_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".png")), figure, bg = 'white')
    if ((metric == metrics_rob[1]) & (ampt_mech == "PM")) {
      figure <- figure + theme(text = element_text(size=22), axis.text.x = element_text(size=18), axis.text.y = element_text(size=18))
      ggsave(file.path(paper_figures_folder, paste0("conf_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".svg")), figure, bg = 'white')
      ggsave(file.path(paper_figures_folder, paste0("conf_rb_", ampt_mech, "_", gsub(extraw_metrics_performance, "", metric),".pdf")), figure)
    }
  }
}

Metric: Missingness pattern:

Code

table_show <- results_rb_incomplete %>%
  group_by(dataset_missing_pattern) %>%
  mutate(across(all_of(metrics_rob), ~ rank(-.), .names ="{.col}_rank")) %>%
  ungroup() %>%
  rename_with(~ str_remove(., extraw_metrics_performance), ends_with("_rank")) %>%
  {
    metric_cols <- grep(paste0("^(", paste(gsub(extraw_metrics_performance, "", metrics_rob), collapse = "|"), ")_(mean|std|rank)$"),
                        colnames(.), value = TRUE)
    mean_cols <- grep("_mean$", metric_cols, value = TRUE)
    std_cols <- grep("_std$", metric_cols, value = TRUE)
    rank_cols <- grep("_rank$", metric_cols, value = TRUE)
    ordered_cols <- c(rbind(mean_cols, std_cols, rank_cols))
    ordered_cols <- ordered_cols[!is.na(ordered_cols)]
    select(., Dataset, Algorithm, Missing_Percentage, Amputation_Mechanism, all_of(ordered_cols))
  } %>%
  arrange(Dataset, Missing_Percentage, Algorithm, Amputation_Mechanism) %>%
  mutate(across(where(is.numeric), \(x) round(x, digits = 2)))

reactable::reactable(table_show, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

All results. Table with all results, showing mean, standard deviation and rank for each metric.

5 Experiments

We firstly use fully observed data, applying 25 independent runs with 80% samples using different random seeds (for both the selection of the samples and the algorithm initialization). For the incomplete data scenario, we simulated modality-wise missing data using four different missingness patterns: missing completely at random (MCAR, random missing modalities), missing not at random (MNAR, specific samples had a similar missing modality pattern), partial missing (PM, some modalities were fully observed, while others are partially missing), and mutually exclusive missing (MEM, incomplete samples had only one modality). Each algorithm was tested in each dataset across the different missingness patters and various missingness levels ranging from 10% to 90% in 10% intervals, with 25 independent runs per condition using different random seeds. Methods not capable of handling missing data directly were applied after prior mean imputation.

Experiments. Experimental workflow designed to address the research questions explored in this project, in which we conducted a systematic and empirical benchmarking study on both fully observed and modality-wise missing multi-modal datasets. Over 1M clustering models were generated and evaluated from multiple perspectives (ground-truth label matching, cluster stability, robustness to missing data, cluster structures and computational efficiency) using diverse robust metrics.

5.1 Datasets

Code

reactable::reactable(dataset_table, pagination = TRUE, sortable = TRUE, defaultPageSize = 10)

Code

# print(xtable(dataset_table), include.rownames=FALSE)

6 Abbreviations

Matthews correlation coefficient: MCC

Adjusted mutual information: AMI

Adjusted rand index: ARI

Variance ratio criterion: VRC

Density-based clustering validation index: DBCVI

Duda hart index: DHI

Sum of squared error index: SSEI

R-squared index: RSI

Davies-Bouldin index: DB

Ball hall index: BHI

Missing completely at random: MCAR

Missing not at random: MNAR

Partial missing: PM

Mutually exclusive missing: MEM

Angle-based Joint and Individual Variation Explained: AJIVE

Co-Regularized Multiview Spectral Clustering: MVCRSC

Cluster-Of-Clusters Analysis: COCA

Data Fusion by Matrix Factorization: DFMF

Doubly Aligned Incomplete Multi-view Clustering: DAIMC

Efficient and Effective Incomplete Multi-view Clustering: EEIMVC

Group Principal Component Analysis: GPCA

Incomplete Multiview Spectral Clustering With Adaptive Graph Learning: IMSCAGL

Integrative Non-negative Matrix Factorization: iNMF

Self-representation Subspace Clustering for Incomplete Multi-view Data: IMSR

Joint Non-negative Matrix Factorization Algorithms: jNMF

Late Fusion Incomplete Multi-View Clustering: LFIMVC

Multiple Kernel k-Means with Incomplete Kernels: MKKMIK

Multi-Omics Factor Analysis: MOFA

Multi Omic clustering by Non-Exhaustive Types: MONET

Multi-Reconstruction Graph Convolutional Network: MRGCN

Multiple Similarity Network Embedding: MSNE

Multiview Spectral Clustering: MVSC

NEighborhood based Multi-Omics clustering: NEMO

Non-Negative Matrix Factorization: NMF

Online multi-view clustering: OMVC

One-Pass Incomplete Multi-View Clustering: OPIMC

One-Stage Incomplete Multi-view Clustering via Late Fusion: OSLFIMVC

Projective Incomplete Multi-View Clustering: PIMVC

Scalable Incomplete Multiview Clustering with Adaptive Data Completion: SIMCADC

Similarity Network Fusion: SNF

Subtyping Tool for Multi-Omic data: SUMO

7 Environment

Code

sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=nb_NO.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=nb_NO.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=nb_NO.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=nb_NO.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] stringr_1.5.1       rlang_1.1.3         reshape2_1.4.4     
 [4] ggrepel_0.9.6       tibble_3.2.1        Rtsne_0.17         
 [7] caTools_1.18.3      xtable_1.8-4        ggraph_2.2.1       
[10] igraph_2.1.4        PMCMRplus_1.9.12    svglite_2.1.3      
[13] reactable_0.4.4     htmlwidgets_1.6.4   plotly_4.10.4      
[16] corrplot_0.95       ggiraphExtra_0.3.0  cowplot_1.1.3      
[19] readr_2.1.5         tidyr_1.3.1         ggdist_3.3.2       
[22] pals_1.9            mlr3benchmark_0.1.7 gghighlight_0.4.1  
[25] dplyr_1.1.4         ggplot2_3.5.1      

loaded via a namespace (and not attached):
 [1] nlme_3.1-155         bitops_1.0-9         bit64_4.6.0-1       
 [4] insight_1.0.1        RColorBrewer_1.1-3   httr_1.4.7          
 [7] tools_4.1.2          backports_1.5.0      utf8_1.2.4          
[10] R6_2.5.1             sjlabelled_1.2.0     lazyeval_0.2.2      
[13] mgcv_1.8-39          colorspace_2.1-0     withr_3.0.0         
[16] mlr3misc_0.16.0      gridExtra_2.3        tidyselect_1.2.1    
[19] extrafontdb_1.0      bit_4.5.0.1          compiler_4.1.2      
[22] cli_3.6.2            labeling_0.4.3       BWStest_0.2.3       
[25] scales_1.3.0         checkmate_2.3.2      mvtnorm_1.3-3       
[28] multcompView_0.1-10  systemfonts_1.2.1    digest_0.6.35       
[31] rmarkdown_2.29       dichromat_2.0-0.1    pkgconfig_2.0.3     
[34] htmltools_0.5.8.1    extrafont_0.19       fastmap_1.2.0       
[37] maps_3.4.2           SuppDists_1.1-9.8    generics_0.1.3      
[40] farver_2.1.1         jsonlite_1.8.8       crosstalk_1.2.1     
[43] vroom_1.6.5          distributional_0.5.0 magrittr_2.0.3      
[46] Matrix_1.4-0         Rcpp_1.0.12          munsell_0.5.0       
[49] fansi_1.0.6          viridis_0.6.5        lifecycle_1.0.4     
[52] stringi_1.8.3        yaml_2.3.10          MASS_7.3-55         
[55] plyr_1.8.9           parallel_4.1.2       crayon_1.5.3        
[58] sjmisc_2.8.10        ppcor_1.1            lattice_0.20-45     
[61] graphlayouts_1.2.2   splines_4.1.2        mapproj_1.2.11      
[64] hms_1.1.3            knitr_1.49           pillar_1.9.0        
[67] uuid_1.2-1           kSamples_1.2-10      glue_1.7.0          
[70] ggiraph_0.8.12       evaluate_1.0.3       mycor_0.1.1         
[73] data.table_1.15.4    vctrs_0.6.5          tzdb_0.4.0          
[76] tweenr_2.0.3         Rttf2pt1_1.3.12      gtable_0.3.4        
[79] purrr_1.0.2          polyclip_1.10-7      reactR_0.6.1        
[82] cachem_1.1.0         xfun_0.50            ggforce_0.4.2       
[85] tidygraph_1.3.1      Rmpfr_1.0-0          viridisLite_0.4.2   
[88] memoise_2.0.1        gmp_0.7-5