R visualization: Raincloud plot by ggplot2

Raincloud plot

Introduction

Raincloud plot can be used to visualize raw data, the distribution of the data, and the key summary statistics at the same time.

Loading required packages

knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(ggpubr)
library(ggdist)
library(gghalves)
library(tidyverse)

# rm(list = ls())
options(stringsAsFactors = F)

# group & color
sp_names <- c("setosa", "versicolor", "virginica")
sp_colors <- c("#0073C2FF", "#EFC000FF", "#CD534CFF")

Data preparation

  • Data object iris
data("iris")

data_pre <- iris

head(data_pre)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Plot function

get_raincloud <- function(
    dat,
    group,
    group_names,
    group_colors,
    measures) {
  
  # group for plot x-label  
  dat_cln2 <- dat
  colnames(dat_cln2)[which(colnames(dat_cln2) == group)] <- "Group_new"
  
  if (group_names[1] == "all") {
    tempdata <- dat_cln2
  } else {
    tempdata <- dat_cln2 %>%
      dplyr::filter(Group_new %in% group_names)
  }
  tempdata$Group_new <- factor(tempdata$Group_new, levels = group_names)
  
  if (length(measures) > 1) {
    plotdata <- tempdata %>%
      dplyr::select(all_of(c("Group_new", measures))) %>%
      tidyr::pivot_longer(cols = 2:(length(measures)+1),
                          names_to = "Index", 
                          values_to = "Values") %>%
      dplyr::mutate(Index = factor(Index, levels = measures)) 
  } else {
    plotdata <- tempdata %>%
      dplyr::select(all_of(c("Group_new", measures)))
    colnames(plotdata)[2] <- "Values"
  }
  
  cmp <- list()
  num <- utils::combn(length(group_names), 2)
  for (i in 1:ncol(num)) {
    cmp[[i]] <- num[, i]
  }
  
  if (length(measures) > 1) {
    pl <- ggplot(plotdata, aes(x = Group_new, y = Values, fill = Group_new)) +
      ggdist::stat_halfeye(adjust = 0.5, width = 0.3,
                           .width = 0, justification = -0.3, point_colour = NA) +
      stat_boxplot(aes(color = Group_new), geom = "errorbar", width = 0.1) +    
      geom_boxplot(width = 0.1, outlier.shape = NA) + 
      gghalves::geom_half_point(side = "l", range_scale = 0.4, alpha = 0.5) +
      stat_summary(geom = "crossbar", width = 0.08, fatten = 0, color = "white", 
               fun.data = function(x){c(y = median(x), ymin = median(x), ymax = median(x))}) +       
      labs(x = "", y = "Value") + 
      scale_fill_manual(values = group_colors) +
      scale_color_manual(values = group_colors) +
      guides(fill = "none", color = "none") + 
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
      ggpubr::stat_compare_means(method = "wilcox.test",
                                 comparisons = cmp) +
      facet_wrap(.~ Index, scales = "free") +      
      theme_bw() +
      theme(axis.title.y = element_text(size = 10, face = "bold"),
            axis.text.y = element_text(size = 9),
            axis.text.x = element_text(size = 10, hjust = .5, vjust = .5, angle = 30),
            strip.text = element_text(size = 12, face = "bold", color = "black"))    
  } else {
    pl <- ggplot(plotdata, aes(x = Group_new, y = Values, fill = Group_new)) +
      ggdist::stat_halfeye(adjust = 0.5, width = 0.3,
                           .width = 0, justification = -0.3, point_colour = NA) +
      stat_boxplot(aes(color = Group_new), geom = "errorbar", width = 0.1) +    
      geom_boxplot(width = 0.1, outlier.shape = NA) + 
      gghalves::geom_half_point(side = "l", range_scale = 0.4, alpha = 0.5) +
      stat_summary(geom = "crossbar", width = 0.08, fatten = 0, color = "white", 
               fun.data = function(x){c(y = median(x), ymin = median(x), ymax = median(x))}) +       
      labs(x = "", y = measures) + 
      scale_fill_manual(values = group_colors) +
      scale_color_manual(values = group_colors) +
      guides(fill = "none", color = "none") + 
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
      ggpubr::stat_compare_means(method = "wilcox.test",
                                 comparisons = cmp) +
      theme_bw() +
      theme(axis.title.y = element_text(size = 10, face = "bold"),
            axis.text.y = element_text(size = 9),
            axis.text.x = element_text(size = 10, hjust = .5, vjust = .5, angle = 30),
            strip.text = element_text(size = 12, face = "bold", color = "black"))    
  }
  
  return(pl)
}

Raincloud plot with single measure

get_raincloud(
  dat = data_pre,
  group = "Species",
  group_names = sp_names,
  group_colors = sp_colors,
  measures = "Sepal.Length")

Results:

  • Raincloud plot shows the distribution of the data and the boxplot of data in three groups.

Raincloud plot with mulitple measures

get_raincloud(
  dat = data_pre,
  group = "Species",
  group_names = sp_names,
  group_colors = sp_colors,
  measures = c("Sepal.Length", "Sepal.Width"))

Conclusion

RainCloud plots using Half Violin plot with jittered data points to show the distribution of the data.

Systemic information

devtools::session_info()
##  Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.1 (2023-06-16)
##  os       macOS Monterey 12.2.1
##  system   x86_64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Asia/Shanghai
##  date     2024-03-14
##  pandoc   3.1.3 @ /Users/zouhua/opt/anaconda3/bin/ (via rmarkdown)
## 
##  Packages ───────────────────────────────────────────────────────────────────
##  package        * version date (UTC) lib source
##  abind            1.4-5   2016-07-21 [1] CRAN (R 4.3.0)
##  backports        1.4.1   2021-12-13 [1] CRAN (R 4.3.0)
##  blogdown         1.19    2024-02-01 [1] CRAN (R 4.3.2)
##  bookdown         0.37    2023-12-01 [1] CRAN (R 4.3.0)
##  broom            1.0.5   2023-06-09 [1] CRAN (R 4.3.0)
##  bslib            0.6.1   2023-11-28 [1] CRAN (R 4.3.0)
##  cachem           1.0.8   2023-05-01 [1] CRAN (R 4.3.0)
##  car              3.1-2   2023-03-30 [1] CRAN (R 4.3.0)
##  carData          3.0-5   2022-01-06 [1] CRAN (R 4.3.0)
##  cli              3.6.2   2023-12-11 [1] CRAN (R 4.3.0)
##  colorspace       2.1-0   2023-01-23 [1] CRAN (R 4.3.0)
##  devtools         2.4.5   2022-10-11 [1] CRAN (R 4.3.0)
##  digest           0.6.34  2024-01-11 [1] CRAN (R 4.3.0)
##  distributional   0.3.2   2023-03-22 [1] CRAN (R 4.3.0)
##  dplyr          * 1.1.4   2023-11-17 [1] CRAN (R 4.3.0)
##  ellipsis         0.3.2   2021-04-29 [1] CRAN (R 4.3.0)
##  evaluate         0.23    2023-11-01 [1] CRAN (R 4.3.0)
##  fansi            1.0.6   2023-12-08 [1] CRAN (R 4.3.0)
##  farver           2.1.1   2022-07-06 [1] CRAN (R 4.3.0)
##  fastmap          1.1.1   2023-02-24 [1] CRAN (R 4.3.0)
##  forcats        * 1.0.0   2023-01-29 [1] CRAN (R 4.3.0)
##  fs               1.6.3   2023-07-20 [1] CRAN (R 4.3.0)
##  generics         0.1.3   2022-07-05 [1] CRAN (R 4.3.0)
##  ggdist         * 3.3.1   2023-11-27 [1] CRAN (R 4.3.0)
##  gghalves       * 0.1.4   2022-11-20 [1] CRAN (R 4.3.0)
##  ggplot2        * 3.4.4   2023-10-12 [1] CRAN (R 4.3.0)
##  ggpubr         * 0.6.0   2023-02-10 [1] CRAN (R 4.3.0)
##  ggsignif         0.6.4   2022-10-13 [1] CRAN (R 4.3.0)
##  glue             1.7.0   2024-01-09 [1] CRAN (R 4.3.0)
##  gtable           0.3.4   2023-08-21 [1] CRAN (R 4.3.0)
##  highr            0.10    2022-12-22 [1] CRAN (R 4.3.0)
##  hms              1.1.3   2023-03-21 [1] CRAN (R 4.3.0)
##  htmltools        0.5.7   2023-11-03 [1] CRAN (R 4.3.0)
##  htmlwidgets      1.6.4   2023-12-06 [1] CRAN (R 4.3.0)
##  httpuv           1.6.14  2024-01-26 [1] CRAN (R 4.3.2)
##  jquerylib        0.1.4   2021-04-26 [1] CRAN (R 4.3.0)
##  jsonlite         1.8.8   2023-12-04 [1] CRAN (R 4.3.0)
##  knitr            1.45    2023-10-30 [1] CRAN (R 4.3.0)
##  labeling         0.4.3   2023-08-29 [1] CRAN (R 4.3.0)
##  later            1.3.2   2023-12-06 [1] CRAN (R 4.3.0)
##  lifecycle        1.0.4   2023-11-07 [1] CRAN (R 4.3.0)
##  lubridate      * 1.9.3   2023-09-27 [1] CRAN (R 4.3.0)
##  magrittr         2.0.3   2022-03-30 [1] CRAN (R 4.3.0)
##  memoise          2.0.1   2021-11-26 [1] CRAN (R 4.3.0)
##  mime             0.12    2021-09-28 [1] CRAN (R 4.3.0)
##  miniUI           0.1.1.1 2018-05-18 [1] CRAN (R 4.3.0)
##  munsell          0.5.0   2018-06-12 [1] CRAN (R 4.3.0)
##  pillar           1.9.0   2023-03-22 [1] CRAN (R 4.3.0)
##  pkgbuild         1.4.3   2023-12-10 [1] CRAN (R 4.3.0)
##  pkgconfig        2.0.3   2019-09-22 [1] CRAN (R 4.3.0)
##  pkgload          1.3.4   2024-01-16 [1] CRAN (R 4.3.0)
##  profvis          0.3.8   2023-05-02 [1] CRAN (R 4.3.0)
##  promises         1.2.1   2023-08-10 [1] CRAN (R 4.3.0)
##  purrr          * 1.0.2   2023-08-10 [1] CRAN (R 4.3.0)
##  R6               2.5.1   2021-08-19 [1] CRAN (R 4.3.0)
##  Rcpp             1.0.12  2024-01-09 [1] CRAN (R 4.3.0)
##  readr          * 2.1.5   2024-01-10 [1] CRAN (R 4.3.0)
##  remotes          2.4.2.1 2023-07-18 [1] CRAN (R 4.3.0)
##  rlang            1.1.3   2024-01-10 [1] CRAN (R 4.3.0)
##  rmarkdown        2.25    2023-09-18 [1] CRAN (R 4.3.0)
##  rstatix          0.7.2   2023-02-01 [1] CRAN (R 4.3.0)
##  rstudioapi       0.15.0  2023-07-07 [1] CRAN (R 4.3.0)
##  sass             0.4.8   2023-12-06 [1] CRAN (R 4.3.0)
##  scales           1.3.0   2023-11-28 [1] CRAN (R 4.3.0)
##  sessioninfo      1.2.2   2021-12-06 [1] CRAN (R 4.3.0)
##  shiny            1.8.0   2023-11-17 [1] CRAN (R 4.3.0)
##  stringi          1.8.3   2023-12-11 [1] CRAN (R 4.3.0)
##  stringr        * 1.5.1   2023-11-14 [1] CRAN (R 4.3.0)
##  tibble         * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
##  tidyr          * 1.3.1   2024-01-24 [1] CRAN (R 4.3.2)
##  tidyselect       1.2.0   2022-10-10 [1] CRAN (R 4.3.0)
##  tidyverse      * 2.0.0   2023-02-22 [1] CRAN (R 4.3.0)
##  timechange       0.3.0   2024-01-18 [1] CRAN (R 4.3.0)
##  tzdb             0.4.0   2023-05-12 [1] CRAN (R 4.3.0)
##  urlchecker       1.0.1   2021-11-30 [1] CRAN (R 4.3.0)
##  usethis          2.2.2   2023-07-06 [1] CRAN (R 4.3.0)
##  utf8             1.2.4   2023-10-22 [1] CRAN (R 4.3.0)
##  vctrs            0.6.5   2023-12-01 [1] CRAN (R 4.3.0)
##  withr            3.0.0   2024-01-16 [1] CRAN (R 4.3.0)
##  xfun             0.41    2023-11-01 [1] CRAN (R 4.3.0)
##  xtable           1.8-4   2019-04-21 [1] CRAN (R 4.3.0)
##  yaml             2.3.8   2023-12-11 [1] CRAN (R 4.3.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────
Hua Zou
Hua Zou
Senior Bioinformatic Analyst

My research interests include host-microbiota intersection, machine learning and multi-omics data integration.