R visualization: donut chart by ggplot2

donut chart shows the number or proportion per group

Introduction

Compared to pie chart, donut chart has one more hole inside. In addition, multiple circles plot shows different traits of dataset.

Loading required packages

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

library(ggplot2) 
library(dplyr)
library(tidyverse)

# group & color
group_names <- c("setosa", "versicolor", "virginica")
group_colors <- c("#0073C2FF", "#EFC000FF", "#CD534CFF")

Data preparation

  • Loading iris dataset

  • Summarizing the total number per species

  • Calculating proportion per species

  • Computing position of text label

data("iris")

plotdata <- iris |>
  dplyr::group_by(Species) |>
  dplyr::summarise(count = length(Species)) |>
  dplyr::mutate(prop = round(count / sum(count), 2)) |>
  dplyr::arrange(desc(Species)) |>
  dplyr::mutate(lab.ypos = cumsum(prop) - 0.5 * prop) |>
  dplyr::mutate(num_prop = paste0("n = ", count, "\n", 
                           paste0(prop, "%"))) |>
  dplyr::mutate(Species = factor(Species, levels = group_names))

head(plotdata)
## # A tibble: 3 × 5
##   Species    count  prop lab.ypos num_prop       
##   <fct>      <int> <dbl>    <dbl> <chr>          
## 1 virginica     50  0.33    0.165 "n = 50\n0.33%"
## 2 versicolor    50  0.33    0.495 "n = 50\n0.33%"
## 3 setosa        50  0.33    0.825 "n = 50\n0.33%"

Pie

pie <- ggplot(plotdata, aes(x = "", y = prop, fill = Species)) +
  geom_bar(stat = "identity", color = "white", width = 1) +
  coord_polar(theta = "y", start = 0) +
  geom_text(aes(y = lab.ypos, label = num_prop), color = "white") +
  scale_fill_manual(breaks = group_names,
                    values = group_colors) +
  theme_void()

pie

Donut

donut <- ggplot(plotdata, aes(x = 2, y = prop, fill = Species)) +
  geom_bar(stat = "identity", color = "white") +
  coord_polar(theta = "y", start = 0) +
  geom_text(aes(y = lab.ypos, label = num_prop), color = "white") +
  scale_fill_manual(breaks = group_names,
                    values = group_colors) +
  annotate("text", x = 1, y = 0, label = "donut chart") +
  theme_void() +
  xlim(0.5, 2.5) +
  theme(legend.position = c(.5, .45))

donut

Mulitple circles chart

One circle represents one traits of dataset, should we using multiple circles to characterize the different traits?

Traits of mtcars: cyl, gear and carb

  • Data preparation
data_select <- mtcars |>
  dplyr::select(cyl, gear, carb) |>
  mutate_if(is.numeric, as.character) |>
  mutate_if(is.character, as.factor) |>
  mutate(cyl = paste0("cyl", cyl),
         gear = paste0("gear", gear),
         carb = paste0("carb", carb))

str(data_select)
## 'data.frame':	32 obs. of  3 variables:
##  $ cyl : chr  "cyl6" "cyl6" "cyl4" "cyl6" ...
##  $ gear: chr  "gear4" "gear4" "gear4" "gear3" ...
##  $ carb: chr  "carb4" "carb4" "carb1" "carb1" ...
  • Calculating the Proportion per group
plotdata2 <- data_select |>
  dplyr::group_by(cyl, gear, carb) |>
  dplyr::summarise(count = length(cyl)) 

head(plotdata2)
## # A tibble: 6 × 4
## # Groups:   cyl, gear [5]
##   cyl   gear  carb  count
##   <chr> <chr> <chr> <int>
## 1 cyl4  gear3 carb1     1
## 2 cyl4  gear4 carb1     4
## 3 cyl4  gear4 carb2     4
## 4 cyl4  gear5 carb2     2
## 5 cyl6  gear3 carb1     2
## 6 cyl6  gear4 carb4     4
  • Function for drawing chart

    • single or multiple traits for plotting

    • values and position of label

    • plotting

get_circle <- function(
    inputdata,
    variables = c("cyl", "gear", "carb"),
    label_size = 2.5,
    annotate_size = 4,
    col_number = 5,
    theme_legend_size = 8,
    theme_strip_size = 8,
    axis_size = 10,
    cyl_names = c("cyl4", "cyl6","cyl8"),
    cyl_cols = c("#D51F26", "#272E6A", "#208A42"),
    gear_names = c("gear3", "gear4", "gear5"),
    gear_cols = c("#7DD06F", "#844081", "#688EC1"),
    carb_names = c("carb1", "carb2", "carb3", "carb4", "carb6", "carb8"),
    carb_cols = c("#faa818", "#41a30d", "#fbdf72", "#367d7d", "#d33502", "#6ebcbc")) {
  
  dat_list <- list()
  for (i in 1:length(variables)) {
    temp_var <- rlang::sym(variables[i])
    temp_dat <- inputdata %>%
      dplyr::select(!!temp_var, count) %>%
      dplyr::group_by(!!temp_var) %>%
      dplyr::summarise(count = sum(count), .groups = "drop")      
    dat_list[[i]] <- temp_dat
  }
  
  names(dat_list) <- variables
  
  get_plotdata <- function(dat) {
    
    dat_cln_list <- list()
    names_new_list <- list()
    colors_new_list <- list()
    
    for (i in 1:length(dat)) {
      if (names(dat)[i] == "cyl") {
        cyl_cln <- dat[[i]] %>%
          dplyr::select(cyl, count) %>%
          dplyr::group_by(cyl) %>%
          dplyr::summarise(count = sum(count)) %>%
          dplyr::mutate(prop = round(count / sum(count), 3) * 100) %>%
          dplyr::ungroup() %>%
          dplyr::arrange(desc(cyl)) %>%
          dplyr::mutate(lab.ypos = cumsum(prop) - 0.5*prop) %>%
          dplyr::mutate(num_prop = paste0("n = ", count, "\n", 
                                          paste0(prop, "%")))
        cyl_cln$level <- i 
        cyl_cln$type <- "cyl"
        match_order_index1 <- sort(pmatch(unique(cyl_cln$cyl), cyl_names), decreasing = F)
        cyl_names_new <- cyl_names[match_order_index1]
        cyl_cols_new <- cyl_cols[match_order_index1]     
        cyl_cln$fill <- factor(cyl_cln$cyl, levels = cyl_names_new, labels = cyl_cols_new)
        
        dat_cln_list[[i]] <- cyl_cln
        names_new_list[[i]] <- cyl_names_new
        colors_new_list[[i]] <- cyl_cols_new
        
      } else if (names(dat)[i] == "gear") {
        gear_cln <- dat[[i]] %>%
          dplyr::select(gear, count) %>%
          dplyr::group_by(gear) %>%
          dplyr::summarise(count = sum(count)) %>%
          dplyr::mutate(prop = round(count / sum(count), 3) * 100) %>%
          dplyr::ungroup() %>%
          dplyr::arrange(desc(gear)) %>%
          dplyr::mutate(lab.ypos = cumsum(prop) - 0.5*prop) %>%
          dplyr::mutate(num_prop = paste0("n = ", count, "\n", 
                                          paste0(prop, "%")))
        gear_cln$level <- i
        gear_cln$type <- "gear"
        match_order_index2 <- sort(pmatch(unique(gear_cln$gear), gear_names), decreasing = F)    
        gear_names_new <- gear_names[match_order_index2]
        gear_cols_new <- gear_cols[match_order_index2]     
        gear_cln$fill <- factor(gear_cln$gear, levels = gear_names_new, labels = gear_cols_new)
        
        dat_cln_list[[i]] <- gear_cln
        names_new_list[[i]] <- gear_names_new
        colors_new_list[[i]] <- gear_cols_new
        
      } else if (names(dat)[i] == "carb") {
        carb_cln <- dat[[i]] %>%
          dplyr::select(carb, count) %>%
          dplyr::group_by(carb) %>%
          dplyr::summarise(count = sum(count)) %>%
          dplyr::mutate(prop = round(count / sum(count), 3) * 100) %>%
          dplyr::ungroup() %>%
          dplyr::arrange(desc(carb)) %>%
          dplyr::mutate(lab.ypos = cumsum(prop) - 0.5*prop) %>%
          dplyr::mutate(num_prop = paste0("n = ", count, "\n", 
                                          paste0(prop, "%"))) 
        carb_cln$level <- i
        carb_cln$type <- "carb"
        match_order_index3 <- sort(pmatch(unique(carb_cln$carb), carb_names), decreasing = F)    
        carb_names_new <- carb_names[match_order_index3]
        carb_cols_new <- carb_cols[match_order_index3]     
        carb_cln$fill <- factor(carb_cln$carb, levels = carb_names_new, labels = carb_cols_new)
        
        dat_cln_list[[i]] <- carb_cln
        names_new_list[[i]] <- carb_names_new
        colors_new_list[[i]] <- carb_cols_new
        
      }
    }
    
    names(dat_cln_list) <- names(dat)
    names(names_new_list) <- names(dat)
    names(colors_new_list) <- names(dat)
    
    temp_name <- c()
    temp_value <- c()
    temp_level <- c()
    temp_type <- c()
    temp_num_prop <- c()
    for (j in 1:length(dat_cln_list)) {
      temp_name <- c(temp_name, dat_cln_list[[j]][, 1, T])
      temp_value <- c(temp_value, dat_cln_list[[j]][, 2, T])
      temp_level <- c(temp_level, dat_cln_list[[j]][, 6, T])
      temp_type <- c(temp_type, dat_cln_list[[j]][, 7, T])
      temp_num_prop <- c(temp_num_prop, dat_cln_list[[j]][, 5, T])
    }
    
    data_temp <- data.frame(name = temp_name,
                            value = temp_value,
                            level = temp_level,
                            type = temp_type,
                            num_prop = temp_num_prop)
    data_parent <- data.frame(name = "Parent", value = 0, level = 0, type = NA, num_prop = NA)
    dat_res <- rbind(data_parent, data_temp)
    res <- list(plotdata = dat_res,
                names_new = names_new_list,
                colors_new = colors_new_list)
    
    return(res)
  }
  
  # discrete variables
  res_list <- get_plotdata(dat = dat_list)
  plotdata <- res_list$plotdata
  
  name_levels <- c()
  name_colors <- c()
  for (k in 1:length(res_list$names_new)) {
    temp_name <- c(paste(k, "circle", names(res_list$names_new)[k]), res_list$names_new[[k]])
    temp_color <- c("white", res_list$colors_new[[k]])
    
    name_levels <- c(name_levels, temp_name)
    name_colors <- c(name_colors, temp_color)
  }
  
  names(name_colors) <- name_levels
  
  plotdata$name <- factor(plotdata$name, levels = name_levels)
  total_num <- plotdata %>%
    dplyr::pull(value) %>%
    sum() / length(variables)
  
  pl <- ggplot(plotdata, aes(x = level, y = value, fill = name)) + 
    geom_col(width = 1, color = "gray90", size = 0.5, position = position_stack()) +
    geom_text(aes(label = num_prop), size = label_size, position = position_stack(vjust = 0.5)) +
    coord_polar(theta = "y", start = 0) +
    scale_x_discrete(breaks = NULL) +
    scale_y_continuous(breaks = NULL) +
    labs(x = NULL, y = NULL) +  
    scale_fill_manual(values = name_colors, drop = FALSE) +
    guides(fill = guide_legend(title = NULL, ncol = col_number)) +
    annotate("text", x = -0.4, y = 0, 
             label = paste("No. of samples", "\n n = ", total_num), 
             size = annotate_size) +
    theme_void() +
    theme(axis.title = element_text(face = "bold", size = axis_size),
          axis.text = element_text(size = axis_size - 1),
          text = element_text(size = axis_size - 2),
          legend.position = "bottom",
          legend.text = element_text(size = theme_legend_size, color = "black"),
          strip.text.x = element_text(size = theme_strip_size, color = "black", face = "bold"))
  
  return(pl)
}
  • multiple circles
pl_circle <- get_circle(
    inputdata = plotdata2,
    variables = c("cyl", "gear", "carb"),
    label_size = 2.5,
    annotate_size = 4,
    col_number = 5,
    theme_legend_size = 8,
    theme_strip_size = 8,
    axis_size = 10)

pl_circle

Conclusion

Pie and donut only are fit for proportion of single variable, while multiple circles are suitable for ratio of multiple traits in dataset.

Systemic information

devtools::session_info()
##  Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.1.3 (2022-03-10)
##  os       macOS Big Sur/Monterey 10.16
##  system   x86_64, darwin17.0
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Asia/Shanghai
##  date     2023-07-24
##  pandoc   3.1.3 @ /Users/zouhua/opt/anaconda3/bin/ (via rmarkdown)
## 
##  Packages ───────────────────────────────────────────────────────────────────
##  package     * version date (UTC) lib source
##  blogdown      1.18    2023-06-19 [2] CRAN (R 4.1.3)
##  bookdown      0.34    2023-05-09 [2] CRAN (R 4.1.2)
##  bslib         0.5.0   2023-06-09 [2] CRAN (R 4.1.3)
##  cachem        1.0.8   2023-05-01 [2] CRAN (R 4.1.2)
##  callr         3.7.3   2022-11-02 [2] CRAN (R 4.1.2)
##  cli           3.6.1   2023-03-23 [2] CRAN (R 4.1.2)
##  colorspace    2.1-0   2023-01-23 [2] CRAN (R 4.1.2)
##  crayon        1.5.2   2022-09-29 [2] CRAN (R 4.1.2)
##  devtools      2.4.5   2022-10-11 [2] CRAN (R 4.1.2)
##  digest        0.6.33  2023-07-07 [1] CRAN (R 4.1.3)
##  dplyr       * 1.1.2   2023-04-20 [2] CRAN (R 4.1.2)
##  ellipsis      0.3.2   2021-04-29 [2] CRAN (R 4.1.0)
##  evaluate      0.21    2023-05-05 [2] CRAN (R 4.1.2)
##  fansi         1.0.4   2023-01-22 [2] CRAN (R 4.1.2)
##  farver        2.1.1   2022-07-06 [2] CRAN (R 4.1.2)
##  fastmap       1.1.1   2023-02-24 [2] CRAN (R 4.1.2)
##  forcats     * 1.0.0   2023-01-29 [2] CRAN (R 4.1.2)
##  fs            1.6.2   2023-04-25 [2] CRAN (R 4.1.2)
##  generics      0.1.3   2022-07-05 [2] CRAN (R 4.1.2)
##  ggplot2     * 3.4.2   2023-04-03 [2] CRAN (R 4.1.2)
##  glue          1.6.2   2022-02-24 [2] CRAN (R 4.1.2)
##  gtable        0.3.3   2023-03-21 [2] CRAN (R 4.1.2)
##  highr         0.10    2022-12-22 [2] CRAN (R 4.1.2)
##  hms           1.1.3   2023-03-21 [2] CRAN (R 4.1.2)
##  htmltools     0.5.5   2023-03-23 [2] CRAN (R 4.1.2)
##  htmlwidgets   1.6.2   2023-03-17 [2] CRAN (R 4.1.2)
##  httpuv        1.6.11  2023-05-11 [2] CRAN (R 4.1.3)
##  jquerylib     0.1.4   2021-04-26 [2] CRAN (R 4.1.0)
##  jsonlite      1.8.7   2023-06-29 [2] CRAN (R 4.1.3)
##  knitr         1.43    2023-05-25 [2] CRAN (R 4.1.3)
##  labeling      0.4.2   2020-10-20 [2] CRAN (R 4.1.0)
##  later         1.3.1   2023-05-02 [2] CRAN (R 4.1.2)
##  lifecycle     1.0.3   2022-10-07 [2] CRAN (R 4.1.2)
##  lubridate   * 1.9.2   2023-02-10 [2] CRAN (R 4.1.2)
##  magrittr      2.0.3   2022-03-30 [2] CRAN (R 4.1.2)
##  memoise       2.0.1   2021-11-26 [2] CRAN (R 4.1.0)
##  mime          0.12    2021-09-28 [2] CRAN (R 4.1.0)
##  miniUI        0.1.1.1 2018-05-18 [2] CRAN (R 4.1.0)
##  munsell       0.5.0   2018-06-12 [2] CRAN (R 4.1.0)
##  pillar        1.9.0   2023-03-22 [2] CRAN (R 4.1.2)
##  pkgbuild      1.4.2   2023-06-26 [2] CRAN (R 4.1.3)
##  pkgconfig     2.0.3   2019-09-22 [2] CRAN (R 4.1.0)
##  pkgload       1.3.2.1 2023-07-08 [2] CRAN (R 4.1.3)
##  prettyunits   1.1.1   2020-01-24 [2] CRAN (R 4.1.0)
##  processx      3.8.2   2023-06-30 [2] CRAN (R 4.1.3)
##  profvis       0.3.8   2023-05-02 [2] CRAN (R 4.1.2)
##  promises      1.2.0.1 2021-02-11 [2] CRAN (R 4.1.0)
##  ps            1.7.5   2023-04-18 [2] CRAN (R 4.1.2)
##  purrr       * 1.0.1   2023-01-10 [2] CRAN (R 4.1.2)
##  R6            2.5.1   2021-08-19 [2] CRAN (R 4.1.0)
##  Rcpp          1.0.11  2023-07-06 [1] CRAN (R 4.1.3)
##  readr       * 2.1.4   2023-02-10 [2] CRAN (R 4.1.2)
##  remotes       2.4.2   2021-11-30 [2] CRAN (R 4.1.0)
##  rlang         1.1.1   2023-04-28 [2] CRAN (R 4.1.2)
##  rmarkdown     2.23    2023-07-01 [2] CRAN (R 4.1.3)
##  rstudioapi    0.15.0  2023-07-07 [2] CRAN (R 4.1.3)
##  sass          0.4.6   2023-05-03 [2] CRAN (R 4.1.2)
##  scales        1.2.1   2022-08-20 [2] CRAN (R 4.1.2)
##  sessioninfo   1.2.2   2021-12-06 [2] CRAN (R 4.1.0)
##  shiny         1.7.4.1 2023-07-06 [2] CRAN (R 4.1.3)
##  stringi       1.7.12  2023-01-11 [2] CRAN (R 4.1.2)
##  stringr     * 1.5.0   2022-12-02 [2] CRAN (R 4.1.2)
##  tibble      * 3.2.1   2023-03-20 [2] CRAN (R 4.1.2)
##  tidyr       * 1.3.0   2023-01-24 [2] CRAN (R 4.1.2)
##  tidyselect    1.2.0   2022-10-10 [2] CRAN (R 4.1.2)
##  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.1.2)
##  timechange    0.2.0   2023-01-11 [2] CRAN (R 4.1.2)
##  tzdb          0.4.0   2023-05-12 [2] CRAN (R 4.1.3)
##  urlchecker    1.0.1   2021-11-30 [2] CRAN (R 4.1.0)
##  usethis       2.2.2   2023-07-06 [2] CRAN (R 4.1.3)
##  utf8          1.2.3   2023-01-31 [2] CRAN (R 4.1.2)
##  vctrs         0.6.3   2023-06-14 [1] CRAN (R 4.1.3)
##  withr         2.5.0   2022-03-03 [2] CRAN (R 4.1.2)
##  xfun          0.39    2023-04-20 [2] CRAN (R 4.1.2)
##  xtable        1.8-4   2019-04-21 [2] CRAN (R 4.1.0)
##  yaml          2.3.7   2023-01-23 [2] CRAN (R 4.1.2)
## 
##  [1] /Users/zouhua/Library/R/x86_64/4.1/library
##  [2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────

Reference

Hua Zou
Hua Zou
Senior Bioinformatic Analyst

My research interests include host-microbiota intersection, machine learning and multi-omics data integration.