R visualization: Violin by ggplot2

Violin with boxplot and point


Violin using the density of distribution to show the differences among groups is one of the most important means for data visualization.

Loading required packages

knitr::opts_chunk$set(message = FALSE, warning = FALSE)


# install.packages("gghalves")

rm(list = ls())
options(stringsAsFactors = F)

# group & color
group_names <- c("setosa", "versicolor", "virginica")
group_colors <- c("#0073C2FF", "#EFC000FF", "#CD534CFF")

Data preparation

  • Loading iris and ToothGrowth dataset

  • Factorizing Species


plotdata <- iris |>
  dplyr::select(Sepal.Length, Species, Sepal.Width) |>
  dplyr::mutate(Species = factor(Species, levels = group_names)) |>
  dplyr::rename(Group = Species,
                Index = Sepal.Length,
                Index2 = Sepal.Width)

##   Index  Group Index2
## 1   5.1 setosa    3.5
## 2   4.9 setosa    3.0
## 3   4.7 setosa    3.2
## 4   4.6 setosa    3.1
## 5   5.0 setosa    3.6
## 6   5.4 setosa    3.9
# other dataset

Single violin

  • create violin by geom_violin

  • apply stat_boxplot and geom_boxplot for boxplot

  • add point by geom_point

pl_violin <- ggplot(data = plotdata, aes(x = Group, y = Index, shape = Group)) +
  geom_violin(aes(fill = Group), alpha = 0.5) +
  stat_boxplot(geom = "errorbar", width = 0.2) +
  geom_boxplot(width = .1, notch = TRUE) +
  geom_point(aes(color = Group), position = position_jitter(width = 0.1), size = 1) +
  stat_compare_means(comparisons = list(c("setosa", "versicolor"),
                                        c("setosa", "virginica")),
                     method = "t.test",
                     label = "p.signif") +
  labs(x = "") +
  scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
  scale_color_manual(values = group_colors) +
  scale_fill_manual(values = group_colors) +
  scale_shape_manual(values = c(15, 16, 17)) +
  guides(color = "none", shape = "none", fill = "none") +
  theme_bw() +
  theme(axis.title = element_text(size = 12, color = "black", face = "bold"),
        axis.text = element_text(size = 10, color = "black"),
        text = element_text(size = 9, color = "black"))


Violin with two different groups

The left part of violin is totally different from the right.

  • Using ToothGrowth dataset with groups Dose and supp
pl_violin2 <- ToothGrowth |>
  dplyr::mutate(supp = factor(supp, levels = c("VC", "OJ"))) |>
  ggplot(aes(x = as.factor(dose), y = len)) +
  geom_half_violin(aes(fill = supp, color = supp, split = supp),
                   position = "identity") +
  labs(x = "Dose") +
  scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
  scale_color_manual(values = c("#AF0F11", "#3372A6")) +
  scale_fill_manual(values = c("#AF0F11", "#3372A6")) +
  theme_bw() +
  theme(axis.title = element_text(size = 12, color = "black", face = "bold"),
        axis.text = element_text(size = 10, color = "black"),
        text = element_text(size = 9, color = "black"),
        legend.position = c(.9, 0.2))

  • add significance between groups by stat_compare_means
pl_violin2_1 <- pl_violin2 +
  stat_compare_means(aes(group = supp),
                     label = "p.signif",
                     label.y = (max(ToothGrowth$len) + max(ToothGrowth$len) * 0.1)) 

  • add significance between groups by ggsignif::geom_signif

nlength <- length(unique(ToothGrowth$dose))
test_res <- ToothGrowth |>
  dplyr::mutate(dose = factor(dose)) |>
  dplyr::group_by(dose) |>
  rstatix::t_test(len ~ supp) |>
  rstatix::adjust_pvalue() |>
  rstatix::add_significance("p.adj") |>
  dplyr::mutate(x = seq(0.875, 0.875+(nlength-1)*1, 1),
                xend = seq(1.125, 1.125+(nlength-1)*1, 1),
                y = rep((max(ToothGrowth$len) + max(ToothGrowth$len) * 0.1), nlength))

pl_violin2_2 <- pl_violin2 +
  geom_signif(stat = "identity",
              data = test_res,
              aes(x = x,xend = xend, y = y, yend = y, annotation = p.adj.signif))


Violin with two different index

pl_violin3 <- ggplot(data = plotdata, aes(x = Group, y = Index)) +
  geom_half_violin(fill = "blue",
                   side = "l",
                   adjust = 0.5,
                   alpha = 0.5) +
  geom_half_violin(aes(y = Index2),
                   fill = "red",
                   side = "r",
                   adjust = 0.5,
                   alpha = 0.5) +
  labs(x = "") +
  annotate(x = 0.5, y = 8, geom = "text", label = "Index", color = "blue", 
           size = 4, fontface = "bold", hjust = 0) +
  annotate(x = 0.5, y = 7.5, geom = "text", label = "Index2", color = "red", 
           size = 4, fontface = "bold", hjust = 0) +  
  scale_y_continuous(expand = expansion(mult = c(0.1, 0.1))) +
  theme_bw() +
  theme(axis.title = element_text(size = 12, color = "black", face = "bold"),
        axis.text = element_text(size = 10, color = "black"),
        text = element_text(size = 9, color = "black"))



Compared to single boxplot, violin not only shows differences among group, but also displays the distribution of each value, providing more information.

Systemic information

