STAMP的扩展error bar图

extended error bar plot

看到某篇公众号的文章很好地实现了我曾经特别想实现的图,特记录下来,方便以后自己查阅改代码。该图形的目的是展现两组数据之间的差异性检验,通常由两部分组成:左侧条形图和右侧散点图。

对公众号提供的代码进行了部分符合我自身个性的修改,现直接上代码:

step1: library R packages

1
2
3
4
library(tidyverse)
library(patchwork)
library(tibble)
library(ggplot2)

step2: load data

所需数据已经上传至百度网盘: https://pan.baidu.com/s/1uqFM1y9UpgLv0tv8W9yqtA 提取码:请邮件 zouhua1@outlook.com

1
2
prof <- read.table("KEGG_L2.txt", header = TRUE, row.names = 1, sep = "\t")
phen <- read.table("group.txt", header = FALSE, sep = "\t")

step3: curate data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
mdat <- prof  %>% rownames_to_column("Type") %>% 
filter(apply(dplyr::select(., -one_of("Type")), 1,
function(x){mean(x)}) > 0.01) %>%
data.frame(.) %>%
column_to_rownames("Type") %>%
t() %>% data.frame() %>%
rownames_to_column("SampleID") %>%
inner_join(phen %>% setNames(c("SampleID", "Group")),
by = "SampleID") %>%
mutate(Group=factor(Group)) %>%
column_to_rownames("SampleID")

diff_t <- mdat %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ Group, data = mdat)), .id = 'var') %>%
mutate(p.adjust=p.adjust(as.numeric(p.value), "bonferroni")) %>%
filter(p.adjust < 0.05)

abun.bar <- mdat %>% select(c(diff_t$var, "Group")) %>%
gather(key = "variable", value = "value", -Group) %>%
group_by(variable, Group) %>%
summarise(Mean = mean(value)) %>%
ungroup()

diff_t_mean <- diff_t %>% select(c("var", "estimate",
"conf.low", "conf.high",
"p.adjust")) %>%
mutate(Group=ifelse(estimate >0, levels(mdat$Group)[1],
levels(mdat$Group)[2])) %>%
arrange(desc(estimate))

step4: draw three figures

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# left barplot
cbbPalette <- c("#E69F00", "#56B4E9")
abun.bar$variable <- factor(abun.bar$variable, levels = rev(diff_t_mean$var))
p1 <- ggplot(abun.bar, aes(x=variable, y=Mean, fill=Group)) +
scale_x_discrete(limits = levels(diff_t_mean$var)) +
coord_flip() +
xlab("") +
ylab("Mean proportion (%)") +
theme(panel.background = element_rect(fill = 'transparent'),
panel.grid = element_blank(),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color='black'),
axis.line = element_line(colour = "black"),
axis.title.x=element_text(colour='black', size=12,face = "bold"),
axis.text=element_text(colour='black',size=10,face = "bold"),
legend.title=element_blank(),
legend.text=element_text(size=12,face = "bold",colour = "black",
margin = margin(r = 20)),
legend.position = c(-1,-0.1),
legend.direction = "horizontal",
legend.key.width = unit(0.8,"cm"),
legend.key.height = unit(0.5,"cm"))

for (i in 1:(nrow(diff_t_mean) - 1)){
p1 <- p1 + annotate('rect', xmin = i+0.5, xmax = i+1.5, ymin = -Inf, ymax = Inf,
fill = ifelse(i %% 2 == 0, 'white', 'gray95'))
}

p1 <- p1 + geom_bar(stat = "identity", position = "dodge",width = 0.7,colour = "black") +
scale_fill_manual(values=cbbPalette)


## right scatterplot
diff_t_mean$var <- factor(diff_t_mean$var,levels = levels(abun.bar$variable))
diff_t_mean$p.adjust <- signif(diff_t_mean$p.adjust, 3)
diff_t_mean$p.adjust <- as.character(diff_t_mean$p.adjust)

p2 <- ggplot(diff_t_mean, aes(x=var, y=estimate, fill = Group)) +
theme(panel.background = element_rect(fill = 'transparent'),
panel.grid = element_blank(),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color='black'),
axis.line = element_line(colour = "black"),
axis.title.x=element_text(colour='black', size=12,face = "bold"),
axis.text=element_text(colour='black',size=10,face = "bold"),
axis.text.y = element_blank(),
legend.position = "none",
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 15,face = "bold",colour = "black",hjust = 0.5)) +
scale_x_discrete(limits = levels(diff_t_mean$var)) +
coord_flip() +
xlab("") +
ylab("Difference in mean proportions (%)") +
labs(title="95% confidence intervals")

for (i in 1:(nrow(diff_t_mean) - 1)){
p2 <- p2 + annotate('rect', xmin = i+0.5, xmax = i+1.5, ymin = -Inf, ymax = Inf,
fill = ifelse(i %% 2 == 0, 'white', 'gray95'))
}


p2 <- p2 +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
position = position_dodge(0.8), width = 0.5, size = 0.5) +
geom_point(shape = 21,size = 3) +
scale_fill_manual(values=cbbPalette) +
geom_hline(aes(yintercept = 0), linetype = 'dashed', color = 'black')


p3 <- ggplot(diff_t_mean, aes(x=var, y=estimate, fill = Group)) +
geom_text(aes(y = 0,x = var),label = diff_t_mean$p.adjust,
hjust = 0,fontface = "bold",inherit.aes = FALSE,size = 3) +
geom_text(aes(x = nrow(diff.mean)/2 +0.5,y = 0.85),label = "P-value (corrected)",
srt = 90,fontface = "bold",size = 5) +
coord_flip() +
ylim(c(0,1)) +
theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())


p <- p1 + p2 + p3 + plot_layout(widths = c(4, 6, 2))
#ggsave("stamp.pdf", p, width = 10,height = 4)

引用

  1. R-ggplot: 完美重现STAMP的结果图

参考文章如引起任何侵权问题,可以与我联系,谢谢。


------------- The End Thanks for reading --------