质谱数据组间差异结果展示

前言

对质谱数据做单变量的组间差异检验后,我们获得了检验的结果。常用展示结果的方法有图和表的方式,我们在文献中常用图来展示,而表可作为附表补充图。更多知识分享请到 https://zouhua.top/

本文讲使用火山图,韦恩图和扩展火山图展示组间差异结果,最后再结合热图展示所有的组间差异蛋白的丰度分布情况。

Importing data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
library(dplyr)
library(tibble)
library(convert)
library(ggplot2)
library(ggrepel)
library(data.table)

phen <- read.csv("phenotype_20210625.csv")
NC_PC_DEP <- fread("NC_PC_limma_Mass_LOG2Impute.csv")
NC_PT_DEP <- fread("NC_PT_limma_Mass_LOG2Impute.csv")
ExprSet <- readRDS("Mass_Proteins_filtered_Normal_LOG2Impute.RDS")

subgrp <- c("NC", "PC", "PT")
grp.col <- c("#568875", "#73FAFC", "#EE853D")

Volcano Function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# function
volcanofun <- function(datset=NC_PC_DEP,
genelist=c("FIBB","FIBA","FA5","EGF","OIT3","IBP2","GANAB","RELN","LYSC","CADH1"),
group_name=subgrp[1:2],
group_col=grp.col[1:2],
pval=0.05,
fc=1){

# datset=NC_PC_DEP
# genelist=c("FIBB","FIBA","FA5","EGF","OIT3","IBP2","GANAB","RELN","LYSC","CADH1")
# group_name=subgrp[1:2]
# group_col=grp.col[1:2]
# pval=0.05
# fc=1

dat <- datset %>%
mutate(color=factor(Enrichment,
levels = c(group_name, "Nonsignif")))
# print(table(dat$color))
dat_status <- table(dat$color)
dat_status_number <- as.numeric(dat_status)
dat_status_name <- names(dat_status)
legend_label <- c(paste0(dat_status_name[1], " (", dat_status_number[1], ")"),
paste0(dat_status_name[2], " (", dat_status_number[2], ")"),
paste0("Nonsignif", " (", dat_status_number[3], ")"))

dat.signif <- subset(dat, adj.P.Val < pval & abs(logFC) > fc) %>%
filter(GeneID%in%genelist)
print(table(dat.signif$color))

group_col_new <- c(group_col, "#979797")
group_name_new <- levels(dat$color)

xlabel <- paste0("log2(", paste(group_name, collapse="/"), ")")

# Make a basic ggplot2 object with x-y values
pl <- ggplot(dat, aes(x = -logFC, y = -log10(adj.P.Val), color = color))+
geom_point(size = 2, alpha = 1, stroke = 1)+
scale_color_manual(name = NULL,
values = group_col_new,
labels = c(legend_label, "Nonsignif"))+
xlab(xlabel) +
ylab(expression(-log[10]("adjusted p-value")))+
geom_hline(yintercept=-log10(pval), alpha=.8, linetype=2, size=.7)+
geom_vline(xintercept=fc, alpha=.8, linetype=2, size=.7)+
geom_vline(xintercept=-fc, alpha=.8, linetype=2, size=.7)+
geom_text_repel(data = dat.signif,
aes(label = GeneID),
size = 4,
max.overlaps = getOption("ggrepel.max.overlaps", default = 80),
segment.linetype = 1,
segment.curvature = -1e-20,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"),
arrow = arrow(length = unit(0.005, "npc")),
color = "black", # text color
bg.color = "white", # shadow color
bg.r = 0.15)+
annotate("text", x=min(dat$logFC), y=-log10(pval), label=pval, size=6, color="red")+
annotate("text", x=fc, y=0, label=fc, size=6, color="red")+
annotate("text", x=-fc, y=0, label=-fc, size=6, color="red")+
scale_y_continuous(trans = "log1p")+
guides(color=guide_legend(override.aes = list(size = 3)))+
theme_bw()+
theme(axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
text = element_text(size = 8, color = "black", family="serif"),
panel.grid = element_blank(),
#legend.position = "right",
legend.position = c(.15, .1),
legend.key.height = unit(0.6,"cm"),
legend.text = element_text(face = "bold", color = "black", size = 8),
strip.text = element_text(face = "bold", size = 14))
return(pl)
}

Gene_boxplot <- function(datset=ExprSet,
genelist=c("FIBB","FIBA","FA5","EGF","OIT3","IBP2","GANAB","RELN","LYSC","CADH1"),
group_name=subgrp[1:2],
group_col=grp.col[1:2]){

# datset=ExprSet
# genelist=c("FIBB","FIBA","FA5","EGF","OIT3","IBP2","GANAB","RELN","LYSC","CADH1")
# group_name=subgrp[1:2]
# group_col=grp.col[1:2]

pheno <- pData(datset) %>%
filter(SubGroup%in%group_name)
pheno$SubGroup <- factor(as.character(pheno$SubGroup), levels = group_name)


edata <- data.frame(exprs(datset)) %>%
dplyr::select(rownames(pheno)) %>%
rownames_to_column("GeneID") %>%
filter(GeneID%in%genelist) %>%
column_to_rownames("GeneID")

mdat <- pheno %>% dplyr::select(SubGroup) %>%
rownames_to_column("SampleID") %>%
inner_join(t(edata) %>% data.frame() %>% rownames_to_column("SampleID"), by = "SampleID") %>%
column_to_rownames("SampleID")

plotdata <- mdat %>% tidyr::gather(key="geneID", value="value", -SubGroup) %>%
mutate(SubGroup=factor(SubGroup, levels = group_name))

plotdata$geneID <- factor(plotdata$geneID, levels = genelist)

pl <- ggplot(plotdata, aes(x = SubGroup, y = value, fill= SubGroup))+
stat_boxplot(geom = "errorbar", width = 0.15,
position = position_dodge(0.4)) +
geom_boxplot(width = 0.4,
outlier.colour = "black",
outlier.shape = 21,
outlier.size = .5)+
scale_fill_manual(values = group_col)+
#scale_y_continuous(labels = scales::scientific)+
facet_wrap(facets = "geneID", scales = "free_y", nrow = 2)+
labs(x="", y="Relative Abundance (Mass Spectrometry)")+
guides(fill=F)+
theme_classic()+
theme(axis.title = element_text(color = "black", size = 12),
axis.text.x = element_text(color = "black", size = 10, hjust = .5, vjust = .5),
text = element_text(size = 8, color = "black", family="serif"),
panel.grid = element_blank(),
strip.text = element_text(face = "bold", size = 12))

return(pl)
}

NC_PC_volcano <- volcanofun(datset=NC_PC_DEP,
genelist=c("FIBB","FIBA","FA5","OIT3","IBP2","GANAB","RELN","LYSC","CADH1","EGF"),
group_name=subgrp[1:2],
group_col=grp.col[1:2])

NC_PC_volcano
ggsave("NC_PC_Mass_volcano.pdf", NC_PC_volcano, width = 8, height = 6)
NC_PC_boxplot <- Gene_boxplot(datset=ExprSet,
genelist=c("FIBB","FIBA","FA5","OIT3","IBP2","GANAB","RELN","LYSC","CADH1","EGF"),
group_name=subgrp[1:2],
group_col=grp.col[1:2])
NC_PC_boxplot
ggsave("NC_PC_Mass_SpecificDEP_boxplot.pdf", NC_PC_boxplot, width = 8, height = 6)

Venn plot for Overlap DEGs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
get_DEG_List <- function(datset1=NC_PC_DEP,
datset2=NC_PT_DEP){
# datset1=NC_PC_DEP
# datset2=NC_PT_DEP

diff_gene1 <- datset1 %>% filter(Enrichment != "Nonsignif")
diff_gene2 <- datset2 %>% filter(Enrichment != "Nonsignif")
res <- list(diff1=diff_gene1$GeneID,
diff2=diff_gene2$GeneID)
return(res)
}

diff_gene_list <- get_DEG_List(datset1=NC_PC_DEP, datset2=NC_PT_DEP)

# eulerr
require(eulerr)
diff_venn <- euler(diff_gene_list, shape = "ellipse")

# pdf(file = "Mass_DEPs_Venn.pdf", width = 5, height = 4)
plot(diff_venn,
fills = alpha(grp.col[2:3], 0.5),
labels = c("NC vs PC", "NC vs PT"),
quantities = TRUE,
col = "black")
# dev.off()

quadrant plot

A quadrant plot shows the common or different Proteins between two comparisons.

  • 1st quadrant: the Proteins up-regulate in both PC and PT

  • 2nd quadrant: the Proteins up-regulate in PC but down-regulate in PT

  • 3rd quadrant: the Proteins down-regulate in both PC and PT

  • 4th quadrant: the Proteins down-regulate in PC but up-regulate in PT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
quadrant_plot <- function(datset1=NC_PC_DEP, 
datset2=NC_PT_DEP,
thresholdFC=3,
group_name=subgrp,
axis_len=12){

# datset1=NC_PC_DEP
# datset2=NC_PT_DEP
# thresholdFC=1
# group_name=subgrp
# axis_len=5

overlap_gene <- union(datset1$GeneID, datset2$GeneID)
dat_x <- datset1 %>% filter(GeneID%in%overlap_gene)
dat_y <- datset2 %>% filter(GeneID%in%overlap_gene)

# the Differential Genes
dat_x_signif <- subset(dat_x, Enrichment != "Nonsignif")
dat_y_signif <- subset(dat_y, Enrichment != "Nonsignif")

# enriched in NC and Disease in both
common_signif <- intersect(dat_x_signif$GeneID, dat_y_signif$GeneID)

# enriched in NC and Disease in each
dat_x_sig_only <- setdiff(dat_x_signif$GeneID, common_signif)
dat_y_sig_only <- setdiff(dat_y_signif$GeneID, common_signif)
non_signif <- setdiff(overlap_gene, c(common_signif, dat_x_sig_only, dat_y_sig_only))


gene_type_name <- c(paste(group_name[2:3], collapse = "/"),
paste(group_name[2], "only"),
paste(group_name[3], "only"),
"Nonsignif")

gene_type_df <- rbind(data.frame(GeneID=common_signif, group=gene_type_name[1]),
data.frame(GeneID=dat_x_sig_only, group=gene_type_name[2]),
data.frame(GeneID=dat_y_sig_only, group=gene_type_name[3]),
data.frame(GeneID=non_signif, group=gene_type_name[4]))
mdat <- inner_join(dat_x %>% dplyr::select(GeneID, logFC) %>% dplyr::rename(xvalue=logFC),
dat_y %>% dplyr::select(GeneID, logFC) %>% dplyr::rename(yvalue=logFC),
by = "GeneID") %>%
inner_join(gene_type_df, by="GeneID") %>%
mutate(group=factor(group, levels = rev(gene_type_name)))

print(table(mdat$group))

common_signif_gene <- mdat %>% filter(GeneID%in%common_signif)

common_signif_gene <- mdat %>% filter(GeneID%in%common_signif) %>%
mutate(GeneID_v2=ifelse(abs(xvalue) > thresholdFC | abs(yvalue) > thresholdFC, GeneID, NA))

common_signif_gene_v2 <- na.omit(common_signif_gene)
print(table(common_signif_gene_v2$group))

require(magrittr)
# constants
axis_begin <- -axis_len
axis_end <- axis_len
total_ticks <- 8

# point to plot
my_point <- data.frame(x=1, y=1)
# chart junk data
tick_frame <- data.frame(ticks = seq(axis_begin, axis_end, by=2), zero=0) %>%
subset(ticks != 0)
tick_frame <- data.frame(ticks = seq(axis_begin, axis_end, by=2), zero=0) %>%
subset(ticks != 0)
lab_frame <- data.frame(lab = seq(axis_begin, axis_end, 2), zero = 0) %>%
subset(lab != 0)
tick_sz <- (tail(lab_frame$lab, 1) - lab_frame$lab[1]) / 128

x_title <- paste(group_name[1], "vs", group_name[2])
y_title <- paste(group_name[1], "vs", group_name[3])

pl <- ggplot(mdat)+
geom_segment(x = 0, xend = 0,
y = lab_frame$lab[1], yend = tail(lab_frame$lab, 1),
size = 0.5) +
geom_segment(y = 0, yend = 0,
x = lab_frame$lab[1], xend = tail(lab_frame$lab, 1),
size = 0.5) +
geom_segment(data = tick_frame,
aes(x = ticks, xend = ticks,
y = zero, yend = zero + tick_sz)) +
geom_segment(data = tick_frame,
aes(x = zero, xend = zero + tick_sz,
y = ticks, yend = ticks)) +
geom_text(data=lab_frame, aes(x=lab, y=zero, label=lab),
family = 'Times', vjust=1.5) +
geom_text(data=lab_frame, aes(x=zero, y=lab, label=lab),
family = 'Times', hjust=1.5) +
annotate("text", x = axis_len-1, y = -.7, color = "black", size=3,
label = paste0("log2(", x_title, ")"))+
annotate("text", x = -.7, y = axis_len-1, color = "black", size=3, angle=90,
label = paste0("log2(", y_title, ")"))+
geom_point(aes(x = xvalue, y = yvalue, color=group), size = 1.5)+
geom_text_repel(data = common_signif_gene,
aes(x=xvalue, y=yvalue, label = GeneID_v2),
size = 5,
max.overlaps = getOption("ggrepel.max.overlaps", default = 80),
segment.linetype = 1,
segment.curvature = -1e-20,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"),
arrow = arrow(length = unit(0.005, "npc")),
# color = "white", # text color
# bg.color = "grey30", # shadow color
bg.r = 0.15)+
scale_color_manual(values = c("#A6A6A6", "#7BBDE0", "#B67FD0", "#FDC361"))+
guides(color=guide_legend(title = NULL, keywidth=.9, keyheight=.9, linetype=2))+
theme_void()+
theme(panel.grid = element_blank(),
text = element_text(size = 8, color = "black", family="serif"),
legend.text=element_text(size=11, color = "black"),
legend.position = c(.7, .2),
legend.justification = c(0, 0),
legend.background = element_rect(linetype=2, color = "black", fill="white"))

return(pl)
}

quadrantpl <- quadrant_plot(datset1=NC_PC_DEP,
datset2=NC_PT_DEP,
thresholdFC=3,
group_name=subgrp,
axis_len=8)
quadrantpl
ggsave("Mass_DEPs_quadrant.pdf", quadrantpl, width = 6, height = 6)

Heatmap Of DEGs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
heatFun <- function(datset1=NC_PC_DEP, 
datset2=NC_PT_DEP,
thresholdFC=1,
ExprSet=ExprSet,
group_name=subgrp){

# datset1=NC_PC_DEP
# datset2=NC_PT_DEP
# thresholdFC=1
# ExprSet=ExprSet
# group_name=subgrp


diff_gene1 <- datset1 %>% filter(Enrichment != "Nonsignif") %>%
filter(abs(logFC) > thresholdFC)
diff_gene2 <- datset2 %>% filter(Enrichment != "Nonsignif") %>%
filter(abs(logFC) > thresholdFC)

union_gene <- Reduce(union, list(diff_gene1$GeneID, diff_gene2$GeneID))

pheno <- pData(ExprSet) %>% data.frame() %>%
rownames_to_column("SampleID") %>%
filter(SubGroup%in%group_name) %>%
mutate(SubGroup=factor(SubGroup, levels = group_name)) %>%
arrange(SubGroup) %>%
column_to_rownames("SampleID")

edata <- exprs(ExprSet) %>% data.frame() %>%
rownames_to_column("geneid") %>%
filter(geneid%in%union_gene) %>%
dplyr::select(c("geneid", rownames(pheno))) %>%
column_to_rownames("geneid")

# scale data: z-score
scale_rows <- function (x) {
m = apply(x, 1, mean, na.rm = T)
s = apply(x, 1, sd, na.rm = T)
return((x - m)/s)
}
edata_scaled <- t(scale_rows(edata))
require(circlize)
col_fun <- colorRamp2(c(round(range(edata_scaled)[1]), 0,
round(range(edata_scaled)[2])),
c("blue", "white", "red"))
# row split
dat_status <- table(pheno$SubGroup)
dat_status_number <- as.numeric(dat_status)
dat_status_name <- names(dat_status)
row_split <- c()
for (i in 1:length(dat_status_number)) {
row_split <- c(row_split, rep(i, dat_status_number[i]))
}
require(ComplexHeatmap)
pl <- Heatmap(
edata_scaled,
#col = col_fun,
cluster_rows = FALSE,
row_order = rownames(pheno),
show_column_names = FALSE,
show_row_names = FALSE,
row_names_gp = gpar(fontsize = 12),
row_names_side = "right",
row_dend_side = "left",
column_title = NULL,
heatmap_legend_param = list(
title = "Relative Abundance\nZscore",
title_position = "topcenter",
border = "black",
legend_height = unit(10, "cm"),
direction = "horizontal"),
row_split = row_split,
left_annotation = rowAnnotation(foo = anno_block(gp = gpar(fill = 2:4),
labels = group_name,
labels_gp = gpar(col = "black", fontsize = 12))),
column_km = 3
)
return(pl)
}

heatFun(datset1=NC_PC_DEP,
datset2=NC_PT_DEP,
thresholdFC=1,
ExprSet=ExprSet,
group_name=subgrp)

结果图如下

systemic information

1
sessionInfo()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
R version 4.0.2 (2020-06-22)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: CentOS Linux 8 (Core)

Matrix products: default
BLAS/LAPACK: /disk/share/anaconda3/lib/libopenblasp-r0.3.10.so

locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:
[1] stats4 parallel stats graphics grDevices utils datasets methods base

other attached packages:
[1] ggrepel_0.9.1.9999 SummarizedExperiment_1.20.0 GenomicRanges_1.42.0 GenomeInfoDb_1.26.4
[5] IRanges_2.24.1 S4Vectors_0.28.1 MatrixGenerics_1.2.1 matrixStats_0.58.0
[9] data.table_1.14.0 umap_0.2.7.0 vegan_2.5-6 lattice_0.20-41
[13] permute_0.9-5 Rtsne_0.15 convert_1.64.0 marray_1.68.0
[17] limma_3.46.0 Biobase_2.50.0 BiocGenerics_0.36.0 ggplot2_3.3.3
[21] tibble_3.1.0 dplyr_1.0.5

loaded via a namespace (and not attached):
[1] nlme_3.1-150 bitops_1.0-6 tools_4.0.2 backports_1.2.1 utf8_1.2.1
[6] R6_2.5.0 DBI_1.1.1 mgcv_1.8-34 colorspace_2.0-0 withr_2.4.1
[11] tidyselect_1.1.0 curl_4.3 compiler_4.0.2 DelayedArray_0.16.3 scales_1.1.1
[16] askpass_1.1 digest_0.6.27 foreign_0.8-81 rmarkdown_2.7 rio_0.5.16
[21] XVector_0.30.0 pkgconfig_2.0.3 htmltools_0.5.1.1 rlang_0.4.10 readxl_1.3.1
[26] generics_0.1.0 jsonlite_1.7.2 zip_2.1.1 car_3.0-10 RCurl_1.98-1.3
[31] magrittr_2.0.1 GenomeInfoDbData_1.2.4 Matrix_1.3-2 Rcpp_1.0.6 munsell_0.5.0
[36] fansi_0.4.2 abind_1.4-5 reticulate_1.18 lifecycle_1.0.0 stringi_1.4.6
[41] yaml_2.2.1 edgeR_3.32.1 carData_3.0-4 MASS_7.3-53.1 zlibbioc_1.36.0
[46] grid_4.0.2 forcats_0.5.0 crayon_1.4.1 haven_2.3.1 splines_4.0.2
[51] hms_1.0.0 locfit_1.5-9.4 knitr_1.31 pillar_1.6.0 ggpubr_0.4.0
[56] ggsignif_0.6.0 glue_1.4.2 evaluate_0.14 vctrs_0.3.7 cellranger_1.1.0
[61] gtable_0.3.0 openssl_1.4.3 purrr_0.3.4 tidyr_1.1.3 assertthat_0.2.1
[66] xfun_0.20 openxlsx_4.2.3 broom_0.7.6 RSpectra_0.16-0 rstatix_0.7.0

Reference

  1. Core functional nodes and sex-specific pathways in human ischaemic and dilated cardiomyopathy

参考文章如引起任何侵权问题,可以与我联系,谢谢。


------------- The End Thanks for reading --------