1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
| get_DiffProtein_limma <- function(dataset=ExprSet_LOG, group_name=subgrp[1:2], pair=FALSE, scale=FALSE, fc=1, Pval=0.05){
pheno <- pData(dataset) %>% filter(SubGroup%in%group_name) pheno$SubGroup <- factor(as.character(pheno$SubGroup), levels = group_name) pheno$PID <- factor(as.character(pheno$PID)) if(pair){ design <- model.matrix(~ pheno$SubGroup + pheno$PID) rownames(design) <- rownames(pheno) colnames(design) <- c("Intercept", paste(group_col, collapse = "-"), as.character(unique(pheno$PID)[-1])) }else{ design <- model.matrix( ~ 0 + pheno$SubGroup) rownames(design) <- rownames(pheno) colnames(design) <- group_name }
edata <- as.matrix(exprs(dataset)) exprSet <- edata[, colnames(edata)%in%rownames(pheno)] boxplot(exprSet) plotDensities(exprSet) if(scale){ require(edgeR) DGEList <- edgeR::DGEList( counts = exprSet, group = pheno$SubGroup) exprSet_norm <- edgeR::calcNormFactors(DGEList, method = "TMM") plotMDS(exprSet_norm, col=as.numeric(pheno$SubGroup)) }else{ exprSet_norm <- exprSet }
fit <- lmFit(exprSet_norm, design) if(pair){ fit2 <- eBayes(fit) qqt(fit2$t, df = fit2$df.prior+fit2$df.residual, pch = 16, cex = 0.2) abline(0,1) diff_gene <- topTable(fit2, number = Inf, adjust.method = 'BH', coef = 2) %>% rownames_to_column("GeneID") require(future.apply) plan(multiprocess, workers = 10) delta_value <- future_apply(exprSet, 1, function(x, y){ dat <- data.frame(value=x, y) %>% arrange(PID, SubGroup) %>% dplyr::select(PID, SubGroup, value) dat$SubGroup <- factor(dat$SubGroup, levels = group_name) dat_delta <- dat %>% group_by(PID, SubGroup) %>% summarise(mean_value=mean(value)) %>% mutate(delta=dplyr::first(mean_value) - dplyr::last(mean_value)) %>% ungroup() delta <- mean(dat_delta$delta) return(delta) }, pheno) %>% data.frame() %>% setNames("Delta") %>% rownames_to_column("GeneID") diff_gene_delta <- inner_join(diff_gene, delta_value, by = "GeneID") }else{ group <- paste(group_name, collapse = "-") if(group%in%"NC-PC"){ contrast <- makeContrasts(contrasts = "NC-PC", levels = design) }else if(group%in%"NC-PT"){ contrast <- makeContrasts(contrasts = "NC-PT", levels = design) }else if(group%in%"PC-PT"){ contrast <- makeContrasts(contrasts = "PC-PT", levels = design) } print(contrast) fit2 <- contrasts.fit(fit, contrast) fit2 <- eBayes(fit2) qqt(fit2$t, df = fit2$df.prior+fit2$df.residual, pch = 16, cex = 0.2) abline(0,1) diff_gene <- topTable(fit2, number = Inf, adjust.method = 'BH', coef = 1) %>% rownames_to_column("GeneID") require(future.apply) plan(multiprocess, workers = 10) delta_value <- future_apply(exprSet, 1, function(x, y){ dat <- data.frame(value=x, y) %>% arrange(SubGroup) %>% dplyr::select(SubGroup, value) dat$Type <- factor(dat$SubGroup, levels = group_name) dat_delta <- dat %>% group_by(SubGroup) %>% summarise(mean_value=mean(value)) %>% mutate(delta=dplyr::first(mean_value) - dplyr::last(mean_value)) %>% ungroup() delta <- mean(dat_delta$delta) return(delta) }, pheno) %>% data.frame() %>% setNames("Delta") %>% rownames_to_column("GeneID") diff_gene_delta <- inner_join(diff_gene, delta_value, by = "GeneID") } pl <- data.frame(edata)[rownames(data.frame(edata))%in%diff_gene_delta$GeneID[1], , F] %>% t() %>% data.frame() %>% setNames("Gene") %>% rownames_to_column("SampleID") %>% inner_join(pheno%>%rownames_to_column("SampleID"), by = "SampleID") %>% ggplot(aes(x=SubGroup, y=Gene))+ geom_boxplot()+ labs(y=diff_gene$GeneID[1], x="")+ ggpubr::stat_compare_means(method = "wilcox.test", paired = pair, comparisons = list(group_name))+ theme_bw() print(pl) for(i in 1:5){ datCoe <- fit$coefficients[diff_gene_delta$GeneID[i], ] deltaMean <- as.numeric(datCoe[group_name[2]] - datCoe[group_name[1]]) logFC <- diff_gene_delta[diff_gene_delta$GeneID%in%diff_gene_delta$GeneID[i], "logFC"] cat(paste0(diff_gene_delta$GeneID[i], ": ", paste(rev(group_name), collapse = "-"), " = ", signif(deltaMean, 3))) cat("\n") cat(paste0(diff_gene_delta$GeneID[i], ": ", "logFC = ", signif(logFC, 3))) cat("\n") }
if((deltaMean > 0 & logFC > 0) | (deltaMean < 0 & logFC < 0)){ diff_gene_delta[which(diff_gene_delta$logFC >= fc & diff_gene_delta$adj.P.Val < Pval), "Enrichment"] <- group_name[2] diff_gene_delta[which(diff_gene_delta$logFC <= -fc & diff_gene_delta$adj.P.Val < Pval), "Enrichment"] <- group_name[1] diff_gene_delta[which(abs(diff_gene_delta$logFC) < fc | diff_gene_delta$adj.P.Val >= Pval), "Enrichment"] <- "Nonsignif" }else if((deltaMean > 0 & logFC < 0) | (deltaMean < 0 & logFC > 0)){ diff_gene_delta[which(diff_gene_delta$logFC >= fc & diff_gene_delta$adj.P.Val < Pval), "Enrichment"] <- group_name[1] diff_gene_delta[which(diff_gene_delta$logFC <= -fc & diff_gene_delta$adj.P.Val < Pval), "Enrichment"] <- group_name[2] diff_gene_delta[which(abs(diff_gene_delta$logFC) < fc | diff_gene_delta$adj.P.Val >= Pval), "Enrichment"] <- "Nonsignif" } dat_status <- table(pheno$SubGroup) dat_status_number <- as.numeric(dat_status) dat_status_name <- names(dat_status) diff_gene_delta$Block <- paste(paste(dat_status_number[1], dat_status_name[1], sep = "_"), "vs", paste(dat_status_number[2], dat_status_name[2], sep = "_")) res <- diff_gene_delta %>% dplyr::select(GeneID, Block, logFC, adj.P.Val, Enrichment, everything()) %>% arrange(adj.P.Val, logFC) print(dim(res %>% filter(Enrichment != "Nonsignif"))) return(res) }
NC_PC_LOG2Impute <- get_DiffProtein_limma( dataset = ExprSet_LOG2Impute, group_name = subgrp[1:2], pair = FALSE, scale = FALSE, fc = 1, Pval = 0.05) write.csv(NC_PC_LOG2Impute, "NC_PC_limma_Mass_LOG2Impute.csv", row.names = F)
NC_PT_LOG2Impute <- get_DiffProtein_limma( dataset = ExprSet_LOG2Impute, group_name = subgrp[c(1,3)], pair = FALSE, scale = FALSE, fc = 1, Pval = 0.05) write.csv(NC_PT_LOG2Impute, "NC_PT_limma_Mass_LOG2Impute.csv", row.names = F)
PC_PT_LOG2Impute <- get_DiffProtein_limma( dataset = ExprSet_LOG2Impute, group_name = subgrp[c(2, 3)], pair = FALSE, scale = FALSE, fc = 1, Pval = 0.05) write.csv(PC_PT_LOG2Impute, "PC_PT_limma_Mass_LOG2Impute.csv", row.names = F)
|