生信技能树学习笔记
DEG 差异基因
rm(list = ls())load(file = "step2output.Rdata")#差异分析,用limma包来做#需要表达矩阵和Group,不需要改library(limma)design=model.matrix(~Group)#构建模型矩阵fit=lmFit(exp,design)#线性拟合fit=eBayes(fit)#贝叶斯检验deg=topTable(fit,coef=2,number = Inf)#提取贝叶斯检验结果 #为deg数据框添加几列#1.加probe_id列,把行名变成一列library(dplyr)deg <- mutate(deg,probe_id=rownames(deg))#新增一列,名字叫probe_id#2.加上探针注释ids = ids[!duplicated(ids$symbol),]#或者用ids=distinct(ids,symbol,.keep_all=T)#按照symbol这一列去重#其他去重方式在zz.去重方式.Rdeg <- inner_join(deg,ids,by="probe_id")#添加nrow(deg) #3.加change列,标记上下调基因logFC_t=1p_t = 0.05k1 = (deg$adj.P.Val < p_t)&(deg$logFC < -logFC_t)k2 = (deg$adj.P.Val < p_t)&(deg$logFC > logFC_t)deg <- mutate(deg,change = ifelse(k1,"down",ifelse(k2,"up","stable")))table(deg$change)#4.加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)library(clusterProfiler)library(org.Hs.eg.db)s2e <- bitr(deg$symbol, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)#人类数据库#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDbdeg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))save(Group,deg,logFC_t,p_t,gse_number,file = "step4output.Rdata") |
---|