#Create test matrix
test = matrix(rnorm(200), 20, 10) test1:10, seq(1, 10, 2) = test1:10, seq(1, 10, 2) + 3 test11:20, seq(2, 10, 2) = test11:20, seq(2, 10, 2) + 2 test15:20, seq(2, 10, 2) = test15:20, seq(2, 10, 2) + 4 colnames(test) = paste("Test", 1:10, sep = "") rownames(test) = paste("Gene", 1:20, sep = "")
#Draw heatmaps
pheatmap(test) pheatmap(test, kmeans_k = 2) pheatmap(test, scale = "row", clustering_distance_rows = "correlation") pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50)) pheatmap(test, cluster_row = FALSE) pheatmap(test, legend = FALSE)
#Show text within cells
pheatmap(test, display_numbers = TRUE) pheatmap(test, display_numbers = TRUE, number_format = "%.1e")#以指数形式显示数字,若设置为“%.2f”则显示小数点后两位 pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test))) pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", "1e-4", "1e-3", "1e-2", "1e-1", "1"))
#Fix cell sizes and save to file with correct size
pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap") pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf")
#Generate annotations for rows and columns
annotation_col = data.frame( CellType = factor(rep(c("CT1", "CT2"), 5)), Time = 1:5 ) rownames(annotation_col) = paste("Test", 1:10, sep = "")
annotation_row = data.frame( GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))) ) rownames(annotation_row) = paste("Gene", 1:20, sep = "")
#Display row and color annotations
pheatmap(test, annotation_col = annotation_col) pheatmap(test, annotation_col = annotation_col, annotation_legend = FALSE) pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row)
#Change angle of text in the columns
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, angle_col = "45") pheatmap(test, annotation_col = annotation_col, angle_col = "0")
#Specify colors
ann_colors = list(
Time = c("white", "firebrick"), CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") +)
pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors, main = "Title") pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = ann_colors) pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors2)
#Gaps in heatmaps
pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14)) pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14), cutree_col = 2)
#Show custom strings as row/col names
labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "Il10", "Il15", "Il1b")
pheatmap(test, annotation_col = annotation_col, labels_row = labels_row)
#Specifying clustering from distance matrix
drows = dist(test, method = "minkowski") dcols = dist(t(test), method = "minkowski") pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols)
#Modify ordering of the clusters using clustering callback option
callback = function(hc, mat){ sv = svd(t(mat))$v,1 dend = reorder(as.dendrogram(hc), wts = sv) as.hclust(dend) }
pheatmap(test, clustering_callback = callback)
library(limma) design = model.matrix(~Group) fit = lmFit(exp,design)#线性拟合 fit = eBayes(fit)#贝叶斯检验 deg = topTable(fit,coef = 2,number = Inf)#coef=2指对design的第二列进行分析 #topTable()默认参数为10,可输入参数number= Inf显示全部结果
identical(a,b)
为避免行名丢失,建议将行名作为一列加入数据框
tibble::column_to_rownames() tibble::rownames_to_column()#将行名变为一列
将差异分析结果和探针注释合在一起
merge()#顺序会发生变更 inner_join()#仍然保留按照P.Value排序的表格顺序
最终呈现结果:
limma输出结果--探针名(行名)--基因名(ids)--上下调(ifelse)--ENTREZID(bitr)
for_lable <- test %>% filter(abs(logFC) > 4&
-log10(P.value)
> -log10(0.000001)) #通过自定义阈值设置显示基因的数量 p + geom_point(size = 3,shape = 1,data = for_lable) + ggrepel::geom_lable_repel( aes(label = symbol), data = for_label, color = "black" )
library(tinyarray) draw_heatmap(n,Group) draw_pca(exp,Group,Style = "3D") draw_volcano(deg,pkg = 4,style = "ggplot2")#pkg = 4指单纯用limma作图 draw_boxplot(nsample(1:nrow(n),10),,Group)
输入数据:差异基因的ENTREZID
并非和SYMBOL一一对应,存在损失均为正常现象
Kyoto Encyclopedia of Genes and Genomes
是系统分析基因功能、基因组信息数据库,有助于研究者把基因及表达信息作为一个整体网络进行研究,以理解生物系统的高级功能和实用程序资源库著称
Gene Ontology
是一个在生物信息学领域中广泛使用的本体,提供了一个可具代表性的规范化的基因和基因产物特性的属于描绘或词义解释的工作平台
rm(list = ls())undefinedload(file = 'step4output.Rdata') library(clusterProfiler) library(ggthemes) library(org.Hs.eg.db) library(enrichplot)
gene_diff = deg$ENTREZIDdeg$change != "stable"
#⭐下面的三句都要注意物种
ekk <- enrichKEGG(gene = gene_diff,organism = 'hsa') #其他物种https://www.genome.jp/kegg/catalog/org_list.html ekk <- setReadable(ekk,OrgDb = org.Hs.eg.db,keyType = "ENTREZID") #如果ekk是空的,这句就会报错,因为没富集到任何通路,即无pvalue<0.05。#将ENTREZID更换为对应的基因。 ego <- enrichGO(gene = gene_diff,OrgDb= org.Hs.eg.db, ont = "ALL",readable = TRUE) #setReadable和readable = TRUE都是把富集结果表格里的基因名称转为symbol class(ekk)
#barplot可以换成dotplot
barplot(ego, split = "ONTOLOGY") + facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y") #将结果按照CC, MF,BP进行分类 barplot(ekk) #如果ekk中没有padj<0.05的通路,就会报错,因为只画padj<0.05,没有参数
#pvalue/qvalue:表征富集是否显著的参数,代表通路而非某一特定基因
##衡量每个通路中的基因在差异基因里是否足够多
#geneID:差异基因中哪些基因属于该通路
#GeneRatio:差异基因中属于该通路的数量/差异基因中被数据库收录的数量
#BgRatio:该通路中总计基因数/数据库中全部通路总计基因数
1* 调整logFC, pvalue和阈值,以改动差异基因数量
2* 不适用默认的padj(富集的),而是使用原始p值,在文章中写清即可
3* 更换富集方法,GSEA也可以作KEGG富集
4* 调整参数maxGSSize = 500,为默认参数,表示500个基因以上的通路不考虑,可以调高该值
#若基因symbol中多了空格,尝试使用trimws(deg$symbol)对空格进行去除
引用自生信技能树
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。