前些天我們的《生信菜鳥團》公眾號的一個筆記:一起畫個圈圈看差異基因,吸引了大家的注意,有評論說其實沒有必要把不同染色體差異基因使用圈圈圖展示,簡簡單單火山圖更好。那我們就比較一下吧:
我們仍然是以airway為例子
加載airway數(shù)據集并轉換為表達矩陣,代碼如下所示:
# 1.構建表達矩陣 ----------------------------------------------------------------
dir.create("./data")
# 魔幻操作,一鍵清空
rm(list = ls())
options(stringsAsFactors = F)
# BiocManager::install('airway')
# 加載airway數(shù)據集并轉換為表達矩陣
library(airway,quietly = T)
data(airway)
class(airway)
rawcount <- assay(airway)
colnames(rawcount)
# 查看表達譜
rawcount[1:4,1:4]
# 去除前的基因表達矩陣情況
dim(rawcount)
# 獲取分組信息
group_list <- colData(airway)$dex
group_list
# 過濾在至少在75%的樣本中都有表達的基因
keep <- rowSums(rawcount>0) >= floor(0.75*ncol(rawcount))
table(keep)
filter_count <- rawcount[keep,]
filter_count[1:4,1:4]
dim(filter_count)
接下來就可以對這個轉錄組測序表達量矩陣變量 filter_count 和其分組信息變量 group_list 走DESeq2差異分析流程啦。
簡簡單單的DESeq2差異分析
# 加載包
library(DESeq2)
# 第一步,構建DESeq2的DESeq對象
colData <- data.frame(row.names=colnames(exprSet),group_list=group_list)
dds <- DESeqDataSetFromMatrix(countData = exprSet,colData = colData,design = ~ group_list)
# 第二步,進行差異表達分析
dds2 <- DESeq(dds)
# 提取差異分析結果,trt組對untrt組的差異分析結果
tmp <- results(dds2,contrast=c("group_list","trt","untrt"))
DEG_DESeq2 <- as.data.frame(tmp[order(tmp$padj),])
head(DEG_DESeq2)
# 去除差異分析結果中包含NA值的行
DEG_DESeq2 = na.omit(DEG_DESeq2)
差異分析很簡單的, 但是需要注釋一下上下調基因的屬性,以及基因的染色體坐標:
# 篩選上下調,設定閾值
fc_cutoff <- 1
fdr <- 0.05
DEG_DESeq2$regulated <- "normal"
loc_up <- intersect(which(DEG_DESeq2$log2FoldChange>log2(fc_cutoff)),
which(DEG_DESeq2$padj<fdr))
loc_down <- intersect(which(DEG_DESeq2$log2FoldChange< (-log2(fc_cutoff))),
which(DEG_DESeq2$padj<fdr))
DEG_DESeq2$regulated[loc_up] <- "up"
DEG_DESeq2$regulated[loc_down] <- "down"
table(DEG_DESeq2$regulated)
head(DEG_DESeq2)
library(AnnoProbe)
ag=annoGene(rownames(DEG_DESeq2),
ID_type = 'ENSEMBL',species = 'human'
)
head(ag)
DEG_DESeq2$ENSEMBL=rownames(DEG_DESeq2)
deg_anno=merge(ag,DEG_DESeq2,by='ENSEMBL')
head(deg_anno)
# 保存
write.table(deg_anno,"./data/DEG_DESeq2_all.xls",
row.names = F,sep="\t",quote = F)
save(deg_anno, file = "./data/Step03-DESeq2_nrDEG.Rdata")
畫圈圈圖
直接參考《生信菜鳥團》公眾號的筆記:一起畫個圈圈看差異基因,代碼如下所示:
rm(list = ls())
options(stringsAsFactors = F)
library(circlize)
load(file = "./data/Step03-DESeq2_nrDEG.Rdata")
load(file = "./data/Step01-airwayData.Rdata")
head(deg_anno)
table(deg_anno$chr)
neworder=c(paste0('chr',1:22),'chrX','chrY','chrM')
deg_anno$chr=factor(deg_anno$chr,neworder,ordered = T)
table(deg_anno$chr)
table(deg_anno$regulated)
bed1=deg_anno[deg_anno$regulated=='up',c('chr','start','end','log2FoldChange')]
colnames(bed1) = c("chr","start","end","value1")
bed2=deg_anno[deg_anno$regulated=='down',c('chr','start','end','log2FoldChange')]
colnames(bed2) = c("chr","start","end","value1")
####3.初始化基因組圈圖####
circos.initializeWithIdeogram(species = "hg38",plotType = NULL)
circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
chr = CELL_META$sector.index
xlim = CELL_META$xlim
ylim = CELL_META$ylim
circos.rect(xlim[1], 0, xlim[2], 1, col = rand_color(1))
circos.text(mean(xlim), mean(ylim), chr, cex = 0.7, col = "white",
facing = "inside", niceFacing = TRUE)
}, track.height = 0.15, bg.border = NA)
####4.添加基因基因的圈圈####
#黑色下調,紅上調
bed_list = list(bed2,bed1)
circos.genomicTrack(bed_list,
panel.fun = function(region, value, ...) {
i = getI(...)
circos.genomicPoints(region, value, pch = 16, cex = 0.5, col = i, ...)
})
#清除
circos.clear()
如下所示:

圈圈圖如果是火山圖
代碼超級簡單:
library(EnhancedVolcano)
colnames(deg_anno)
EnhancedVolcano(deg_anno,
lab = deg_anno$SYMBOL,
x = 'log2FoldChange',
y = 'padj')
出圖如下所示:

火山圖文末小調查
你喜歡哪種可視化方式呢?火山圖還是圈圈圖?