1 下載cellphonedb官網(wǎng)測試數(shù)據(jù),并運行軟件cellphonedb官網(wǎng)下載測試數(shù)據(jù) curl https://raw./Teichlab/cellphonedb/master/in/example_data/test_counts.txt --output test_counts.txt curl https://raw./Teichlab/cellphonedb/master/in/example_data/test_meta.txt --output test_meta.txt
Python運行軟件cellphonedb cd cpdb-venv source cpdb-venv/bin/activate cellphonedb method statistical_analysis test_meta.txt test_counts.txt --iterations=10 --threads=20
查看測試文件 data_path <- 'data' test_meta <- read.delim(file.path(data_path, 'test_meta.txt'), header=T, sep='\t') table(test_meta$cell_type) ## ## Myeloid NKcells_0 NKcells_1 Tcells ## 1 5 3 1
2 處理cellphonedb結(jié)果文件# Offer downloadable file function library(tidyverse) library(DT) create_dt <- function(x){ DT::datatable(x, extensions = 'Buttons', options = list(dom = 'Blfrtip', buttons = c('csv', 'excel', 'pdf'), lengthMenu = list(c(10,25,50,-1), c(10,25,50,"All")))) }
2-1 讀取cellphonedb結(jié)果文件new_path <- 'out' mypvals <- read.delim(file.path(new_path,"pvalues.txt"), check.names = FALSE) mymeans <- read.delim(file.path(new_path, "means.txt"), check.names = FALSE) mysigmeans <- read.delim(file.path(new_path, "significant_means.txt"), check.names = FALSE) dim(mymeans) ## [1] 190 27 mymeans[1:4, 1:4] ## id_cp_interaction interacting_pair partner_a partner_b ## 1 CPI-SS028784FC6 HLA-DPA1_TNFSF9 simple:HLADPA1 simple:P41273 ## 2 CPI-SS00A8596B5 PVR_TNFSF9 simple:P15151 simple:P41273 ## 3 CPI-SS0B84DAE3D PVR_CD96 simple:P15151 simple:P40200 ## 4 CPI-SS0A8627ED6 PVR_CD226 simple:P15151 simple:Q15762
2-2 調(diào)整受體配體對順序,要求配體在前,受體在后# Rearrange data column sequence library(dplyr) order_sequence <- function(df){ da <- data.frame() for(i in 1:length(df$gene_a)){ sub_data <- df[i, ] if(sub_data$receptor_b=='False'){ if(sub_data$receptor_a=='True'){ old_names <- colnames(sub_data) my_list <- strsplit(old_names[-c(1:11)], split="\\|") my_character <- paste(sapply(my_list, '[[', 2L), sapply(my_list, '[[', 1L), sep='|') new_names <- c(names(sub_data)[1:4], 'gene_b', 'gene_a', 'secreted', 'receptor_b', 'receptor_a', "annotation_strategy", "is_integrin", my_character) sub_data = dplyr::select(sub_data, new_names) # print('Change sequence!!!') names(sub_data) <- old_names da = rbind(da, sub_data) } }else{ da = rbind(da, sub_data) } } return(da) }
# 受體配體對,要求一個為受體,一個為配體 df <- subset(mymeans, receptor_a == 'True' & receptor_b == 'False' | receptor_a == 'False' & receptor_b == 'True')
# 篩選單基因的受體配體對,待商榷! df <- df %>% dplyr::mutate(na_count = rowSums(is.na(df) | df == "")) %>% subset(na_count == 0) %>% dplyr::select(-na_count) dim(df) ## [1] 72 27
# 運行函數(shù),重排受體配體順序,并生成新列 means_order <- order_sequence(df) %>% tidyr::unite(Pairs, gene_a, gene_b) pvals_order <- order_sequence(mypvals) %>% tidyr::unite(Pairs, gene_a, gene_b)
# 保存文件 # write.table(means_order %>% dplyr::select(-interacting_pair), file.path(new_path,"means_order.txt"), sep = '\t', quote=F, row.names=F) # write.table(pvals_order %>% dplyr::select(-interacting_pair), file.path(new_path,"pvalues_order.txt"), sep = '\t', quote=F, row.names=F)
2-3 合并表達量文件和pvalue文件means_sub <- means_order[, c('Pairs', colnames(mymeans)[-c(1:11)])] pvals_sub <- pvals_order[, c('Pairs', colnames(mymeans)[-c(1:11)])] means_gather <- tidyr::gather(means_sub, celltype, mean_expression, names(means_sub)[-1]) pvals_gather <- tidyr::gather(pvals_sub, celltype, pval, names(pvals_sub)[-1]) mean_pval <- dplyr::left_join(means_gather, pvals_gather, by = c('Pairs', 'celltype')) create_dt(mean_pval)
# write.table(mean_pval, file.path(new_path,"mean_pval_order.txt"), sep = '\t', quote=F, row.names=F)
 2-4 提取顯著性表達的受體配體對# 至少在一組細胞類型兩兩組合中,pvalue顯著的受體配體對,認為是顯著性表達的受體配體對 a <- mean_pval %>% dplyr::select(Pairs, celltype, pval) %>% tidyr::spread(key=celltype, value=pval) sig_pairs <- a[which(rowSums(a<=0.05)!=0), ] dim(sig_pairs) ## [1] 72 17
# 保存顯著性表達的受體配體對 mean_pval_sub <- subset(mean_pval, Pairs %in% sig_pairs$Pairs) # write.table(mean_pval_sub, file.path(new_path,"mean_pval_sig.txt"), sep = '\t', quote=F, row.names=F)
3 可視化顯著性表達的受體配體對library(RColorBrewer) library(scales) library(ggplot2) library(cowplot)
# 比較均值和P值數(shù)據(jù)變換前后的分布 p1 <- mean_pval_sub %>% ggplot(aes(x=mean_expression)) + geom_density(alpha=0.2, color='red') p2 <- mean_pval_sub %>% ggplot(aes(x=log2(mean_expression))) + geom_density(alpha=0.2, color='red') mean_distribution <- plot_grid(p1, p2, labels = "AUTO", nrow=1)
p1 <- mean_pval_sub %>% ggplot(aes(x=pval)) + geom_density(alpha=0.2, color='red') p2 <- mean_pval_sub %>% ggplot(aes(x=-log10(pval+1*10^-3))) + geom_density(alpha=0.2, color='red') pval_distribution <- plot_grid(p1, p2, labels = "AUTO", nrow=1) plot_grid(mean_distribution, pval_distribution, labels = "AUTO", ncol=1)
 # 展示部分(10對)顯著性表達的受體配體對 p <- mean_pval_sub %>% dplyr::arrange(Pairs) %>% head(16*10) %>% ggplot(aes(x=Pairs, y=celltype)) + # geom_point(aes(color=log2(mean_expression), size=pval)) + # scale_size(trans = 'reverse') + geom_point(aes(color=log2(mean_expression), size=-log10(pval+1*10^-3)) ) + guides(colour = guide_colourbar(order = 1),size = guide_legend(order = 2)) + labs(x='', y='') + scale_color_gradientn(name='Expression level \n(log2 mean expression \nmolecule1, molecule2)', colours = terrain.colors(100)) + # scale_color_gradient2('Expression level \n(log2 mean expression \nmolecule1, molecule2)', low = 'blue', mid = 'yellow', high = 'red') + theme(axis.text.x= element_text(angle=45, hjust=1)) + # coord_flip() + theme( panel.border = element_rect(color = 'black', fill = NA), panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), panel.background = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank() # plot.title = element_text(hjust = 0.5), # legend.position = 'bottom' # guides(fill = guide_legend(label.position = "bottom")) # legend.position = "bottom" # axis.text.y.right = element_text(angle=270, hjust=0.5) ) + theme(legend.key.size = unit(0.4, 'cm'), #change legend key size # legend.key.height = unit(1, 'cm'), #change legend key height # legend.key.width = unit(1, 'cm'), #change legend key width legend.title = element_text(size=9), #change legend title font size legend.text = element_text(size=8)) #change legend text font size
p
 save_plot(paste0(new_path,"/Ligands_Receptors_dotplot.png"), p, base_height = 4, base_aspect_ratio = 3, base_width = NULL, dpi=600) save_plot(paste0(new_path,"/Ligands_Receptors_dotplot.pdf"), p, base_height = 4, base_aspect_ratio = 3, base_width = NULL)
|