在生信分析中,|log2FC| >1,p<0.05就是我们做分析想要的结果。不管是用GEO数据、TCGA数据等做分析,甚至是自己花钱做单细胞、多组学,没有差异也是枉然。在实验中,表达差异和表型验证,也要看到基因表达对表型的影响具有统计学差异。我们总结说过, 生信分析的数据可视化的方式要根据待展示差异基因的数目而定 。
如果待展示的基因较少,柱状图、箱线图、散点图、小提琴图等都可以(10^1数量级);如果待展示的差异基因数为几十个甚至上百个,首选热图(10^2数量级);如果待展示的基因数目已经几百,甚至几千个(10^3数量级及以上),那么火山图绝对是首选。
在铜死亡论文中,图A(左)筛选流程,比较两种铜离子载体的共同耐药基因。图A(右)是筛选结果,蓝色标记是保护性基因(敲除后细胞存活),红色标记是敏感基因(敲除后细胞更易死亡)。FDX1是最高置信度的基因,表明其在铜死亡中起核心作用。图B和图C火山图展示筛选结果,横轴是基因敲除效应(log fold change),纵轴是统计显著性。FDX1、LIAS、LIPT1、DLD等基因位于右上角,表明它们对铜死亡至关重要。
差异基因火山图。关键基因:下调基因(胰腺癌 vs 健康对照):Robo1(轴突导向)、Mir6236(非编码RNA)、Lin28b(干细胞调控)。上调基因:Gal(神经肽)、Erbb3(生长因子受体)、Mif(免疫调节因子)。这篇Nature文章是以横轴火山图来展示的。
好看的火山图真的很多。这里,上次我们用DeepSeek对GEO芯片数据处理,并进行差异分析和初步火山图绘制。这次,我们绘制了多种火山图,供果友们参考。需要注意的是,代码在实际运行过程中,是需要微调的,而不是直接拿来就能用的。
library(dplyr)
library(ggplot2)
library(ggprism)
library(ggrepel)
library(patchwork) # 用于图形排版
library(tibble)
library(tidyverse)
library(ggfun)
library(grid)
data = DM1_vs_CTRL
# 数据预处理函数
data = rownames_to_column(data, var = "genesymbol")
logFC_threshold = 1.5
pval_threshold = 0.05
# 标记上下调基因
# 第一步:只创建分组
data <- data %>%
mutate(group = case_when(
adj.P.Val < pval_threshold & logFC > logFC_threshold ~ "Up",
adj.P.Val < pval_threshold & logFC < -logFC_threshold ~ "Down",
TRUE ~ "NS"
))
# 第二步:转换为因子
data$group <- factor(data$group, levels = c("Up", "NS", "Down"))
# 基础火山图函数
basic_volcano <- function(data, logFC_threshold = 1.5, pval_threshold = 0.05,
title = "Volcano Plot") {
ggplot(data, aes(x = logFC, y = -log10(adj.P.Val), color = group)) +
geom_point(aes(color = group), alpha = 0.85, size = 1.5) +
scale_color_manual(values = c('red', 'gray', 'blue')) +
geom_vline(xintercept = c(-logFC_threshold, logFC_threshold),
linetype = "dashed", color = "black", linewidth = 0.8) +
geom_hline(yintercept = -log10(pval_threshold),
linetype = "dashed", color = "black", linewidth = 0.8) +
labs(x = "log2 (fold change)", y = "-log10 (adj.P.Val)", title = title) +
theme_prism(border = TRUE) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "right",
legend.title = element_blank()
)
}
# 带标签的火山图函数
labeled_volcano <- function(data, genes_to_label = NULL,
logFC_threshold = 1.5, pval_threshold = 0.05,
title = "Volcano Plot with Labels") {
# 添加标签列
if (!is.null(genes_to_label)) {
data <- data %>%
mutate(label = ifelse(genesymbol %in% genes_to_label, genesymbol, ""))
} else {
data$label <- ifelse(data$adj.P.Val < pval_threshold &
abs(data$logFC) > logFC_threshold,
data$genesymbol, "")
}
basic_volcano(data, logFC_threshold, pval_threshold, title) +
geom_label_repel(
data = filter(data, label != ""),
aes(label = label),
size = 3,
box.padding = 0.5,
point.padding = 0.8,
segment.color = "black",
show.legend = FALSE,
max.overlaps = Inf
)
}
# 美化火山图函数
enhanced_volcano <- function(data, top_n = 15,
logFC_threshold = 1.5, pval_threshold = 0.05) {
# 移除p值为0的基因
data <- data %>% filter(adj.P.Val != 0)
# 获取上下调top基因
top_up <- data %>%
filter(group == "Up") %>%
arrange(desc(-log10(adj.P.Val))) %>%
slice(1:top_n)
top_down <- data %>%
filter(group == "Down") %>%
arrange(desc(-log10(adj.P.Val))) %>%
slice(1:top_n)
# 绘图
ggplot(data, aes(x = logFC, y = -log10(adj.P.Val))) +
# 背景点
geom_point(aes(color = logFC, size = -log10(adj.P.Val)), alpha = 0.7) +
# 突出显示top基因
geom_point(
data = bind_rows(top_up, top_down),
aes(fill = logFC, size = -log10(adj.P.Val)),
shape = 21, color = "black", show.legend = FALSE
) +
# 标签
geom_text_repel(
data = top_up,
aes(label = genesymbol),
box.padding = 0.5,
nudge_x = 0.5,
nudge_y = 0.2,
segment.curvature = -0.1,
segment.ncp = 3,
direction = "y",
hjust = "left"
) +
geom_text_repel(
data = top_down,
aes(label = genesymbol),
box.padding = 0.5,
nudge_x = -0.2,
nudge_y = 0.2,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20,
direction = "y",
hjust = "right"
) +
# 颜色和大小比例尺
scale_color_gradientn(
colours = c("#3288bd", "#66c2a5", "#ffffbf", "#f46d43", "#9e0142"),
values = scales::rescale(c(min(data$logFC), -1, 0, 1, max(data$logFC)))
) +
scale_fill_gradientn(
colours = c("#3288bd", "#66c2a5", "#ffffbf", "#f46d43", "#9e0142"),
values = scales::rescale(c(min(data$logFC), -1, 0, 1, max(data$logFC)))
) +
scale_size(range = c(1, 7)) +
# 参考线
geom_vline(xintercept = c(-logFC_threshold, logFC_threshold), linetype = "dashed") +
geom_hline(yintercept = -log10(pval_threshold), linetype = "dashed") +
# 坐标轴限制
xlim(c(-max(abs(data$logFC)), max(abs(data$logFC)))) +
ylim(c(0, max(-log10(data$adj.P.Val)) + 2)) +
# 注释和主题
labs(
x = "log2 (fold change)",
y = "-log10 (adj.P.Val)",
title = "Enhanced Volcano Plot",
subtitle = paste("Top", top_n, "up/down regulated genes")
) +
theme_bw() +
theme(
panel.grid = element_blank(),
legend.background = element_roundrect(color = "#808080", linetype = 1),
axis.text = element_text(size = 13, color = "black"),
axis.title = element_text(size = 15),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
) +
# 添加方向箭头
annotation_custom(
grob = segmentsGrob(
y0 = unit(-10, "pt"),
y1 = unit(-10, "pt"),
arrow = arrow(angle = 45, length = unit(0.2, "cm"), ends = "first"),
gp = gpar(lwd = 3, col = "#74add1")
),
xmin = -max(abs(data$logFC)),
xmax = -1,
ymin = max(-log10(data$adj.P.Val)) + 1,
ymax = max(-log10(data$adj.P.Val)) + 1
) +
annotation_custom(
grob = textGrob(label = "Down", gp = gpar(col = "#74add1")),
xmin = -max(abs(data$logFC)),
xmax = -1,
ymin = max(-log10(data$adj.P.Val)) + 1.5,
ymax = max(-log10(data$adj.P.Val)) + 1.5
) +
annotation_custom(
grob = segmentsGrob(
y0 = unit(-10, "pt"),
y1 = unit(-10, "pt"),
arrow = arrow(angle = 45, length = unit(0.2, "cm"), ends = "last"),
gp = gpar(lwd = 3, col = "#d73027")
),
xmin = max(abs(data$logFC)),
xmax = 1,
ymin = max(-log10(data$adj.P.Val)) + 1,
ymax = max(-log10(data$adj.P.Val)) + 1
) +
annotation_custom(
grob = textGrob(label = "Up", gp = gpar(col = "#d73027")),
xmin = max(abs(data$logFC)),
xmax = 1,
ymin = max(-log10(data$adj.P.Val)) + 1.5,
ymax = max(-log10(data$adj.P.Val)) + 1.5
)
}
# MA图函数
ma_plot <- function(data, logFC_threshold = 1.5, pval_threshold = 0.05,
highlight_genes = NULL) {
# 如果没有AveExpr列,使用平均表达量
if (!"AveExpr" %in% colnames(data)) {
warning("No 'AveExpr' column found. Using row means if available.")
if (any(grepl("^GSM", colnames(data)))) {
expr_cols <- grep("^GSM", colnames(data), value = TRUE)
data$AveExpr <- rowMeans(data[, expr_cols], na.rm = TRUE)
} else {
stop("Cannot calculate average expression. Please provide 'AveExpr' column.")
}
}
# 标记显著基因
significant <- data %>%
filter(adj.P.Val < pval_threshold & abs(logFC) > logFC_threshold)
ggplot(data, aes(x = log2(AveExpr), y = logFC, color = group)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("red", "grey50", "blue")) +
geom_point(
data = significant,
aes(fill = group),
shape = 21, color = "black", size = 2, alpha = 0.8
) +
geom_hline(yintercept = c(-logFC_threshold, 0, logFC_threshold),
linetype = c("dashed", "solid", "dashed"),
linewidth = c(0.8, 1.2, 0.8)) +
labs(
x = "log2 (Average Expression)",
y = "log2 (Fold Change)",
title = "MA Plot"
) +
theme_bw() +
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
legend.position = "top"
) +
{
if (!is.null(highlight_genes)) {
geom_text_repel(
data = filter(data, genesymbol %in% highlight_genes),
aes(label = genesymbol),
box.padding = 0.5,
max.overlaps = Inf
)
}
}
}
# 使用示例
# 1. 准备数据
data_prepared <- data
# 2. 创建各种图形
p1 <- basic_volcano(data_prepared, title = "Basic Volcano Plot")
p2 <- labeled_volcano(data_prepared, genes_to_label = c("CD36", "CX3CR1", "MAFF", "ACTN3"))
p3 <- enhanced_volcano(data_prepared)
p4 <- ma_plot(data_prepared, highlight_genes = c("CD36", "CX3CR1", "MAFF", "ACTN3"))
# 3. 组合图形
(p1 + p2)
(p3 + p4)参考资料:https://mp.weixin.qq.com/s/RJhEX2GiF720_GQmjwcIlQ
特别声明:以上内容(如有图片或视频亦包括在内)为自媒体平台“网易号”用户上传并发布,本平台仅提供信息存储服务。
Notice: The content above (including the pictures and videos if any) is uploaded and posted by a user of NetEase Hao, which is a social media platform and only provides information storage services.