Loading [MathJax]/jax/output/CommonHTML/config.js
首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >专栏 >双向孟德尔随机化研究

双向孟德尔随机化研究

原创
作者头像
sheldor没耳朵
发布于 2025-03-10 07:00:23
发布于 2025-03-10 07:00:23
1790
举报
文章被收录于专栏:数据挖掘数据挖掘

双向孟德尔随机化研究

暴露和结局的双向孟德尔随机化研究,重要的是前期的数据整理

1 正向

例1:暴露ADHD(PGC数据库),结局TMD(芬兰数据库)

代码语言:r
AI代码解释
复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)
dir.create("Rdata")
dir.create("data/ADHD")
source("function.R")
#暴露ADHD----
ADHD_exposure_finn = vroom("../rawData/ADHD2022_iPSYCH_deCODE_PGC.meta.gz")
head(ADHD_exposure_finn)
colnames(ADHD_exposure_finn)
ADHD_exposure_finn <- subset(ADHD_exposure_finn,P< 5e-08)
dim(ADHD_exposure_finn)#[1] 1428   14
#
ADHD_exposure_finn$Beta <- log(ADHD_exposure_finn$OR)
ADHD_exposure_finn$EAF <- (ADHD_exposure_finn$FRQ_A_38691 * ADHD_exposure_finn$Nca + 
                             ADHD_exposure_finn$FRQ_U_186843 * ADHD_exposure_finn$Nco) / 
  (ADHD_exposure_finn$Nca + ADHD_exposure_finn$Nco)

#选择列名
ADHD_exposure_finn <- ADHD_exposure_finn[,c("SNP","A1","A2",
                                            "EAF","Beta",
                                            "OR","SE","P","Nca","Nco")]

write.csv(ADHD_exposure_finn,file = "data/ADHD/ADHD_exposure_PGC.csv")
#修改列名
ADHD_exposure_dat <- read_exposure_data(file = "data/ADHD/ADHD_exposure_PGC.csv",sep=",",
                                        snp_col = "SNP",
                                        beta_col = "Beta",se_col = "SE",
                                        effect_allele_col = "A1",
                                        other_allele_col = "A2",
                                        eaf_col = "EAF",pval_col = "P")
#去除连锁不平衡
ADHD_exposure_dat_clumped = ieugwasr::ld_clump(dplyr::tibble(rsid = ADHD_exposure_dat$SNP,
                                                             pval = ADHD_exposure_dat$pval.exposure),
                                               clump_kb = 10000,
                                               clump_r2 = 0.001,
                                               clump_p = 1,
                                               bfile = "/home/data/t110558/project/MR/weiwenbing/sup/EUR",
                                               plink_bin = plinkbinr::get_plink_exe(),
                                               pop = "EUR")
ADHD_exposure_dat_clumped$id.exposure = "ADHD"
ADHD_exposure_dat_clumped = ADHD_exposure_dat[ADHD_exposure_dat$SNP %in% ADHD_exposure_dat_clumped$rsid,]
#保留F值>10
ADHD_exposure_dat_clumped <- calculate_f_statistic(ADHD_exposure_dat_clumped,
                                                   sample_size = 225534)

dim(ADHD_exposure_dat_clumped)#26 14
write.csv(ADHD_exposure_dat_clumped,file = "data/ADHD/ADHD_exposure_dat_clumped.csv")
save(ADHD_exposure_dat_clumped,file = "Rdata/step1.ADHD.Rdata")

#结局----
outcome_finn = vroom("../rawData/finngen_R12_TEMPOROMANDIB_TMD.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% ADHD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 25 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",
                               beta_col = "beta",
                               pval_col = "pval",
                               se_col = "sebeta",
                               eaf_col = "af_alt",
                               effect_allele_col = "alt",
                               other_allele_col = "ref")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-08)
dim(outcome_finn_dat)#[1] 25 12

#修改暴露和结局的名称
ADHD_exposure_dat_clumped$id.exposure = "ADHD"
outcome_finn_dat$id.outcome = "TMD"

mr_data_finn = harmonise_data(exposure_dat = ADHD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
write.csv(mr_data_finn, file = "data/ADHD/mr_ADHD_data_PGC.csv")
View(mr_data_finn)
table(mr_data_finn$mr_keep)#25
save(mr_data_finn,file = "Rdata/step1.mr.Rdata")
load("Rdata/step1.mr.Rdata")
#MR----
#MR分析
res = mr(mr_data_finn)
res

#异质性检验
het = mr_heterogeneity(mr_data_finn)
#异质性可视化
# sin = mr_singlesnp(mr_data_finn)
# pdf(file = "data/ADHD/yizhixing.pdf", width=10, heigh=8)
# mr_funnel_plot(singlesnp_results = sin)
# dev.off()
#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,
                   SignifThreshold = 0.05)

presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/ADHD/het_clean.csv")
# id.exposure id.outcome outcome exposure                    method        Q Q_df    Q_pval
# 1        ADHD        TMD outcome exposure                  MR Egger 25.61954   23 0.3191527
# 2        ADHD        TMD outcome exposure Inverse variance weighted 28.06430   24 0.2573371
#异质性检验
egger_result1 <- mr_egger(mr_input(bx = mr_data_clean$beta.exposure, 
                                   bxse = mr_data_clean$se.exposure, 
                                   by = mr_data_clean$beta.outcome, 
                                   byse = mr_data_clean$se.outcome))

write.csv(egger_result1,file = "data/ADHD/egger_result1.csv")
#异质性可视化
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/ADHD/yizhixing.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/ADHD/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/ADHD/MRresult.csv")
#MR分析结果可视化
pdf(file = "data/ADHD/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()
#MR分析结果森林图
col = c("id.exposure", "id.outcome", "nsnp", "method", "or", "pval", "or_lci95", "or_uci95")
OR = OR_clean
OR = OR[,col]
OR$`OR (95% CI)` = sprintf("%.2f (%.2f to %.2f)", OR$or, OR$or_lci95, OR$or_uci95)
OR$pval = signif(OR$pval, 3)
#write.csv(OR, "OR.csv", row.names = F)
#OR = read.csv("OR.csv", header = T, check.names = F)
#OR$nsnp = ifelse(is.na(OR$nsnp), "", OR$nsnp)
OR$` ` = paste(rep(" ", 40), collapse = " ")
#绘制
library(forestploter)
library(grid)
pdf("data/ADHD/MRresult_eQTLGen.pdf", width = 11, height = 7, onefile = FALSE)
tm = forest_theme(base_size = 10,   # 文本的大小
                  ci_pch = 16,      # 可信区间点的形状
                  ci_col = "black", # CI的颜色
                  ci_fill = "red",  # CI中se点的颜色填充
                  ci_alpha = 0.8,   # CI透明度
                  ci_lty = 1,       # CI的线型
                  ci_lwd = 1.5,     # CI的线宽
                  ci_Theight = 0.2, # CI的高度,默认是NULL
                  #参考线默认的参数,中间的竖的虚线
                  refline_lwd = 1,  # 中间的竖的虚线
                  refline_lty = "dashed",
                  refline_col = "grey20")
p = forest(OR[,c(1:4,10,6,9)],
           est = OR$or,         #效应值
           lower = OR$or_lci95, #置信区间下限
           upper = OR$or_uci95, #置信区间上限
           ci_column = 5,       #在哪一列画森林图,选空的那一列
           ref_line = 1,        #参考线位置
           xlim = c(-1, 3),      #设置轴范围
           ticks_at = c(0, 1, 2, 3),#设置刻度
           theme = tm)
edit_plot(p,
          which = "background",
          row = c(3),
          gp = gpar(fill = "lightsteelblue1"))
dev.off()

#SNP森林图
res_single = mr_singlesnp(mr_data_clean)
#绘制SNP森林图
pdf(file = "data/ADHD/森林图.pdf", width = 9, heigh = 6)
mr_forest_plot(res_single)
dev.off()

#leave-one-out analysis(留一法分析)
pdf(file = "data/ADHD/leave-one-out.pdf", width = 9, heigh = 4)
mr_leaveoneout_plot(leaveoneout_results = mr_leaveoneout(mr_data_clean))
dev.off()

例2:暴露AD(芬兰数据库),结局TMD(芬兰数据库)

代码语言:r
AI代码解释
复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)

source("function.R")
dir.create("data/AD")
#焦虑症----
AN_exposure_finn = vroom("../rawData/finngen_R12_KRA_PSY_ANXIETY_EXMORE.gz")
head(AN_exposure_finn)
colnames(AN_exposure_finn)
AN_exposure_finn <- subset(AN_exposure_finn,pval< 5e-08)
dim(AN_exposure_finn)#[1] 826  13
AN_exposure_finn = format_data(dat = AN_exposure_finn,
                               type = "exposure",
                               snp_col = "rsids",
                               beta_col = "beta",
                               se_col = "sebeta",
                               effect_allele_col = "alt",
                               other_allele_col = "ref",
                               eaf_col = "af_alt",
                               pval_col = "pval")

write.csv(AN_exposure_finn,file = "data/AD/AN_exposure_finn.csv")
AN_exposure_dat <- AN_exposure_finn
#修改列名
#去除连锁不平衡
AN_exposure_dat_clumped = ieugwasr::ld_clump(dplyr::tibble(rsid = AN_exposure_dat$SNP,
                                                           pval = AN_exposure_dat$pval.exposure),
                                             clump_kb = 10000,
                                             clump_r2 = 0.001,
                                             clump_p = 1,
                                             bfile = "/home/data/t110558/project/MR/weiwenbing/sup/EUR",
                                             plink_bin = plinkbinr::get_plink_exe(),
                                             pop = "EUR")
AN_exposure_dat_clumped$id.exposure = "AN"
AN_exposure_dat_clumped = AN_exposure_dat[AN_exposure_dat$SNP %in% AN_exposure_dat_clumped$rsid,]
#保留F值>10
AN_exposure_dat_clumped <- calculate_f_statistic(AN_exposure_dat_clumped,
                                                 sample_size = 418856)

dim(AN_exposure_dat_clumped)#24 14
write.csv(AN_exposure_dat_clumped,file = "data/AD/AN_exposure_dat_clumped.csv")
save(AN_exposure_dat_clumped,file = "Rdata/step2.AN_.Rdata")

#结局----
outcome_finn = vroom("../rawData/finngen_R12_TEMPOROMANDIB_TMD.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% AN_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 24 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",
                               beta_col = "beta",
                               pval_col = "pval",
                               se_col = "sebeta",
                               eaf_col = "af_alt",
                               effect_allele_col = "alt",
                               other_allele_col = "ref")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-08)
dim(outcome_finn_dat)#[1] 24 12

#修改暴露和结局的名称
AN_exposure_dat_clumped$id.exposure = "AN"
outcome_finn_dat$id.outcome = "TMD"

mr_data_finn = harmonise_data(exposure_dat = AN_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 23
write.csv(mr_data_finn, file = "data/AD/mr_AN_data_PGC.csv")
View(mr_data_finn)
mr_data_finn <- data.table::fread("data/AD/mr_AN_data_PGC.csv",data.table = F)

#MR----
#MR分析
res = mr(mr_data_finn)
res

#异质性检验
het = mr_heterogeneity(mr_data_finn)
#异质性可视化
# sin = mr_singlesnp(mr_data_finn)
# pdf(file = "data/AD/yizhixing.pdf", width=10, heigh=8)
# mr_funnel_plot(singlesnp_results = sin)
# dev.off()

#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 3000,
                   SignifThreshold = 0.05)
presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/AD/het_clean.csv")
#异质性可视化
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/AD/yizhixing.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()


#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/AD/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/AD/MRresult.csv")

#MR分析结果可视化
pdf(file = "data/AD/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

#MR分析结果森林图
col = c("id.exposure", "id.outcome", "nsnp", "method", "or", "pval", "or_lci95", "or_uci95")
OR = OR_clean
OR = OR[,col]
OR$`OR (95% CI)` = sprintf("%.2f (%.2f to %.2f)", OR$or, OR$or_lci95, OR$or_uci95)
OR$pval = signif(OR$pval, 3)
#write.csv(OR, "OR.csv", row.names = F)
#OR = read.csv("OR.csv", header = T, check.names = F)
#OR$nsnp = ifelse(is.na(OR$nsnp), "", OR$nsnp)
OR$` ` = paste(rep(" ", 40), collapse = " ")
#绘制
library(forestploter)
library(grid)
pdf("data/AD/MRresult_eQTLGen.pdf", width = 11, height = 7, onefile = FALSE)
tm = forest_theme(base_size = 10,   # 文本的大小
                  ci_pch = 16,      # 可信区间点的形状
                  ci_col = "black", # CI的颜色
                  ci_fill = "red",  # CI中se点的颜色填充
                  ci_alpha = 0.8,   # CI透明度
                  ci_lty = 1,       # CI的线型
                  ci_lwd = 1.5,     # CI的线宽
                  ci_Theight = 0.2, # CI的高度,默认是NULL
                  #参考线默认的参数,中间的竖的虚线
                  refline_lwd = 1,  # 中间的竖的虚线
                  refline_lty = "dashed",
                  refline_col = "grey20")
p = forest(OR[,c(1:4,10,6,9)],
           est = OR$or,         #效应值
           lower = OR$or_lci95, #置信区间下限
           upper = OR$or_uci95, #置信区间上限
           ci_column = 5,       #在哪一列画森林图,选空的那一列
           ref_line = 1,        #参考线位置
           xlim = c(-1, 3),      #设置轴范围
           ticks_at = c(0, 1, 2, 3),#设置刻度
           theme = tm)
edit_plot(p,
          which = "background",
          row = c(3),
          gp = gpar(fill = "lightsteelblue1"))
dev.off()

#SNP森林图
res_single = mr_singlesnp(mr_data_clean)
#绘制SNP森林图
pdf(file = "data/AD/森林图.pdf", width = 9, heigh = 5)
mr_forest_plot(res_single)
dev.off()

#leave-one-out analysis(留一法分析)
pdf(file = "data/AD/leave-one-out.pdf", width = 9, heigh = 4)
mr_leaveoneout_plot(leaveoneout_results = mr_leaveoneout(mr_data_clean))
dev.off()

2 反向

例1:暴露TMD(芬兰数据库),结局ADHD(PGC数据库)

代码语言:r
AI代码解释
复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)

dir.create("data/reADHD")

source("function.R")

#TMD
TMD_exposure_finn = vroom("../rawData/finngen_R12_TEMPOROMANDIB_TMD.gz")
head(TMD_exposure_finn)
colnames(TMD_exposure_finn)
TMD_exposure_finn <- subset(TMD_exposure_finn,pval< 5e-06)
dim(TMD_exposure_finn)#[1] 281  13
TMD_exposure_finn = format_data(dat = TMD_exposure_finn,
                               type = "exposure",
                               snp_col = "rsids",
                               beta_col = "beta",
                               se_col = "sebeta",
                               effect_allele_col = "alt",
                               other_allele_col = "ref",
                               eaf_col = "af_alt",
                               pval_col = "pval")

write.csv(TMD_exposure_finn,file = "data/reADHD/TMD_exposure_finn.csv")
TMD_exposure_dat <- TMD_exposure_finn
#修改列名
#去除连锁不平衡
TMD_exposure_dat_clumped = ieugwasr::ld_clump(dplyr::tibble(rsid = TMD_exposure_dat$SNP,
                                                           pval = TMD_exposure_dat$pval.exposure),
                                             clump_kb = 10000,
                                             clump_r2 = 0.001,
                                             clump_p = 1,
                                             bfile = "/home/data/t110558/project/MR/weiwenbing/sup/EUR",
                                             plink_bin = plinkbinr::get_plink_exe(),
                                             pop = "EUR")
TMD_exposure_dat_clumped$id.exposure = "TMD"
TMD_exposure_dat_clumped = TMD_exposure_dat[TMD_exposure_dat$SNP %in% TMD_exposure_dat_clumped$rsid,]
#保留F值>10
TMD_exposure_dat_clumped <- calculate_f_statistic(TMD_exposure_dat_clumped,
                                                 sample_size = 270370)

dim(TMD_exposure_dat_clumped)#20 14
write.csv(TMD_exposure_dat_clumped,file = "data/reADHD/TMD_exposure_dat_clumped.csv")
save(TMD_exposure_dat_clumped,file = "Rdata/step1.1.TMD_.Rdata")


#结局----
outcome_finn = vroom("../rawData/ADHD2022_iPSYCH_deCODE_PGC.meta.gz")
outcome_finn_dat = subset(outcome_finn, outcome_finn$SNP %in% TMD_exposure_dat_clumped$SNP)
outcome_finn_dat$Beta <- log(outcome_finn_dat$OR)
outcome_finn_dat$EAF <- (outcome_finn_dat$FRQ_A_38691 * outcome_finn_dat$Nca + 
                             outcome_finn_dat$FRQ_U_186843 * outcome_finn_dat$Nco) / 
  (outcome_finn_dat$Nca + outcome_finn_dat$Nco)
colnames(outcome_finn_dat)

outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "SNP",
                               beta_col = "Beta",
                               pval_col = "P",
                               se_col = "SE",
                               eaf_col = "EAF",
                               effect_allele_col = "A1",
                               other_allele_col = "A2")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 17 11


#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "ADHD"

mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 16
write.csv(mr_data_finn, file = "data/reADHD/mr_TMD_data_PGC.csv")
View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reADHD/mr_TMD_data_PGC.csv",data.table = F)
#MR----
#MR分析
res = mr(mr_data_finn)
res

#异质性检验
het = mr_heterogeneity(mr_data_finn)
#异质性可视化
# sin = mr_singlesnp(mr_data_finn)
# pdf(file = "data/reADHD/yizhixing.pdf", width=10, heigh=8)
# mr_funnel_plot(singlesnp_results = sin)
# dev.off()

#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,
                   SignifThreshold = 0.05)
presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reADHD/het_clean.csv")

sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reADHD/yizhixing.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()

#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reADHD/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reADHD/MRresult.csv")

#MR分析结果可视化
pdf(file = "data/reADHD/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

#MR分析结果森林图
col = c("id.exposure", "id.outcome", "nsnp", "method", "or", "pval", "or_lci95", "or_uci95")
OR = OR_clean
OR = OR[,col]
OR$`OR (95% CI)` = sprintf("%.2f (%.2f to %.2f)", OR$or, OR$or_lci95, OR$or_uci95)
OR$pval = signif(OR$pval, 3)
#write.csv(OR, "OR.csv", row.names = F)
#OR = read.csv("OR.csv", header = T, check.names = F)
#OR$nsnp = ifelse(is.na(OR$nsnp), "", OR$nsnp)
OR$` ` = paste(rep(" ", 40), collapse = " ")
#绘制
library(forestploter)
library(grid)
pdf("data/reADHD/MRresult_eQTLGen.pdf", width = 11, height = 7, onefile = FALSE)
tm = forest_theme(base_size = 10,   # 文本的大小
                  ci_pch = 16,      # 可信区间点的形状
                  ci_col = "black", # CI的颜色
                  ci_fill = "red",  # CI中se点的颜色填充
                  ci_alpha = 0.8,   # CI透明度
                  ci_lty = 1,       # CI的线型
                  ci_lwd = 1.5,     # CI的线宽
                  ci_Theight = 0.2, # CI的高度,默认是NULL
                  #参考线默认的参数,中间的竖的虚线
                  refline_lwd = 1,  # 中间的竖的虚线
                  refline_lty = "dashed",
                  refline_col = "grey20")
p = forest(OR[,c(1:4,10,6,9)],
           est = OR$or,         #效应值
           lower = OR$or_lci95, #置信区间下限
           upper = OR$or_uci95, #置信区间上限
           ci_column = 5,       #在哪一列画森林图,选空的那一列
           ref_line = 1,        #参考线位置
           xlim = c(-1, 3),      #设置轴范围
           ticks_at = c(0, 1, 2, 3),#设置刻度
           theme = tm)
edit_plot(p,
          which = "background",
          row = c(3),
          gp = gpar(fill = "lightsteelblue1"))
dev.off()

#SNP森林图
res_single = mr_singlesnp(mr_data_clean)
#绘制SNP森林图
pdf(file = "data/reADHD/森林图.pdf", width = 9, heigh = 6)
mr_forest_plot(res_single)
dev.off()

#leave-one-out analysis(留一法分析)
pdf(file = "data/reADHD/leave-one-out.pdf", width = 9, heigh = 4)
mr_leaveoneout_plot(leaveoneout_results = mr_leaveoneout(mr_data_clean))
dev.off()

例2:暴露TMD(芬兰数据库),结局AD(芬兰数据库)

代码语言:r
AI代码解释
复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)

dir.create("data/reAD")

source("function.R")

#TMD
load("Rdata/step1.1.TMD_.Rdata")


#结局----
outcome_finn = vroom("../rawData/finngen_R12_KRA_PSY_ANXIETY_EXMORE.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 20 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",
                               beta_col = "beta",
                               pval_col = "pval",
                               se_col = "sebeta",
                               eaf_col = "af_alt",
                               effect_allele_col = "alt",
                               other_allele_col = "ref")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 19 11

#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AD"

mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 18
write.csv(mr_data_finn, file = "data/reAD/mr_TMD_data_PGC.csv")
View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reAD/mr_TMD_data_PGC.csv",data.table = F)
#MR----
#MR分析
res = mr(mr_data_finn)
res

#异质性检验
het = mr_heterogeneity(mr_data_finn)
#异质性可视化
# sin = mr_singlesnp(mr_data_finn)
# pdf(file = "data/reAD/yizhixing.pdf", width=10, heigh=8)
# mr_funnel_plot(singlesnp_results = sin)
# dev.off()

#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,
                   SignifThreshold = 0.05)
presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reAD/het_clean.csv")
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAD/yizhixing.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reAD/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAD/MRresult.csv")

#MR分析结果可视化
pdf(file = "data/reAD/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

#MR分析结果森林图
col = c("id.exposure", "id.outcome", "nsnp", "method", "or", "pval", "or_lci95", "or_uci95")
OR = OR_clean
OR = OR[,col]
OR$`OR (95% CI)` = sprintf("%.2f (%.2f to %.2f)", OR$or, OR$or_lci95, OR$or_uci95)
OR$pval = signif(OR$pval, 3)
#write.csv(OR, "OR.csv", row.names = F)
#OR = read.csv("OR.csv", header = T, check.names = F)
#OR$nsnp = ifelse(is.na(OR$nsnp), "", OR$nsnp)
OR$` ` = paste(rep(" ", 40), collapse = " ")
#绘制
library(forestploter)
library(grid)
pdf("data/reAD/MRresult_eQTLGen.pdf", width = 11, height = 7, onefile = FALSE)
tm = forest_theme(base_size = 10,   # 文本的大小
                  ci_pch = 16,      # 可信区间点的形状
                  ci_col = "black", # CI的颜色
                  ci_fill = "red",  # CI中se点的颜色填充
                  ci_alpha = 0.8,   # CI透明度
                  ci_lty = 1,       # CI的线型
                  ci_lwd = 1.5,     # CI的线宽
                  ci_Theight = 0.2, # CI的高度,默认是NULL
                  #参考线默认的参数,中间的竖的虚线
                  refline_lwd = 1,  # 中间的竖的虚线
                  refline_lty = "dashed",
                  refline_col = "grey20")
p = forest(OR[,c(1:4,10,6,9)],
           est = OR$or,         #效应值
           lower = OR$or_lci95, #置信区间下限
           upper = OR$or_uci95, #置信区间上限
           ci_column = 5,       #在哪一列画森林图,选空的那一列
           ref_line = 1,        #参考线位置
           xlim = c(-1, 3),      #设置轴范围
           ticks_at = c(0, 1, 2, 3),#设置刻度
           theme = tm)
edit_plot(p,
          which = "background",
          row = c(3),
          gp = gpar(fill = "lightsteelblue1"))
dev.off()

#SNP森林图
res_single = mr_singlesnp(mr_data_clean)
#绘制SNP森林图
pdf(file = "data/reAD/森林图.pdf", width = 9, heigh = 6)
mr_forest_plot(res_single)
dev.off()

#leave-one-out analysis(留一法分析)
pdf(file = "data/reAD/leave-one-out.pdf", width = 9, heigh = 4)
mr_leaveoneout_plot(leaveoneout_results = mr_leaveoneout(mr_data_clean))
dev.off()

3 问题

参考链接:

https://www.bilibili.com/video/BV1LV4y1b7v5/?spm_id_from=333.1391.0.0&vd_source=7e83cb2510516bdff59ccf808d022aa0

https://www.bilibili.com/video/BV1TcHXezEU7/?spm_id_from=333.1391.0.0&vd_source=7e83cb2510516bdff59ccf808d022aa0

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
暂无评论
推荐阅读
加入讨论
的问答专区 >
1Java工程师擅长4个领域
    领券
    问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档