R包:APAlyzer从RNA-seq数据计算APA表达丰度

embedded/2024/10/11 4:09:53/

在这里插入图片描述

文章目录

    • 介绍
    • 教程
    • 实战案例
      • 数据
      • 脚本
      • 运行

介绍

今天安利APAlyzer工具,它是通过RNA-seq数据获取3′UTR APA, intronic APA等表达谱的R包。

APAlyzer将bam文件比对到PolyA-DB数据库识别APA。

Most eukaryotic genes produce alternative polyadenylation (APA) isoforms. APA is dynamically regulated under different growth and differentiation conditions. Here, we present a bioinformatics package, named APAlyzer, for examining 3′UTR APA, intronic APA and gene expression changes using RNA-seq data and annotated polyadenylation sites in the PolyA_DB database. Using APAlyzer and data from the GTEx database, we present APA profiles across human tissues.

在这里插入图片描述

教程

library(APAlyzer)
library(TBX20BamSubset)
library(Rsamtools)# RNA-seq BAM files
flsall = getBamFileList()# Genomic reference
library(repmis)
URL="https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"
file="mm9_REF.RData"
source_data(paste0(URL,file,"?raw=True"))# Building 3’UTR and intronic PAS reference region at once
refUTRraw=refUTRraw[which(refUTRraw$Chrom=='chr19'),]
dfIPAraw=dfIPA[which(dfIPA$Chrom=='chr19'),]
dfLEraw=dfLE[which(dfLE$Chrom=='chr19'),]   
PASREF=REF4PAS(refUTRraw,dfIPAraw,dfLEraw)
UTRdbraw=PASREF$UTRdbraw
dfIPA=PASREF$dfIPA
dfLE=PASREF$dfLE # Building 3’UTR PAS and IPA reference using GTF files
download.file(url='ftp://ftp.ensembl.org/pub/release-99/gtf/mus_musculus/Mus_musculus.GRCm38.99.gtf.gz',destfile='Mus_musculus.GRCm38.99.gtf.gz')           
GTFfile="Mus_musculus.GRCm38.99.gtf.gz" 
PASREFraw=PAS2GEF(GTFfile)  
refUTRraw=PASREFraw$refUTRraw
dfIPAraw=PASREFraw$dfIPA
dfLEraw=PASREFraw$dfLE
PASREF=REF4PAS(refUTRraw,dfIPAraw,dfLEraw)# Building aUTR and cUTR references
refUTRraw=refUTRraw[which(refUTRraw$Chrom=='chr19'),]
UTRdbraw=REF3UTR(refUTRraw)# Calculation of relative expression
DFUTRraw=PASEXP_3UTR(UTRdbraw, flsall, Strandtype="forward")# Building intronic polyA references
URL="https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"
file="mm9_REF.RData"
source_data(paste0(URL,file,"?raw=True"))# Calculation of relative expression
dfIPA=dfIPA[which(dfIPA$Chrom=='chr19'),]
dfLE=dfLE[which(dfLE$Chrom=='chr19'),]
IPA_OUTraw=PASEXP_IPA(dfIPA, dfLE, flsall, Strandtype="forward", nts=1)# Significance analysis of APA events
sampleTable1 = data.frame(samplename = c(names(flsall)),condition = c(rep("NT",3),rep("KD",3)))# Significantly regulated APA in 3’UTRs
test_3UTRsing=APAdiff(sampleTable2,DFUTRraw, conKET='NT',trtKEY='KD',PAS='3UTR',CUTreads=0,p_adjust_methods="fdr")
# Visualization of analysis results
APAVolcano(test_3UTRsing, PAS='3UTR', Pcol = "pvalue", top=5, main='3UTR APA')

实战案例

数据

下列样本存成bam_file.tsv

SampleID	BamPath
SRR316184	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316184.bam
SRR316185	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316185.bam
SRR316186	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316186.bam
SRR316187	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316187.bam
SRR316188	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316188.bam
SRR316189	/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316189.bam

脚本

下列代码存成APAlyzer_Expression.R

suppressPackageStartupMessages({ library(dplyr)library(tibble)library(optparse)library(data.table)library(APAlyzer)library(TBX20BamSubset)library(Rsamtools)
})option_list <- list(make_option(c("-b", "--bam"), type = "character",help = "bam csv file (1st column: sampleID; 2nd: bam path)", metavar = "character"),make_option(c("-r", "--reference"), type = "character", # RData/gtfhelp = "genomic reference type", metavar = "character"),    make_option(c("-g", "--genome"), type = "character",help = "genomic reference file", metavar = "character"), make_option(c("-c", "--chromosome"), type = "character",default = "all", # chr19help = "chromosome to be selected", metavar = "character"),  make_option(c("-e", "--expression"), type = "character", default = "all", # 3UTR/IPAhelp = "APA expression: 3UTR and intronic APA", metavar = "character"),  make_option(c("-o", "--out"), type = "character",help = "output file path", metavar = "character")
)opt_parser <- OptionParser(option_list = option_list)
opt <- parse_args(opt_parser)# input parameters
bam_path <- opt$bam
ref_type <- opt$reference
ref_path <- opt$genome
chrom <- opt$chromosome
expr_type <- opt$expression
dir <- opt$out# bam_path <- "bam_file.tsv"
# ref_type <- "RData"
# ref_path <- "mm9_REF.RData"
# chrom <- "chr19"
# expr_type <- "3UTR"
# dir <- "result"# step1: bam file
bam_vector <- read.table("bam_file.tsv", header = TRUE)
bam_file <- bam_vector$BamPath
names(bam_file) <- bam_vector$SampleID# step2: genomic reference
if (ref_type == "RData") {# data from built referencerequire(repmis)URL <- "https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"source_data(paste0(URL, ref_path, "?raw=True"))if (ref_path == "mm9_REF.RData") {refUTRraw_temp <- refUTRrawdfIPAraw_temp <- dfIPAdfLEraw_temp <- dfLE} else if (ref_path == "hg19_REF.RData") {refUTRraw_temp <- refUTRraw_hg19dfIPAraw_temp <- dfIPA_hg19dfLEraw_temp <- dfLE_hg19}} else if (ref_type == "gtf") {# building reference from gtf filePASREFraw <- PAS2GEF(ref_path)  refUTRraw_temp <- PASREFraw$refUTRrawdfIPAraw_temp <- PASREFraw$dfIPAdfLEraw_temp <- PASREFraw$dfLE
}# step3: whether to choose chromosome
if (chrom == "all") {UTRdbraw <- refUTRraw_tempdfIPAraw <- dfIPAraw_tempdfLEraw <- dfLEraw_temp   
} else {# multiple chromosome or notif (length(grep(":", chrom)) > 0) {chroms <- unlist(strsplit(chrom, ":"))} else {chroms <- chrom}UTRdbraw <- refUTRraw_temp[which(refUTRraw_temp$Chrom %in% chroms), ]dfIPAraw <- dfIPAraw_temp[which(dfIPAraw_temp$Chrom %in% chroms), ]dfLEraw <- dfLEraw_temp[which(dfLEraw_temp$Chrom %in% chroms), ]
}
## aUTR cUTR
PASREF_temp <- REF4PAS(UTRdbraw, dfIPAraw, dfLEraw)
UTRdb <- PASREF_temp$UTRdbraw
dfIPA <- PASREF_temp$dfIPA
dfLE <- PASREF_temp$dfLE  # step4: APA expression (3UTR and IPA)
if (expr_type == "all") {# 3UTRUTR_APA_OUT <- PASEXP_3UTR(UTRdb, bam_file, Strandtype = "forward")# IPAIPA_OUT <- PASEXP_IPA(dfIPA, dfLE, bam_file, Strandtype = "invert", nts = 4)final_OUT <- list(UTR = UTR_APA_OUT,IPA = IPA_OUT)
} else if (expr_type == "3UTR") { # 3UTRfinal_OUT <- PASEXP_3UTR(UTRdb, bam_file, Strandtype = "forward")  
} else if (expr_type == "IPA") { final_OUT <- PASEXP_IPA(dfIPA, dfLE, bam_file, Strandtype = "invert", nts = 4)
}# step5: output
if (!dir.exists(dir)) {dir.create(dir, recursive = TRUE)
}if (!is.data.frame(final_OUT)) {file_name <- paste0(dir, "/APA_Expr_", expr_type, ".RDS")saveRDS(final_OUT, file_name, compress = TRUE)
} else {file_name <- paste0(dir, "/APA_Expr_", expr_type, ".tsv")write.table(final_OUT, file_name, quote = F, row.names = F, sep = "\t")
}print("Program Ended without Problems")

运行

在命令行模式下运行该命令

Rscript APAlyzer_Expression.R \-b bam_file.tsv \-r RData \-g mm9_REF.RData \-c chr19 \-e 3UTR \-o result

http://www.ppmy.cn/embedded/125694.html

相关文章

YOLOv10改进,YOLOv10添加CA注意力机制,二次创新C2f结构,助力涨点

改进前训练结果: 二次创新C2f结构训练结果: 摘要 在本文中,提出了一种新的移动网络注意力机制,将位置信息嵌入到信道注意力中称之为“协调注意力”。与渠道关注不同通过 2D 全局池将特征张量转换为单个特征向量,坐标注意力因子将通道注意力转化为两个 1D 特征编码过程…

OpenAI .NET 库稳定版发布,支持 GPT-4o 并改进 API 功能

penAI 在6月推出其官方 .NET 库的 beta 版之后&#xff0c;如今终于发布了稳定版。该库已在 NuGet 上作为包发布&#xff0c;支持最新的模型&#xff0c;如 GPT-4o 和 GPT-4o mini&#xff0c;并且提供完整的 OpenAI REST API。这次发布包括同步和异步 API&#xff0c;以及流式…

基于SpringBoot+Vue+MySQL的校园招聘管理系统

系统展示 用户前台界面 管理员后台界面 公司后台界面 系统背景 随着高等教育的普及和就业市场的竞争加剧&#xff0c;校园招聘成为了连接学生与企业的关键桥梁。然而&#xff0c;传统的校园招聘流程繁琐、效率低下&#xff0c;且信息更新不及时&#xff0c;给企业和求职者带来了…

【数字图像处理】第一章 数字图像处理概论,图像的分类。主要内容

上理考研周导师的哔哩哔哩频道 我在频道里讲课哦 目录 1.1 图像处理的产生 1.2 图像的基本概念 图像的分类 图像的表示方法 1.3 数字图像处理系统 1.4 数字图像处理的应用与发展 一. 数字图像处理及其特点 2. 数字图像处理 二. 图像处理的主要内容 2. 数字图像处理…

使用 Go 语言与 Redis 构建高效缓存与消息队列系统

什么是 Redis&#xff1f; Redis 是一个开源的内存数据库&#xff0c;支持多种数据结构&#xff0c;包括字符串、列表、集合、哈希和有序集合。由于 Redis 运行在内存中&#xff0c;读写速度极快&#xff0c;常被用于构建缓存系统、实时排行榜、会话存储和消息队列等高并发场景…

js中各种时间日期格式之间的转换

前言&#xff1a;近几天在做百度地图时,需要转换时间格式并做显示,下面这篇文章主要给大家介绍了关于js中各种时间格式的转换方法的相关资料,文中通过实例代码介绍的非常详细,需要的朋友可以参考下 &#x1f308;&#x1f308;文章目录 先来认识 js 的时间格式有哪些&#xf…

如何用深度神经网络预测潜在消费者

1. 模型架构 本项目采用的是DeepFM模型&#xff0c;其结构结合了FM&#xff08;因子分解机&#xff09;与深度神经网络&#xff08;DNN&#xff09;&#xff0c;实现了低阶与高阶特征交互的有效建模。模型分为以下几层&#xff1a; 1.1 FM部分&#xff08;因子分解机层&#…

ElementUI 2.x 输入框回车后在调用接口进行远程搜索功能

输入框回车后在调用接口进行远程搜索功能 主要思路 默认的远程搜索会在输入框聚焦的时候就展示下拉弹窗&#xff0c;而我们需要的是在回车之后才展示下拉弹窗。 具体代码 <divv-for"(domain, index) in formData.domains"class"dynamic-input":key&…