数据来源:https://ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE226291
下载数据(3小时)
#!/bin/bash
for i in 1 2 3 4 5 6
do
prefetch SRR2364187${i}
donewget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/GRCm38.p6.genome.fa.gz
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.annotation.gtf.gz
格式转换(5-6小时)
#!/bin/bash
for i in 1 2 3 4 5 6
do
echo SRR2364187${i}
fastq-dump --gzip --split-files SRR2364187${i}
done
数据质控(0.5小时)
#!/bin/bash
for i in 1 2 3 4 5 6
do
fastp -i /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}_1.fastq.gz -I /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}_2.fastq.gz -o SRR2364187${i}_1_clean.fastq.gz -O SRR2364187${i}_2_clean.fastq.gz -h report.html -j report.json
done
hisat2序列比对(5.5小时)
索引文件可以自己建,也可以下载,STAR需要自己建索引,hisat2可以直接用官网的索引。
#!/bin/bash
for i in 1 2 3 4 5 6
do
hisat2 -t -x /home/hemiaomiao/NDNFrnaseq/genome/mm10/genome -1 /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}_1_clean.fastq.gz -2 /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}_2_clean.fastq.gz -S /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}.sam
done
samtools文件格式转换(1.5-2小时)
#!/bin/bash
for i in 1 2 3 4 5 6
do
samtools view -S SRR2364187${i}.sam -b > SRR2364187${i}.bam
samtools sort SRR2364187${i}.bam -o SRR2364187${i}_sorted.bam #将所有的bam文件按默认的染色体位置进行排序
samtools index SRR2364187${i}_sorted.bam
done
htseq计数(8-9小时)
#!/bin/bash
for i in 1 2 3 4 5 6
do
samtools sort -n SRR2364187${i}.bam -o SRR2364187${i}_nsorted.bam #上一步是按照染色体位置排序的 这里需要按照reads数重新排序(read name排序)
htseq-count -r name -f bam /home/hemiaomiao/NDNFrnaseq/data/SRR2364187${i}_nsorted.bam /home/hemiaomiao/NDNFrnaseq/genome/gencode.vM25.annotation.gtf > /home/hemiaomiao/NDNFrnaseq/matrix/SRR2364187${i}.count
done
featureCounts计数
featureCounts -g gene_id -a /home/hemiaomiao/NDNFrnaseq/genome/gencode.vM25.annotation.gtf -o gene_exp.txt /home/hemiaomiao/NDNFrnaseq/data/SRR23641871_nsorted.bam /home/hemiaomiao/NDNFrnaseq/data/SRR23641872_nsorted.bam /home/hemiaomiao/NDNFrnaseq/data/SRR23641873_nsorted.bam /home/hemiaomiao/NDNFrnaseq/data/SRR23641874_nsorted.bam /home/hemiaomiao/NDNFrnaseq/data/SRR23641875_nsorted.bam /home/hemiaomiao/NDNFrnaseq/data/SRR23641876_nsorted.bam