- 了解生物的RNA种类,真核生物mRNA的特点及如何分离纯化
- 了解RNA、DNA比对的区别
- 掌握DESeq2分析DE基因方法
转录组测序(RNA-Seq)应用非常广泛,目前测序市场有超过一半做的是转录组。
转录组没序是对生物体内所有的RNA进行测序,一般可以按测序鞋标不同而分为
mRNA,microRNA, total RNA测序,传统意义上的RNA-Seq指的是mRNA测序。一般
RNA-Seq项目分析包括以下几个步骤:
- 序列质控
- 将得到的clean reads 回帖到参考基因组
- 计算每个基因的reads数
- 比较不同组之间的DE基因
- DE基因的功能挖掘
本实验主要介绍DESeq2的使用,DESeq2是用来做差异基因分析的一套R软件包,
有两个水稻品种材料,品种BR-IRGA 409耐热,品种IRGA 428不耐热,拟比较这两个品种经热处理后基因表达差异。
RUN | Class | Cultivar | Status | Group | Description |
---|---|---|---|---|---|
SRR7760302 | IRGA409_T | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760301 | IRGA409_T | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760300 | IRGA409_T | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760299 | IRGA409_C | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 control_spikelet | control condition (29C) for 7 hrs |
SRR7760298 | IRGA409_C | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 control_spikelet | control condition (29C) for 7 hrs |
SRR7760297 | IRGA409_C | BR-IRGA 409 | tolerant to heat stress | BR-IRGA 409 control_spikelet | control condition (29C) for 7 hrs |
SRR7760296 | IRGA428_T | IRGA 428 | sensitive to heat stress | IRGA 428 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760295 | IRGA428_T | IRGA 428 | sensitive to heat stress | IRGA 428 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760294 | IRGA428_T | IRGA 428 | sensitive to heat stress | IRGA 428 treated_spikelet | high temperature treatment (38C) for 7 hrs |
SRR7760293 | IRGA428_C | IRGA 428 | sensitive to heat stress | IRGA 428 control_spikelet | control condition (29C) for 7 hrs |
SRR7760292 | IRGA428_C | IRGA 428 | sensitive to heat stress | IRGA 428 control_spikelet | control condition (29C) for 7 hrs |
SRR7760291 | IRGA428_C | IRGA 428 | sensitive to heat stress | IRGA 428 control_spikelet | control condition (29C) for 7 hrs |
work_mapping.sh
#!/bin/bash
#$ -S /bin/bash
#$ -N hisat2
#$ -j y
#$ -cwd
for i in $(seq 291 302);
do
hisat2 -p 1 \
-x /data/lab/genomic/lab05/ref/index/osa \
-q /data/lab/genomic/lab05/data/SRR7760${i}.fastq | \
samtools view -b - | \
samtools sort -o SRR7760${i}.sort.bam - >SRR7760${i}.log;
done
work_count.sh
#!/bin/bash
#$ -S /bin/bash
#$ -N count
#$ -j y
#$ -cwd
TPMCalculator -g /data/lab/genomic/lab05/ref/Oryza_sativa.IRGSP-1.0.41.gtf -d ./ -a
paste <(cut -f 1,6 SRR7760291.sort_genes.out) \
<(cut -f 6 SRR7760292.sort_genes.out) \
<(cut -f 6 SRR7760293.sort_genes.out) \
<(cut -f 6 SRR7760294.sort_genes.out) \
<(cut -f 6 SRR7760295.sort_genes.out) \
<(cut -f 6 SRR7760296.sort_genes.out) \
<(cut -f 6 SRR7760297.sort_genes.out) \
<(cut -f 6 SRR7760298.sort_genes.out) \
<(cut -f 6 SRR7760299.sort_genes.out) \
<(cut -f 6 SRR7760300.sort_genes.out) \
<(cut -f 6 SRR7760301.sort_genes.out) \
<(cut -f 6 SRR7760302.sort_genes.out) > counts.tsv
# Replace the header line
sed -i '1c\GeneID\tSRR7760291\tSRR7760292\tSRR7760293\tSRR7760294\tSRR7760295\tSRR7760296\tSRR7760297\tSRR7760298\tSRR7760299\tSRR7760300\tSRR7760301\tSRR7760302' counts.tsv
require(tidyverse)
require(DESeq2)
cts <- read_tsv("counts.tsv") %>% as.data.frame()
rownames(cts) <- cts$GeneID
cts <- cts[,-1]
colData <- data.frame(SampleID=c("SRR7760291","SRR7760292","SRR7760293",
"SRR7760294","SRR7760295","SRR7760296",
"SRR7760297","SRR7760298","SRR7760299",
"SRR7760300","SRR7760301","SRR7760302"),
Group=factor(c(rep("IRGA428_C",3),
rep("IRGA428_T",3),
rep("IRGA409_C",3),
rep("IRGA409_T",3))))
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = colData,
design = ~ Group)
dds <- DESeq(dds)
res.IRGA428 <- results(dds, contrast=c("Group","IRGA428_C","IRGA428_T"))
res.IRGA409 <- results(dds, contrast=c("Group","IRGA409_C","IRGA409_T"))
write.csv(res.IRGA428[order(res.IRGA428$pvalue),],"Results_428.csv")
write.csv(res.IRGA409[order(res.IRGA409$pvalue),],"Results_409.csv")
比较两种水稻品种经热处理后基因表达变化情况有什么差异。