inputdir="/fast3/ranaseq/results/1/FASTQ" analysisname="/fast3/ranaseq/results/1/analysis_340" reportsdir="/fast3/ranaseq/scripts/reports" methodsdir="/fast3/ranaseq/scripts/analysis" inputquant="quant.genes.sf" alignparams=rbind(c("1","A","","","0","0")) files=data.frame(id=c(100007,100005,100003,100001),name=c("SRR5615379_1","SRR5615378_1","SRR5615377_1","SRR5615376_1"),samplename=c("Nrf2_KO_2","Nrf2_KO_1","WT_2","WT_1")) samples=c("Nrf2_KO_1","Nrf2_KO_2","WT_1","WT_2") groups=c(0,0,1,1) groupa="Samples A" groupb="Samples B" paired=FALSE method="DESeq2" test="Wald" fitType="parametric" cutoff=0.05 tabgen_file="/fast3/ranaseq/genomes/Mus_musculus.GRCm38.cdna.all.fa.tabgen" fdbdir="/fast3/ranaseq/genomes/functional/" taxon_id="10090" source(paste0(methodsdir,'/functions.R')) load(file=paste0(analysisname,"/counts.RData")) load(file=paste0(analysisname,"/tpms.RData")) if(nrow(counts)==1) stop("Only one gene is expressed. Please use the raw counts data for your analyses. RaNA-Seq is conceived for the analysis of whole transcriptome.") setwd(analysisname) samples=data.frame(samples=samples,type=as.factor(groups)) rownames(samples)<-samples$samples tabgen <- read.delim(tabgen_file,FALSE) Symbol <- as.character(tabgen[,3]) names(Symbol) <- as.character(tabgen[,2]) Symbol <- Symbol[Symbol!=""] geneid <- sort(rownames(tpms)) names(geneid) <- geneid commonsymbols <- intersect(names(Symbol),names(geneid)) geneid[commonsymbols] <- Symbol[commonsymbols] Symbol <- geneid ordTPMs <- tpms[order(rownames(tpms)),] Amean <- ordTPMs[,as.character(samples[samples[,2]==0,1])] if(!is.null(dim(Amean))) Amean <- rowMeans(Amean) Bmean <- ordTPMs[,as.character(samples[samples[,2]==1,1])] if(!is.null(dim(Bmean))) Bmean <- rowMeans(Bmean) ExpMean <- rowMeans(ordTPMs) log2FC <- log2(Bmean+1) - log2(Amean+1) ## Perform DESeq2 study suppressMessages(library("DESeq2")) suppressMessages(library("BiocParallel")) register(MulticoreParam(5)) parallel = TRUE if(length(samples$samples)<=2) parallel = FALSE if(paired){ ind=as.factor(c(1:sum(samples$type==0),1:sum(samples$type==0))) samples <- cbind(samples,ind) design = ~ ind + type }else{ design = ~ type } countData <- round(counts) if(sum(apply(counts,1,min))==0) countData <- countData+1 ddsMat <- DESeqDataSetFromMatrix(countData = countData,colData = samples,design = design) if(!(test %in% c("Wald","LRT"))) test <- "Wald" if(!(fitType %in% c("parametric","local","mean"))) fitType <- "parametric" if(test=="LRT"){ fitType <- "parametric" dds <- DESeq(ddsMat, parallel=parallel,fitType=fitType,test=test,reduced=~ 1) }else{ dds <- DESeq(ddsMat, parallel=parallel,fitType=fitType,test=test) } DE <- results(dds,independentFiltering=FALSE) DE <- as.data.frame(DE)[,c(-1,-2)] DE$pvalue[is.na(DE$pvalue)] <- 1 DE$padj[is.na(DE$padj)] <- 1 colnames(DE)[match('stat',colnames(DE))] <- "Stat" colnames(DE)[match('pvalue',colnames(DE))] <- "p-value" DE <- DE[order(rownames(DE)),] DE <- cbind(Symbol,ExpMean,Amean,Bmean,log2FC,DE) colnames(DE)[3:4] <- c(paste("Exp",groupa),paste("Exp",groupb)) DE <- DE[order(DE[['p-value']]),] save(DE,file=paste0(analysisname,"/DE.RData")) write.table(DE,file=paste0(analysisname,"/DE.tsv"),quote=FALSE,sep="\t") suppressMessages(library(openxlsx)) write.xlsx(DE,file=paste0(analysisname,"/DE.xlsx")) # PDF Report create_report('DEreport')