inputdir="/fast3/ranaseq/results/1/FASTQ" analysisname="/fast3/ranaseq/results/1/analysis_340" reportsdir="/fast3/ranaseq/scripts/reports" methodsdir="/fast3/ranaseq/scripts/analysis" inputquant="quant.genes.sf" alignparams=rbind(c("1","A","","","0","0")) files=data.frame(id=c(100007,100005,100003,100001),name=c("SRR5615379_1","SRR5615378_1","SRR5615377_1","SRR5615376_1"),samplename=c("Nrf2_KO_2","Nrf2_KO_1","WT_2","WT_1")) samples=c("Nrf2_KO_1","Nrf2_KO_2","WT_1","WT_2") groups=c(0,0,1,1) groupa="Samples A" groupb="Samples B" paired=FALSE method="DESeq2" test="Wald" fitType="parametric" cutoff=0.05 tabgen_file="/fast3/ranaseq/genomes/Mus_musculus.GRCm38.cdna.all.fa.tabgen" fdbdir="/fast3/ranaseq/genomes/functional/" taxon_id="10090" source(paste0(methodsdir,'/functions.R')) load(file=paste0(analysisname,"/DE.RData")) load(file=paste0(analysisname,"/genesLength.RData")) setwd(analysisname) ## Load Libraries suppressMessages(library(goseq)) suppressMessages(library(openxlsx)) suppressMessages(library(dplyr)) ## GOSEQ Analysis genome <- data.frame(Symbol=DE$Symbol,bases=genesLength[rownames(DE)]) aggdata <-aggregate(genome$bases, by=list(genome$Symbol),FUN=max, na.rm=TRUE) colnames(aggdata) <- c("Symbol","bases") lengthData <- aggdata$bases names(lengthData) <- aggdata$symbol DE2 <-aggregate(DE$padj, by=list(DE$Symbol),FUN=min, na.rm=TRUE) colnames(DE2) <- c("Symbol","padj") genevector <- as.numeric(DE2$padj1,] if(nrow(goseqres)){ ## Add Go Terms and Ontology load(paste0(fdbdir,"allgos.RData")) goseqres2 <- merge(goseqres,allgos,by.x="category",by.y="GO_ID",all.x=TRUE) if(nrow(goseqres2)){ ## Get and Add number of DE genes annotated and number of Whole genes annotated idsde <- rownames(pwf[pwf[,1]==1,]) AnotGenes <- length(intersect(go[,1],rownames(pwf))) AnotDEGenes <- length(intersect(go[,1],idsde)) goseqres2$numDEAnot <- AnotDEGenes goseqres2$AnotGenes <- AnotGenes ## Get DE genes annotated to each category gosubset <- semi_join(go,data.frame(symbol=idsde,stringsAsFactors=FALSE),by="symbol") gosubset <- unique(gosubset) goannot <- gosubset %>% group_by(goid) %>% summarise(symbols = paste(symbol, collapse = ",")) goannot <- as.data.frame(goannot) goseqres2 <- merge(goseqres2,goannot,by.x="category",by.y="goid",all.x=TRUE) if(nrow(goseqres2)){ goseqres2$DEPercent <- (goseqres2$numDEInCat / goseqres2$numDEAnot) * 100 goseqres2$AnotPercent <- (goseqres2$numInCat / goseqres2$AnotGenes) * 100 goseqres2 <- goseqres2[order(goseqres2$over_represented_pvalue),] goseqres3 <- goseqres2[,c("category","GO_term","Category","numDEInCat","numDEAnot","DEPercent","numInCat","AnotGenes","AnotPercent","over_represented_pvalue","symbols")] colnames(goseqres3) <- c("GO_ID","GO_TERM","Category","NumDEInCat","NumDEAnot","DEPercent","NumInCat","NumGenesAnot","AnotPercent","p-value","GenesAnot") ## Get links for(cat in c("Process","Function","Component")){ get_links(goseqres3[goseqres3[,'Category']==cat,],gosubset,paste0("GO_",cat)) } write.table(goseqres3,file=paste0(analysisname,"/GOSEQ_GOResults.tsv"),quote=FALSE,sep="\t") write.xlsx(goseqres3,file=paste0(analysisname,"/GOSEQ_GOResults.xlsx")) } } } } } PATHDB <- paste0(fdbdir,taxon_id,"_Path.RData") if(file.exists(PATHDB)){ ## PATHWAYS load(PATHDB) if(length(intersect(rownames(pwf),path$symbol))){ pathseqres=goseq(pwf,gene2cat=path) pathseqres <- pathseqres[pathseqres$numDEInCat>1,] if(nrow(pathseqres)){ ## Add Pathway Terms and Databases load(paste0(fdbdir,"allpath.RData")) pathseqres2 <- merge(pathseqres,allpath,by.x="category",by.y="V2",all.x=TRUE) if(nrow(pathseqres2)){ ## Get and Add number of DE genes annotated and number of Whole genes annotated idsde <- rownames(pwf[pwf[,1]==1,]) AnotGenes <- length(intersect(path[,1],rownames(pwf))) AnotDEGenes <- length(intersect(path[,1],idsde)) pathseqres2$numDEAnot <- AnotDEGenes pathseqres2$AnotGenes <- AnotGenes ## Get DE genes annotated to each category pathsubset <- semi_join(path,data.frame(symbol=idsde,stringsAsFactors=FALSE),by="symbol") pathannot <- pathsubset %>% group_by(sourceacc) %>% summarise(symbols = paste(symbol, collapse = ",")) pathannot <- as.data.frame(pathannot) pathseqres2 <- merge(pathseqres2,pathannot,by.x="category",by.y="sourceacc",all.x=TRUE) if(nrow(pathseqres2)){ pathseqres2$DEPercent <- (pathseqres2$numDEInCat / pathseqres2$numDEAnot) * 100 pathseqres2$AnotPercent <- (pathseqres2$numInCat / pathseqres2$AnotGenes) * 100 pathseqres2 <- pathseqres2[order(pathseqres2$over_represented_pvalue),] pathseqres3 <- pathseqres2[,c("category","V3","V1","numDEInCat","numDEAnot","DEPercent","numInCat","AnotGenes","AnotPercent","over_represented_pvalue","symbols")] colnames(pathseqres3) <- c("Pathway_ID","Pathway","Database","NumDEInCat","NumDEAnot","DEPercent","NumInCat","NumGenesAnot","AnotPercent","p-value","GenesAnot") ## Get links get_links(pathseqres3,pathsubset,'Pathway') write.table(pathseqres3,file=paste0(analysisname,"/GOSEQ_PathResults.tsv"),quote=FALSE,sep="\t") write.xlsx(pathseqres3,file=paste0(analysisname,"/GOSEQ_PathResults.xlsx")) } } } } } # PDF Report create_report('GOSEQreport')