inputdir="/fast3/ranaseq/results/1/FASTQ"
analysisname="/fast3/ranaseq/results/1/analysis_340"
reportsdir="/fast3/ranaseq/scripts/reports"
methodsdir="/fast3/ranaseq/scripts/analysis"
inputquant="quant.genes.sf"
alignparams=rbind(c("1","A","","","0","0"))
files=data.frame(id=c(100007,100005,100003,100001),name=c("SRR5615379_1","SRR5615378_1","SRR5615377_1","SRR5615376_1"),samplename=c("Nrf2_KO_2","Nrf2_KO_1","WT_2","WT_1"))
samples=c("Nrf2_KO_1","Nrf2_KO_2","WT_1","WT_2")
groups=c(0,0,1,1)
groupa="Samples A"
groupb="Samples B"
paired=FALSE
method="DESeq2"
test="Wald"
fitType="parametric"
cutoff=0.05
tabgen_file="/fast3/ranaseq/genomes/Mus_musculus.GRCm38.cdna.all.fa.tabgen"
fdbdir="/fast3/ranaseq/genomes/functional/"
taxon_id="10090"

source(paste0(methodsdir,'/functions.R'))
load(file=paste0(analysisname,"/DE.RData"))
load(file=paste0(analysisname,"/genesLength.RData"))

setwd(analysisname)


## Load Libraries
suppressMessages(library(goseq))
suppressMessages(library(openxlsx))
suppressMessages(library(dplyr))

## GOSEQ Analysis
genome <- data.frame(Symbol=DE$Symbol,bases=genesLength[rownames(DE)])
aggdata <-aggregate(genome$bases, by=list(genome$Symbol),FUN=max, na.rm=TRUE)
colnames(aggdata) <- c("Symbol","bases")
lengthData <- aggdata$bases
names(lengthData) <- aggdata$symbol


DE2 <-aggregate(DE$padj, by=list(DE$Symbol),FUN=min, na.rm=TRUE)
colnames(DE2) <- c("Symbol","padj")
genevector <- as.numeric(DE2$padj<cutoff)
if(sum(genevector)==0){
  DE2 <-aggregate(DE[['p-value']], by=list(DE$Symbol),FUN=min, na.rm=TRUE)
  colnames(DE2) <- c("Symbol","p-value")
  genevector <- as.numeric(order(DE2[['p-value']])<=100)
}
names(genevector) <- DE2$Symbol
genevector[is.na(genevector)] <- 0

# Perform GoSeq analysis
pwf=nullp(genevector,bias.data=lengthData,plot.fit=FALSE)

GODB <- paste0(fdbdir,taxon_id,"_GO.RData")
if(file.exists(GODB)){

## GOS
load(GODB)
if(length(intersect(rownames(pwf),go$symbol))){
goseqres=goseq(pwf,gene2cat=go)

goseqres <- goseqres[goseqres$numDEInCat>1,]

if(nrow(goseqres)){

## Add Go Terms and Ontology
load(paste0(fdbdir,"allgos.RData"))
goseqres2 <- merge(goseqres,allgos,by.x="category",by.y="GO_ID",all.x=TRUE)

if(nrow(goseqres2)){

## Get and Add number of DE genes annotated and number of Whole genes annotated
idsde <- rownames(pwf[pwf[,1]==1,])
AnotGenes <- length(intersect(go[,1],rownames(pwf)))
AnotDEGenes <- length(intersect(go[,1],idsde))

goseqres2$numDEAnot <- AnotDEGenes
goseqres2$AnotGenes <- AnotGenes

## Get DE genes annotated to each category
gosubset <- semi_join(go,data.frame(symbol=idsde,stringsAsFactors=FALSE),by="symbol")
gosubset <- unique(gosubset)

goannot <- gosubset %>%
  group_by(goid) %>%
  summarise(symbols = paste(symbol, collapse = ","))
goannot <- as.data.frame(goannot)

goseqres2 <- merge(goseqres2,goannot,by.x="category",by.y="goid",all.x=TRUE)

if(nrow(goseqres2)){
goseqres2$DEPercent <- (goseqres2$numDEInCat / goseqres2$numDEAnot) * 100
goseqres2$AnotPercent <- (goseqres2$numInCat / goseqres2$AnotGenes) * 100
goseqres2 <- goseqres2[order(goseqres2$over_represented_pvalue),]
goseqres3 <- goseqres2[,c("category","GO_term","Category","numDEInCat","numDEAnot","DEPercent","numInCat","AnotGenes","AnotPercent","over_represented_pvalue","symbols")]
colnames(goseqres3) <- c("GO_ID","GO_TERM","Category","NumDEInCat","NumDEAnot","DEPercent","NumInCat","NumGenesAnot","AnotPercent","p-value","GenesAnot")

## Get links
for(cat in c("Process","Function","Component")){
  get_links(goseqres3[goseqres3[,'Category']==cat,],gosubset,paste0("GO_",cat))
}

write.table(goseqres3,file=paste0(analysisname,"/GOSEQ_GOResults.tsv"),quote=FALSE,sep="\t")
write.xlsx(goseqres3,file=paste0(analysisname,"/GOSEQ_GOResults.xlsx"))
}
}
}
}
}

PATHDB <- paste0(fdbdir,taxon_id,"_Path.RData")
if(file.exists(PATHDB)){

## PATHWAYS
load(PATHDB)
if(length(intersect(rownames(pwf),path$symbol))){
pathseqres=goseq(pwf,gene2cat=path)

pathseqres <- pathseqres[pathseqres$numDEInCat>1,]

if(nrow(pathseqres)){

## Add Pathway Terms and Databases
load(paste0(fdbdir,"allpath.RData"))
pathseqres2 <- merge(pathseqres,allpath,by.x="category",by.y="V2",all.x=TRUE)

if(nrow(pathseqres2)){

## Get and Add number of DE genes annotated and number of Whole genes annotated
idsde <- rownames(pwf[pwf[,1]==1,])
AnotGenes <- length(intersect(path[,1],rownames(pwf)))
AnotDEGenes <- length(intersect(path[,1],idsde))

pathseqres2$numDEAnot <- AnotDEGenes
pathseqres2$AnotGenes <- AnotGenes

## Get DE genes annotated to each category
pathsubset <- semi_join(path,data.frame(symbol=idsde,stringsAsFactors=FALSE),by="symbol")

pathannot <- pathsubset %>%
  group_by(sourceacc) %>%
  summarise(symbols = paste(symbol, collapse = ","))
pathannot <- as.data.frame(pathannot)

pathseqres2 <- merge(pathseqres2,pathannot,by.x="category",by.y="sourceacc",all.x=TRUE)

if(nrow(pathseqres2)){
pathseqres2$DEPercent <- (pathseqres2$numDEInCat / pathseqres2$numDEAnot) * 100
pathseqres2$AnotPercent <- (pathseqres2$numInCat / pathseqres2$AnotGenes) * 100
pathseqres2 <- pathseqres2[order(pathseqres2$over_represented_pvalue),]
pathseqres3 <- pathseqres2[,c("category","V3","V1","numDEInCat","numDEAnot","DEPercent","numInCat","AnotGenes","AnotPercent","over_represented_pvalue","symbols")]
colnames(pathseqres3) <- c("Pathway_ID","Pathway","Database","NumDEInCat","NumDEAnot","DEPercent","NumInCat","NumGenesAnot","AnotPercent","p-value","GenesAnot")

## Get links
get_links(pathseqres3,pathsubset,'Pathway')

write.table(pathseqres3,file=paste0(analysisname,"/GOSEQ_PathResults.tsv"),quote=FALSE,sep="\t")
write.xlsx(pathseqres3,file=paste0(analysisname,"/GOSEQ_PathResults.xlsx"))
}
}
}
}
}

# PDF Report
create_report('GOSEQreport')