view GO_enrichment/TopGO_pipeline_new.txt @ 8:dd918931548f draft

Deleted selected files
author nanettec
date Fri, 18 Mar 2016 03:58:15 -0400
parents bde64415f03b
children
line wrap: on
line source

#setwd("/Users/nanettecoetzer/Documents/Bioinformatics/MAIZE_project/eQTL_pipeline/April_2012_pipeline_scripts/July_2012/output_July")


library(topGO)
library(ALL)
data(ALL)

adjust = "fdr"

# gene universe (infile 1)
gene_universe <- read.table(file="gene_universe.txt",header=TRUE)
#gene_universe <- read.table(file="gene_pos_v2.txt")
u_gene_universe <- as.vector(unique(gene_universe[,1]))

# hotspot (internal infile 2)
myInterestedGenes <- read.table(file="interesting_genes.txt")
u_myInterestedGenes <- as.vector(unique(myInterestedGenes[,1]))
geneList <- factor(as.integer(u_gene_universe %in% u_myInterestedGenes))
names(geneList) <- u_gene_universe

# gene2GO mapping (user infile 3)
Zm2GO <- readMappings(file="gene2GO_mapping.map")
#Zm2GO <- readMappings(file="maize_agilent_reference.map.txt")

# BP
# create topGO object
GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList, annot = annFUN.gene2GO, gene2GO = Zm2GO)

# run the test
resultFis <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
pvalFis <- score(resultFis)
adj.pvalFis <- p.adjust(pvalFis,adjust)

# write output table
allRes <- GenTable(GOdata, classic = resultFis, topNodes = 20)
p.vals <- as.vector(allRes[,6])

classicFisher.adj_p <- as.vector(adj.pvalFis[allRes[,1]])
newRes <- cbind(allRes, classicFisher.adj_p)
names(newRes)[6] = "classicFisher.p.val"
names(newRes)[7] = "classicFisher.adj.p.val"

sign <- dim(allRes[as.numeric(p.vals)<0.05,])[1]
s <- rep('*',sign)
ns <- rep('',20-sign)
newRes <- cbind(newRes,c(s,ns))
names(newRes)[8] = "p.val<0.05"

sign1 <- dim(allRes[as.numeric(p.vals)<0.01,])[1]
s1 <- rep('*',sign1)
ns1 <- rep('',20-sign1)
newRes <- cbind(newRes,c(s1,ns1))
names(newRes)[9] = "p.val<0.01"

sign2 <- dim(allRes[as.numeric(classicFisher.adj_p)<0.05,])[1]
s2 <- rep('*',sign2)
ns2 <- rep('',20-sign2)
newRes <- cbind(newRes,c(s2,ns2))
names(newRes)[10] = "adj.p.val<0.05"

##############
sel.terms <- as.vector(newRes[,1]);
#sel.terms;
num.ann.genes <- countGenesInTerm(GOdata, sel.terms);
ann.genes <- genesInTerm(GOdata, sel.terms);
ans = c("GO","geneID");

for (i in 1:length(ann.genes)) {for (j in ann.genes[i]) {for (k in j) {if (k %in% as.vector(u_myInterestedGenes)) ans=rbind(ans,c(names(ann.genes[i]),(k))) }}};

if (length(ans)>2) {
   ans2 = data.frame(ans[2:dim(ans)[1],]);
   ans3 = aggregate(ans2[,2], by=list(ans2$X1), paste, collapse=";");
   ans4 = ans3[order(match(ans3[,1],newRes[,1])),][2];
   extra_num = 20-dim(ans4)[1];
   geneIDs=as.vector(ans4[,1]);
} else {
	extra_num = 19;
	geneIDs = "";
}
GeneIDs_significant = c(geneIDs,rep("",extra_num))
newRes2 <- cbind(newRes, GeneIDs_significant);
##############

write.table(newRes2,file="GO_table_BP.txt",sep="\t")


# visualize GO if 1 or more p.val < 0.01
#if (sign1 > 0) {
#nr_nodes = sign1
nr_nodes = 5
showSigOfNodes(GOdata, score(resultFis), firstSigNodes = nr_nodes, useInfo = "all")
printGraph(GOdata, resultFis, firstSigNodes = nr_nodes, fn.prefix = "tGO_BP", useInfo = "all", pdfSW = TRUE)
#}


# MF
# create topGO object
GOdata <- new("topGOdata", ontology = "MF", allGenes = geneList, annot = annFUN.gene2GO, gene2GO = Zm2GO)

# run the test
resultFis <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
pvalFis <- score(resultFis)
adj.pvalFis <- p.adjust(pvalFis,adjust)

# write output table
allRes <- GenTable(GOdata, classic = resultFis, topNodes = 20)
p.vals <- as.vector(allRes[,6])

classicFisher.adj_p <- as.vector(adj.pvalFis[allRes[,1]])
newRes <- cbind(allRes, classicFisher.adj_p)
names(newRes)[6] = "classicFisher.p.val"
names(newRes)[7] = "classicFisher.adj.p.val"

sign <- dim(allRes[as.numeric(p.vals)<0.05,])[1]
s <- rep('*',sign)
ns <- rep('',20-sign)
newRes <- cbind(newRes,c(s,ns))
names(newRes)[8] = "p.val<0.05"

sign1 <- dim(allRes[as.numeric(p.vals)<0.01,])[1]
s1 <- rep('*',sign1)
ns1 <- rep('',20-sign1)
newRes <- cbind(newRes,c(s1,ns1))
names(newRes)[9] = "p.val<0.01"

sign2 <- dim(allRes[as.numeric(classicFisher.adj_p)<0.05,])[1]
s2 <- rep('*',sign2)
ns2 <- rep('',20-sign2)
newRes <- cbind(newRes,c(s2,ns2))
names(newRes)[10] = "adj.p.val<0.05"

##############
sel.terms <- as.vector(newRes[,1]);
#sel.terms;
num.ann.genes <- countGenesInTerm(GOdata, sel.terms);
ann.genes <- genesInTerm(GOdata, sel.terms);
ans = c("GO","geneID");

for (i in 1:length(ann.genes)) {for (j in ann.genes[i]) {for (k in j) {if (k %in% as.vector(u_myInterestedGenes)) ans=rbind(ans,c(names(ann.genes[i]),(k))) }}};

if (length(ans)>2) {
   ans2 = data.frame(ans[2:dim(ans)[1],]);
   ans3 = aggregate(ans2[,2], by=list(ans2$X1), paste, collapse=";");
   ans4 = ans3[order(match(ans3[,1],newRes[,1])),][2];
   extra_num = 20-dim(ans4)[1];
   geneIDs=as.vector(ans4[,1]);
} else {
	extra_num = 19;
	geneIDs = "";
}
GeneIDs_significant = c(geneIDs,rep("",extra_num))
newRes2 <- cbind(newRes, GeneIDs_significant);
##############

write.table(newRes2,file="GO_table_MF.txt",sep="\t")


# visualize GO if 1 or more p.val < 0.01
#if (sign1 > 0) {
nr_nodes = 5
showSigOfNodes(GOdata, score(resultFis), firstSigNodes = nr_nodes, useInfo = "all")
printGraph(GOdata, resultFis, firstSigNodes = nr_nodes, fn.prefix = "tGO_MF", useInfo = "all", pdfSW = TRUE)
#}


# CC
# create topGO object
GOdata <- new("topGOdata", ontology = "CC", allGenes = geneList, annot = annFUN.gene2GO, gene2GO = Zm2GO)

# run the test
resultFis <- runTest(GOdata, algorithm = "classic", statistic = "fisher")
pvalFis <- score(resultFis)
adj.pvalFis <- p.adjust(pvalFis,adjust)

# write output table
allRes <- GenTable(GOdata, classic = resultFis, topNodes = 20)
p.vals <- as.vector(allRes[,6])

classicFisher.adj_p <- as.vector(adj.pvalFis[allRes[,1]])
newRes <- cbind(allRes, classicFisher.adj_p)
names(newRes)[6] = "classicFisher.p.val"
names(newRes)[7] = "classicFisher.adj.p.val"

sign <- dim(allRes[as.numeric(p.vals)<0.05,])[1]
s <- rep('*',sign)
ns <- rep('',20-sign)
newRes <- cbind(newRes,c(s,ns))
names(newRes)[8] = "p.val<0.05"

sign1 <- dim(allRes[as.numeric(p.vals)<0.01,])[1]
s1 <- rep('*',sign1)
ns1 <- rep('',20-sign1)
newRes <- cbind(newRes,c(s1,ns1))
names(newRes)[9] = "p.val<0.01"

sign2 <- dim(allRes[as.numeric(classicFisher.adj_p)<0.05,])[1]
s2 <- rep('*',sign2)
ns2 <- rep('',20-sign2)
newRes <- cbind(newRes,c(s2,ns2))
names(newRes)[10] = "adj.p.val<0.05"

##############
sel.terms <- as.vector(newRes[,1]);
#sel.terms;
num.ann.genes <- countGenesInTerm(GOdata, sel.terms);
ann.genes <- genesInTerm(GOdata, sel.terms);
ans = c("GO","geneID");

for (i in 1:length(ann.genes)) {for (j in ann.genes[i]) {for (k in j) {if (k %in% as.vector(u_myInterestedGenes)) ans=rbind(ans,c(names(ann.genes[i]),(k))) }}};

if (length(ans)>2) {
   ans2 = data.frame(ans[2:dim(ans)[1],]);
   ans3 = aggregate(ans2[,2], by=list(ans2$X1), paste, collapse=";");
   ans4 = ans3[order(match(ans3[,1],newRes[,1])),][2];
   extra_num = 20-dim(ans4)[1];
   geneIDs=as.vector(ans4[,1]);
} else {
	extra_num = 19;
	geneIDs = "";
}
GeneIDs_significant = c(geneIDs,rep("",extra_num))
newRes2 <- cbind(newRes, GeneIDs_significant);
##############

write.table(newRes2,file="GO_table_CC.txt",sep="\t")


# visualize GO if 1 or more p.val < 0.01
#if (sign1 > 0) {
nr_nodes = 5
showSigOfNodes(GOdata, score(resultFis), firstSigNodes = nr_nodes, useInfo = "all")
printGraph(GOdata, resultFis, firstSigNodes = nr_nodes, fn.prefix = "tGO_CC", useInfo = "all", pdfSW = TRUE)
#}