diff GO_enrichment/GO_enrichment.xml @ 0:bde64415f03b draft

Uploaded
author nanettec
date Wed, 16 Mar 2016 10:15:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GO_enrichment/GO_enrichment.xml	Wed Mar 16 10:15:05 2016 -0400
@@ -0,0 +1,150 @@
+<tool id="GO_enrichment5" name="GO enrichment" version="5.0.0">
+	<description>of eQTL hotspot gene lists</description>
+	<command interpreter="python">
+		GO_enrichment.py --rscript \$R_SCRIPT_PATH/GO_enrichment/TopGO_pipeline_new.txt --input1 $input1 --input2 $input2 --input3 $input3 --input4 $input4 --input5 $input5 --output1 $output1 --output2 $output2 --output3 $output3 --output4 $output4
+	</command>
+        <inputs>
+            <param label="Gene Universe file" name="input1" type="data" format="tabular" help="A tabular file with the full set of genes from the study (gene universe)"></param>
+            <param label="Genes of interest file, all eQTLs" name="input2" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to all eQTL hotspots)"></param>
+	    <param label="Genes of interest file, cis eQTLs" name="input3" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to cis eQTL hotspots)"></param>
+	    <param label="Genes of interest file, trans eQTLs" name="input4" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to transeQTL hotspots)"></param>
+	    <param label="Gene2GO mapping file" name="input5" type="data" format="tabular" help="A tabular file with a mapping between gene identifiers and GO terms"></param>
+        </inputs>
+	<outputs>
+                <data format="zip" name="output1" />
+		<data format="zip" name="output2" />
+		<data format="zip" name="output3" />
+		<data format="tabular" name="output4" />
+	</outputs>
+	<requirements>
+		<requirement type="set_environment">R_SCRIPT_PATH</requirement>
+	</requirements>
+	<tests>
+          <test>
+          </test>
+	</tests>
+	<help>
+		
+**What it does**
+
+This tool uses the topGO R package to determine the enriched GO terms, for one or more gene lists simultaneously. 
+
+-------
+
+**Example input files**
+
+The full list of gene identifiers from the array/study (gene universe) (only a part of the file is shown)::
+
+ geneA
+ geneB
+ geneC
+ geneD
+ geneE
+ geneF
+ geneG
+ geneH
+ geneI
+ geneJ
+ geneK
+ geneL
+ geneM
+ geneN
+ geneO
+
+
+
+Predefined lists of interesting genes, separated by header lines starting with a '='. Only the gene identifiers in the first column will be used.
+
+For example: Gene lists extracted for significant eQTL hotspots (only the first 5 eQTLs linked to hotspots 1 - 2 are shown)::
+Three input files like this are required, for all, cis and trans eQTLs respectively::
+
+ = = = = =   Hotspot 1    chr 1    sliding.ids: [17, 18]    int.ids: [19, 20, 21, 22]    nr.eQTL: 257   = = = = =												
+ geneA    639     1       3       0.2878  4       0.3872  3       0.3478  13.7958496      0.110934        0.487661	 parentB      no_result	  19      NA
+ geneL   800     1       3       0.2478  4       0.4072  3       0.3478  24.2128991      0.2848178       0.4639009       parentB      no_result   19      NA
+ geneB   382     1       3       0.2878  4       0.3872  3       0.3478  13.7048724      0.1522281       0.3023807       parentB      trans   	  19      757
+ geneD   457     1       3       0.2678  4       0.4072  3       0.3478  16.2210425      0.1537186       0.3527068       parentA      trans   	  19      722
+ geneE   381     1       3       0.2678  4       0.4272  3       0.3478  19.2398655      0.1747831       0.4636225       parentA      cis    	  19      16
+
+ …												
+
+ = = = = =   Hotspot 2    chr 1    sliding.ids: [36, 37]    int.ids: [43, 44, 45, 46]    nr.eQTL: 268   = = = = =												
+ geneW    146     1       8       0.6998  9       0.9588  8       0.7798  17.6058658      0.168243        0.3517602       parentA      cis        43      41
+ geneP    510     1       8       0.6998  8       0.7998  8       0.7798  48.9321454      0.6530789       0.7453719       parentB      trans      43      566
+ geneF    231     1       8       0.7598  8       0.7998  8       0.7798  13.2268263      0.1715268       0.4169803       parentB      trans      43      491
+ geneY    480     1       7       0.6922  8       0.7998  8       0.7798  71.8820179      0.7463132       0.8353116       parentB      no_result  43      NA
+ geneG    652     1       8       0.7798  8       0.7798  8       0.7798  11.5596194      0.1168083       0.3429812       parentB      trans      43      753
+
+ …												
+
+ 
+
+
+Gene-to-GOs mappings, for each gene identifier the corresponding annotated GO terms are listed (only a part of the file is shown)::
+
+ geneA	GO:0016491
+ geneB	GO:0003824, GO:0008152, GO:0008415, GO:0009058
+ geneC	GO:0004553, GO:0005975
+ geneD	GO:0004175, GO:0004298, GO:0005839, GO:0006511, GO:0019773, GO:0051603
+ geneE	GO:0005622, GO:0008270
+ geneF	GO:0004221, GO:0005622, GO:0006511, GO:0008152, GO:0008270, GO:0016491, GO:0055114
+ geneG	GO:0004672, GO:0004674, GO:0005524, GO:0006468
+ geneH	GO:0003924, GO:0005515, GO:0005525, GO:0005622, GO:0006886, GO:0006913, GO:0007165, GO:0007264, GO:0015031
+ geneI	GO:0007049, GO:0016538
+ geneJ	GO:0019787, GO:0043687, GO:0051246
+ geneK	GO:0005622, GO:0008270
+ geneL	GO:0004175, GO:0004298, GO:0005839, GO:0006511, GO:0019773, GO:0051603
+ geneM	GO:0005643, GO:0006810
+ geneN	GO:0005634, GO:0030528, GO:0045449
+ geneO	GO:0005515
+
+-------
+
+**Example output files**
+
+
+For each hotspot, the top 20 GO terms in the Biological Process (BP), Molecular Function (MF) and Cellular Component (CC) ontologies are given::
+
+ BP_GO.ID	Term					Annotated	Significant	Expected	classicFisher.p	classicFisher.adj_p	p.val cutoff 0.05 	p.val cutoff 0.01	adj.p.val cutoff 0.05
+ GO:0010498	proteasomal protein catabolic process		7	2		0.09		0.0031		1			"*"			"*"			""
+ GO:0043161	proteasomal ubiquitin-dependent protein ...	7	2		0.09		0.0031		1			"*"			"*"			""
+ GO:0044238	primary metabolic process			4986	75		62.52		0.0094		1			"*"			"*"			""
+ GO:0006857	oligopeptide transport				36	3		0.45		0.0101		1			"*"			""			""
+ GO:0006259	DNA metabolic process				349	10		4.38		0.012		1			"*"			""			""
+ ... 
+ 
+ MF_GO.ID	Term					Annotated	Significant	Expected	classicFisher.p	classicFisher.adj_p	p.val cutoff 0.01	adj.p.val cutoff 0.05
+ GO:0004181	metallocarboxypeptidase activity		10	2		0.11		0.0053		1			"*"			"*"			""
+ GO:0003684	damaged DNA binding				20	2		0.22		0.0208		1			"*"			""			""
+ GO:0008235	metalloexopeptidase activity			20	2		0.22		0.0208		1			"*"			""			""
+ GO:0008553	hydrogen-exporting ATPase activity, phos...	2	1		0.02		0.0223		1			"*"			""			""
+ GO:0003697	single-stranded DNA binding			21	2		0.24		0.0228		1			"*"			""			""
+ ...
+
+ CC_GO.ID	Term					Annotated	Significant	Expected	classicFisher.p	classicFisher.adj_p	p.val cutoff 0.01	adj.p.val cutoff 0.05
+ GO:0005743	mitochondrial inner membrane			84	5		1.05		0.0037		0.437168496		"*"			"*"			""
+ GO:0044429	mitochondrial part				122	6		1.53		0.0039		0.437168496		"*"			"*"			""
+ GO:0019866	organelle inner membrane			85	5		1.06		0.0039		0.437168496		"*"			"*"			""
+ GO:0005739	mitochondrion					138	6		1.73		0.0071		0.500734933		"*"			"*"			""
+ GO:0031966	mitochondrial membrane				99	5		1.24		0.0075		0.500734933		"*"			"*"			""
+ ...
+
+
+For each hotspot, for each ontology, a visual representation of the GO structure are also given, i.e. the subgraph induced by the top 5 GO terms identified.
+
+
+Hotspot summary file (14 columns):: 
+
+ Type	Number	Chr	Sliding_IDs	Interval_IDs	    Num_eQTLs	Parent SC up-reg %	Parent CML up-reg %	BP_pVal cutoff 0.01   BP_adj_pVal cutoff 0.05	MF_pVal cutoff 0.01	MF_adj_pVal cutoff 0.05	CC_pVal cutoff 0.01	CC_adj_pVal cutoff 0.05
+ All	1	1	[17, 18]	[19, 20, 21, 22]	257	54.9			41.0			1			0			4			0			0			0
+ All	2	1	[36, 37]	[43, 44, 45, 46]	268	90.7			9.3			16			0		 	6			0			0			0
+ ... 
+ Cis	1	2	[198]		[231, 232]		73	67.3			65.5			1			0		 	5			0			0			0
+ Cis	2	3	[260]		[307, 308, 309]		55	61.8			38.2			4			0			2			0			2			0
+  ...
+ Trans	1	1	[17, 18]	[19, 20, 21, 22]	192	54.5			40.6			1			0			2			0			0			0
+ Trans	2	1	[36, 37]	[43, 44, 45, 46]	202	93.6			6.4			19			0			5			0			0			0
+ ...
+ 
+
+        </help>
+</tool>