Mercurial > repos > nanettec > go_enrichment
diff GO_enrichment/GO_enrichment.xml @ 0:bde64415f03b draft
Uploaded
author | nanettec |
---|---|
date | Wed, 16 Mar 2016 10:15:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GO_enrichment/GO_enrichment.xml Wed Mar 16 10:15:05 2016 -0400 @@ -0,0 +1,150 @@ +<tool id="GO_enrichment5" name="GO enrichment" version="5.0.0"> + <description>of eQTL hotspot gene lists</description> + <command interpreter="python"> + GO_enrichment.py --rscript \$R_SCRIPT_PATH/GO_enrichment/TopGO_pipeline_new.txt --input1 $input1 --input2 $input2 --input3 $input3 --input4 $input4 --input5 $input5 --output1 $output1 --output2 $output2 --output3 $output3 --output4 $output4 + </command> + <inputs> + <param label="Gene Universe file" name="input1" type="data" format="tabular" help="A tabular file with the full set of genes from the study (gene universe)"></param> + <param label="Genes of interest file, all eQTLs" name="input2" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to all eQTL hotspots)"></param> + <param label="Genes of interest file, cis eQTLs" name="input3" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to cis eQTL hotspots)"></param> + <param label="Genes of interest file, trans eQTLs" name="input4" type="data" format="tabular" help="A tabular file with gene lists of interest (genes linked to transeQTL hotspots)"></param> + <param label="Gene2GO mapping file" name="input5" type="data" format="tabular" help="A tabular file with a mapping between gene identifiers and GO terms"></param> + </inputs> + <outputs> + <data format="zip" name="output1" /> + <data format="zip" name="output2" /> + <data format="zip" name="output3" /> + <data format="tabular" name="output4" /> + </outputs> + <requirements> + <requirement type="set_environment">R_SCRIPT_PATH</requirement> + </requirements> + <tests> + <test> + </test> + </tests> + <help> + +**What it does** + +This tool uses the topGO R package to determine the enriched GO terms, for one or more gene lists simultaneously. + +------- + +**Example input files** + +The full list of gene identifiers from the array/study (gene universe) (only a part of the file is shown):: + + geneA + geneB + geneC + geneD + geneE + geneF + geneG + geneH + geneI + geneJ + geneK + geneL + geneM + geneN + geneO + + + +Predefined lists of interesting genes, separated by header lines starting with a '='. Only the gene identifiers in the first column will be used. + +For example: Gene lists extracted for significant eQTL hotspots (only the first 5 eQTLs linked to hotspots 1 - 2 are shown):: +Three input files like this are required, for all, cis and trans eQTLs respectively:: + + = = = = = Hotspot 1 chr 1 sliding.ids: [17, 18] int.ids: [19, 20, 21, 22] nr.eQTL: 257 = = = = = + geneA 639 1 3 0.2878 4 0.3872 3 0.3478 13.7958496 0.110934 0.487661 parentB no_result 19 NA + geneL 800 1 3 0.2478 4 0.4072 3 0.3478 24.2128991 0.2848178 0.4639009 parentB no_result 19 NA + geneB 382 1 3 0.2878 4 0.3872 3 0.3478 13.7048724 0.1522281 0.3023807 parentB trans 19 757 + geneD 457 1 3 0.2678 4 0.4072 3 0.3478 16.2210425 0.1537186 0.3527068 parentA trans 19 722 + geneE 381 1 3 0.2678 4 0.4272 3 0.3478 19.2398655 0.1747831 0.4636225 parentA cis 19 16 + + … + + = = = = = Hotspot 2 chr 1 sliding.ids: [36, 37] int.ids: [43, 44, 45, 46] nr.eQTL: 268 = = = = = + geneW 146 1 8 0.6998 9 0.9588 8 0.7798 17.6058658 0.168243 0.3517602 parentA cis 43 41 + geneP 510 1 8 0.6998 8 0.7998 8 0.7798 48.9321454 0.6530789 0.7453719 parentB trans 43 566 + geneF 231 1 8 0.7598 8 0.7998 8 0.7798 13.2268263 0.1715268 0.4169803 parentB trans 43 491 + geneY 480 1 7 0.6922 8 0.7998 8 0.7798 71.8820179 0.7463132 0.8353116 parentB no_result 43 NA + geneG 652 1 8 0.7798 8 0.7798 8 0.7798 11.5596194 0.1168083 0.3429812 parentB trans 43 753 + + … + + + + +Gene-to-GOs mappings, for each gene identifier the corresponding annotated GO terms are listed (only a part of the file is shown):: + + geneA GO:0016491 + geneB GO:0003824, GO:0008152, GO:0008415, GO:0009058 + geneC GO:0004553, GO:0005975 + geneD GO:0004175, GO:0004298, GO:0005839, GO:0006511, GO:0019773, GO:0051603 + geneE GO:0005622, GO:0008270 + geneF GO:0004221, GO:0005622, GO:0006511, GO:0008152, GO:0008270, GO:0016491, GO:0055114 + geneG GO:0004672, GO:0004674, GO:0005524, GO:0006468 + geneH GO:0003924, GO:0005515, GO:0005525, GO:0005622, GO:0006886, GO:0006913, GO:0007165, GO:0007264, GO:0015031 + geneI GO:0007049, GO:0016538 + geneJ GO:0019787, GO:0043687, GO:0051246 + geneK GO:0005622, GO:0008270 + geneL GO:0004175, GO:0004298, GO:0005839, GO:0006511, GO:0019773, GO:0051603 + geneM GO:0005643, GO:0006810 + geneN GO:0005634, GO:0030528, GO:0045449 + geneO GO:0005515 + +------- + +**Example output files** + + +For each hotspot, the top 20 GO terms in the Biological Process (BP), Molecular Function (MF) and Cellular Component (CC) ontologies are given:: + + BP_GO.ID Term Annotated Significant Expected classicFisher.p classicFisher.adj_p p.val cutoff 0.05 p.val cutoff 0.01 adj.p.val cutoff 0.05 + GO:0010498 proteasomal protein catabolic process 7 2 0.09 0.0031 1 "*" "*" "" + GO:0043161 proteasomal ubiquitin-dependent protein ... 7 2 0.09 0.0031 1 "*" "*" "" + GO:0044238 primary metabolic process 4986 75 62.52 0.0094 1 "*" "*" "" + GO:0006857 oligopeptide transport 36 3 0.45 0.0101 1 "*" "" "" + GO:0006259 DNA metabolic process 349 10 4.38 0.012 1 "*" "" "" + ... + + MF_GO.ID Term Annotated Significant Expected classicFisher.p classicFisher.adj_p p.val cutoff 0.01 adj.p.val cutoff 0.05 + GO:0004181 metallocarboxypeptidase activity 10 2 0.11 0.0053 1 "*" "*" "" + GO:0003684 damaged DNA binding 20 2 0.22 0.0208 1 "*" "" "" + GO:0008235 metalloexopeptidase activity 20 2 0.22 0.0208 1 "*" "" "" + GO:0008553 hydrogen-exporting ATPase activity, phos... 2 1 0.02 0.0223 1 "*" "" "" + GO:0003697 single-stranded DNA binding 21 2 0.24 0.0228 1 "*" "" "" + ... + + CC_GO.ID Term Annotated Significant Expected classicFisher.p classicFisher.adj_p p.val cutoff 0.01 adj.p.val cutoff 0.05 + GO:0005743 mitochondrial inner membrane 84 5 1.05 0.0037 0.437168496 "*" "*" "" + GO:0044429 mitochondrial part 122 6 1.53 0.0039 0.437168496 "*" "*" "" + GO:0019866 organelle inner membrane 85 5 1.06 0.0039 0.437168496 "*" "*" "" + GO:0005739 mitochondrion 138 6 1.73 0.0071 0.500734933 "*" "*" "" + GO:0031966 mitochondrial membrane 99 5 1.24 0.0075 0.500734933 "*" "*" "" + ... + + +For each hotspot, for each ontology, a visual representation of the GO structure are also given, i.e. the subgraph induced by the top 5 GO terms identified. + + +Hotspot summary file (14 columns):: + + Type Number Chr Sliding_IDs Interval_IDs Num_eQTLs Parent SC up-reg % Parent CML up-reg % BP_pVal cutoff 0.01 BP_adj_pVal cutoff 0.05 MF_pVal cutoff 0.01 MF_adj_pVal cutoff 0.05 CC_pVal cutoff 0.01 CC_adj_pVal cutoff 0.05 + All 1 1 [17, 18] [19, 20, 21, 22] 257 54.9 41.0 1 0 4 0 0 0 + All 2 1 [36, 37] [43, 44, 45, 46] 268 90.7 9.3 16 0 6 0 0 0 + ... + Cis 1 2 [198] [231, 232] 73 67.3 65.5 1 0 5 0 0 0 + Cis 2 3 [260] [307, 308, 309] 55 61.8 38.2 4 0 2 0 2 0 + ... + Trans 1 1 [17, 18] [19, 20, 21, 22] 192 54.5 40.6 1 0 2 0 0 0 + Trans 2 1 [36, 37] [43, 44, 45, 46] 202 93.6 6.4 19 0 5 0 0 0 + ... + + + </help> +</tool>