changeset 0:d8a6a6d67727 draft

initial commit
author fubar
date Tue, 27 Aug 2013 23:16:38 -0400
parents
children 98db9665b15e
files rlGAT/rlGAT.xml rlGAT/tool_dependencies.xml
diffstat 2 files changed, 201 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rlGAT/rlGAT.xml	Tue Aug 27 23:16:38 2013 -0400
@@ -0,0 +1,166 @@
+<tool id="rlGAT" name="Genomic association tests" version="0.1">
+  <description>using the GAT package</description>
+  <stdio>
+   <regex match=".*" source="both" level="warning" description="stdout/err chatter from gat-run.py"/>
+  </stdio>
+  <requirements>
+      <requirement type="package" version="0.1">GAT</requirement>
+      <requirement type="package" version="1.2.1">matplotliblite</requirement> 
+  </requirements>
+  <command>
+    gat-run.py ${ignore_segments} --log="$logfile" --num-samples=$numsamp
+    #for $s in $segmentfiles:
+      #if $s.sf.ext != 'data':
+        --segment-file "${s.sf}" 
+      #end if
+    #end for
+    #for $s in $workspacefiles:
+      #if $s.wsf.ext != 'data':
+        --workspace-file "${s.wsf}" 
+      #end if
+    #end for
+    #for $s in $annotationfiles:
+      #if $s.anno.ext != 'data':
+        --annotation-file "${s.anno}" 
+      #end if
+    #end for
+    #for $s in $isochorefiles:
+      #if $s.iso.ext != 'data':
+        --isochore-file "${s.iso}" 
+      #end if
+    #end for
+    > ${outfile}
+  </command>
+  <inputs>
+    <param name="title" label="Name for this job's output file" type="text" size="80" value="GAT"/>
+    <param name="numsamp" type="integer" value="1000" size="5" label="Number of simulations - longer takes longer - 1000 is usually a reasonable choice" />
+    <param name="counter"  type="select" label="Counting methods to use - multiple will generate multiple output files" multiple="True"
+        help="Use ctrl to select multiple counters. If in doubt, read the fine manual">
+        <option value="nucleotide-overlap" selected="true">nucleotide-overlap: number of bases overlapping</option>
+        <option value="segment-overlap">segment-overlap: number of intervals intervals in the segments of interest overlapping annotations. A single base-pair overlap is sufficient</option>
+        <option value="segment-mid-overlap">segment-mid-overlap: number of intervals in the segments of interest overlapping at their midpoint annotations</option>
+        <option value="annotations-overlap">annotations-overlap: number of intervals in the annotations overlapping segments of interest. A single base-pair overlap is sufficient.</option>
+        <option value="annotations-mid-overlap">annotations-mid-overlap: number of intervals in the segments of interest overlapping annotations at their midpoint</option>
+    </param>
+      
+   <repeat name="workspacefiles" min="1" title="Bed files containing contigs to analyse as the workspace" help="Must have 1 or more. Multiples will be concatenated to form the work space of regions for the analysis">
+        <param name="wsf" type="data" label="Workspace (BED) format file(s) from your history" format="bed,interval" size="100" 
+             optional="false"/>
+    </repeat>
+    <repeat name="segmentfiles" min="1"
+      title="Bed files containing segments (features) to analyse" help="Must have 1 or more. Make these the shorter files when choosing between segment and annotation - please read the fine manual">
+        <param name="sf" type="data" label="Segment (BED) format file from your history" format="bed,interval" size="100" 
+             optional="false"/>
+    </repeat>
+    <param name="ignore_segments" type="select" label="Ignore 4th BED column when determining segments (default)" help="Otherwise GAT treats each distinct 4th column name as a distincts segment for testing">
+     <option value="--ignore-segment-tracks" selected="true">Ignore segment tracks</option>
+     <option value="">Use 4th bed column to determine segments - may use lots'o'ram - see GAT documentation</option>
+    </param>
+    <repeat name="annotationfiles" min="1"
+      title="Bed files containing annotations to analyse for each segment" help="Make these the longer files when choosing between segments and annotations. Please read the fine manual">
+        <param name="anno" type="data" label="Annotation (BED) format file from your history" format="bed,interval" size="100" 
+             optional="false"/>
+    </repeat>
+    <repeat name="isochorefiles" 
+      title="Optional BED files containing isochores to split the workspace(s) during simulation" help="Read the fine manual">
+        <param name="iso" type="data" label="Isochore (BED) format file from your history" format="bed,interval" size="100" 
+             optional="true"/>
+    </repeat>
+    
+  </inputs>
+  <outputs>
+    <data format="tabular" name="outfile" label="${title}_gatout.tsv" />
+    <data format="txt" name="logfile" label="${title}_gatrun.log" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="feature_type" value="exon" />
+      <param name="gfffile" value="rn4_chr20_100k.gtf" />
+      <param name="firstsamf" value="rn4chr20test1.bam" ftype="bam"/>
+      <param name="secondsamf" value="rn4chr20test2.bam" ftype="bam"/>
+      <param name="id_attr" value="gene_name" />
+      <param name="model" value="union" />
+      <param name="stranded" value="no" />
+      <param name="title" value="htseqtest" />
+      <param name="mapqMin" value="0" />
+
+      <output name="outfile" file="htseqsams2mx_test1_out.xls" lines_diff="1"/>
+    </test>
+  </tests>
+  <help>
+
+**What this tool does**
+
+This is a Galaxy wrapper for the GAT package
+
+**For complete GAT package documentation** 
+
+please read the file manual at GAT_ 
+
+**Brief overview**
+
+For those too lazy to click on GAT_, here's a summary of notes from there:
+
+The gat tool requires the following input:
+
+  A set of intervals S with segments of interest to test
+ 
+  A set of intervals A with annotations to test against
+ 
+  A set of intervals W describing a workspace
+
+
+All of the options –segment-file, –workspace-file, –annotation-file can be used several times on the command line. What happens with multiple files depends on the file type:
+
+Multiple –segment-file entries are added to the list of segments of interest to test with.
+Multiple –annotation-file entries are added to the list of annotations to test against.
+Multiple –workspace entries are intersected to create a single workspace.
+Generally, gat will test m segments of interest lists against n annotations lists in all m * n combinations.
+
+Within a bed formatted file, different tracks can be separated using a UCSC formatted track line, such as this:
+
+
+	track name="segmentset1"
+	chr1  23     100
+	chr3  50     2000
+	track name="segmentset2"
+	chr1  1000   2000
+	chr3  4000   5000
+
+or alternatively, using the fourth column in a bed formatted file:
+
+	chr1 23      100     segmentset1
+	chr3 50      2000    segmentset1
+	chr1 1000    2000    segmentset2
+	chr3 4000    5000    segmentset2
+
+The latter takes precedence. The option –ignore-segment-tracks forces gat to ignore the fourth column and consider all intervals to be from a single interval set.
+
+Note Be careful with bed-files where each interval gets a unique identifier. Gat will interprete each interval as a separate segment set to read. 
+This is usually not intended and causes gat to require a very large amount of memory. 
+By default, tracks can not be split over multiple files. 
+
+Adding isochores
+
+Isochores are genomic segments with common properties that are potentially correlated with the segments of interest and the annotations, but the correlation is not of interest here. 
+For example, consider a CHiP-Seq experiment and the testing if CHiP-Seq intervals are close to genes. 
+G+C rich regions in the genome are gene rich, while at the same time there is possibly a nucleotide composition bias in the CHiP-Seq protocol depleting A+T rich sequence. 
+An association between genes and CHiP-Seq intervals might simply be due to the G+C effect. Using isochores can control for this effect to some extent.
+
+Isochores split the workspace into smaller workspaces of similar properties, so called isochore workspaces. 
+Simulations are performed for each isochore workspaces separately. At the end, results for each all isochore workspaces are aggregated.
+
+
+**Attribution**
+
+
+Otherwise, all code and documentation comprising this tool including the requirement
+for more than one sample bam
+was written by Ross Lazarus and is 
+licensed to you under the LGPL_ like other rgenetics artefacts
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _GAT: http://www.cgat.org/~andreas/documentation/gat/contents.html
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rlGAT/tool_dependencies.xml	Tue Aug 27 23:16:38 2013 -0400
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="numpy" version="1.7.1">
+        <repository changeset_revision="af9633757cf0" name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="matplotliblite" version="1.2.1">
+        <repository changeset_revision="8df6bbf48c3a" name="package_matplotlib_lite" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="gat" version="0.1">
+        <install version="1.0">
+            <actions>
+                <action type="make_directory">$INSTALL_DIR/lib/python</action> <!-- Not sure why these must be made apriori, but install fails otherwise -->
+                <action type="make_directory">$INSTALL_DIR/lib64/python</action> <!-- Not sure why these must be made apriori, but install fails otherwise -->
+                <action type="set_environment_for_install">
+                        <repository changeset_revision="af9633757cf0" name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu">
+                            <package name="numpy" version="1.7.1" />
+                        </repository>
+                        <repository changeset_revision="8df6bbf48c3a" name="package_matplotlib_lite" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu/">
+                            <package name="matplotliblite" version="1.2.1" />
+                        </repository>
+                </action>
+                <action type="download_by_url">https://genomic-association-tester.googlecode.com/files/genomic-association-tester-0.1.tar.gz</action>
+                <action type="shell_command">export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python:$INSTALL_DIR/lib64/python &amp;&amp; python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action>
+                <action type="set_environment">
+                    <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python:$INSTALL_DIR/lib64/python</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            Installation of GAT requires the Numpy Python package. matplotlib is used for plots so added here 
+            Note this uses the matplotlib lite version dependent on the lite version of numpy - no atlas compilation 
+        </readme>
+    </package>
+</tool_dependency>