diff ceas.xml @ 0:4e52505adaa6

Imported from capsule None
author jjohnson
date Wed, 17 Sep 2014 15:03:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ceas.xml	Wed Sep 17 15:03:26 2014 -0400
@@ -0,0 +1,288 @@
+<tool name="CEAS: Enrichment on chromosome and annotation" id="ceas_ceas" version="0.1.0">
+  <description>Annotate the given intervals and scores with genome features such as gene body</description>
+  <macros>
+    <import>ceas_macros.xml</import>
+  </macros>
+  <expand macro="requirements_bx" />
+  <command>
+#import os.path
+#if $genegroup.enable == "yes"
+#set $gngroups = str($genegroup.genelist)
+#set $gnlabels = str($genegroup.label)
+#for $m in $genegroup.more
+#set $gngroups = $gngroups+","+str($m.genelist)
+#set $gnlabels = $gnlabels+","+str($m.label)
+#end for
+#set $gngroupspara = "--gn-groups="+$gngroups
+#set $gnlabelspara = "--gn-group-names='"+$gnlabels+"'"+str($genegroup.idtype.value)
+#else
+#set $gngroupspara = ""
+#set $gnlabelspara = ""
+#end if
+#set $sizes = str($lowersize) + "," + str($middlesize) + "," + str($uppersize)
+#set $bisizes = str($lowerbisize) + "," + str($upperbisize)
+#if $wfile:
+  #if $wfile.extension == "bigwig"
+    ceasBW -w $wfile
+    #set dbkey = $wfile.metadata.dbkey
+    #set $length_file = os.path.join( os.path.abspath($__app__.config.len_file_path), $dbkey+".len" )
+    -l $length_file
+  #else
+    ceas -w $wfile
+  #end if
+#else
+    ceas 
+#end if
+#if $bfile:
+  -b $bfile
+#end if
+#include source=$gtpath_ceasdb_ref#
+--span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes
+--rel-dist=$reldist $gngroupspara $gnlabelspara --name=$name &amp;> $log &amp;&amp; 
+Rscript ceas_out.R  &amp;&amp; 
+#if $imagetype.__str__ == "PNG":
+convert ceas_out.pdf ceas_out.png &amp;&amp; convert ceas_out-*.png -append ceas_out_joint.png &amp;&amp;
+cp ceas_out_joint.png $output
+#else
+cp ceas_out.pdf $output
+#end if
+  </command>
+  <inputs>
+    <expand macro="ceasdb_ref" />
+    <param name="name" type="hidden" value="ceas_out"/>
+    <param format="wig,bigwig" name="wfile" type="data" label="wig / bigwig file" optional="true">
+      <validator type="unspecified_build" />
+    </param>
+    <param format="bed" name="bfile" type="data" label="BED file(maximum 100000 lines)" optional="true">
+      <validator type="unspecified_build" />
+    </param>
+    <param name="span" type="integer" label="Span" value="3000">
+    	<validator type="in_range" max="1000000" min="100" message="The Span is out of range, the parameter has to be between 100 to 1000000" />
+    </param>
+    <param name="pfres" type="integer" label="Profiling resolution" value="50">
+   		<validator type="in_range" max="1000" min="10" message="The Profiling Resolution is out of range, the parameter has to be between 10 to 1000" />
+    </param>
+    <param name="lowersize" type="text" label="Promoter/downstream lower-interval" value="1000" >
+    	<validator type="in_range" max="10000" min="100" message="The lower-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="middlesize" type="text" label="Promoter/downstream middle-interval" value="2000" >
+      <validator type="in_range" max="10000" min="100" message="The middle-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="uppersize" type="text" label="Promoter/downstream upper-interval" value="3000">
+    	<validator type="in_range" max="10000" min="100" message="The upper-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="lowerbisize" type="text" label="Bi-Promoter lower range" value="2500">
+    	<validator type="in_range" max="10000" min="100" message="The lower-range is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="upperbisize" type="text" label="Bi-Promoter upper range" value="5000">
+    	<validator type="in_range" max="10000" min="100" message="The upper-range is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="reldist" type="integer" label="Relative distance" value="3000">
+    	<validator type="in_range" max="10000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param type="select" name="imagetype" display="radio" label="Image Type">
+      <option value="PNG">PNG format</option>
+      <option value="PDF">PDF format</option>
+    </param>
+
+    <conditional name="genegroup">
+      <param name="enable" type="select" label="Specify gene list in the signal profiling" force_select="true">
+	<option value="no">No</option>
+	<option value="yes">Yes</option>
+      </param>
+
+      <when value="no">
+      </when>
+
+      <when value="yes">
+	<param format="text" name="genelist" type="data" label="Gene List" optional="false"/>
+	<param name="label" type="text" label="Gene List Label" optional="false" />
+	<repeat name="more" title="Gene Lists">
+	  <param format="text" name="genelist" type="data" label="Gene List" optional="false"/>
+	  <param name="label" type="text" label="Gene List Label" optional="false" />
+	</repeat>
+	<param name="idtype" type="select" label="Are they ...">
+	  <option value="">refseq</option>
+	  <option value=" --gname2">genesymbol</option>
+	</param>
+      </when>
+    </conditional>
+
+  </inputs>
+
+  <outputs>
+    <data format="png" name="output">
+      <change_format>
+	<when input="imagetype" value="PDF" format="pdf" />
+      </change_format>
+    </data>
+    <data format="txt" name="log" label="ceas job log" />
+  </outputs>
+  <expand macro="stdio" />
+  <tests>
+    <test>
+      <param name="wfile" value="treatment.wig" />
+      <param name="bfile" value="peaks.bed" />
+      <param name="span" value="3000" />
+      <param name="pfres" value="50" />
+      <param name="lowersize" value="1000" />
+      <param name="middlesize" value="2000" />
+      <param name="uppersize" value="3000" />
+      <param name="lowerbisize" value="2500" />
+      <param name="upperbisize" value="5000" />
+      <param name="reldist" value="3000" />
+      <param name="refsrc" value="history"/>
+      <param name="gdb" ftype="ceasdb" value="mm9.refGene.ceasdb"/>
+      <param name="imagetype" value="PDF" />
+      <param name="enable" value="no" />
+      <output name="log">
+        <assert_contents>
+          <has_text_matching expression="See ceas_out.pdf for the graphical results of CEAS" />
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="wfile" value="treatment.bigwig" />
+      <param name="bfile" value="peaks.bed" />
+      <param name="span" value="3000" />
+      <param name="pfres" value="50" />
+      <param name="lowersize" value="1000" />
+      <param name="middlesize" value="2000" />
+      <param name="uppersize" value="3000" />
+      <param name="lowerbisize" value="2500" />
+      <param name="upperbisize" value="5000" />
+      <param name="reldist" value="3000" />
+      <param name="refsrc" value="history"/>
+      <param name="gdb" ftype="ceasdb" value="mm9.refGene.ceasdb"/>
+      <param name="imagetype" value="PDF" />
+      <param name="enable" value="no" />
+      <output name="log">
+        <assert_contents>
+          <has_text_matching expression="See ceas_out.pdf for the graphical results of CEAS" />
+        </assert_contents>
+      </output>
+    </test>
+  </tests> 
+  <help>
+** CEAS **
+This tool annotates the given intervals and scores with genome
+features such as gene body. It's the major module in CEAS package
+which is written by Hyunjin Gene Shin, published in Bioinformatics
+(pubmed id:19689956).
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+
+.. class:: warningmark
+
+**NEED IMPROVEMENT**
+
+-----
+
+**Parameters**
+
+- **WIGGLE file** contains the scores for the experiment in a wiggle
+  format file. Normally, it's produced by the peak calling tool. It's
+  optional.
+- **BED file** contains the peak locations for the experiment in a BED
+  format file.
+- **Span** from TSS and TTS in the gene-centered annotation. ChIP
+  regions within this range from TSS and TTS are considered when
+  calculating the coverage rates in promoter and downstream.
+- **Profiling resolution** is the WIGGLE profiling resolution.
+- **Promoter/downstream intervals** for ChIP region annotation are
+  comma-separated three values or a single value can be given. If a
+  single value is given, it will be segmented into three equal
+  fractions (ie, 3000 is equivalent to 1000,2000,3000)
+- **BiPromoter ranges** is for ChIP region annotation. It's
+  comma-separated two values or a single value can be given. If a
+  single value is given, it will be segmented into two equal fractions
+  (ie, 5000 is equivalent to 2500,5000) 
+- **Relative distance** is the relative distance to TSS/TTS in WIGGLE file
+  profiling
+- **Genome Annotation Version** to specify the annotations according to
+  the data set. The annotations are downloaded from UCSC genome site.
+- **Image type** specify the output image format, either in PNG or in
+  PDF format.
+- If **Specify gene list in the signal profiling** is set, you can specify
+  different gene groups for CEAS to put them together in the profile
+  figure. You need to select several **Gene List** files from history which
+  contains the RefSeq ids or Gene Symbols for each row, and
+  **Gene List Label** for each gene list file.
+
+-----
+
+**Outputs**
+
+- **PNG/PDF file** is the result for CEAS analysis, containing 5 pages.
+- **LOG file** for job log. If you see errors, please attached this in
+  the bug report
+
+-----
+
+**script parameter list of CEAS 0.9.8**
+
+Options:
+  --version             show program's version number and exit
+  -h, --help            Show this help message and exit.
+  -b BED, --bed=BED     BED file of ChIP regions.
+  -w WIG, --wig=WIG     WIG file for either wig profiling or genome background
+                        annotation. WARNING: --bg flag must be set for genome
+                        background re-annotation.
+  -e EBED, --ebed=EBED  BED file of extra regions of interest (eg, non-coding
+                        regions)
+  -g GDB, --gt=GDB      Gene annotation table (eg, a refGene table in sqlite3
+                        db format provided through the CEAS web,
+                        http://liulab.dfci.harvard.edu/CEAS/download.html).
+  --name=NAME           Experiment name. This will be used to name the output
+                        files. If an experiment name is not given, the stem of
+                        the input BED file name will be used instead (eg, if
+                        'peaks.bed', 'peaks' will be used as a name.)
+  --sizes=SIZES         Promoter (also dowsntream) sizes for ChIP region
+                        annotation. Comma-separated three values or a single
+                        value can be given. If a single value is given, it
+                        will be segmented into three equal fractions (ie, 3000
+                        is equivalent to 1000,2000,3000), DEFAULT:
+                        1000,2000,3000. WARNING: Values > 10000bp are
+                        automatically set to 10000bp.
+  --bisizes=BISIZES     Bidirectional-promoter sizes for ChIP region
+                        annotation Comma-separated two values or a single
+                        value can be given. If a single value is given, it
+                        will be segmented into two equal fractions (ie, 5000
+                        is equivalent to 2500,5000) DEFAULT: 2500,5000bp.
+                        WARNING: Values > 20000bp are automatically set to
+                        20000bp.
+  --bg                  Run genome BG annotation again. WARNING: This flag is
+                        effective only if a WIG file is given through -w
+                        (--wig). Otherwise, ignored.
+  --span=SPAN           Span from TSS and TTS in the gene-centered annotation.
+                        ChIP regions within this range from TSS and TTS are
+                        considered when calculating the coverage rates in
+                        promoter and downstream, DEFAULT=3000bp
+  --pf-res=PF_RES       Wig profiling resolution, DEFAULT: 50bp. WARNING:
+                        Value smaller than the wig interval (resolution) may
+                        cause aliasing error.
+  --rel-dist=REL_DIST   Relative distance to TSS/TTS in wig profiling,
+                        DEFAULT: 3000bp
+  --gn-groups=GN_GROUPS
+                        Gene-groups of particular interest in wig profiling.
+                        Each gene group file must have gene names in the 1st
+                        column. The file names are separated by commas w/ no
+                        space (eg, --gn-groups=top10.txt,bottom10.txt)
+  --gn-group-names=GN_NAMES
+                        The names of the gene groups in --gn-groups. The gene
+                        group names are separated by commas. (eg, --gn-group-
+                        names='top 10%,bottom 10%'). These group names appear
+                        in the legends of the wig profiling plots. If no group
+                        names given, the groups are represented as 'Group 1,
+                        Group2,...Group n'.
+  --gname2              Whether or not use the 'name2' column of the gene
+                        annotation table when reading the gene IDs in the
+                        files given through --gn-groups. This flag is
+                        meaningful only with --gn-groups.
+
+  </help>
+
+</tool>