changeset 2:9c5241259454 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author jjohnson
date Thu, 22 Mar 2018 08:33:55 -0400
parents 7f023a22da15
children 067573bac905
files beta_basic.xml beta_macros.xml beta_minus.xml beta_plus.xml tool_dependencies.xml
diffstat 5 files changed, 527 insertions(+), 555 deletions(-) [+]
line wrap: on
line diff
--- a/beta_basic.xml	Tue Sep 16 12:51:50 2014 -0500
+++ b/beta_basic.xml	Thu Mar 22 08:33:55 2018 -0400
@@ -1,61 +1,61 @@
 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0">
-  <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description>
-  <macros>
-    <import>beta_macros.xml</import>
-  </macros>
-  <expand macro="requirements" />
-  <command>
-  BETA basic 
-  #include source=$common_opts#
-  #include source=$genome_opts#
-  #include source=$extended_opts#
-  &amp;> $log
-  </command>
-  <inputs>
-    <expand macro="common_params" />
-    <expand macro="genome_params" />
-    <expand macro="extended_params" />
-  </inputs>
-  <expand macro="stdio" />
-  <outputs>
-    <data format="txt" name="log" label="Log of BETA basic"/>
-    <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
-    <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
-    <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
-    <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
-    <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
-      <param name="distance" value="100000"/>
-      <param name="peaknumber" value="10000"/>
-      <param name="genomeName" value="hg19"/>
-      <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
-      <param name="kind" value="LIM"/>
-      <param name="expreinfo" type="text" value="2,5,7"/>
-      <param name="gname2" value="Refseq"/>
-      <param name="diff_fdr" value="1.0"/>
-      <param name="diff_amount" value="0.5"/>
-      <param name="method" value="score"/>
-      <output name="log">
-        <assert_contents>
-            <has_text_matching expression="Finished" />
-        </assert_contents>
-      </output>
-      <output name="targetsoutput">
-        <assert_contents>
-            <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
-        </assert_contents>
-      </output>
-      <output name="targetpeaks">
-        <assert_contents>
-            <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
-        </assert_contents>
-      </output>
-    </test>
-  </tests>
- <help>
+    <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description>
+    <macros>
+        <import>beta_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+        BETA basic 
+        #include source=$common_opts#
+        #include source=$genome_opts#
+        #include source=$extended_opts#
+        &> $log
+    ]]></command>
+    <inputs>
+        <expand macro="common_params" />
+        <expand macro="genome_params" />
+        <expand macro="extended_params" />
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="Log of BETA basic"/>
+        <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
+        <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
+        <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
+        <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
+        <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
+            <param name="distance" value="100000"/>
+            <param name="peaknumber" value="10000"/>
+            <param name="genomeName" value="hg19"/>
+            <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
+            <param name="kind" value="LIM"/>
+            <param name="expreinfo" value="2,5,7"/>
+            <param name="gname2" value="Refseq"/>
+            <param name="diff_fdr" value="1.0"/>
+            <param name="diff_amount" value="0.5"/>
+            <param name="method" value="score"/>
+            <output name="log">
+                <assert_contents>
+                    <has_text_matching expression="Finished" />
+                </assert_contents>
+            </output>
+            <output name="uptargetsoutput">
+                <assert_contents>
+                    <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
+                </assert_contents>
+            </output>
+            <output name="uptargetpeaks">
+                <assert_contents>
+                    <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+ <help><![CDATA[
 ** BETA basic **
 
 @EXTERNAL_DOCUMENTATION@
@@ -76,36 +76,36 @@
 **Parameters**
 
 - **PEAKFILE file** contains peaks for the experiment in a bed
-  format file. Normally, it's produced by the peak calling tool. It's
-  required.
+    format file. Normally, it's produced by the peak calling tool. It's
+    required.
 - **EXPREFILE file** contains the differentially expressed genes in a tab 
-  delimited text file. It's required.
+    delimited text file. It's required.
 - **Kind** The kind of your expression file format, LIM for LIMMA standard 
-  output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
-  BSF for BETA specific format, and O for other formats.
+    output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
+    BSF for BETA specific format, and O for other formats.
 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
 - **gname2** If this switch is on, gene or transcript IDs in files given 
-  through -e will be considered as official gene symbols, DEFAULT=FALSE
+    through -e will be considered as official gene symbols, DEFAULT=FALSE
 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
-  values column of your expression data,NOTE: use a comma as an connector. 
-  for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
-  and FDR in 7 column.
+    values column of your expression data,NOTE: use a comma as an connector. 
+    for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
+    and FDR in 7 column.
 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
-  It is a tab delimited text file with gene annotation with refseq and gene symbol.
-  Input this file only if your genome is neither hg19 nor mm9.
-  profiling
+    It is a tab delimited text file with gene annotation with refseq and gene symbol.
+    Input this file only if your genome is neither hg19 nor mm9.
+    profiling
 - **OUTPUT** to specify the output files directory
 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
-  is neither hg19 nor mm9.
+    is neither hg19 nor mm9.
 - **NAME** specify the name of the output files.
 - **DISTANCE** specify the distance wich peaks within it will be considered.
 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
-  via -e, genes with less than this value will be considered as the differentially
-  changed genes.
+    via -e, genes with less than this value will be considered as the differentially
+    changed genes.
 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
-  the 3rd column in file input via -e, genes ranked in the top # will be considered
-  as the differentially expressed genes.
+    the 3rd column in file input via -e, genes ranked in the top # will be considered
+    as the differentially expressed genes.
 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
 
 -----
@@ -114,75 +114,66 @@
 
 ::
 
-  -h, --help            show this help message and exit
-  -p PEAKFILE, --peakfile PEAKFILE
-                        The bed format of peaks binding sites. (BETA support 3
-                        or 5 columns bed format, CHROM, START, END (NAME,
-                        SCORE))
-  -e EXPREFILE, --diff_expr EXPREFILE
-                        The differential expression file get from limma for
-                        MicroArray ddata and cuffdiff for RNAseq data
-  -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
-                        The kind of your expression file,this is required,it
-                        can be LIM, CUF, BSF, O. LIM for LIMMA standard
-                        format. CUF for CUFDIFF standard format, BSF for BETA
-                        specific format and O for other formats, if is 'O',
-                        columns infor required via --info
-  -g {hg19,mm9}, --genome {hg19,mm9}
-                        Specify your species, hg19, mm9. For other genome
-                        assembily versions of human and mouse or other
-                        species, ignore this parameter.
-  --gname2              If this switch is on, gene or transcript IDs in files
-                        given through -e will be considered as official gene
-                        symbols, DEFAULT=FALSE
-  --info EXPREINFO      Specify the geneID, up/down status and statistcal
-                        values column of your expression data,NOTE: use a
-                        comma as an connector. for example: 2,5,7 means geneID
-                        in the 2nd column, Tscore in 5th column and FDR in 7
-                        column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
-                        and 1,2,3 for BETA specific format
-  -r REFERENCE, --reference REFERENCE
-                        The refgene info file downloaded from UCSC genome
-                        browser.input this file only if your genome is neither
-                        hg19 nor mm9
-  -o OUTPUT, --output OUTPUT
-                        The directory to store all the output files, if you
-                        don't set this, files will be output into the current
-                        directory
-  --bl                  Whether or not use CTCF boundary to filter peaks
-                        around a gene, DEFAULT=FALSE
-  --bf BOUNDARYFILE     CTCF conserved peaks bed file, use this only when you
-                        set --bl and the genome is neither hg19 nor mm9
-  --pn PEAKNUMBER       The number of peaks you want to consider,
-                        DEFAULT=10000
-  --method {score,distance}
-                        Define the method to do the TF/CR function prediction,
-                        score for regulatory potential, distance for the
-                        distance to the proximal binding peak. DEFAULT:SCORE
-  -n NAME, --name NAME  This argument is used to name the result file.If not
-                        set, the peakfile name will be used instead
-  -d DISTANCE, --distance DISTANCE
-                        Set a number which unit is 'base'. It will get peaks
-                        within this distance from gene TSS. default:100000
-                        (100kb)
-  --df DIFF_FDR         Input a number 0~1 as a threshold to pick out the most
-                        significant differential expressed genes by FDR,
-                        DEFAULT = 1, that is select all the genes
-  --da DIFF_AMOUNT      Get the most significant differential expressed genes
-                        by the percentage(0-1) or number(larger than 1)Input a
-                        number between 0-1, the rank based on fdr for example,
-                        2000, so that the script will only consider top 2000
-                        genes as the differentially expressed genes. DEFAULT =
-                        0.5, that is select top 50 percent genes of up and
-                        down seprately. NOTE: if you want to use diff_fdr,
-                        please set this parameter to 1, otherwise it will get
-                        the intersection of these two parameters
-  -c CUTOFF, --cutoff CUTOFF
-                        Input a number between 0~1 as a threshold to select
-                        the closer target gene list(up regulate or down
-                        regulate or both) with the p value was called by one
-                        side ks-test, DEFAULT = 0.001
+    -h, --help                                  show this help message and exit
+    -p PEAKFILE, --peakfile PEAKFILE            The bed format of peaks binding sites. (BETA support 3
+                                                or 5 columns bed format, CHROM, START, END (NAME,
+                                                SCORE))
+    -e EXPREFILE, --diff_expr EXPREFILE         The differential expression file get from limma for
+                                                MicroArray ddata and cuffdiff for RNAseq data
+    -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}  The kind of your expression file,this is required,it
+                                                can be LIM, CUF, BSF, O. LIM for LIMMA standard
+                                                format. CUF for CUFDIFF standard format, BSF for BETA
+                                                specific format and O for other formats, if is 'O',
+                                                columns infor required via --info
+    -g {hg19,mm9}, --genome {hg19,mm9}          Specify your species, hg19, mm9. For other genome
+                                                assembily versions of human and mouse or other
+                                                species, ignore this parameter.
+    --gname2                                    If this switch is on, gene or transcript IDs in files
+                                                given through -e will be considered as official gene
+                                                symbols, DEFAULT=FALSE
+    --info EXPREINFO                            Specify the geneID, up/down status and statistcal
+                                                values column of your expression data,NOTE: use a
+                                                comma as an connector. for example: 2,5,7 means geneID
+                                                in the 2nd column, Tscore in 5th column and FDR in 7
+                                                column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
+                                                and 1,2,3 for BETA specific format
+    -r REFERENCE, --reference REFERENCE         The refgene info file downloaded from UCSC genome
+                                                browser.input this file only if your genome is neither
+                                                hg19 nor mm9
+    -o OUTPUT, --output OUTPUT                  The directory to store all the output files, if you
+                                                don't set this, files will be output into the current
+                                                directory
+    --bl                                        Whether or not use CTCF boundary to filter peaks
+                                                around a gene, DEFAULT=FALSE
+    --bf BOUNDARYFILE                           CTCF conserved peaks bed file, use this only when you
+                                                set --bl and the genome is neither hg19 nor mm9
+    --pn PEAKNUMBER                             The number of peaks you want to consider,
+                                                DEFAULT=10000
+    --method {score,distance}                   Define the method to do the TF/CR function prediction,
+                                                score for regulatory potential, distance for the
+                                                distance to the proximal binding peak. DEFAULT:SCORE
+    -n NAME, --name NAME                        This argument is used to name the result file.If not
+                                                set, the peakfile name will be used instead
+    -d DISTANCE, --distance DISTANCE            Set a number which unit is 'base'. It will get peaks
+                                                within this distance from gene TSS. default:100000
+                                                (100kb)
+    --df DIFF_FDR                               Input a number 0~1 as a threshold to pick out the most
+                                                significant differential expressed genes by FDR,
+                                                DEFAULT = 1, that is select all the genes
+    --da DIFF_AMOUNT                            Get the most significant differential expressed genes
+                                                by the percentage(0-1) or number(larger than 1)Input a
+                                                number between 0-1, the rank based on fdr for example,
+                                                2000, so that the script will only consider top 2000
+                                                genes as the differentially expressed genes. DEFAULT =
+                                                0.5, that is select top 50 percent genes of up and
+                                                down seprately. NOTE: if you want to use diff_fdr,
+                                                please set this parameter to 1, otherwise it will get
+                                                the intersection of these two parameters
+    -c CUTOFF, --cutoff CUTOFF                  Input a number between 0~1 as a threshold to select
+                                                the closer target gene list(up regulate or down
+                                                regulate or both) with the p value was called by one
+                                                side ks-test, DEFAULT = 0.001
 
-  </help>
-
+    ]]></help>
+    <expand macro="citations" />
 </tool>
--- a/beta_macros.xml	Tue Sep 16 12:51:50 2014 -0500
+++ b/beta_macros.xml	Thu Mar 22 08:33:55 2018 -0400
@@ -1,185 +1,187 @@
 
 <macros>
-  <macro name="requirements">
-    <requirements>
-      <requirement type="package" version="1.7.1">numpy</requirement>
-      <requirement type="package" version="2.15.0">R</requirement>
-      <requirement type="package" version="1.0.6">beta</requirement>
-    </requirements>
-  </macro>
+    <macro name="requirements">
+        <requirements>
+            <requirement type="package" version="1.0.7">beta</requirement>
+        </requirements>
+    </macro>
 
-  <macro name="stdio">
-    <stdio>
-        <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
-        <exit_code range="1:"  level="fatal" description="Error" />
-    </stdio>
-  </macro>
+    <macro name="stdio">
+        <stdio>
+                <exit_code range=":-1"    level="fatal" description="Error: Cannot open file" />
+                <exit_code range="1:"    level="fatal" description="Error" />
+        </stdio>
+    </macro>
 
-  <macro name="common_params">
-    <param format="bed" name="peakfile" type="data" label="BED file for Peaks">
-      <validator type="unspecified_build" />
-    </param>
-    <param name="output_dir" type="hidden" label="Name for the output files" value="BETA_OUTPUT"/>
-    <param name="name" type="hidden" label="Name for the output files" value="NA"/>
-    <param name="distance" type="integer" label="the distance from gene TSS within which peaks will be selected" value="100000">
-        <validator type="in_range" max="20000000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000000" />
-    </param>
-    <param name="peaknumber" type="integer" label="Peaks considered to contribute to the genes" value="10000">
-        <validator type="in_range" max="200000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" />
-    </param>
-  </macro>
+    <macro name="common_params">
+        <param format="bed" name="peakfile" type="data" label="BED file for Peaks">
+            <validator type="unspecified_build" />
+        </param>
+        <param name="output_dir" type="hidden" label="Name for the output files" value="BETA_OUTPUT"/>
+        <param name="name" type="hidden" label="Name for the output files" value="NA"/>
+        <param name="distance" type="integer" label="the distance from gene TSS within which peaks will be selected" value="100000">
+            <validator type="in_range" max="20000000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000000" />
+        </param>
+        <param name="peaknumber" type="integer" label="Peaks considered to contribute to the genes" value="10000">
+            <validator type="in_range" max="200000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" />
+        </param>
+    </macro>
 
-  <macro name="boundary">
+    <macro name="boundary">
         <conditional name="boundary">
-          <param name="boundaryLimit" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use CTCF boundary to filter peaks around a gene"/>
-          <when value="no"/>
-          <when value="yes">
-            <yield />
-          </when>
+            <param name="boundaryLimit" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use CTCF boundary to filter peaks around a gene"/>
+            <when value="no"/>
+            <when value="yes">
+                <yield />
+            </when>
         </conditional>
-  </macro>
+    </macro>
 
-  <macro name="genome_params">
-    <conditional name="refGenome">
-      <param name="genomeName" type="select" label="genome reference">
-        <option value="hg19">hg19 (built-in)</option>
-        <option value="mm9">mm9 (built-in)</option>
-        <option value="other">other</option>
-      </param>
-      <when value="hg19">
-        <expand macro="boundary" />
-      </when>
-      <when value="mm9">
-        <expand macro="boundary" />
-      </when>
-      <when value="other">
-        <param name="refseq" type="data" format="tabular" label="UCSC Refseq Genes (From UCSC Table Browser)"
-         help="Columns: name,chrom,strand,txStart,txEnd,name"/>
-        <expand macro="boundary">
-            <param name="bl_bed" type="data" format="bed" label="BED format boundary file"/>
-        </expand>
-      </when>
-    </conditional>
-  </macro>
+    <macro name="genome_params">
+        <conditional name="refGenome">
+            <param name="genomeName" type="select" label="genome reference">
+                <option value="hg19">hg19 (built-in)</option>
+                <option value="mm9">mm9 (built-in)</option>
+                <option value="other">other</option>
+            </param>
+            <when value="hg19">
+                <expand macro="boundary" />
+            </when>
+            <when value="mm9">
+                <expand macro="boundary" />
+            </when>
+            <when value="other">
+                <param name="refseq" type="data" format="tabular" label="UCSC Refseq Genes (From UCSC Table Browser)"
+                 help="Columns: name,chrom,strand,txStart,txEnd,name"/>
+                <expand macro="boundary">
+                <param name="bl_bed" type="data" format="bed" label="BED format boundary file"/>
+                </expand>
+            </when>
+        </conditional>
+    </macro>
 
-  <macro name="refGenomeSourceConditional">
-    <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Use a built in reference genome or own from your history" help="Genome Reference Fasta sequence">
-        <option value="cached" selected="True">Use a built-in genome</option>
-        <option value="history">Use a genome from history</option>
-      </param>
-      <when value="cached">
-        <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
-            <options from_data_table="all_fasta"/>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
-      </when>  <!-- history -->
-    </conditional>  <!-- refGenomeSource -->
-  </macro>
+    <macro name="refGenomeSourceConditional">
+        <conditional name="refGenomeSource">
+            <param name="genomeSource" type="select" label="Use a built in reference genome or one from your history" help="Genome Reference Fasta sequence">
+                <option value="cached" selected="True">Use a built-in genome</option>
+                <option value="history">Use a genome from history</option>
+            </param>
+            <when value="cached">
+                <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ownFile" type="data" format="fasta" label="Select the reference genome" />
+            </when>    <!-- history -->
+        </conditional>    <!-- refGenomeSource -->
+    </macro>
 
-  <macro name="extended_params">
-    <param format="txt" name="exprefile" type="data" label="TEXT file for differential expression data">
-        <validator type="unspecified_build" />
-    </param>
-    <conditional name="expression">
-        <param name="kind" type="select" label="Expression FIle Type" help="Preset columns for Cuffdiff, LIMMA, and BETA Specific Format">
-            <option value="CUF">RNA-seq data with Cuffdiff default format</option>
-            <option value="LIM">MicroArray data with LIMMA default format</option>
-            <option value="BSF">BETA Specific Format</option>
-            <option value="O">Other tools processed data with BETA specific format</option>
+    <macro name="extended_params">
+        <param format="txt" name="exprefile" type="data" label="TEXT file for differential expression data">
+            <validator type="unspecified_build" />
         </param>
-        <when value="CUF">
-            <param name="expreinfo" type="text" value="2,10,13" label="Column number of the geneid, regulate status and statistics value"/>
-        </when>
-        <when value="LIM">
-            <param name="expreinfo" type="text" value="2,5,7" label="Column number of the geneid, regulate status and statistics value"/>
-        </when>
-        <when value="BSF">
-            <param name="expreinfo" type="text" value="1,2,3" label="Column number of the geneid, regulate status and statistics value"/>
-        </when>
-        <when value="O">
-            <param name="expreinfo" type="text" value="" label="Column number of the geneid, regulate status and statistics value is required">
-                <validator type="regex" message="Enter column numbers:geneid,test_stat,value">^\d+,\d+,\d+$</validator>
+        <conditional name="expression">
+            <param name="kind" type="select" label="Expression FIle Type" help="Preset columns for Cuffdiff, LIMMA, and BETA Specific Format">
+                <option value="CUF">RNA-seq data with Cuffdiff default format</option>
+                <option value="LIM">MicroArray data with LIMMA default format</option>
+                <option value="BSF">BETA Specific Format</option>
+                <option value="O">Other tools processed data with BETA specific format</option>
             </param>
-        </when>
-    </conditional>
-    <param name="gname2" type="select" label="TRUE if gene ID in expression file identified by official gene symbol">
-        <option value="Refseq">Refseq</option>
-        <option value="Gene_Symbol">Gene Symbol</option>
-    </param>
-    <param name="diff_fdr" type="float" label="get the most significant expression differentially changed genes by this cutoff based on fdr or pvalue" value="1.0">
-        <validator type="in_range" max="1.0" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 1.0" />
-    </param>
-    <param name="diff_amount" type="float" label="get the most significant expression differentially changed genes by amount" value="0.5">
-        <validator type="in_range" max="20000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000" />
-    </param>
-    <param name="method" type="select" label="method to do the TF/CR function prediction" optional="true">
-        <option value="score">regulatory potential</option>
-        <option value="distance">distance to the nearest peak</option>
-    </param>
-
-  </macro>
+            <when value="CUF">
+                <param name="expreinfo" type="text" value="2,10,13" label="Column number of the geneid, regulate status and statistics value"/>
+            </when>
+            <when value="LIM">
+                <param name="expreinfo" type="text" value="2,5,7" label="Column number of the geneid, regulate status and statistics value"/>
+            </when>
+            <when value="BSF">
+                <param name="expreinfo" type="text" value="1,2,3" label="Column number of the geneid, regulate status and statistics value"/>
+            </when>
+            <when value="O">
+                <param name="expreinfo" type="text" value="" label="Column number of the geneid, regulate status and statistics value is required">
+                    <validator type="regex" message="Enter column numbers:geneid,test_stat,value">^\d+,\d+,\d+$</validator>
+                </param>
+            </when>
+        </conditional>
+        <param name="gname2" type="select" label="TRUE if gene ID in expression file identified by official gene symbol">
+            <option value="Refseq">Refseq</option>
+            <option value="Gene_Symbol">Gene Symbol</option>
+        </param>
+        <param name="diff_fdr" type="float" label="get the most significant expression differentially changed genes by this cutoff based on fdr or pvalue" value="1.0">
+            <validator type="in_range" max="1.0" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 1.0" />
+        </param>
+        <param name="diff_amount" type="float" label="get the most significant expression differentially changed genes by amount" value="0.5">
+            <validator type="in_range" max="20000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000" />
+        </param>
+        <param name="method" type="select" label="method to do the TF/CR function prediction" optional="true">
+            <option value="score">regulatory potential</option>
+            <option value="distance">distance to the nearest peak</option>
+        </param>
+    </macro>
 
-  <template name="common_opts">
-  -p "$peakfile" 
-  -d $distance --pn $peaknumber -o $output_dir -n $name
-  </template>
+    <template name="common_opts">
+        -p "$peakfile" 
+        -d $distance --pn $peaknumber -o $output_dir -n $name
+    </template>
 
-  <template name="genome_opts">
-#if $refGenome.genomeName == 'hg19':
-  -g $refGenome.genomeName
-  ## -r \$BETA_LIB_PATH/BETA/references/hg19.refseq
-  #if $refGenome.boundary.boundaryLimit: 
-    --bl
-    ## --bf \$BETA_LIB_PATH/BETA/references/hg19_CTCF_bound.bed
-  #end if
-#elif $refGenome.genomeName == 'mm9':
-  -g $refGenome.genomeName
-  ## -r \$BETA_LIB_PATH/BETA/references/mm9.refseq
-  #if $refGenome.boundary.boundaryLimit: 
-    --bl
-    ## --bf \$BETA_LIB_PATH/BETA/references/mm9_CTCF_bound.bed
-  #end if
-#else
-  -r  $refGenome.refseq
-  #if $refGenome.boundary.boundaryLimit: 
-    --bl 
-    --bf $refGenome.boundary.bl_bed
-  #end if
-#end if
-  </template>
-  <template name="ref_genome_seq_opts">
-#if $refGenomeSource.genomeSource == 'cached':
-  --gs $refGenomeSource.all_fasta_source.fields.path
-#else
-  --gs $refGenomeSource.ownFile
-#end if
-  </template>
+    <template name="genome_opts">
+        #if $refGenome.genomeName == 'hg19':
+            -g $refGenome.genomeName
+            ## -r \$BETA_LIB_PATH/BETA/references/hg19.refseq
+            #if $refGenome.boundary.boundaryLimit: 
+                --bl
+                ## --bf \$BETA_LIB_PATH/BETA/references/hg19_CTCF_bound.bed
+            #end if
+        #elif $refGenome.genomeName == 'mm9':
+            -g $refGenome.genomeName
+            ## -r \$BETA_LIB_PATH/BETA/references/mm9.refseq
+            #if $refGenome.boundary.boundaryLimit: 
+                --bl
+                ## --bf \$BETA_LIB_PATH/BETA/references/mm9_CTCF_bound.bed
+            #end if
+        #else
+            -r    $refGenome.refseq
+            #if $refGenome.boundary.boundaryLimit: 
+                --bl 
+                --bf $refGenome.boundary.bl_bed
+            #end if
+        #end if
+    </template>
+    <template name="ref_genome_seq_opts">
+        #if $refGenomeSource.genomeSource == 'cached':
+            --gs $refGenomeSource.all_fasta_source.fields.path
+        #else
+            --gs $refGenomeSource.ownFile
+        #end if
+    </template>
 
-  <template name="extended_opts">
-  -e "$exprefile"
-  -k $expression.kind --info $expression.expreinfo --method $method
-  --da $diff_amount --df $diff_fdr -c 1
-#if $gname2 == "Gene_Symbol":
-  --gname2"
-#end if
-  </template>
+    <template name="extended_opts">
+        -e "$exprefile"
+        -k $expression.kind --info $expression.expreinfo --method $method
+        --da $diff_amount --df $diff_fdr -c 1
+        #if $gname2 == "Gene_Symbol":
+            --gname2"
+        #end if
+    </template>
 
-  <token name="@EXTERNAL_DOCUMENTATION@">
+    <token name="@EXTERNAL_DOCUMENTATION@">
 
 For details about this application, please go to:
-        http://cistrome.org/BETA/index.html
+                http://cistrome.org/BETA/index.html
 
-  </token>
-  <token name="@CITATION_SECTION@">------
+    </token>
+    <token name="@CITATION_SECTION@">
 
 **Citation**
 
 For the underlying tool, please cite the following publication:
 Wang, S., Sun, H., Ma, J., Zang, C., Wang, C., Wang, J., Tang Q, Meyer CA, Zhang Y, Liu, X. S. (2013). Target analysis by integration of transcriptome and ChIP-seq data with BETA. Nature protocols, 8(12), 2502-2515. 
 PMID: 24263090
-  </token>
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nprot.2013.150</citation>
+        </citations>
+    </xml>
 </macros>
 
--- a/beta_minus.xml	Tue Sep 16 12:51:50 2014 -0500
+++ b/beta_minus.xml	Thu Mar 22 08:33:55 2018 -0400
@@ -1,49 +1,49 @@
 <tool id="beta_minus" name="BETA-minus: Targets prediction with binding only" version="0.1.0">
-  <description>Predict the factors (TFs or CRs) direct target genes by only binding data</description>
-  <macros>
-    <import>beta_macros.xml</import>
-  </macros>
-  <expand macro="requirements" />
-  <command>
-  BETA minus 
-  #include source=$common_opts#
-  #include source=$genome_opts#
-  &amp;> $log
-  </command>
-  <inputs>
-    <expand macro="common_params" />
-    <expand macro="genome_params" />
-  </inputs>
-  <expand macro="stdio" />
-  <outputs>
-    <data format="txt" name="log" label="Log of BETA minus"/>
-    <data format="tabular" name="targetsoutput" label="BETA predicted Targets" from_work_dir="BETA_OUTPUT/NA_targets.txt"/>
-    <data format="tabular" name="targetpeaks" label="BETA Target gene's associated peaks" from_work_dir="BETA_OUTPUT/NA_targets_associated_peaks.txt"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
-      <param name="distance" value="100000"/>
-      <param name="peaknumber" value="10000"/>
-      <param name="genomeName" value="hg19"/>
-      <output name="log">
-        <assert_contents>
-            <has_text_matching expression="Finished" />
-        </assert_contents>
-      </output>
-      <output name="targetsoutput">
-        <assert_contents>
-            <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
-        </assert_contents>
-      </output>
-      <output name="targetpeaks">
-        <assert_contents>
-            <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
-        </assert_contents>
-      </output>
-    </test>
-  </tests>
- <help>
+    <description>Predict the factors (TFs or CRs) direct target genes by only binding data</description>
+    <macros>
+        <import>beta_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+        BETA minus 
+        #include source=$common_opts#
+        #include source=$genome_opts#
+        &> $log
+    ]]></command>
+    <inputs>
+        <expand macro="common_params" />
+        <expand macro="genome_params" />
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="Log of BETA minus"/>
+        <data format="tabular" name="targetsoutput" label="BETA predicted Targets" from_work_dir="BETA_OUTPUT/NA_targets.txt"/>
+        <data format="tabular" name="targetpeaks" label="BETA Target gene's associated peaks" from_work_dir="BETA_OUTPUT/NA_targets_associated_peaks.txt"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
+            <param name="distance" value="100000"/>
+            <param name="peaknumber" value="10000"/>
+            <param name="genomeName" value="hg19"/>
+            <output name="log">
+                <assert_contents>
+                        <has_text_matching expression="Finished" />
+                </assert_contents>
+            </output>
+            <output name="targetsoutput">
+                <assert_contents>
+                        <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
+                </assert_contents>
+            </output>
+            <output name="targetpeaks">
+                <assert_contents>
+                        <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+ <help><![CDATA[
 ** BETA minus **
 
 @EXTERNAL_DOCUMENTATION@
@@ -64,13 +64,13 @@
 **Parameters**
 
 - **PEAKFILE file** contains peaks for the experiment in a bed
-  format file. Normally, it's produced by the peak calling tool. It's
-  required.
+    format file. Normally, it's produced by the peak calling tool. It's
+    required.
 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
-  It is a tab delimited text file with gene annotation with refseq and gene symbol.
-  Input this file only if your genome is neither hg19 nor mm9.
-  profiling
+    It is a tab delimited text file with gene annotation with refseq and gene symbol.
+    Input this file only if your genome is neither hg19 nor mm9.
+    profiling
 - **OUTPUT** to specify the output files directory
 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
 - **NAME** specify the name of the output files.
@@ -83,29 +83,29 @@
 
 ::
 
-  -h, --help            show this help message and exit
-  -p PEAKFILE, --peakfile PEAKFILE
-                        The bed format of peaks binding sites. 
-                        BETA supports 3 or 5 columns bed format: CHROM, START, END [NAME, SCORE]
-  -g {hg19,mm9}, --genome {hg19,mm9}
-                        Specify your species, {hg19, mm9}
-  -r REFERENCE, --reference REFERENCE
-                        the refgene info file downloaded from UCSC genome
-                        browser.input this file only if your genome is neither
-                        hg19 nor mm9
-  -o OUTPUT, --output OUTPUT
-                        the directory to store all the output files, if you
-                        don't set this, files will be output into the current
-                        directory
-  --bl                  whether or not use CTCF boundary to filter peaks
-                        around a gene, DEFAULT=FALSE
-  --pn PEAKNUMBER       the number of peaks you want to consider, DEFAULT=10000
-  -n NAME, --name NAME  this argument is used to name the result file.If not
-                        set, the peakfile name will be used instead
-  -d DISTANCE, --distance DISTANCE
-                        Set a number which unit is 'base'. It will get peaks
-                        within this distance from gene TSS. default:100000 (100kb)
+    -h, --help                        show this help message and exit
+    -p PEAKFILE, --peakfile PEAKFILE
+                                                The bed format of peaks binding sites. 
+                                                BETA supports 3 or 5 columns bed format: CHROM, START, END [NAME, SCORE]
+    -g {hg19,mm9}, --genome {hg19,mm9}
+                                                Specify your species, {hg19, mm9}
+    -r REFERENCE, --reference REFERENCE
+                                                the refgene info file downloaded from UCSC genome
+                                                browser.input this file only if your genome is neither
+                                                hg19 nor mm9
+    -o OUTPUT, --output OUTPUT
+                                                the directory to store all the output files, if you
+                                                don't set this, files will be output into the current
+                                                directory
+    --bl                                    whether or not use CTCF boundary to filter peaks
+                                                around a gene, DEFAULT=FALSE
+    --pn PEAKNUMBER             the number of peaks you want to consider, DEFAULT=10000
+    -n NAME, --name NAME    this argument is used to name the result file.If not
+                                                set, the peakfile name will be used instead
+    -d DISTANCE, --distance DISTANCE
+                                                Set a number which unit is 'base'. It will get peaks
+                                                within this distance from gene TSS. default:100000 (100kb)
 
-  </help>
-
+    ]]></help>
+    <expand macro="citations" />
 </tool>
--- a/beta_plus.xml	Tue Sep 16 12:51:50 2014 -0500
+++ b/beta_plus.xml	Thu Mar 22 08:33:55 2018 -0400
@@ -1,85 +1,84 @@
 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0">
-  <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
-  <macros>
-    <import>beta_macros.xml</import>
-  </macros>
-  <expand macro="requirements" />
-  <command>
-  BETA plus 
-  #include source=$common_opts#
-  #include source=$genome_opts#
-  #include source=$ref_genome_seq_opts#
-  #include source=$extended_opts#
-  --mn $motifs
-  &amp;> $log &amp;&amp;
-  mkdir -p $motifresult.extra_files_path  &amp;&amp;
-  cp BETA_OUTPUT/motifresult/betamotif.html $motifresult  &amp;&amp;
-  cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &amp;&amp;
-  cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &amp;&amp;
-  cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
-
-  </command>
-  <inputs>
-    <expand macro="common_params" />
-    <expand macro="genome_params" />
-    <expand macro="refGenomeSourceConditional" />
-    <expand macro="extended_params" />
-    <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" 
-           help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
-        <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
-    </param>
-  </inputs>
-  <expand macro="stdio" />
-  <outputs>
-    <data format="txt" name="log" label="Log of BETA plus"/>
-    <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
-    <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
-    <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
-    <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
-    <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
-    <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
-    <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
-    <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
-    <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
-    <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
-    <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
-      <param name="distance" value="100000"/>
-      <param name="peaknumber" value="10000"/>
-      <param name="genomeName" value="hg19"/>
-      <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
-      <param name="kind" value="LIM"/>
-      <param name="expreinfo" type="text" value="2,5,7"/>
-      <param name="gname2" value="Refseq"/>
-      <param name="diff_fdr" value="1.0"/>
-      <param name="diff_amount" value="0.5"/>
-      <param name="method" value="score"/>
-      <output name="log">
-        <assert_contents>
-            <has_text_matching expression="Finished" />
-        </assert_contents>
-      </output>
-      <output name="uptargetsoutput">
-        <assert_contents>
-            <has_text_matching expression="NM_001002231" />
-        </assert_contents>
-      </output>
-      <output name="downtargetsoutput">
-        <assert_contents>
-            <has_text_matching expression="NM_001280" />
-        </assert_contents>
-      </output>
-      <output name="differentialmotifs">
-        <assert_contents>
-            <has_text_matching expression="CDX1\tHomeodomain Family" />
-        </assert_contents>
-      </output>
-    </test>
-  </tests>
- <help>
+    <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
+    <macros>
+        <import>beta_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+        BETA plus 
+        #include source=$common_opts#
+        #include source=$genome_opts#
+        #include source=$ref_genome_seq_opts#
+        #include source=$extended_opts#
+        --mn $motifs
+        &> $log &&
+        mkdir -p $motifresult.extra_files_path    &&
+        cp BETA_OUTPUT/motifresult/betamotif.html $motifresult    &&
+        cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &&
+        cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &&
+        cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
+    ]]></command>
+    <inputs>
+        <expand macro="common_params" />
+        <expand macro="genome_params" />
+        <expand macro="refGenomeSourceConditional" />
+        <expand macro="extended_params" />
+        <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" 
+               help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
+            <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="Log of BETA plus"/>
+        <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
+        <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
+        <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
+        <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
+        <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
+        <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
+        <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
+        <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
+        <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
+        <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
+        <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
+            <param name="distance" value="100000"/>
+            <param name="peaknumber" value="10000"/>
+            <param name="genomeName" value="hg19"/>
+            <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
+            <param name="kind" value="LIM"/>
+            <param name="expreinfo" value="2,5,7"/>
+            <param name="gname2" value="Refseq"/>
+            <param name="diff_fdr" value="1.0"/>
+            <param name="diff_amount" value="0.5"/>
+            <param name="method" value="score"/>
+            <output name="log">
+                <assert_contents>
+                    <has_text_matching expression="Finished" />
+                </assert_contents>
+            </output>
+            <output name="uptargetsoutput">
+                <assert_contents>
+                    <has_text_matching expression="NM_001002231" />
+                </assert_contents>
+            </output>
+            <output name="downtargetsoutput">
+                <assert_contents>
+                    <has_text_matching expression="NM_001280" />
+                </assert_contents>
+            </output>
+            <output name="differentialmotifs">
+                <assert_contents>
+                    <has_text_matching expression="CDX1\tHomeodomain Family" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+ <help><![CDATA[
 ** BETA plus **
 
 @EXTERNAL_DOCUMENTATION@
@@ -89,7 +88,7 @@
 This tool annotates the given intervals and scores with genome
 features such as gene body. 
 Predicts Direct targets of TF and the active/repressive function
-prediction.  Does motif analysis at targets region as well. 
+prediction.    Does motif analysis at targets region as well. 
 It's the major module in CEAS package
 which is written by Hyunjin Gene Shin, published in Bioinformatics
 (pubmed id:19689956).
@@ -103,37 +102,37 @@
 **Parameters**
 
 - **PEAKFILE file** contains peaks for the experiment in a bed
-  format file. Normally, it's produced by the peak calling tool. It's
-  required.
+    format file. Normally, it's produced by the peak calling tool. It's
+    required.
 - **EXPREFILE file** contains the differentially expressed genes in a tab 
-  delimited text file. It's required.
+    delimited text file. It's required.
 - **Kind** The kind of your expression file format, LIM for LIMMA standard 
-  output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
-  BSF for BETA specific format, and O for other formats.
+    output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
+    BSF for BETA specific format, and O for other formats.
 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
 - **genomereference** Genome reference data with fasta format
 - **gname2** If this switch is on, gene or transcript IDs in files given 
-  through -e will be considered as official gene symbols, DEFAULT=FALSE
+    through -e will be considered as official gene symbols, DEFAULT=FALSE
 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
-  values column of your expression data,NOTE: use a comma as an connector. 
-  for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
-  and FDR in 7 column.
+    values column of your expression data,NOTE: use a comma as an connector. 
+    for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
+    and FDR in 7 column.
 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
-  It is a tab delimited text file with gene annotation with refseq and gene symbol.
-  Input this file only if your genome is neither hg19 nor mm9.
-  profiling
+    It is a tab delimited text file with gene annotation with refseq and gene symbol.
+    Input this file only if your genome is neither hg19 nor mm9.
+    profiling
 - **OUTPUT** to specify the output files directory
 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
-  is neither hg19 nor mm9.
+    is neither hg19 nor mm9.
 - **NAME** specify the name of the output files.
 - **DISTANCE** specify the distance wich peaks within it will be considered.
 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
-  via -e, genes with less than this value will be considered as the differentially
-  changed genes.
+    via -e, genes with less than this value will be considered as the differentially
+    changed genes.
 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
-  the 3rd column in file input via -e, genes ranked in the top # will be considered
-  as the differentially expressed genes.
+    the 3rd column in file input via -e, genes ranked in the top # will be considered
+    as the differentially expressed genes.
 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
 
 
@@ -143,73 +142,65 @@
 
 ::
 
-  -h, --help            show this help message and exit
-  -p PEAKFILE, --peakfile PEAKFILE
-                        The bed format of peaks binding sites. (BETA support 3
-                        or 5 columns bed format, CHROM, START, END (NAME,
-                        SCORE))
-  -e EXPREFILE, --diff_expr EXPREFILE
-                        The differential expression file get from limma for
-                        MicroArray ddata and cuffdiff for RNAseq data
-  -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
-                        The kind of your expression file,this is required,it
-                        can be LIM, CUF, BSF, O. LIM for LIMMA standard
-                        format. CUF for CUFDIFF standard format, BSF for BETA
-                        specific format and O for other formats, if is 'O',
-                        columns infor required via --info
-  -g {hg19,mm9}, --genome {hg19,mm9}
-                        Specify your species, hg19, mm9
-  --gs GENOMEREFERNCE	GenomeReference file with fasta format
-  --gname2              If this switch is on, gene or transcript IDs in files
-                        given through -e will be considered as official gene
-                        symbols, DEFAULT=FALSE
-  --info EXPREINFO      Specify the geneID, up/down status and statistcal
-                        values column of your expression data,NOTE: use a
-                        comma as an connector. for example: 2,5,7 means geneID
-                        in the 2nd column, Tscore in 5th column and FDR in 7
-                        column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
-                        and 1,2,3 for BETA specific format
-  -r REFERENCE, --reference REFERENCE
-                        The refgene info file downloaded from UCSC genome
-                        browser.input this file only if your genome is neither
-                        hg19 nor mm9
-  -o OUTPUT, --output OUTPUT
-                        The directory to store all the output files, if you
-                        don't set this, files will be output into the current
-                        directory
-  --bl                  Whether or not use CTCF boundary to filter peaks
-                        around a gene, DEFAULT=FALSE
-  --bf BOUNDARYFILE     CTCF conserved peaks bed file, use this only when you
-                        set --bl and the genome is neither hg19 nor mm9
-  --pn PEAKNUMBER       The number of peaks you want to consider,
-                        DEFAULT=10000
-  --method {score,distance}
-                        Define the method to do the TF/CR function prediction,
-                        score for regulatory potential, distance for the
-                        distance to the proximal binding peak. DEFAULT:SCORE
-  -n NAME, --name NAME  This argument is used to name the result file.If not
-                        set, the peakfile name will be used instead
-  -d DISTANCE, --distance DISTANCE
-                        Set a number which unit is 'base'. It will get peaks
-                        within this distance from gene TSS. default:100000
-                        (100kb)
-  --df DIFF_FDR         Input a number 0~1 as a threshold to pick out the most
-                        significant differential expressed genes by FDR,
-                        DEFAULT = 1, that is select all the genes
-  --da DIFF_AMOUNT      Get the most significant differential expressed genes
-                        by the percentage(0-1) or number(larger than 1)Input a
-                        number between 0-1, the rank based on fdr for example,
-                        2000, so that the script will only consider top 2000
-                        genes as the differentially expressed genes. DEFAULT =
-                        0.5, that is select top 50 percent genes of up and
-                        down seprately. NOTE: if you want to use diff_fdr,
-                        please set this parameter to 1, otherwise it will get
-                        the intersection of these two parameters
-  -c CUTOFF, --cutoff CUTOFF
-                        Input a number between 0~1 as a threshold to select
-                        the closer target gene list(up regulate or down
-                        regulate or both) with the p value was called by one
-                        side ks-test, DEFAULT = 0.001
+    -h, --help                                  show this help message and exit
+    -p PEAKFILE, --peakfile PEAKFILE            The bed format of peaks binding sites. (BETA support 3
+                                                or 5 columns bed format, CHROM, START, END (NAME,
+                                                SCORE))
+    -e EXPREFILE, --diff_expr EXPREFILE         The differential expression file get from limma for
+                                                MicroArray ddata and cuffdiff for RNAseq data
+    -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}  The kind of your expression file,this is required,it
+                                                can be LIM, CUF, BSF, O. LIM for LIMMA standard
+                                                format. CUF for CUFDIFF standard format, BSF for BETA
+                                                specific format and O for other formats, if is 'O',
+                                                columns infor required via --info
+    -g {hg19,mm9}, --genome {hg19,mm9}          Specify your species, hg19, mm9
+    --gs GENOMEREFERNCE	                        GenomeReference file with fasta format
+    --gname2                                    If this switch is on, gene or transcript IDs in files
+                                                given through -e will be considered as official gene
+                                                symbols, DEFAULT=FALSE
+    --info EXPREINFO                            Specify the geneID, up/down status and statistcal
+                                                values column of your expression data,NOTE: use a
+                                                comma as an connector. for example: 2,5,7 means geneID
+                                                in the 2nd column, Tscore in 5th column and FDR in 7
+                                                column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
+                                                and 1,2,3 for BETA specific format
+    -r REFERENCE, --reference REFERENCE         The refgene info file downloaded from UCSC genome
+                                                browser.input this file only if your genome is neither
+                                                hg19 nor mm9
+    -o OUTPUT, --output OUTPUT                  The directory to store all the output files, if you
+                                                don't set this, files will be output into the current
+                                                directory
+    --bl                                        Whether or not use CTCF boundary to filter peaks
+                                                around a gene, DEFAULT=FALSE
+    --bf BOUNDARYFILE                           CTCF conserved peaks bed file, use this only when you
+                                                set --bl and the genome is neither hg19 nor mm9
+    --pn PEAKNUMBER                             The number of peaks you want to consider,
+                                                DEFAULT=10000
+    --method {score,distance}                   Define the method to do the TF/CR function prediction,
+                                                score for regulatory potential, distance for the
+                                                distance to the proximal binding peak. DEFAULT:SCORE
+    -n NAME, --name NAME                        This argument is used to name the result file.If not
+                                                set, the peakfile name will be used instead
+    -d DISTANCE, --distance DISTANCE            Set a number which unit is 'base'. It will get peaks
+                                                within this distance from gene TSS. default:100000
+                                                (100kb)
+    --df DIFF_FDR                               Input a number 0~1 as a threshold to pick out the most
+                                                significant differential expressed genes by FDR,
+                                                DEFAULT = 1, that is select all the genes
+    --da DIFF_AMOUNT                            Get the most significant differential expressed genes
+                                                by the percentage(0-1) or number(larger than 1)Input a
+                                                number between 0-1, the rank based on fdr for example,
+                                                2000, so that the script will only consider top 2000
+                                                genes as the differentially expressed genes. DEFAULT =
+                                                0.5, that is select top 50 percent genes of up and
+                                                down seprately. NOTE: if you want to use diff_fdr,
+                                                please set this parameter to 1, otherwise it will get
+                                                the intersection of these two parameters
+    -c CUTOFF, --cutoff CUTOFF                  Input a number between 0~1 as a threshold to select
+                                                the closer target gene list(up regulate or down
+                                                regulate or both) with the p value was called by one
+                                                side ks-test, DEFAULT = 0.001
 
-  </help>
+    ]]></help>
+    <expand macro="citations" />
 </tool>
--- a/tool_dependencies.xml	Tue Sep 16 12:51:50 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="numpy" version="1.7.1">
-      <repository changeset_revision="55a7a5e9d63f" name="package_numpy_1_7" owner="devteam" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="R" version="2.15.0">
-      <repository changeset_revision="3a70cdc41d21" name="package_r_2_15_0" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="beta" version="1.0.6">
-      <repository changeset_revision="1874a464e5d6" name="package_beta_1_0_6" owner="jjohnson" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>