Mercurial > repos > jjohnson > cistrome_beta

diff beta_plus.xml @ 2:9c5241259454 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author: jjohnson
date: Thu, 22 Mar 2018 08:33:55 -0400
parents: 20453b656907
children: 067573bac905
--- a/beta_plus.xml	Tue Sep 16 12:51:50 2014 -0500
+++ b/beta_plus.xml	Thu Mar 22 08:33:55 2018 -0400
@@ -1,85 +1,84 @@
 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0">
-  <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
-  <macros>
-    <import>beta_macros.xml</import>
-  </macros>
-  <expand macro="requirements" />
-  <command>
-  BETA plus 
-  #include source=$common_opts#
-  #include source=$genome_opts#
-  #include source=$ref_genome_seq_opts#
-  #include source=$extended_opts#
-  --mn $motifs
-  &amp;> $log &amp;&amp;
-  mkdir -p $motifresult.extra_files_path  &amp;&amp;
-  cp BETA_OUTPUT/motifresult/betamotif.html $motifresult  &amp;&amp;
-  cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &amp;&amp;
-  cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &amp;&amp;
-  cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
-
-  </command>
-  <inputs>
-    <expand macro="common_params" />
-    <expand macro="genome_params" />
-    <expand macro="refGenomeSourceConditional" />
-    <expand macro="extended_params" />
-    <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" 
-           help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
-        <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
-    </param>
-  </inputs>
-  <expand macro="stdio" />
-  <outputs>
-    <data format="txt" name="log" label="Log of BETA plus"/>
-    <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
-    <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
-    <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
-    <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
-    <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
-    <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
-    <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
-    <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
-    <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
-    <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
-    <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
-      <param name="distance" value="100000"/>
-      <param name="peaknumber" value="10000"/>
-      <param name="genomeName" value="hg19"/>
-      <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
-      <param name="kind" value="LIM"/>
-      <param name="expreinfo" type="text" value="2,5,7"/>
-      <param name="gname2" value="Refseq"/>
-      <param name="diff_fdr" value="1.0"/>
-      <param name="diff_amount" value="0.5"/>
-      <param name="method" value="score"/>
-      <output name="log">
-        <assert_contents>
-            <has_text_matching expression="Finished" />
-        </assert_contents>
-      </output>
-      <output name="uptargetsoutput">
-        <assert_contents>
-            <has_text_matching expression="NM_001002231" />
-        </assert_contents>
-      </output>
-      <output name="downtargetsoutput">
-        <assert_contents>
-            <has_text_matching expression="NM_001280" />
-        </assert_contents>
-      </output>
-      <output name="differentialmotifs">
-        <assert_contents>
-            <has_text_matching expression="CDX1\tHomeodomain Family" />
-        </assert_contents>
-      </output>
-    </test>
-  </tests>
- <help>
+    <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
+    <macros>
+        <import>beta_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+        BETA plus 
+        #include source=$common_opts#
+        #include source=$genome_opts#
+        #include source=$ref_genome_seq_opts#
+        #include source=$extended_opts#
+        --mn $motifs
+        &> $log &&
+        mkdir -p $motifresult.extra_files_path    &&
+        cp BETA_OUTPUT/motifresult/betamotif.html $motifresult    &&
+        cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &&
+        cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &&
+        cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
+    ]]></command>
+    <inputs>
+        <expand macro="common_params" />
+        <expand macro="genome_params" />
+        <expand macro="refGenomeSourceConditional" />
+        <expand macro="extended_params" />
+        <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" 
+               help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
+            <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="Log of BETA plus"/>
+        <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
+        <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
+        <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
+        <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
+        <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
+        <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
+        <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
+        <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
+        <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
+        <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
+        <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
+            <param name="distance" value="100000"/>
+            <param name="peaknumber" value="10000"/>
+            <param name="genomeName" value="hg19"/>
+            <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
+            <param name="kind" value="LIM"/>
+            <param name="expreinfo" value="2,5,7"/>
+            <param name="gname2" value="Refseq"/>
+            <param name="diff_fdr" value="1.0"/>
+            <param name="diff_amount" value="0.5"/>
+            <param name="method" value="score"/>
+            <output name="log">
+                <assert_contents>
+                    <has_text_matching expression="Finished" />
+                </assert_contents>
+            </output>
+            <output name="uptargetsoutput">
+                <assert_contents>
+                    <has_text_matching expression="NM_001002231" />
+                </assert_contents>
+            </output>
+            <output name="downtargetsoutput">
+                <assert_contents>
+                    <has_text_matching expression="NM_001280" />
+                </assert_contents>
+            </output>
+            <output name="differentialmotifs">
+                <assert_contents>
+                    <has_text_matching expression="CDX1\tHomeodomain Family" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+ <help><![CDATA[
 ** BETA plus **
 
 @EXTERNAL_DOCUMENTATION@
@@ -89,7 +88,7 @@
 This tool annotates the given intervals and scores with genome
 features such as gene body. 
 Predicts Direct targets of TF and the active/repressive function
-prediction.  Does motif analysis at targets region as well. 
+prediction.    Does motif analysis at targets region as well. 
 It's the major module in CEAS package
 which is written by Hyunjin Gene Shin, published in Bioinformatics
 (pubmed id:19689956).
@@ -103,37 +102,37 @@
 **Parameters**
 
 - **PEAKFILE file** contains peaks for the experiment in a bed
-  format file. Normally, it's produced by the peak calling tool. It's
-  required.
+    format file. Normally, it's produced by the peak calling tool. It's
+    required.
 - **EXPREFILE file** contains the differentially expressed genes in a tab 
-  delimited text file. It's required.
+    delimited text file. It's required.
 - **Kind** The kind of your expression file format, LIM for LIMMA standard 
-  output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
-  BSF for BETA specific format, and O for other formats.
+    output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 
+    BSF for BETA specific format, and O for other formats.
 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
 - **genomereference** Genome reference data with fasta format
 - **gname2** If this switch is on, gene or transcript IDs in files given 
-  through -e will be considered as official gene symbols, DEFAULT=FALSE
+    through -e will be considered as official gene symbols, DEFAULT=FALSE
 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
-  values column of your expression data,NOTE: use a comma as an connector. 
-  for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
-  and FDR in 7 column.
+    values column of your expression data,NOTE: use a comma as an connector. 
+    for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 
+    and FDR in 7 column.
 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
-  It is a tab delimited text file with gene annotation with refseq and gene symbol.
-  Input this file only if your genome is neither hg19 nor mm9.
-  profiling
+    It is a tab delimited text file with gene annotation with refseq and gene symbol.
+    Input this file only if your genome is neither hg19 nor mm9.
+    profiling
 - **OUTPUT** to specify the output files directory
 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
-  is neither hg19 nor mm9.
+    is neither hg19 nor mm9.
 - **NAME** specify the name of the output files.
 - **DISTANCE** specify the distance wich peaks within it will be considered.
 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
-  via -e, genes with less than this value will be considered as the differentially
-  changed genes.
+    via -e, genes with less than this value will be considered as the differentially
+    changed genes.
 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
-  the 3rd column in file input via -e, genes ranked in the top # will be considered
-  as the differentially expressed genes.
+    the 3rd column in file input via -e, genes ranked in the top # will be considered
+    as the differentially expressed genes.
 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
 
 
@@ -143,73 +142,65 @@
 
 ::
 
-  -h, --help            show this help message and exit
-  -p PEAKFILE, --peakfile PEAKFILE
-                        The bed format of peaks binding sites. (BETA support 3
-                        or 5 columns bed format, CHROM, START, END (NAME,
-                        SCORE))
-  -e EXPREFILE, --diff_expr EXPREFILE
-                        The differential expression file get from limma for
-                        MicroArray ddata and cuffdiff for RNAseq data
-  -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
-                        The kind of your expression file,this is required,it
-                        can be LIM, CUF, BSF, O. LIM for LIMMA standard
-                        format. CUF for CUFDIFF standard format, BSF for BETA
-                        specific format and O for other formats, if is 'O',
-                        columns infor required via --info
-  -g {hg19,mm9}, --genome {hg19,mm9}
-                        Specify your species, hg19, mm9
-  --gs GENOMEREFERNCE	GenomeReference file with fasta format
-  --gname2              If this switch is on, gene or transcript IDs in files
-                        given through -e will be considered as official gene
-                        symbols, DEFAULT=FALSE
-  --info EXPREINFO      Specify the geneID, up/down status and statistcal
-                        values column of your expression data,NOTE: use a
-                        comma as an connector. for example: 2,5,7 means geneID
-                        in the 2nd column, Tscore in 5th column and FDR in 7
-                        column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
-                        and 1,2,3 for BETA specific format
-  -r REFERENCE, --reference REFERENCE
-                        The refgene info file downloaded from UCSC genome
-                        browser.input this file only if your genome is neither
-                        hg19 nor mm9
-  -o OUTPUT, --output OUTPUT
-                        The directory to store all the output files, if you
-                        don't set this, files will be output into the current
-                        directory
-  --bl                  Whether or not use CTCF boundary to filter peaks
-                        around a gene, DEFAULT=FALSE
-  --bf BOUNDARYFILE     CTCF conserved peaks bed file, use this only when you
-                        set --bl and the genome is neither hg19 nor mm9
-  --pn PEAKNUMBER       The number of peaks you want to consider,
-                        DEFAULT=10000
-  --method {score,distance}
-                        Define the method to do the TF/CR function prediction,
-                        score for regulatory potential, distance for the
-                        distance to the proximal binding peak. DEFAULT:SCORE
-  -n NAME, --name NAME  This argument is used to name the result file.If not
-                        set, the peakfile name will be used instead
-  -d DISTANCE, --distance DISTANCE
-                        Set a number which unit is 'base'. It will get peaks
-                        within this distance from gene TSS. default:100000
-                        (100kb)
-  --df DIFF_FDR         Input a number 0~1 as a threshold to pick out the most
-                        significant differential expressed genes by FDR,
-                        DEFAULT = 1, that is select all the genes
-  --da DIFF_AMOUNT      Get the most significant differential expressed genes
-                        by the percentage(0-1) or number(larger than 1)Input a
-                        number between 0-1, the rank based on fdr for example,
-                        2000, so that the script will only consider top 2000
-                        genes as the differentially expressed genes. DEFAULT =
-                        0.5, that is select top 50 percent genes of up and
-                        down seprately. NOTE: if you want to use diff_fdr,
-                        please set this parameter to 1, otherwise it will get
-                        the intersection of these two parameters
-  -c CUTOFF, --cutoff CUTOFF
-                        Input a number between 0~1 as a threshold to select
-                        the closer target gene list(up regulate or down
-                        regulate or both) with the p value was called by one
-                        side ks-test, DEFAULT = 0.001
+    -h, --help                                  show this help message and exit
+    -p PEAKFILE, --peakfile PEAKFILE            The bed format of peaks binding sites. (BETA support 3
+                                                or 5 columns bed format, CHROM, START, END (NAME,
+                                                SCORE))
+    -e EXPREFILE, --diff_expr EXPREFILE         The differential expression file get from limma for
+                                                MicroArray ddata and cuffdiff for RNAseq data
+    -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}  The kind of your expression file,this is required,it
+                                                can be LIM, CUF, BSF, O. LIM for LIMMA standard
+                                                format. CUF for CUFDIFF standard format, BSF for BETA
+                                                specific format and O for other formats, if is 'O',
+                                                columns infor required via --info
+    -g {hg19,mm9}, --genome {hg19,mm9}          Specify your species, hg19, mm9
+    --gs GENOMEREFERNCE	                        GenomeReference file with fasta format
+    --gname2                                    If this switch is on, gene or transcript IDs in files
+                                                given through -e will be considered as official gene
+                                                symbols, DEFAULT=FALSE
+    --info EXPREINFO                            Specify the geneID, up/down status and statistcal
+                                                values column of your expression data,NOTE: use a
+                                                comma as an connector. for example: 2,5,7 means geneID
+                                                in the 2nd column, Tscore in 5th column and FDR in 7
+                                                column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
+                                                and 1,2,3 for BETA specific format
+    -r REFERENCE, --reference REFERENCE         The refgene info file downloaded from UCSC genome
+                                                browser.input this file only if your genome is neither
+                                                hg19 nor mm9
+    -o OUTPUT, --output OUTPUT                  The directory to store all the output files, if you
+                                                don't set this, files will be output into the current
+                                                directory
+    --bl                                        Whether or not use CTCF boundary to filter peaks
+                                                around a gene, DEFAULT=FALSE
+    --bf BOUNDARYFILE                           CTCF conserved peaks bed file, use this only when you
+                                                set --bl and the genome is neither hg19 nor mm9
+    --pn PEAKNUMBER                             The number of peaks you want to consider,
+                                                DEFAULT=10000
+    --method {score,distance}                   Define the method to do the TF/CR function prediction,
+                                                score for regulatory potential, distance for the
+                                                distance to the proximal binding peak. DEFAULT:SCORE
+    -n NAME, --name NAME                        This argument is used to name the result file.If not
+                                                set, the peakfile name will be used instead
+    -d DISTANCE, --distance DISTANCE            Set a number which unit is 'base'. It will get peaks
+                                                within this distance from gene TSS. default:100000
+                                                (100kb)
+    --df DIFF_FDR                               Input a number 0~1 as a threshold to pick out the most
+                                                significant differential expressed genes by FDR,
+                                                DEFAULT = 1, that is select all the genes
+    --da DIFF_AMOUNT                            Get the most significant differential expressed genes
+                                                by the percentage(0-1) or number(larger than 1)Input a
+                                                number between 0-1, the rank based on fdr for example,
+                                                2000, so that the script will only consider top 2000
+                                                genes as the differentially expressed genes. DEFAULT =
+                                                0.5, that is select top 50 percent genes of up and
+                                                down seprately. NOTE: if you want to use diff_fdr,
+                                                please set this parameter to 1, otherwise it will get
+                                                the intersection of these two parameters
+    -c CUTOFF, --cutoff CUTOFF                  Input a number between 0~1 as a threshold to select
+                                                the closer target gene list(up regulate or down
+                                                regulate or both) with the p value was called by one
+                                                side ks-test, DEFAULT = 0.001
 
-  </help>
+    ]]></help>
+    <expand macro="citations" />
 </tool>
author	jjohnson
date	Thu, 22 Mar 2018 08:33:55 -0400
parents	20453b656907
children	067573bac905