Mercurial > repos > jjohnson > cistrome_beta
changeset 2:9c5241259454 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author | jjohnson |
---|---|
date | Thu, 22 Mar 2018 08:33:55 -0400 |
parents | 7f023a22da15 |
children | 067573bac905 |
files | beta_basic.xml beta_macros.xml beta_minus.xml beta_plus.xml tool_dependencies.xml |
diffstat | 5 files changed, 527 insertions(+), 555 deletions(-) [+] |
line wrap: on
line diff
--- a/beta_basic.xml Tue Sep 16 12:51:50 2014 -0500 +++ b/beta_basic.xml Thu Mar 22 08:33:55 2018 -0400 @@ -1,61 +1,61 @@ <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0"> - <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description> - <macros> - <import>beta_macros.xml</import> - </macros> - <expand macro="requirements" /> - <command> - BETA basic - #include source=$common_opts# - #include source=$genome_opts# - #include source=$extended_opts# - &> $log - </command> - <inputs> - <expand macro="common_params" /> - <expand macro="genome_params" /> - <expand macro="extended_params" /> - </inputs> - <expand macro="stdio" /> - <outputs> - <data format="txt" name="log" label="Log of BETA basic"/> - <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> - <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> - <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> - <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> - <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> - </outputs> - <tests> - <test> - <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> - <param name="distance" value="100000"/> - <param name="peaknumber" value="10000"/> - <param name="genomeName" value="hg19"/> - <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> - <param name="kind" value="LIM"/> - <param name="expreinfo" type="text" value="2,5,7"/> - <param name="gname2" value="Refseq"/> - <param name="diff_fdr" value="1.0"/> - <param name="diff_amount" value="0.5"/> - <param name="method" value="score"/> - <output name="log"> - <assert_contents> - <has_text_matching expression="Finished" /> - </assert_contents> - </output> - <output name="targetsoutput"> - <assert_contents> - <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> - </assert_contents> - </output> - <output name="targetpeaks"> - <assert_contents> - <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> - </assert_contents> - </output> - </test> - </tests> - <help> + <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description> + <macros> + <import>beta_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command><![CDATA[ + BETA basic + #include source=$common_opts# + #include source=$genome_opts# + #include source=$extended_opts# + &> $log + ]]></command> + <inputs> + <expand macro="common_params" /> + <expand macro="genome_params" /> + <expand macro="extended_params" /> + </inputs> + <outputs> + <data format="txt" name="log" label="Log of BETA basic"/> + <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> + <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> + <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> + <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> + <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> + </outputs> + <tests> + <test> + <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> + <param name="distance" value="100000"/> + <param name="peaknumber" value="10000"/> + <param name="genomeName" value="hg19"/> + <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> + <param name="kind" value="LIM"/> + <param name="expreinfo" value="2,5,7"/> + <param name="gname2" value="Refseq"/> + <param name="diff_fdr" value="1.0"/> + <param name="diff_amount" value="0.5"/> + <param name="method" value="score"/> + <output name="log"> + <assert_contents> + <has_text_matching expression="Finished" /> + </assert_contents> + </output> + <output name="uptargetsoutput"> + <assert_contents> + <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> + </assert_contents> + </output> + <output name="uptargetpeaks"> + <assert_contents> + <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ ** BETA basic ** @EXTERNAL_DOCUMENTATION@ @@ -76,36 +76,36 @@ **Parameters** - **PEAKFILE file** contains peaks for the experiment in a bed - format file. Normally, it's produced by the peak calling tool. It's - required. + format file. Normally, it's produced by the peak calling tool. It's + required. - **EXPREFILE file** contains the differentially expressed genes in a tab - delimited text file. It's required. + delimited text file. It's required. - **Kind** The kind of your expression file format, LIM for LIMMA standard - output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, - BSF for BETA specific format, and O for other formats. + output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, + BSF for BETA specific format, and O for other formats. - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. - **gname2** If this switch is on, gene or transcript IDs in files given - through -e will be considered as official gene symbols, DEFAULT=FALSE + through -e will be considered as official gene symbols, DEFAULT=FALSE - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal - values column of your expression data,NOTE: use a comma as an connector. - for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column - and FDR in 7 column. + values column of your expression data,NOTE: use a comma as an connector. + for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column + and FDR in 7 column. - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. - It is a tab delimited text file with gene annotation with refseq and gene symbol. - Input this file only if your genome is neither hg19 nor mm9. - profiling + It is a tab delimited text file with gene annotation with refseq and gene symbol. + Input this file only if your genome is neither hg19 nor mm9. + profiling - **OUTPUT** to specify the output files directory - **bl** Whether or not to use CTCF boundary file to get the contributed peaks - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome - is neither hg19 nor mm9. + is neither hg19 nor mm9. - **NAME** specify the name of the output files. - **DISTANCE** specify the distance wich peaks within it will be considered. - **DIFF_FDR** specify the differential genes by the 3rd column in file input - via -e, genes with less than this value will be considered as the differentially - changed genes. + via -e, genes with less than this value will be considered as the differentially + changed genes. - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by - the 3rd column in file input via -e, genes ranked in the top # will be considered - as the differentially expressed genes. + the 3rd column in file input via -e, genes ranked in the top # will be considered + as the differentially expressed genes. - **CUTOFF** specify a cutoff of ks-test in the function prediction part ----- @@ -114,75 +114,66 @@ :: - -h, --help show this help message and exit - -p PEAKFILE, --peakfile PEAKFILE - The bed format of peaks binding sites. (BETA support 3 - or 5 columns bed format, CHROM, START, END (NAME, - SCORE)) - -e EXPREFILE, --diff_expr EXPREFILE - The differential expression file get from limma for - MicroArray ddata and cuffdiff for RNAseq data - -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} - The kind of your expression file,this is required,it - can be LIM, CUF, BSF, O. LIM for LIMMA standard - format. CUF for CUFDIFF standard format, BSF for BETA - specific format and O for other formats, if is 'O', - columns infor required via --info - -g {hg19,mm9}, --genome {hg19,mm9} - Specify your species, hg19, mm9. For other genome - assembily versions of human and mouse or other - species, ignore this parameter. - --gname2 If this switch is on, gene or transcript IDs in files - given through -e will be considered as official gene - symbols, DEFAULT=FALSE - --info EXPREINFO Specify the geneID, up/down status and statistcal - values column of your expression data,NOTE: use a - comma as an connector. for example: 2,5,7 means geneID - in the 2nd column, Tscore in 5th column and FDR in 7 - column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff - and 1,2,3 for BETA specific format - -r REFERENCE, --reference REFERENCE - The refgene info file downloaded from UCSC genome - browser.input this file only if your genome is neither - hg19 nor mm9 - -o OUTPUT, --output OUTPUT - The directory to store all the output files, if you - don't set this, files will be output into the current - directory - --bl Whether or not use CTCF boundary to filter peaks - around a gene, DEFAULT=FALSE - --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you - set --bl and the genome is neither hg19 nor mm9 - --pn PEAKNUMBER The number of peaks you want to consider, - DEFAULT=10000 - --method {score,distance} - Define the method to do the TF/CR function prediction, - score for regulatory potential, distance for the - distance to the proximal binding peak. DEFAULT:SCORE - -n NAME, --name NAME This argument is used to name the result file.If not - set, the peakfile name will be used instead - -d DISTANCE, --distance DISTANCE - Set a number which unit is 'base'. It will get peaks - within this distance from gene TSS. default:100000 - (100kb) - --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most - significant differential expressed genes by FDR, - DEFAULT = 1, that is select all the genes - --da DIFF_AMOUNT Get the most significant differential expressed genes - by the percentage(0-1) or number(larger than 1)Input a - number between 0-1, the rank based on fdr for example, - 2000, so that the script will only consider top 2000 - genes as the differentially expressed genes. DEFAULT = - 0.5, that is select top 50 percent genes of up and - down seprately. NOTE: if you want to use diff_fdr, - please set this parameter to 1, otherwise it will get - the intersection of these two parameters - -c CUTOFF, --cutoff CUTOFF - Input a number between 0~1 as a threshold to select - the closer target gene list(up regulate or down - regulate or both) with the p value was called by one - side ks-test, DEFAULT = 0.001 + -h, --help show this help message and exit + -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3 + or 5 columns bed format, CHROM, START, END (NAME, + SCORE)) + -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for + MicroArray ddata and cuffdiff for RNAseq data + -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it + can be LIM, CUF, BSF, O. LIM for LIMMA standard + format. CUF for CUFDIFF standard format, BSF for BETA + specific format and O for other formats, if is 'O', + columns infor required via --info + -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9. For other genome + assembily versions of human and mouse or other + species, ignore this parameter. + --gname2 If this switch is on, gene or transcript IDs in files + given through -e will be considered as official gene + symbols, DEFAULT=FALSE + --info EXPREINFO Specify the geneID, up/down status and statistcal + values column of your expression data,NOTE: use a + comma as an connector. for example: 2,5,7 means geneID + in the 2nd column, Tscore in 5th column and FDR in 7 + column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff + and 1,2,3 for BETA specific format + -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome + browser.input this file only if your genome is neither + hg19 nor mm9 + -o OUTPUT, --output OUTPUT The directory to store all the output files, if you + don't set this, files will be output into the current + directory + --bl Whether or not use CTCF boundary to filter peaks + around a gene, DEFAULT=FALSE + --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you + set --bl and the genome is neither hg19 nor mm9 + --pn PEAKNUMBER The number of peaks you want to consider, + DEFAULT=10000 + --method {score,distance} Define the method to do the TF/CR function prediction, + score for regulatory potential, distance for the + distance to the proximal binding peak. DEFAULT:SCORE + -n NAME, --name NAME This argument is used to name the result file.If not + set, the peakfile name will be used instead + -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks + within this distance from gene TSS. default:100000 + (100kb) + --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most + significant differential expressed genes by FDR, + DEFAULT = 1, that is select all the genes + --da DIFF_AMOUNT Get the most significant differential expressed genes + by the percentage(0-1) or number(larger than 1)Input a + number between 0-1, the rank based on fdr for example, + 2000, so that the script will only consider top 2000 + genes as the differentially expressed genes. DEFAULT = + 0.5, that is select top 50 percent genes of up and + down seprately. NOTE: if you want to use diff_fdr, + please set this parameter to 1, otherwise it will get + the intersection of these two parameters + -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select + the closer target gene list(up regulate or down + regulate or both) with the p value was called by one + side ks-test, DEFAULT = 0.001 - </help> - + ]]></help> + <expand macro="citations" /> </tool>
--- a/beta_macros.xml Tue Sep 16 12:51:50 2014 -0500 +++ b/beta_macros.xml Thu Mar 22 08:33:55 2018 -0400 @@ -1,185 +1,187 @@ <macros> - <macro name="requirements"> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.15.0">R</requirement> - <requirement type="package" version="1.0.6">beta</requirement> - </requirements> - </macro> + <macro name="requirements"> + <requirements> + <requirement type="package" version="1.0.7">beta</requirement> + </requirements> + </macro> - <macro name="stdio"> - <stdio> - <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> - <exit_code range="1:" level="fatal" description="Error" /> - </stdio> - </macro> + <macro name="stdio"> + <stdio> + <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </macro> - <macro name="common_params"> - <param format="bed" name="peakfile" type="data" label="BED file for Peaks"> - <validator type="unspecified_build" /> - </param> - <param name="output_dir" type="hidden" label="Name for the output files" value="BETA_OUTPUT"/> - <param name="name" type="hidden" label="Name for the output files" value="NA"/> - <param name="distance" type="integer" label="the distance from gene TSS within which peaks will be selected" value="100000"> - <validator type="in_range" max="20000000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000000" /> - </param> - <param name="peaknumber" type="integer" label="Peaks considered to contribute to the genes" value="10000"> - <validator type="in_range" max="200000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" /> - </param> - </macro> + <macro name="common_params"> + <param format="bed" name="peakfile" type="data" label="BED file for Peaks"> + <validator type="unspecified_build" /> + </param> + <param name="output_dir" type="hidden" label="Name for the output files" value="BETA_OUTPUT"/> + <param name="name" type="hidden" label="Name for the output files" value="NA"/> + <param name="distance" type="integer" label="the distance from gene TSS within which peaks will be selected" value="100000"> + <validator type="in_range" max="20000000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000000" /> + </param> + <param name="peaknumber" type="integer" label="Peaks considered to contribute to the genes" value="10000"> + <validator type="in_range" max="200000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" /> + </param> + </macro> - <macro name="boundary"> + <macro name="boundary"> <conditional name="boundary"> - <param name="boundaryLimit" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use CTCF boundary to filter peaks around a gene"/> - <when value="no"/> - <when value="yes"> - <yield /> - </when> + <param name="boundaryLimit" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use CTCF boundary to filter peaks around a gene"/> + <when value="no"/> + <when value="yes"> + <yield /> + </when> </conditional> - </macro> + </macro> - <macro name="genome_params"> - <conditional name="refGenome"> - <param name="genomeName" type="select" label="genome reference"> - <option value="hg19">hg19 (built-in)</option> - <option value="mm9">mm9 (built-in)</option> - <option value="other">other</option> - </param> - <when value="hg19"> - <expand macro="boundary" /> - </when> - <when value="mm9"> - <expand macro="boundary" /> - </when> - <when value="other"> - <param name="refseq" type="data" format="tabular" label="UCSC Refseq Genes (From UCSC Table Browser)" - help="Columns: name,chrom,strand,txStart,txEnd,name"/> - <expand macro="boundary"> - <param name="bl_bed" type="data" format="bed" label="BED format boundary file"/> - </expand> - </when> - </conditional> - </macro> + <macro name="genome_params"> + <conditional name="refGenome"> + <param name="genomeName" type="select" label="genome reference"> + <option value="hg19">hg19 (built-in)</option> + <option value="mm9">mm9 (built-in)</option> + <option value="other">other</option> + </param> + <when value="hg19"> + <expand macro="boundary" /> + </when> + <when value="mm9"> + <expand macro="boundary" /> + </when> + <when value="other"> + <param name="refseq" type="data" format="tabular" label="UCSC Refseq Genes (From UCSC Table Browser)" + help="Columns: name,chrom,strand,txStart,txEnd,name"/> + <expand macro="boundary"> + <param name="bl_bed" type="data" format="bed" label="BED format boundary file"/> + </expand> + </when> + </conditional> + </macro> - <macro name="refGenomeSourceConditional"> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Use a built in reference genome or own from your history" help="Genome Reference Fasta sequence"> - <option value="cached" selected="True">Use a built-in genome</option> - <option value="history">Use a genome from history</option> - </param> - <when value="cached"> - <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> - <options from_data_table="all_fasta"/> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> <!-- history --> - </conditional> <!-- refGenomeSource --> - </macro> + <macro name="refGenomeSourceConditional"> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Use a built in reference genome or one from your history" help="Genome Reference Fasta sequence"> + <option value="cached" selected="True">Use a built-in genome</option> + <option value="history">Use a genome from history</option> + </param> + <when value="cached"> + <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" label="Select the reference genome" /> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> + </macro> - <macro name="extended_params"> - <param format="txt" name="exprefile" type="data" label="TEXT file for differential expression data"> - <validator type="unspecified_build" /> - </param> - <conditional name="expression"> - <param name="kind" type="select" label="Expression FIle Type" help="Preset columns for Cuffdiff, LIMMA, and BETA Specific Format"> - <option value="CUF">RNA-seq data with Cuffdiff default format</option> - <option value="LIM">MicroArray data with LIMMA default format</option> - <option value="BSF">BETA Specific Format</option> - <option value="O">Other tools processed data with BETA specific format</option> + <macro name="extended_params"> + <param format="txt" name="exprefile" type="data" label="TEXT file for differential expression data"> + <validator type="unspecified_build" /> </param> - <when value="CUF"> - <param name="expreinfo" type="text" value="2,10,13" label="Column number of the geneid, regulate status and statistics value"/> - </when> - <when value="LIM"> - <param name="expreinfo" type="text" value="2,5,7" label="Column number of the geneid, regulate status and statistics value"/> - </when> - <when value="BSF"> - <param name="expreinfo" type="text" value="1,2,3" label="Column number of the geneid, regulate status and statistics value"/> - </when> - <when value="O"> - <param name="expreinfo" type="text" value="" label="Column number of the geneid, regulate status and statistics value is required"> - <validator type="regex" message="Enter column numbers:geneid,test_stat,value">^\d+,\d+,\d+$</validator> + <conditional name="expression"> + <param name="kind" type="select" label="Expression FIle Type" help="Preset columns for Cuffdiff, LIMMA, and BETA Specific Format"> + <option value="CUF">RNA-seq data with Cuffdiff default format</option> + <option value="LIM">MicroArray data with LIMMA default format</option> + <option value="BSF">BETA Specific Format</option> + <option value="O">Other tools processed data with BETA specific format</option> </param> - </when> - </conditional> - <param name="gname2" type="select" label="TRUE if gene ID in expression file identified by official gene symbol"> - <option value="Refseq">Refseq</option> - <option value="Gene_Symbol">Gene Symbol</option> - </param> - <param name="diff_fdr" type="float" label="get the most significant expression differentially changed genes by this cutoff based on fdr or pvalue" value="1.0"> - <validator type="in_range" max="1.0" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 1.0" /> - </param> - <param name="diff_amount" type="float" label="get the most significant expression differentially changed genes by amount" value="0.5"> - <validator type="in_range" max="20000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000" /> - </param> - <param name="method" type="select" label="method to do the TF/CR function prediction" optional="true"> - <option value="score">regulatory potential</option> - <option value="distance">distance to the nearest peak</option> - </param> - - </macro> + <when value="CUF"> + <param name="expreinfo" type="text" value="2,10,13" label="Column number of the geneid, regulate status and statistics value"/> + </when> + <when value="LIM"> + <param name="expreinfo" type="text" value="2,5,7" label="Column number of the geneid, regulate status and statistics value"/> + </when> + <when value="BSF"> + <param name="expreinfo" type="text" value="1,2,3" label="Column number of the geneid, regulate status and statistics value"/> + </when> + <when value="O"> + <param name="expreinfo" type="text" value="" label="Column number of the geneid, regulate status and statistics value is required"> + <validator type="regex" message="Enter column numbers:geneid,test_stat,value">^\d+,\d+,\d+$</validator> + </param> + </when> + </conditional> + <param name="gname2" type="select" label="TRUE if gene ID in expression file identified by official gene symbol"> + <option value="Refseq">Refseq</option> + <option value="Gene_Symbol">Gene Symbol</option> + </param> + <param name="diff_fdr" type="float" label="get the most significant expression differentially changed genes by this cutoff based on fdr or pvalue" value="1.0"> + <validator type="in_range" max="1.0" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 1.0" /> + </param> + <param name="diff_amount" type="float" label="get the most significant expression differentially changed genes by amount" value="0.5"> + <validator type="in_range" max="20000" min="0" message="The Relative distance is out of range, the parameter has to be between 0 to 20000" /> + </param> + <param name="method" type="select" label="method to do the TF/CR function prediction" optional="true"> + <option value="score">regulatory potential</option> + <option value="distance">distance to the nearest peak</option> + </param> + </macro> - <template name="common_opts"> - -p "$peakfile" - -d $distance --pn $peaknumber -o $output_dir -n $name - </template> + <template name="common_opts"> + -p "$peakfile" + -d $distance --pn $peaknumber -o $output_dir -n $name + </template> - <template name="genome_opts"> -#if $refGenome.genomeName == 'hg19': - -g $refGenome.genomeName - ## -r \$BETA_LIB_PATH/BETA/references/hg19.refseq - #if $refGenome.boundary.boundaryLimit: - --bl - ## --bf \$BETA_LIB_PATH/BETA/references/hg19_CTCF_bound.bed - #end if -#elif $refGenome.genomeName == 'mm9': - -g $refGenome.genomeName - ## -r \$BETA_LIB_PATH/BETA/references/mm9.refseq - #if $refGenome.boundary.boundaryLimit: - --bl - ## --bf \$BETA_LIB_PATH/BETA/references/mm9_CTCF_bound.bed - #end if -#else - -r $refGenome.refseq - #if $refGenome.boundary.boundaryLimit: - --bl - --bf $refGenome.boundary.bl_bed - #end if -#end if - </template> - <template name="ref_genome_seq_opts"> -#if $refGenomeSource.genomeSource == 'cached': - --gs $refGenomeSource.all_fasta_source.fields.path -#else - --gs $refGenomeSource.ownFile -#end if - </template> + <template name="genome_opts"> + #if $refGenome.genomeName == 'hg19': + -g $refGenome.genomeName + ## -r \$BETA_LIB_PATH/BETA/references/hg19.refseq + #if $refGenome.boundary.boundaryLimit: + --bl + ## --bf \$BETA_LIB_PATH/BETA/references/hg19_CTCF_bound.bed + #end if + #elif $refGenome.genomeName == 'mm9': + -g $refGenome.genomeName + ## -r \$BETA_LIB_PATH/BETA/references/mm9.refseq + #if $refGenome.boundary.boundaryLimit: + --bl + ## --bf \$BETA_LIB_PATH/BETA/references/mm9_CTCF_bound.bed + #end if + #else + -r $refGenome.refseq + #if $refGenome.boundary.boundaryLimit: + --bl + --bf $refGenome.boundary.bl_bed + #end if + #end if + </template> + <template name="ref_genome_seq_opts"> + #if $refGenomeSource.genomeSource == 'cached': + --gs $refGenomeSource.all_fasta_source.fields.path + #else + --gs $refGenomeSource.ownFile + #end if + </template> - <template name="extended_opts"> - -e "$exprefile" - -k $expression.kind --info $expression.expreinfo --method $method - --da $diff_amount --df $diff_fdr -c 1 -#if $gname2 == "Gene_Symbol": - --gname2" -#end if - </template> + <template name="extended_opts"> + -e "$exprefile" + -k $expression.kind --info $expression.expreinfo --method $method + --da $diff_amount --df $diff_fdr -c 1 + #if $gname2 == "Gene_Symbol": + --gname2" + #end if + </template> - <token name="@EXTERNAL_DOCUMENTATION@"> + <token name="@EXTERNAL_DOCUMENTATION@"> For details about this application, please go to: - http://cistrome.org/BETA/index.html + http://cistrome.org/BETA/index.html - </token> - <token name="@CITATION_SECTION@">------ + </token> + <token name="@CITATION_SECTION@"> **Citation** For the underlying tool, please cite the following publication: Wang, S., Sun, H., Ma, J., Zang, C., Wang, C., Wang, J., Tang Q, Meyer CA, Zhang Y, Liu, X. S. (2013). Target analysis by integration of transcriptome and ChIP-seq data with BETA. Nature protocols, 8(12), 2502-2515. PMID: 24263090 - </token> + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1038/nprot.2013.150</citation> + </citations> + </xml> </macros>
--- a/beta_minus.xml Tue Sep 16 12:51:50 2014 -0500 +++ b/beta_minus.xml Thu Mar 22 08:33:55 2018 -0400 @@ -1,49 +1,49 @@ <tool id="beta_minus" name="BETA-minus: Targets prediction with binding only" version="0.1.0"> - <description>Predict the factors (TFs or CRs) direct target genes by only binding data</description> - <macros> - <import>beta_macros.xml</import> - </macros> - <expand macro="requirements" /> - <command> - BETA minus - #include source=$common_opts# - #include source=$genome_opts# - &> $log - </command> - <inputs> - <expand macro="common_params" /> - <expand macro="genome_params" /> - </inputs> - <expand macro="stdio" /> - <outputs> - <data format="txt" name="log" label="Log of BETA minus"/> - <data format="tabular" name="targetsoutput" label="BETA predicted Targets" from_work_dir="BETA_OUTPUT/NA_targets.txt"/> - <data format="tabular" name="targetpeaks" label="BETA Target gene's associated peaks" from_work_dir="BETA_OUTPUT/NA_targets_associated_peaks.txt"/> - </outputs> - <tests> - <test> - <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> - <param name="distance" value="100000"/> - <param name="peaknumber" value="10000"/> - <param name="genomeName" value="hg19"/> - <output name="log"> - <assert_contents> - <has_text_matching expression="Finished" /> - </assert_contents> - </output> - <output name="targetsoutput"> - <assert_contents> - <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> - </assert_contents> - </output> - <output name="targetpeaks"> - <assert_contents> - <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> - </assert_contents> - </output> - </test> - </tests> - <help> + <description>Predict the factors (TFs or CRs) direct target genes by only binding data</description> + <macros> + <import>beta_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command><![CDATA[ + BETA minus + #include source=$common_opts# + #include source=$genome_opts# + &> $log + ]]></command> + <inputs> + <expand macro="common_params" /> + <expand macro="genome_params" /> + </inputs> + <outputs> + <data format="txt" name="log" label="Log of BETA minus"/> + <data format="tabular" name="targetsoutput" label="BETA predicted Targets" from_work_dir="BETA_OUTPUT/NA_targets.txt"/> + <data format="tabular" name="targetpeaks" label="BETA Target gene's associated peaks" from_work_dir="BETA_OUTPUT/NA_targets_associated_peaks.txt"/> + </outputs> + <tests> + <test> + <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> + <param name="distance" value="100000"/> + <param name="peaknumber" value="10000"/> + <param name="genomeName" value="hg19"/> + <output name="log"> + <assert_contents> + <has_text_matching expression="Finished" /> + </assert_contents> + </output> + <output name="targetsoutput"> + <assert_contents> + <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> + </assert_contents> + </output> + <output name="targetpeaks"> + <assert_contents> + <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ ** BETA minus ** @EXTERNAL_DOCUMENTATION@ @@ -64,13 +64,13 @@ **Parameters** - **PEAKFILE file** contains peaks for the experiment in a bed - format file. Normally, it's produced by the peak calling tool. It's - required. + format file. Normally, it's produced by the peak calling tool. It's + required. - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. - It is a tab delimited text file with gene annotation with refseq and gene symbol. - Input this file only if your genome is neither hg19 nor mm9. - profiling + It is a tab delimited text file with gene annotation with refseq and gene symbol. + Input this file only if your genome is neither hg19 nor mm9. + profiling - **OUTPUT** to specify the output files directory - **bl** Whether or not to use CTCF boundary file to get the contributed peaks - **NAME** specify the name of the output files. @@ -83,29 +83,29 @@ :: - -h, --help show this help message and exit - -p PEAKFILE, --peakfile PEAKFILE - The bed format of peaks binding sites. - BETA supports 3 or 5 columns bed format: CHROM, START, END [NAME, SCORE] - -g {hg19,mm9}, --genome {hg19,mm9} - Specify your species, {hg19, mm9} - -r REFERENCE, --reference REFERENCE - the refgene info file downloaded from UCSC genome - browser.input this file only if your genome is neither - hg19 nor mm9 - -o OUTPUT, --output OUTPUT - the directory to store all the output files, if you - don't set this, files will be output into the current - directory - --bl whether or not use CTCF boundary to filter peaks - around a gene, DEFAULT=FALSE - --pn PEAKNUMBER the number of peaks you want to consider, DEFAULT=10000 - -n NAME, --name NAME this argument is used to name the result file.If not - set, the peakfile name will be used instead - -d DISTANCE, --distance DISTANCE - Set a number which unit is 'base'. It will get peaks - within this distance from gene TSS. default:100000 (100kb) + -h, --help show this help message and exit + -p PEAKFILE, --peakfile PEAKFILE + The bed format of peaks binding sites. + BETA supports 3 or 5 columns bed format: CHROM, START, END [NAME, SCORE] + -g {hg19,mm9}, --genome {hg19,mm9} + Specify your species, {hg19, mm9} + -r REFERENCE, --reference REFERENCE + the refgene info file downloaded from UCSC genome + browser.input this file only if your genome is neither + hg19 nor mm9 + -o OUTPUT, --output OUTPUT + the directory to store all the output files, if you + don't set this, files will be output into the current + directory + --bl whether or not use CTCF boundary to filter peaks + around a gene, DEFAULT=FALSE + --pn PEAKNUMBER the number of peaks you want to consider, DEFAULT=10000 + -n NAME, --name NAME this argument is used to name the result file.If not + set, the peakfile name will be used instead + -d DISTANCE, --distance DISTANCE + Set a number which unit is 'base'. It will get peaks + within this distance from gene TSS. default:100000 (100kb) - </help> - + ]]></help> + <expand macro="citations" /> </tool>
--- a/beta_plus.xml Tue Sep 16 12:51:50 2014 -0500 +++ b/beta_plus.xml Thu Mar 22 08:33:55 2018 -0400 @@ -1,85 +1,84 @@ <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> - <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> - <macros> - <import>beta_macros.xml</import> - </macros> - <expand macro="requirements" /> - <command> - BETA plus - #include source=$common_opts# - #include source=$genome_opts# - #include source=$ref_genome_seq_opts# - #include source=$extended_opts# - --mn $motifs - &> $log && - mkdir -p $motifresult.extra_files_path && - cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && - cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && - cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && - cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path - - </command> - <inputs> - <expand macro="common_params" /> - <expand macro="genome_params" /> - <expand macro="refGenomeSourceConditional" /> - <expand macro="extended_params" /> - <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" - help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> - <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> - </param> - </inputs> - <expand macro="stdio" /> - <outputs> - <data format="txt" name="log" label="Log of BETA plus"/> - <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> - <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> - <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> - <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> - <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> - <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> - <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> - <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> - <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> - <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> - <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> - </outputs> - <tests> - <test> - <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> - <param name="distance" value="100000"/> - <param name="peaknumber" value="10000"/> - <param name="genomeName" value="hg19"/> - <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> - <param name="kind" value="LIM"/> - <param name="expreinfo" type="text" value="2,5,7"/> - <param name="gname2" value="Refseq"/> - <param name="diff_fdr" value="1.0"/> - <param name="diff_amount" value="0.5"/> - <param name="method" value="score"/> - <output name="log"> - <assert_contents> - <has_text_matching expression="Finished" /> - </assert_contents> - </output> - <output name="uptargetsoutput"> - <assert_contents> - <has_text_matching expression="NM_001002231" /> - </assert_contents> - </output> - <output name="downtargetsoutput"> - <assert_contents> - <has_text_matching expression="NM_001280" /> - </assert_contents> - </output> - <output name="differentialmotifs"> - <assert_contents> - <has_text_matching expression="CDX1\tHomeodomain Family" /> - </assert_contents> - </output> - </test> - </tests> - <help> + <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> + <macros> + <import>beta_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command><![CDATA[ + BETA plus + #include source=$common_opts# + #include source=$genome_opts# + #include source=$ref_genome_seq_opts# + #include source=$extended_opts# + --mn $motifs + &> $log && + mkdir -p $motifresult.extra_files_path && + cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && + cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && + cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && + cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path + ]]></command> + <inputs> + <expand macro="common_params" /> + <expand macro="genome_params" /> + <expand macro="refGenomeSourceConditional" /> + <expand macro="extended_params" /> + <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" + help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> + <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> + </param> + </inputs> + <outputs> + <data format="txt" name="log" label="Log of BETA plus"/> + <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> + <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> + <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> + <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> + <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> + <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> + <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> + <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> + <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> + <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> + <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> + </outputs> + <tests> + <test> + <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> + <param name="distance" value="100000"/> + <param name="peaknumber" value="10000"/> + <param name="genomeName" value="hg19"/> + <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> + <param name="kind" value="LIM"/> + <param name="expreinfo" value="2,5,7"/> + <param name="gname2" value="Refseq"/> + <param name="diff_fdr" value="1.0"/> + <param name="diff_amount" value="0.5"/> + <param name="method" value="score"/> + <output name="log"> + <assert_contents> + <has_text_matching expression="Finished" /> + </assert_contents> + </output> + <output name="uptargetsoutput"> + <assert_contents> + <has_text_matching expression="NM_001002231" /> + </assert_contents> + </output> + <output name="downtargetsoutput"> + <assert_contents> + <has_text_matching expression="NM_001280" /> + </assert_contents> + </output> + <output name="differentialmotifs"> + <assert_contents> + <has_text_matching expression="CDX1\tHomeodomain Family" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ ** BETA plus ** @EXTERNAL_DOCUMENTATION@ @@ -89,7 +88,7 @@ This tool annotates the given intervals and scores with genome features such as gene body. Predicts Direct targets of TF and the active/repressive function -prediction. Does motif analysis at targets region as well. +prediction. Does motif analysis at targets region as well. It's the major module in CEAS package which is written by Hyunjin Gene Shin, published in Bioinformatics (pubmed id:19689956). @@ -103,37 +102,37 @@ **Parameters** - **PEAKFILE file** contains peaks for the experiment in a bed - format file. Normally, it's produced by the peak calling tool. It's - required. + format file. Normally, it's produced by the peak calling tool. It's + required. - **EXPREFILE file** contains the differentially expressed genes in a tab - delimited text file. It's required. + delimited text file. It's required. - **Kind** The kind of your expression file format, LIM for LIMMA standard - output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, - BSF for BETA specific format, and O for other formats. + output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, + BSF for BETA specific format, and O for other formats. - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. - **genomereference** Genome reference data with fasta format - **gname2** If this switch is on, gene or transcript IDs in files given - through -e will be considered as official gene symbols, DEFAULT=FALSE + through -e will be considered as official gene symbols, DEFAULT=FALSE - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal - values column of your expression data,NOTE: use a comma as an connector. - for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column - and FDR in 7 column. + values column of your expression data,NOTE: use a comma as an connector. + for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column + and FDR in 7 column. - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. - It is a tab delimited text file with gene annotation with refseq and gene symbol. - Input this file only if your genome is neither hg19 nor mm9. - profiling + It is a tab delimited text file with gene annotation with refseq and gene symbol. + Input this file only if your genome is neither hg19 nor mm9. + profiling - **OUTPUT** to specify the output files directory - **bl** Whether or not to use CTCF boundary file to get the contributed peaks - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome - is neither hg19 nor mm9. + is neither hg19 nor mm9. - **NAME** specify the name of the output files. - **DISTANCE** specify the distance wich peaks within it will be considered. - **DIFF_FDR** specify the differential genes by the 3rd column in file input - via -e, genes with less than this value will be considered as the differentially - changed genes. + via -e, genes with less than this value will be considered as the differentially + changed genes. - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by - the 3rd column in file input via -e, genes ranked in the top # will be considered - as the differentially expressed genes. + the 3rd column in file input via -e, genes ranked in the top # will be considered + as the differentially expressed genes. - **CUTOFF** specify a cutoff of ks-test in the function prediction part @@ -143,73 +142,65 @@ :: - -h, --help show this help message and exit - -p PEAKFILE, --peakfile PEAKFILE - The bed format of peaks binding sites. (BETA support 3 - or 5 columns bed format, CHROM, START, END (NAME, - SCORE)) - -e EXPREFILE, --diff_expr EXPREFILE - The differential expression file get from limma for - MicroArray ddata and cuffdiff for RNAseq data - -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} - The kind of your expression file,this is required,it - can be LIM, CUF, BSF, O. LIM for LIMMA standard - format. CUF for CUFDIFF standard format, BSF for BETA - specific format and O for other formats, if is 'O', - columns infor required via --info - -g {hg19,mm9}, --genome {hg19,mm9} - Specify your species, hg19, mm9 - --gs GENOMEREFERNCE GenomeReference file with fasta format - --gname2 If this switch is on, gene or transcript IDs in files - given through -e will be considered as official gene - symbols, DEFAULT=FALSE - --info EXPREINFO Specify the geneID, up/down status and statistcal - values column of your expression data,NOTE: use a - comma as an connector. for example: 2,5,7 means geneID - in the 2nd column, Tscore in 5th column and FDR in 7 - column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff - and 1,2,3 for BETA specific format - -r REFERENCE, --reference REFERENCE - The refgene info file downloaded from UCSC genome - browser.input this file only if your genome is neither - hg19 nor mm9 - -o OUTPUT, --output OUTPUT - The directory to store all the output files, if you - don't set this, files will be output into the current - directory - --bl Whether or not use CTCF boundary to filter peaks - around a gene, DEFAULT=FALSE - --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you - set --bl and the genome is neither hg19 nor mm9 - --pn PEAKNUMBER The number of peaks you want to consider, - DEFAULT=10000 - --method {score,distance} - Define the method to do the TF/CR function prediction, - score for regulatory potential, distance for the - distance to the proximal binding peak. DEFAULT:SCORE - -n NAME, --name NAME This argument is used to name the result file.If not - set, the peakfile name will be used instead - -d DISTANCE, --distance DISTANCE - Set a number which unit is 'base'. It will get peaks - within this distance from gene TSS. default:100000 - (100kb) - --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most - significant differential expressed genes by FDR, - DEFAULT = 1, that is select all the genes - --da DIFF_AMOUNT Get the most significant differential expressed genes - by the percentage(0-1) or number(larger than 1)Input a - number between 0-1, the rank based on fdr for example, - 2000, so that the script will only consider top 2000 - genes as the differentially expressed genes. DEFAULT = - 0.5, that is select top 50 percent genes of up and - down seprately. NOTE: if you want to use diff_fdr, - please set this parameter to 1, otherwise it will get - the intersection of these two parameters - -c CUTOFF, --cutoff CUTOFF - Input a number between 0~1 as a threshold to select - the closer target gene list(up regulate or down - regulate or both) with the p value was called by one - side ks-test, DEFAULT = 0.001 + -h, --help show this help message and exit + -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3 + or 5 columns bed format, CHROM, START, END (NAME, + SCORE)) + -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for + MicroArray ddata and cuffdiff for RNAseq data + -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it + can be LIM, CUF, BSF, O. LIM for LIMMA standard + format. CUF for CUFDIFF standard format, BSF for BETA + specific format and O for other formats, if is 'O', + columns infor required via --info + -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9 + --gs GENOMEREFERNCE GenomeReference file with fasta format + --gname2 If this switch is on, gene or transcript IDs in files + given through -e will be considered as official gene + symbols, DEFAULT=FALSE + --info EXPREINFO Specify the geneID, up/down status and statistcal + values column of your expression data,NOTE: use a + comma as an connector. for example: 2,5,7 means geneID + in the 2nd column, Tscore in 5th column and FDR in 7 + column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff + and 1,2,3 for BETA specific format + -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome + browser.input this file only if your genome is neither + hg19 nor mm9 + -o OUTPUT, --output OUTPUT The directory to store all the output files, if you + don't set this, files will be output into the current + directory + --bl Whether or not use CTCF boundary to filter peaks + around a gene, DEFAULT=FALSE + --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you + set --bl and the genome is neither hg19 nor mm9 + --pn PEAKNUMBER The number of peaks you want to consider, + DEFAULT=10000 + --method {score,distance} Define the method to do the TF/CR function prediction, + score for regulatory potential, distance for the + distance to the proximal binding peak. DEFAULT:SCORE + -n NAME, --name NAME This argument is used to name the result file.If not + set, the peakfile name will be used instead + -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks + within this distance from gene TSS. default:100000 + (100kb) + --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most + significant differential expressed genes by FDR, + DEFAULT = 1, that is select all the genes + --da DIFF_AMOUNT Get the most significant differential expressed genes + by the percentage(0-1) or number(larger than 1)Input a + number between 0-1, the rank based on fdr for example, + 2000, so that the script will only consider top 2000 + genes as the differentially expressed genes. DEFAULT = + 0.5, that is select top 50 percent genes of up and + down seprately. NOTE: if you want to use diff_fdr, + please set this parameter to 1, otherwise it will get + the intersection of these two parameters + -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select + the closer target gene list(up regulate or down + regulate or both) with the p value was called by one + side ks-test, DEFAULT = 0.001 - </help> + ]]></help> + <expand macro="citations" /> </tool>
--- a/tool_dependencies.xml Tue Sep 16 12:51:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="numpy" version="1.7.1"> - <repository changeset_revision="55a7a5e9d63f" name="package_numpy_1_7" owner="devteam" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="R" version="2.15.0"> - <repository changeset_revision="3a70cdc41d21" name="package_r_2_15_0" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="beta" version="1.0.6"> - <repository changeset_revision="1874a464e5d6" name="package_beta_1_0_6" owner="jjohnson" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>