Mercurial > repos > jjohnson > cistrome_ceas
view ceas.xml @ 2:45e094f8858f
Add SitePro tool
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 15 Dec 2014 15:31:58 -0600 |
parents | 4e52505adaa6 |
children |
line wrap: on
line source
<tool name="CEAS: Enrichment on chromosome and annotation" id="ceas_ceas" version="0.1.0"> <description>Annotate the given intervals and scores with genome features such as gene body</description> <macros> <import>ceas_macros.xml</import> </macros> <expand macro="requirements_bx" /> <command> #import os.path #if $genegroup.enable == "yes" #set $gngroups = str($genegroup.genelist) #set $gnlabels = str($genegroup.label) #for $m in $genegroup.more #set $gngroups = $gngroups+","+str($m.genelist) #set $gnlabels = $gnlabels+","+str($m.label) #end for #set $gngroupspara = "--gn-groups="+$gngroups #set $gnlabelspara = "--gn-group-names='"+$gnlabels+"'"+str($genegroup.idtype.value) #else #set $gngroupspara = "" #set $gnlabelspara = "" #end if #set $sizes = str($lowersize) + "," + str($middlesize) + "," + str($uppersize) #set $bisizes = str($lowerbisize) + "," + str($upperbisize) #if $wfile: #if $wfile.extension == "bigwig" ceasBW -w $wfile #set dbkey = $wfile.metadata.dbkey #set $length_file = os.path.join( os.path.abspath($__app__.config.len_file_path), $dbkey+".len" ) -l $length_file #else ceas -w $wfile #end if #else ceas #end if #if $bfile: -b $bfile #end if #include source=$gtpath_ceasdb_ref# --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes --rel-dist=$reldist $gngroupspara $gnlabelspara --name=$name &> $log && Rscript ceas_out.R && #if $imagetype.__str__ == "PNG": convert ceas_out.pdf ceas_out.png && convert ceas_out-*.png -append ceas_out_joint.png && cp ceas_out_joint.png $output #else cp ceas_out.pdf $output #end if </command> <inputs> <expand macro="ceasdb_ref" /> <param name="name" type="hidden" value="ceas_out"/> <param format="wig,bigwig" name="wfile" type="data" label="wig / bigwig file" optional="true"> <validator type="unspecified_build" /> </param> <param format="bed" name="bfile" type="data" label="BED file(maximum 100000 lines)" optional="true"> <validator type="unspecified_build" /> </param> <param name="span" type="integer" label="Span" value="3000"> <validator type="in_range" max="1000000" min="100" message="The Span is out of range, the parameter has to be between 100 to 1000000" /> </param> <param name="pfres" type="integer" label="Profiling resolution" value="50"> <validator type="in_range" max="1000" min="10" message="The Profiling Resolution is out of range, the parameter has to be between 10 to 1000" /> </param> <param name="lowersize" type="text" label="Promoter/downstream lower-interval" value="1000" > <validator type="in_range" max="10000" min="100" message="The lower-interval is out of range, the parameter has to be between 100 to 10000" /> </param> <param name="middlesize" type="text" label="Promoter/downstream middle-interval" value="2000" > <validator type="in_range" max="10000" min="100" message="The middle-interval is out of range, the parameter has to be between 100 to 10000" /> </param> <param name="uppersize" type="text" label="Promoter/downstream upper-interval" value="3000"> <validator type="in_range" max="10000" min="100" message="The upper-interval is out of range, the parameter has to be between 100 to 10000" /> </param> <param name="lowerbisize" type="text" label="Bi-Promoter lower range" value="2500"> <validator type="in_range" max="10000" min="100" message="The lower-range is out of range, the parameter has to be between 100 to 10000" /> </param> <param name="upperbisize" type="text" label="Bi-Promoter upper range" value="5000"> <validator type="in_range" max="10000" min="100" message="The upper-range is out of range, the parameter has to be between 100 to 10000" /> </param> <param name="reldist" type="integer" label="Relative distance" value="3000"> <validator type="in_range" max="10000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" /> </param> <param type="select" name="imagetype" display="radio" label="Image Type"> <option value="PNG">PNG format</option> <option value="PDF">PDF format</option> </param> <conditional name="genegroup"> <param name="enable" type="select" label="Specify gene list in the signal profiling" force_select="true"> <option value="no">No</option> <option value="yes">Yes</option> </param> <when value="no"> </when> <when value="yes"> <param format="text" name="genelist" type="data" label="Gene List" optional="false"/> <param name="label" type="text" label="Gene List Label" optional="false" /> <repeat name="more" title="Gene Lists"> <param format="text" name="genelist" type="data" label="Gene List" optional="false"/> <param name="label" type="text" label="Gene List Label" optional="false" /> </repeat> <param name="idtype" type="select" label="Are they ..."> <option value="">refseq</option> <option value=" --gname2">genesymbol</option> </param> </when> </conditional> </inputs> <outputs> <data format="png" name="output"> <change_format> <when input="imagetype" value="PDF" format="pdf" /> </change_format> </data> <data format="txt" name="log" label="ceas job log" /> </outputs> <expand macro="stdio" /> <tests> <test> <param name="wfile" value="treatment.wig" /> <param name="bfile" value="peaks.bed" /> <param name="span" value="3000" /> <param name="pfres" value="50" /> <param name="lowersize" value="1000" /> <param name="middlesize" value="2000" /> <param name="uppersize" value="3000" /> <param name="lowerbisize" value="2500" /> <param name="upperbisize" value="5000" /> <param name="reldist" value="3000" /> <param name="refsrc" value="history"/> <param name="gdb" ftype="ceasdb" value="mm9.refGene.ceasdb"/> <param name="imagetype" value="PDF" /> <param name="enable" value="no" /> <output name="log"> <assert_contents> <has_text_matching expression="See ceas_out.pdf for the graphical results of CEAS" /> </assert_contents> </output> </test> <test> <param name="wfile" value="treatment.bigwig" /> <param name="bfile" value="peaks.bed" /> <param name="span" value="3000" /> <param name="pfres" value="50" /> <param name="lowersize" value="1000" /> <param name="middlesize" value="2000" /> <param name="uppersize" value="3000" /> <param name="lowerbisize" value="2500" /> <param name="upperbisize" value="5000" /> <param name="reldist" value="3000" /> <param name="refsrc" value="history"/> <param name="gdb" ftype="ceasdb" value="mm9.refGene.ceasdb"/> <param name="imagetype" value="PDF" /> <param name="enable" value="no" /> <output name="log"> <assert_contents> <has_text_matching expression="See ceas_out.pdf for the graphical results of CEAS" /> </assert_contents> </output> </test> </tests> <help> ** CEAS ** This tool annotates the given intervals and scores with genome features such as gene body. It's the major module in CEAS package which is written by Hyunjin Gene Shin, published in Bioinformatics (pubmed id:19689956). @EXTERNAL_DOCUMENTATION@ @CITATION_SECTION@ .. class:: warningmark **NEED IMPROVEMENT** ----- **Parameters** - **WIGGLE file** contains the scores for the experiment in a wiggle format file. Normally, it's produced by the peak calling tool. It's optional. - **BED file** contains the peak locations for the experiment in a BED format file. - **Span** from TSS and TTS in the gene-centered annotation. ChIP regions within this range from TSS and TTS are considered when calculating the coverage rates in promoter and downstream. - **Profiling resolution** is the WIGGLE profiling resolution. - **Promoter/downstream intervals** for ChIP region annotation are comma-separated three values or a single value can be given. If a single value is given, it will be segmented into three equal fractions (ie, 3000 is equivalent to 1000,2000,3000) - **BiPromoter ranges** is for ChIP region annotation. It's comma-separated two values or a single value can be given. If a single value is given, it will be segmented into two equal fractions (ie, 5000 is equivalent to 2500,5000) - **Relative distance** is the relative distance to TSS/TTS in WIGGLE file profiling - **Genome Annotation Version** to specify the annotations according to the data set. The annotations are downloaded from UCSC genome site. - **Image type** specify the output image format, either in PNG or in PDF format. - If **Specify gene list in the signal profiling** is set, you can specify different gene groups for CEAS to put them together in the profile figure. You need to select several **Gene List** files from history which contains the RefSeq ids or Gene Symbols for each row, and **Gene List Label** for each gene list file. ----- **Outputs** - **PNG/PDF file** is the result for CEAS analysis, containing 5 pages. - **LOG file** for job log. If you see errors, please attached this in the bug report ----- **script parameter list of CEAS 0.9.8** Options: --version show program's version number and exit -h, --help Show this help message and exit. -b BED, --bed=BED BED file of ChIP regions. -w WIG, --wig=WIG WIG file for either wig profiling or genome background annotation. WARNING: --bg flag must be set for genome background re-annotation. -e EBED, --ebed=EBED BED file of extra regions of interest (eg, non-coding regions) -g GDB, --gt=GDB Gene annotation table (eg, a refGene table in sqlite3 db format provided through the CEAS web, http://liulab.dfci.harvard.edu/CEAS/download.html). --name=NAME Experiment name. This will be used to name the output files. If an experiment name is not given, the stem of the input BED file name will be used instead (eg, if 'peaks.bed', 'peaks' will be used as a name.) --sizes=SIZES Promoter (also dowsntream) sizes for ChIP region annotation. Comma-separated three values or a single value can be given. If a single value is given, it will be segmented into three equal fractions (ie, 3000 is equivalent to 1000,2000,3000), DEFAULT: 1000,2000,3000. WARNING: Values > 10000bp are automatically set to 10000bp. --bisizes=BISIZES Bidirectional-promoter sizes for ChIP region annotation Comma-separated two values or a single value can be given. If a single value is given, it will be segmented into two equal fractions (ie, 5000 is equivalent to 2500,5000) DEFAULT: 2500,5000bp. WARNING: Values > 20000bp are automatically set to 20000bp. --bg Run genome BG annotation again. WARNING: This flag is effective only if a WIG file is given through -w (--wig). Otherwise, ignored. --span=SPAN Span from TSS and TTS in the gene-centered annotation. ChIP regions within this range from TSS and TTS are considered when calculating the coverage rates in promoter and downstream, DEFAULT=3000bp --pf-res=PF_RES Wig profiling resolution, DEFAULT: 50bp. WARNING: Value smaller than the wig interval (resolution) may cause aliasing error. --rel-dist=REL_DIST Relative distance to TSS/TTS in wig profiling, DEFAULT: 3000bp --gn-groups=GN_GROUPS Gene-groups of particular interest in wig profiling. Each gene group file must have gene names in the 1st column. The file names are separated by commas w/ no space (eg, --gn-groups=top10.txt,bottom10.txt) --gn-group-names=GN_NAMES The names of the gene groups in --gn-groups. The gene group names are separated by commas. (eg, --gn-group- names='top 10%,bottom 10%'). These group names appear in the legends of the wig profiling plots. If no group names given, the groups are represented as 'Group 1, Group2,...Group n'. --gname2 Whether or not use the 'name2' column of the gene annotation table when reading the gene IDs in the files given through --gn-groups. This flag is meaningful only with --gn-groups. </help> </tool>