Mercurial > repos > ryotas > cistrome
changeset 0:97bd5bb4204c default tip
commit
author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
---|---|
date | Tue, 30 Dec 2014 18:45:34 +0900 |
parents | |
children | |
files | ceas.xml fcfunc.py macs.xml peak2gene.xml |
diffstat | 4 files changed, 1366 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ceas.xml Tue Dec 30 18:45:34 2014 +0900 @@ -0,0 +1,400 @@ +<tool name="CEAS: Enrichment on chromosome and annotation" id="ceas_ceas"> + <description>Annotate the given intervals and scores with genome features such as gene body</description> + <command interpreter="command">/bin/bash $shscript </command> + <inputs> + <param format="wig" name="wfile" type="data" label="wig file" optional="true"> + <validator type="unspecified_build" /> + </param> + <param format="bed" name="bfile" type="data" label="BED file(maximum 100000 lines)" optional="true"> + <validator type="unspecified_build" /> + </param> + <param name="span" type="integer" label="Span" value="3000"> + <validator type="in_range" max="1000000" min="100" message="The Span is out of range, the parameter has to be between 100 to 1000000" /> + </param> + <param name="pfres" type="integer" label="Profiling resolution" value="50"> + <validator type="in_range" max="1000" min="10" message="The Profiling Resolution is out of range, the parameter has to be between 10 to 1000" /> + </param> + <param name="lowersize" type="text" label="Promoter/downstream lower-interval" value="1000" > + <validator type="in_range" max="10000" min="100" message="The lower-interval is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param name="middlesize" type="text" label="Promoter/downstream middle-interval" value="2000" > + <validator type="in_range" max="10000" min="100" message="The middle-interval is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param name="uppersize" type="text" label="Promoter/downstream upper-interval" value="3000"> + <validator type="in_range" max="10000" min="100" message="The upper-interval is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param name="lowerbisize" type="text" label="Bi-Promoter lower range" value="2500"> + <validator type="in_range" max="10000" min="100" message="The lower-range is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param name="upperbisize" type="text" label="Bi-Promoter upper range" value="5000"> + <validator type="in_range" max="10000" min="100" message="The upper-range is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param name="reldist" type="integer" label="Relative distance" value="3000"> + <validator type="in_range" max="10000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" /> + </param> + <param type="select" name="imagetype" display="radio" label="Image Type"> + <option value="PNG">PNG format</option> + <option value="PDF">PDF format</option> + </param> + + <conditional name="genegroup"> + <param name="enable" type="select" label="Specify gene list in the signal profiling" force_select="true"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + + <when value="no"> + </when> + + <when value="yes"> + <param format="text" name="genelist" type="data" label="Gene List" optional="false"/> + <param name="label" type="text" label="Gene List Label" optional="false" /> + <repeat name="more" title="Gene Lists"> + <param format="text" name="genelist" type="data" label="Gene List" optional="false"/> + <param name="label" type="text" label="Gene List Label" optional="false" /> + </repeat> + <param name="idtype" type="select" label="Are they ..."> + <option value="">refseq</option> + <option value=" --gname2">genesymbol</option> + </param> + </when> + </conditional> + + </inputs> + + <outputs> + <data format="png" name="output"> + <change_format> + <when input="imagetype" value="PDF" format="pdf" /> + </change_format> + </data> + <data format="txt" name="log" label="ceas job log" /> + </outputs> + + <configfiles> + <configfile name="shscript"> +#!/bin/bash +#import os + +#set $dollar = chr(36) +#set $gt = chr(62) +#set $lt = chr(60) +#set $ad = chr(38) + +if [ $bfile == "None" ];then + if [ $wfile == "None" ];then + echo "Either wig or bed file is required!" ${gt}${ad}2 + exit; + fi +fi + + +#if $bfile != None + dbkey=$bfile.metadata.dbkey +#elif $wfile != None + dbkey=$wfile.metadata.dbkey +#end if + + +##REMOVING WIG VALIDATORS +##if [ $wfile != "None" ];then +## wigsize=`du -b $wfile | awk '{print ${dollar}1}'` +## +## if [[ ${dollar}wigsize -gt 2097152000 ]];then +## echo "wig file is too big! 2G is the maximum!" ${gt}${ad}2 +## exit; +## fi +##fi + + +#if $genegroup.enable == "yes" +#set $gngroups = str($genegroup.genelist) +#set $gnlabels = str($genegroup.label) +lines=`wc -l $gngroups | tail -1 | awk '{print ${dollar}1}'` +if [[ ${dollar}lines -gt 100000 ]];then + echo "Total lines of the gene list has to between 100 and 100000!" ${gt}${ad}2; + exit; +fi +if [[ ${dollar}lines -lt 100 ]];then + echo "Total lines of the gene list has to between 100 and 100000!" ${gt}${ad}2; + exit; +fi +genelength=`echo $gnlabels |awk '{print length($0)}'` +if [[ ${dollar}genelength -gt 255 ]];then + echo "Gene List Label exceed the limit of 255 characters!" ${gt}${ad}2; + exit; +fi +#end if +#if $genegroup.enable == "yes" +#set $gngroups = str($genegroup.genelist) +#set $gnlabels = str($genegroup.label) +#for $m in $genegroup.more +#set $gngroups = $gngroups+","+str($m.genelist) +#set $gnlabels = $gnlabels+","+str($m.label) +#end for +#set $gngroupspara = "--gn-groups="+$gngroups +#set $gnlabelspara = "--gn-group-names='"+$gnlabels+"'"+str($genegroup.idtype.value) +#else +#set $gngroupspara = "" +#set $gnlabelspara = "" +#end if + +#set $path = $os.path.abspath($__app__.config.tool_path) + +WIG="" +if [ $wfile != "None" ]; then #were we sent in a value for the wig file? + WIG="-w $wfile" +fi + +BED="" +if [ $bfile != "None" ]; then #were we sent in a value for the wig file? + BED="-b $bfile" + lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'` + format=`$path/validation/fcfunc.py $bfile` + + if [[ ${dollar}lines -gt 500000 ]];then + echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2 + exit; + fi + if [[ ${dollar}format != "passed" ]]; then + echo ${dollar}format ${gt}${ad}2 + exit; + fi +fi + +#set $sizes = str($lowersize) + "," + str($middlesize) + "," + str($uppersize) +#set $bisizes = str($lowerbisize) + "," + str($upperbisize) + +#set $gtpath = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "ceaslib", "GeneTable", $dbkey ) +#set $length_file = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "chromLen", $dbkey+".len" ) + + +#if str($wfile) != "None" +#if $wfile.extension == "wig" +ceas ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes \ + --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log +#elif $wfile.extension == "bigwig" +ceasBW ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes -l $length_file\ + --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log +#end if +#else +ceas ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes \ + --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log +#end if + + +R --vanilla ${lt} ceas_out.R ${ad}${gt}/dev/null +if [ $imagetype == "PNG" ]; then +convert ceas_out.pdf ceas_out.png +convert ceas_out-*.png -append ceas_out_joint.png +mv ceas_out_joint.png $output +else +mv ceas_out.pdf $output +fi + + </configfile> + </configfiles> + <tests> + <test maxseconds="3600" name="CEAS_1"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="3000" /> + <param name="pfres" value="50" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="2500" /> + <param name="upperbisize" value="5000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="output" file="ceas_1/ceas_1.pdf" /> + <output name="output" file="ceas_1/ceas_1.log" lines_diff = "200" /> + </test> + <test maxseconds="3600" name="CEAS_2"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="1000" /> + <param name="pfres" value="250" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="2500" /> + <param name="upperbisize" value="5000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="output" file="ceas_2/ceas_2.pdf" /> + <output name="output" file="ceas_2/ceas_2.log" lines_diff = "200" /> + </test> + <test maxseconds="3600" name="CEAS_3"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="3000" /> + <param name="pfres" value="150" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="5000" /> + <param name="upperbisize" value="10000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="output" file="ceas_3/ceas_3.pdf" /> + <output name="output" file="ceas_3/ceas_3.log" lines_diff = "200" /> + </test> + <test maxseconds="3600" name="CEAS_4"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="3000" /> + <param name="pfres" value="500" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="5000" /> + <param name="upperbisize" value="10000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="output" file="ceas_4/ceas_4.pdf" /> + <output name="output" file="ceas_4/ceas_4.log" lines_diff = "200" /> + </test> + <test maxseconds="3600" name="CEAS_5"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="6000" /> + <param name="pfres" value="500" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="5000" /> + <param name="upperbisize" value="10000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="output" file="ceas_5/ceas_5.pdf" /> + <output name="output" file="ceas_5/ceas_5.log" lines_diff = "200" /> + </test> +</tests> + <help> +This tool annotates the given intervals and scores with genome +features such as gene body. It's the major module in CEAS package +which is written by Hyunjin Gene Shin, published in Bioinformatics +(pubmed id:19689956). + +.. class:: warningmark + +**NEED IMPROVEMENT** + +----- + +**Parameters** + +- **WIGGLE file** contains the scores for the experiment in a wiggle + format file. Normally, it's produced by the peak calling tool. It's + optional. +- **BED file** contains the peak locations for the experiment in a BED + format file. +- **Span** from TSS and TTS in the gene-centered annotation. ChIP + regions within this range from TSS and TTS are considered when + calculating the coverage rates in promoter and downstream. +- **Profiling resolution** is the WIGGLE profiling resolution. +- **Promoter/downstream intervals** for ChIP region annotation are + comma-separated three values or a single value can be given. If a + single value is given, it will be segmented into three equal + fractions (ie, 3000 is equivalent to 1000,2000,3000) +- **BiPromoter ranges** is for ChIP region annotation. It's + comma-separated two values or a single value can be given. If a + single value is given, it will be segmented into two equal fractions + (ie, 5000 is equivalent to 2500,5000) +- **Relative distance** is the relative distance to TSS/TTS in WIGGLE file + profiling +- **Genome Annotation Version** to specify the annotations according to + the data set. The annotations are downloaded from UCSC genome site. +- **Image type** specify the output image format, either in PNG or in + PDF format. +- If **Specify gene list in the signal profiling** is set, you can specify + different gene groups for CEAS to put them together in the profile + figure. You need to select several **Gene List** files from history which + contains the RefSeq ids or Gene Symbols for each row, and + **Gene List Label** for each gene list file. + +----- + +**Outputs** + +- **PNG/PDF file** is the result for CEAS analysis, containing 5 pages. +- **LOG file** for job log. If you see errors, please attached this in + the bug report + +----- + +**script parameter list of CEAS 0.9.8** + +Options: + --version show program's version number and exit + -h, --help Show this help message and exit. + -b BED, --bed=BED BED file of ChIP regions. + -w WIG, --wig=WIG WIG file for either wig profiling or genome background + annotation. WARNING: --bg flag must be set for genome + background re-annotation. + -e EBED, --ebed=EBED BED file of extra regions of interest (eg, non-coding + regions) + -g GDB, --gt=GDB Gene annotation table (eg, a refGene table in sqlite3 + db format provided through the CEAS web, + http://liulab.dfci.harvard.edu/CEAS/download.html). + --name=NAME Experiment name. This will be used to name the output + files. If an experiment name is not given, the stem of + the input BED file name will be used instead (eg, if + 'peaks.bed', 'peaks' will be used as a name.) + --sizes=SIZES Promoter (also dowsntream) sizes for ChIP region + annotation. Comma-separated three values or a single + value can be given. If a single value is given, it + will be segmented into three equal fractions (ie, 3000 + is equivalent to 1000,2000,3000), DEFAULT: + 1000,2000,3000. WARNING: Values > 10000bp are + automatically set to 10000bp. + --bisizes=BISIZES Bidirectional-promoter sizes for ChIP region + annotation Comma-separated two values or a single + value can be given. If a single value is given, it + will be segmented into two equal fractions (ie, 5000 + is equivalent to 2500,5000) DEFAULT: 2500,5000bp. + WARNING: Values > 20000bp are automatically set to + 20000bp. + --bg Run genome BG annotation again. WARNING: This flag is + effective only if a WIG file is given through -w + (--wig). Otherwise, ignored. + --span=SPAN Span from TSS and TTS in the gene-centered annotation. + ChIP regions within this range from TSS and TTS are + considered when calculating the coverage rates in + promoter and downstream, DEFAULT=3000bp + --pf-res=PF_RES Wig profiling resolution, DEFAULT: 50bp. WARNING: + Value smaller than the wig interval (resolution) may + cause aliasing error. + --rel-dist=REL_DIST Relative distance to TSS/TTS in wig profiling, + DEFAULT: 3000bp + --gn-groups=GN_GROUPS + Gene-groups of particular interest in wig profiling. + Each gene group file must have gene names in the 1st + column. The file names are separated by commas w/ no + space (eg, --gn-groups=top10.txt,bottom10.txt) + --gn-group-names=GN_NAMES + The names of the gene groups in --gn-groups. The gene + group names are separated by commas. (eg, --gn-group- + names='top 10%,bottom 10%'). These group names appear + in the legends of the wig profiling plots. If no group + names given, the groups are represented as 'Group 1, + Group2,...Group n'. + --gname2 Whether or not use the 'name2' column of the gene + annotation table when reading the gene IDs in the + files given through --gn-groups. This flag is + meaningful only with --gn-groups. + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fcfunc.py Tue Dec 30 18:45:34 2014 +0900 @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import sys + +def checkFormat(file): + f = open(file) + n = 0 #line count + oldlen=0 #length of previous line + + for line in f: + line=line.rstrip('\n\t ') + n=n+1 + if line.find('track') == -1 and line.find('browse') == -1 and line and n<20: + row=line.split(); + rowlen=len(row) + + #check for 3 fields + if rowlen < 3: + sys.stdout.write('Error with BED file format: need atleast 3 fields per line\n') + sys.exit() + + #check is field 3 is greater thant field 2 + if int(row[2]) < int(row[1]): + sys.stdout.write('Error with BED file format: field 3 needs to be greater than row 2\n') + sys.exit() + + #check each field + m=0 #field number + while m < len(row): + #check if length of lines are consistent + if rowlen != oldlen and oldlen != 0: + sys.stdout.write('Error with BED file format: number of fields do not match\n') + sys.exit() + #column 1 & 4 + #if (m==0 or m==3) and row[m].isdigit() == 1: + # pass + + #column 2 + elif m==1: + if row[m].isdigit() == 0: + sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1)) + sys.exit() + elif row[m] < 0: + sys.stdout.write('Error with BED file format: line%d,field%d can not be negative\n' %(n.m)) + sys.exit() + #column 3, 7, 8, 10 + elif (m==2 or m==6 or m==7 or m==9): + if row[m].isdigit() == 0: + sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1)) + sys.exit() + elif m==9: + blockCount=int(row[9]) + #column 5 + #elif m==4: + # try: + # float(row[4]) + # except ValueError: + # sys.stdout.write('Error with BED file format: line%d,field%d needs to be a number\n' %(n,m+1)) + # sys.exit() + # else: + # pass + + #column 6 + elif m==5 and (row[5] != '-' and row[5] != '+'): + sys.stdout.write('Error with BED file format: need +/- in line%d,field%d\n' %(n,m+1)) + sys.exit() + #column 9 + elif m==8 and row[8] != '0': + sys.stdout.write('Error with BED file format: line%d,field%d is always 0\n' %(n,m+1)) + sys.exit() + #column 11 & 12 + elif m==10 or m==11: + if row[m].find(',')==-1: + sys.stdout.write('Error with BED file format: need comma separated list at line%d,field%d\n' %(n,m+1)) + sys.exit() + else: + col=row[m].strip(',').split(',') + if len(col) != blockCount: + sys.stdout.write('Error with BED file format: block count does not match list length at line%d,field%d\n' %(n,m+1)) + sys.exit() + else: + for l in col: + if l.isdigit() == 0: + sys.stdout.write('Error with BED file format: need list of numbers at line%d,field%d\n' %(n,m+1)) + sys.exit() + m=m+1 + oldlen=rowlen + #elif n>=20: + # break +if __name__ == '__main__': + checkFormat(sys.argv[1]) + sys.stdout.write('passed')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macs.xml Tue Dec 30 18:45:34 2014 +0900 @@ -0,0 +1,785 @@ +<tool name="MACS" id="cistrome_macs"> + <description>Model-based Analysis for ChIP-Seq</description> + <command interpreter="command">/bin/bash $shscript</command> + <inputs> + <param format="interval,sam,bam,eland,elandmulti,bed" name="tfile" type="data" label="Treatment file"/> + <param format="interval,sam,bam,eland,elandmulti,bed" name="cfile" type="data" label="Input file" optional="true"/> + <param name="format" type="select" label="Format"> + <option value="AUTO">Auto</option> + <option value="BED">Bed</option> + <option value="SAM">SAM (support PES)</option> + <option value="BAM">BAM (support PES)</option> + <option value="BOWTIE">Bowtie default format</option> + <option value="ELAND">ELAND_result</option> + <option value="ELANDMULTI">ELAND_multi</option> + <option value="ELANDEXPORT">ELAND_export</option> + </param> + <conditional name="genome_size_cond"> + <param name="genome_size" type="select" label="Effective Genome Size"> + <option value="2770000000">Human (hg18)</option> + <option value="2790000000">Human (hg19)</option> + <option value="1870000000">Mouse (mm8)</option> + <option value="1910000000">Mouse (mm9)</option> + <option value="90300000">C elegans (ce4)</option> + <option value="90300000">C elegans (ce6)</option> + <option value="119000000">Drosophila (dm2)</option> + <option value="152000000">Drosophila (dm3)</option> + <option value="OTHER">Other</option> + </param> + <when value="OTHER"> + <param name="genome_size_other" type="text" label="Custom Genome Size"/> + </when> + <when value="2770000000"/> + <when value="2790000000"/> + <when value="1870000000"/> + <when value="1910000000"/> + <when value="90300000"/> + <when value="90300000"/> + <when value="119000000"/> + <when value="152000000"/> + </conditional> + <param name="tag_size" type="integer" label="Tag size (Optional)" value="25" optional="true"> + <validator type="in_range" max="1000" min="20" message="Tag size is out of range, Tag size has to be between 20 to 1000" /> + </param> + <param name="pvalue" type="float" label="P-Value" value="0.00001"> + <validator type="in_range" max="1" min="0" message="Pvalue is out of range, Pvalue has to be between 0 to 1" /> + </param> + <param name="keep_dup" type="select" label="Keep duplicate tags at the exact same location?" display="radio"> + <option value="all" >Keep ALL</option> + <option value="auto" >Auto by Binomial</option> + <option value="1" selected="true">Keep Single</option> + </param> + + <conditional name="model"> + <param name="use_model" type="select" label="Use Model?"> + <option value="">True</option> + <option value="nomodel">False</option> + </param> + <when value=""> + <param name="sfold" type="text" label="small fold enrichment for model building" value="10" > + <validator type="in_range" max="100" min="5" message="Small fold enrichment is out of range, Small fold enrichment has to be between 5 to 100" /> + </param> + <param name="lfold" type="text" label="large fold" value="30" > + <validator type="in_range" max="100" min="5" message="Large Fold is out of range, Large Fold has to be between 5 to 100" /> + </param> + </when> + <when value="nomodel"> + <param name="shift_size" type="text" label="Shift size" value="100"> + <validator type="in_range" max="1000" min="50" message="Shift size is out of range, Shift size has to be between 50 to 1000" /> + </param> + </when> + </conditional> + + <conditional name="advopt"> + <param name="advopt_select" type="select" label="Advanced Options"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="bandwidth" type="text" label="Bandwidth" value="300"> + <validator type="in_range" max="1000" min="100" message="Bandwidth is out of range, Bandwidth has to be between 100 to 2000" /> + </param> + <param name="lambda" type="select" label="Use Lambda?" display="radio"> + <option value="">True</option> + <option value="nolambda">False</option> + </param> + <param name="lambda_small" type="text" label="Small Lambda" value="1000"> + <validator type="in_range" max="1000000" min="100" message="Small Lambda is out of range, Small Lambda has to be between 100 to 1000000" /> + </param> + <param name="lambda_large" type="text" label="Large Lambda" value="10000"> + <validator type="in_range" max="1000000" min="100" message="Large Lambda is out of range, Large Lambda has to be between 100 to 1000000" /> + </param> + <param name="make_wig" type="select" label="Generate a wig file?" display="radio"> + <option value="wig" selected="true">Yes</option> + <option value="" >No</option> + </param> + </when> + <when value="no" /> + </conditional> + + <conditional name="diag_report"> + <param name="diag_report_select" type="select" label="Diagnosis Report"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="femin" type="text" label="Minimum Fold Enrichment" value="0"> + <validator type="in_range" max="100" min="0" message="Minimum Fold Enrichment is out of range, Minimum Fold Enrichment has to be between 0 to 100" /> + </param> + <param name="femax" type="text" label="Maximum Fold Enrichment(optional)" optional="true"> + <validator type="in_range" max="100" min="0" message="Maximum Fold Enrichment is out of range, Maximum Fold Enrichment has to be between Minimum Fold Enrichment to 100" /> + </param> + <param name="festep" type="text" label="Fold Enrichment Step" value="20"> + <validator type="in_range" max="100" min="0" message="Fold Enrichment Step is out of range, Fold Enrichment Step has to be between 0 to 100" /> + </param> + </when> + <when value="no" /> + </conditional> + </inputs> + <outputs> + <data format="bed" name="bedoutput" label="MACS peaks on ${tfile.name}" /> + <data format="bed" name="summitsoutput" label="MACS summits on ${tfile.name}" /> + <data format="tabular" name="xlsoutput" label="MACS xls on ${tfile.name}" /> + <data format="wig" name="wigoutput" label="MACS wiggle on ${tfile.name}" /> + <data format="txt" name="log" label="MACS job log on ${tfile.name}" /> + <data format="tabular" name="xlsdiagreport" label="MACS diagnosis report on ${tfile.name}" /> + </outputs> + + <configfiles> + <configfile name="shscript"> +#!/bin/bash +#set $dollar = chr(36) +#set $gt = chr(62) +#set $lt = chr(60) +#set $ad = chr(38) + +tfilesize=`du -b $tfile | awk '{print ${dollar}1}'` + +if [[ ${dollar}tfilesize -gt 10000000000 ]];then + echo "Treatment file is too big! 10G is the maximum!" ${gt}${ad}2 + exit; +fi + +if [ $cfile != "None" ];then + cfilesize=`du -b $cfile | awk '{print ${dollar}1}'` + + if [[ ${dollar}cfilesize -gt 10000000000 ]];then + echo "Control file is too big! 10G is the maximum!" ${gt}${ad}2 + exit; + fi +fi + +#if $model.use_model == "" +#if $model.sfold.value > $model.lfold.value + echo "Large Fold has to be greater than Small Fold" ${gt}${ad}2 + exit; +#end if +#end if + +#if $diag_report.diag_report_select == "yes" +#if $diag_report.femin.value > $diag_report.femax.value + echo "Maximum Fold Enrichment has to be greater than Minimum Fold Enrichment" ${gt}${ad}2 + exit; +#end if +#end if + +#if $advopt.advopt_select == "yes" +#if $advopt.lambda_small.value > $advopt.lambda_large.value + echo "Large lambda has to be greater than Small lambda" ${gt}${ad}2 + exit; +#end if +#end if + +#NOTE: IF --nomodel is used, then we feed in the shiftsize param, ELSE feed in mfold param. +#if $model.use_model == "nomodel" +#set $m="--nomodel --shiftsize "+str($model.shift_size) +#else +#set $m="--mfold "+str($model.sfold)+","+str($model.lfold) +#end if + +#NOTE: the control file is optional, if it is unspecified it is sent as 'None' +#if str($cfile) != "None" +#set $c = " -c "+ str($cfile) +#else +#set $c = "" +#end if + +##set $extracommand = "touch "+str($wigoutput) + +#if $advopt.advopt_select == "yes" +#if str($advopt.lambda) == "nolambda" +#set $lamb="--"+str($advopt.lambda) +#else +#set $lamb="" +#end if +#if str($advopt.make_wig) == "wig" +#set $mkwig="--"+str($advopt.make_wig) +#else +#set $mkwig="" +#end if +#set $advparam = "--bw="+str($advopt.bandwidth)+" "+str($lamb)+" --slocal="+str($advopt.lambda_small)+" --llocal="+str($advopt.lambda_large)+" "+str($mkwig) +#if $advopt.make_wig.value != "" +#set $extracommand = "zcat macs_output_MACS_wiggle/treat/*.gz > "+str($wigoutput) +#end if +#else +#set $advparam = "--bw=300 --wig" +#set $extracommand = "zcat macs_output_MACS_wiggle/treat/*.gz > "+str($wigoutput) +#end if + +#if $diag_report.diag_report_select == "yes" +#set $diagparam = "--diag --fe-min=0 --fe-step=20" +#if $diag_report.femin.value != "" +#set $diagparam = $diagparam.replace("--fe-min=0", "--fe-min="+str($diag_report.femin.value)) +#end if +#if $diag_report.festep.value != "" +#set $diagparam = $diagparam.replace("--fe-step=20", "--fe-step="+str($diag_report.festep.value)) +#end if +#if str($diag_report.femax.value) != "" +#set $diagparam = $diagparam+" --fe-max="+str($diag_report.femax.value) +#end if +#else +#set $diagparam = " " +#end if + +#set genomeSize = $genome_size_cond.genome_size +#if $genome_size_cond.genome_size == "OTHER" +#set $genomeSize = $genome_size_cond.genome_size_other +#end if + +#set $keepdup = "--keep-dup "+ str($keep_dup) + +macs14 -t $tfile $c --format=$format --gsize=$genomeSize --tsize=$tag_size $m $advparam --pvalue=$pvalue --name=macs_output $diagparam --single-profile $keepdup ${ad}${gt} $log +cp macs_output_peaks.bed $bedoutput +cp macs_output_summits.bed $summitsoutput +cp macs_output_peaks.xls $xlsoutput +#if $diag_report.diag_report_select == "yes" +cp macs_output_diag.xls $xlsdiagreport +#end if + +#if $advopt.advopt_select == "yes" +#if $advopt.make_wig.value != "" +$extracommand +#end if +#else +$extracommand +#end if + </configfile> + </configfiles> + + <tests> + <test maxseconds="3600" name="TreatmentFile"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_1/macs_output_peaks.bed" /> + <output name="output" file="macs_1/macs_output_summits.bed" /> + <output name="output" file="macs_1/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_1/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_1/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_1/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="ControlFile"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" value="macs_control_hg18.bed" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_2/macs_output_peaks.bed" /> + <output name="output" file="macs_2/macs_output_summits.bed" /> + <output name="output" file="macs_2/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_2/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_2/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_2/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="GSize"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_3/macs_output_peaks.bed" /> + <output name="output" file="macs_3/macs_output_summits.bed" /> + <output name="output" file="macs_3/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_3/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_3/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_3/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="TagSize"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_4/macs_output_peaks.bed" /> + <output name="output" file="macs_4/macs_output_summits.bed" /> + <output name="output" file="macs_4/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_4/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_4/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_4/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="Pvalue"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_5/macs_output_peaks.bed" /> + <output name="output" file="macs_5/macs_output_summits.bed" /> + <output name="output" file="macs_5/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_5/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_5/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_5/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="UseModel"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_6/macs_output_peaks.bed" /> + <output name="output" file="macs_6/macs_output_summits.bed" /> + <output name="output" file="macs_6/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_6/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_6/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_6/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="ShiftSize"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="nomodel" /> + <param name="shift_size" value="100" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_7/macs_output_peaks.bed" /> + <output name="output" file="macs_7/macs_output_summits.bed" /> + <output name="output" file="macs_7/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_7/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_7/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_7/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="advopt_1"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="nomodel" /> + <param name="shift_size" value="100" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_8/macs_output_peaks.bed" /> + <output name="output" file="macs_8/macs_output_summits.bed" /> + <output name="output" file="macs_8/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_8/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_8/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_8/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="advopt_2"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_9/macs_output_peaks.bed" /> + <output name="output" file="macs_9/macs_output_summits.bed" /> + <output name="output" file="macs_9/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_9/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_9/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_9/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="Bandwidth"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_10/macs_output_peaks.bed" /> + <output name="output" file="macs_10/macs_output_summits.bed" /> + <output name="output" file="macs_10/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_10/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_10/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_10/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="UseLambda_1"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_11/macs_output_peaks.bed" /> + <output name="output" file="macs_11/macs_output_summits.bed" /> + <output name="output" file="macs_11/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_11/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_11/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_11/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="UseLambda_2"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="nolambda" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_12/macs_output_peaks.bed" /> + <output name="output" file="macs_12/macs_output_summits.bed" /> + <output name="output" file="macs_12/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_12/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_12/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_12/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="LambdaSet"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_13/macs_output_peaks.bed" /> + <output name="output" file="macs_13/macs_output_summits.bed" /> + <output name="output" file="macs_13/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_13/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_13/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_13/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="wig_1"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="wig" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_14/macs_output_peaks.bed" /> + <output name="output" file="macs_14/macs_output_summits.bed" /> + <output name="output" file="macs_14/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_14/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_14/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_14/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="wig_2"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="yes" /> + <param name="bandwidth" value="300" /> + <param name="lambda" value="" /> + <param name="lambda_small" value="1000" /> + <param name="lambda_large" value="10000" /> + <param name="make_wig" value="" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_15/macs_output_peaks.bed" /> + <output name="output" file="macs_15/macs_output_summits.bed" /> + <output name="output" file="macs_15/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_15/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_15/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_15/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="diag_1"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="yes" /> + <param name="femin" value="0" /> + <param name="femax" value="20" /> + <param name="festep" value="20" /> + <output name="output" file="macs_16/macs_output_peaks.bed" /> + <output name="output" file="macs_16/macs_output_summits.bed" /> + <output name="output" file="macs_16/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_16/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_16/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_16/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="diag_2"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="no" /> + <output name="output" file="macs_17/macs_output_peaks.bed" /> + <output name="output" file="macs_17/macs_output_summits.bed" /> + <output name="output" file="macs_17/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_17/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_17/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_17/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="femin"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="yes" /> + <param name="femin" value="0" /> + <param name="femax" value="20" /> + <param name="festep" value="20" /> + <output name="output" file="macs_18/macs_output_peaks.bed" /> + <output name="output" file="macs_18/macs_output_summits.bed" /> + <output name="output" file="macs_18/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_18/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_18/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_18/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="femax"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="yes" /> + <param name="femin" value="0" /> + <param name="femax" value="20" /> + <param name="festep" value="20" /> + <output name="output" file="macs_19/macs_output_peaks.bed" /> + <output name="output" file="macs_19/macs_output_summits.bed" /> + <output name="output" file="macs_19/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_19/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_19/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_19/macs_output_diag.xls" lines_diff = "20"/> + </test> + <test maxseconds="3600" name="festep"> + <param name="tfile" value="macs_treatment_hg18.bed" /> + <param name="cfile" /> + <param name="format" value="AUTO" /> + <param name="genome_size" value="2770000000" /> + <param name="tag_size" value="25" /> + <param name="pvalue" value="0.00001" /> + <param name="keep_dup" value="1" /> + <param name="use_model" value="" /> + <param name="sfold" value="10" /> + <param name="lfold" value="30" /> + <param name="advopt_select" value="no" /> + <param name="diag_report_select" value="yes" /> + <param name="femin" value="0" /> + <param name="femax" value="20" /> + <param name="festep" value="20" /> + <output name="output" file="macs_20/macs_output_peaks.bed" /> + <output name="output" file="macs_20/macs_output_summits.bed" /> + <output name="output" file="macs_20/macs_output_peaks.xls" lines_diff = "4" /> + <output name="output" file="macs_20/macs_output_treat_afterfiting_all.wig" /> + <output name="output" file="macs_20/macs_output.log" lines_diff = "200"/> + <output name="output" file="macs_20/macs_output_diag.xls" lines_diff = "20"/> + </test> + </tests> + <help> +This tool performs peak calling for ChIP-Seq data. MACS is developped +in Xiaole Shirley Liu's lab, by Tao Liu and Yong Zhang, and published +on Genome Biology (pubmed: 18798982). The version deployed here is +1.4.0rc2. + +.. class:: infomark + +**TIP:** Please first upload your treatment and control files using the **Upload File from your computer tool**. + +.. class:: infomark + +**TIP:** If you choose to generate a wiggle file, it will take longer +time. + +----- + +**Parameters** + +- **Treatment file** The input file for ChIP/treatment channel chosen from the + history. Approporiate format is BED, ELAND, and ELAND_MULTI. SAM and BAM are + also supported. MACS can accept pair-end sequencing data in SAM/BAM format. +- **Control file** The input file for input/control channel chosen + from the history. +- **Format** The format of input files. Choices are AUTO (Auto detect format) + BED ( >= 6 columns), ELAND ( eland_result), ELAND_MULTI ( eland_multi), + ELANDMULTIPET, ELANDEXPORT, SAM (SAM format), BAM ( binary BAM format) + and BOWTIE ( bowtie .map output) +- **Effective genome size** Select the desired genome assembly and the correct + size will be selected. The choices are hg18(2.77e9), hg19(2.79e9), mm8(1.87e9), + mm9(1.91e9), ce4(9.03e7), ce6(9.03e7), dm2(1.19e8), and dm3(1.52e8). +- **Tag size** is the size of reads. +- **P-Value** is the pvalue cutoff. Default is 0.00001, for looser + results, try 0.001 instead. +- **Use Model** is whether or not to use Macs paired peaks model. +- **Model fold** is available when **Use Model** is true, which is the + foldchange to chose paired peaks to build paired peaks model. Users + need to set a lower(smaller) and upper(larger) number for fold change + so that MACS will only use the peaks within these foldchange range to + build model. +- **Shift size** is available when **Use Model** is false, which will + represent the *HALF* of the fragment size of your sample. If your + sonication and size selection size is 300 bps, after you trim out + nearly 100 bps adapters, the fragment size is about 200 bps, so you + can specify 100 here. +- **Keep Duplicate Tags** It controls the MACS behavior towards + duplicate tags at the exact same location -- the same coordination and + the same strand. The default 'auto' option makes MACS calculate the + maximum tags at the exact same location based on binomal distribution + using 1e-5 as pvalue cutoff; and the 'all' option keeps every tags. + If an integer is given, at most this number of tags will be kept at + the same location. Default: 1 +- **Advanced Options** to turn on advanced settings. You will lose + advanced settings and go back to default parameters when you choose + 'No'. +- **Bandwidth (Advanced)** is the bandwidth to scan for paired peaks when **Use + Model** is on, and is the *HALF* of the window size to detect actual + peaks when **Use Model** is off. +- **Use Lambda (Advanced)** to turn on or off local lambda model which + can use the local bias at peak regions to throw out false positives. +- **Small Lambda (Advanced)** The small nearby region in basepairs to calculate + dynamic lambda. This is used to capture the bias near the peak summit region. + Invalid if there is no control data. DEFAULT: 1000 +- **Large Lambda (Advanced)** The large nearby region in basepairs to calculate + dynamic lambda. This is used to capture the surround bias. DEFAULT: 10000. +- **Generate a wig file (Advanced)** to turn on or off the wiggle file + generation for ChIP channel. The wiggle file is the fragment pileup + accumulations at every 10 bps. +- **Diagnostic Report** whether or not to produce a diagnosis report. +- **Minimum Fold Enrichment (Diagnosis)** Minimum fold enrichment to consider. +- **Maximum Fold Enrichment (Diagnosis)** Maximum fold enrchment to consider. + Leave blank for default max value. +- **Fold Enrichment Step (Diagnosis)** Interval of fold enrichment. + +----- + +**Outputs** + +- **BED file** for peak locations in BED format. Typically used in gene association study like CEAS, or correlation calculation. +- **BED file** for peak summits locations in BED format. Typically used in DNA motif analysis or conservation check. +- **XLS file** for detail informations in a tab-delimited file. +- **WIGGLE file** for fragments pileup at every 10 bps of chIP channel + in WIGGLE format. +- **LOG file** for job log. If you see errors, please attach this in + the bug report +- **Diagnosis Report** empty unless diagnosis report is set to yes + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peak2gene.xml Tue Dec 30 18:45:34 2014 +0900 @@ -0,0 +1,89 @@ +<tool name="peak2gene: Peak Center Annotation" id="ceas_peak2gene"> + <!--<description>Input a peak file, and It will search each peak on UCSC GeneTable to get the refGenes near the peak center.</description> + --> + <description>This tool is abolished, please use BETA-minus instead.</description> + <command interpreter="command">/bin/bash $shscript</command> + <inputs> + <param format="bed" name="bfile" type="data" label="BED file(center will be aligned, 100,000 lines max)"> + <validator type="unspecified_build" /> + </param> + <param name="dist" type="integer" label="Distance from peak center" value="30000"> + <validator type="in_range" max="30000" min="300" message="distance is out of range, distance has to be between 300 to 30000" /> + </param> + <param name="genetype" type="select" label="which you want to output for gene name"> + <option value="--symbol">Offical gene symbol</option> + <option value=" ">Refseq gene</option> + </param> + <param name="op" type="select" label="Which gene you want to output. eg) up(uptream of gene overlaps with peak center)"> + <option value="all">all</option> + <option value="up">up</option> + <option value="down">down</option> + </param> + </inputs> + <outputs> + <data format="bed" name="output_gene_annotation" label="annotation for each gene" /> + <data format="bed" name="output_peak_annotation" label="annotation for each peak" /> + <data format="txt" name="log" label="GetRef log" /> + </outputs> +<tests><test> +<param name="bfile" value="peak.bed" /> +<param name="dist" value="10000" /> +<param name="genome" value="hg19" /> +<param name="genetype" value="symbol" /> +<param name="op" value="up" /> +<param name="outputgenelist" file="PCGA_test_gene_annotation.txt" diff_lines="100"/> +<param name="outputbed" file="PCGA_test_peaks_annotation.txt" diff_ines="100" /> +</test></tests> + <configfiles> + <configfile name="shscript"> +#!/bin/bash + +#import os + +#set $dollar = chr(36) +#set $gt = chr(62) +#set $lt = chr(60) +#set $ad = chr(38) + +#set $path = $os.path.abspath($__app__.config.tool_path) + +lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'` +format=`$path/cistrome/fcfunc.py $bfile` + +if [[ ${dollar}lines -gt 500000 ]];then + echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2 + exit; +fi +if [[ ${dollar}format != "passed" ]]; then + echo ${dollar}format ${gt}${ad}2 + exit; +fi + +#set $advparam = "" +#if $dist +#set $advparam = $advparam+"-d "+str($dist) +#end if + +#set $gtpath = os.path.join( os.path.abspath("tool-data"), "ceaslib", "GeneTable", $bfile.metadata.dbkey ) + +PCGA.py -t $bfile --name=output -g $gtpath --op=$op $genetype $advparam ${ad}${gt} $log +mv output_peaks_annotation.txt $output_peak_annotation +mv output_gene_annotation.txt $output_gene_annotation + </configfile> + </configfiles> + <tests> + <test> + <param name="bfile" value="bedfile.bed" ftype="bed"/> + <param name="dist" value="10000" /> + <param name="genome" value="hg19" /> + <param name="genetype" value="refseq" /> + <param name="op" value="down" /> + <output name="output_gene_annotation" file="getref_result_gene.bed" /> + <output name="output_peak_annotation" file="getref_result_peak.bed" /> + </test> + </tests> + <help> +This tool is abolished, please use BETA-minus instead. + </help> + +</tool>