changeset 0:97bd5bb4204c default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:45:34 +0900
parents
children
files ceas.xml fcfunc.py macs.xml peak2gene.xml
diffstat 4 files changed, 1366 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ceas.xml	Tue Dec 30 18:45:34 2014 +0900
@@ -0,0 +1,400 @@
+<tool name="CEAS: Enrichment on chromosome and annotation" id="ceas_ceas">
+  <description>Annotate the given intervals and scores with genome features such as gene body</description>
+  <command interpreter="command">/bin/bash $shscript </command>
+  <inputs>
+    <param format="wig" name="wfile" type="data" label="wig file" optional="true">
+      <validator type="unspecified_build" />
+    </param>
+    <param format="bed" name="bfile" type="data" label="BED file(maximum 100000 lines)" optional="true">
+      <validator type="unspecified_build" />
+    </param>
+    <param name="span" type="integer" label="Span" value="3000">
+    	<validator type="in_range" max="1000000" min="100" message="The Span is out of range, the parameter has to be between 100 to 1000000" />
+    </param>
+    <param name="pfres" type="integer" label="Profiling resolution" value="50">
+   		<validator type="in_range" max="1000" min="10" message="The Profiling Resolution is out of range, the parameter has to be between 10 to 1000" />
+    </param>
+    <param name="lowersize" type="text" label="Promoter/downstream lower-interval" value="1000" >
+    	<validator type="in_range" max="10000" min="100" message="The lower-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="middlesize" type="text" label="Promoter/downstream middle-interval" value="2000" >
+      <validator type="in_range" max="10000" min="100" message="The middle-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="uppersize" type="text" label="Promoter/downstream upper-interval" value="3000">
+    	<validator type="in_range" max="10000" min="100" message="The upper-interval is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="lowerbisize" type="text" label="Bi-Promoter lower range" value="2500">
+    	<validator type="in_range" max="10000" min="100" message="The lower-range is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="upperbisize" type="text" label="Bi-Promoter upper range" value="5000">
+    	<validator type="in_range" max="10000" min="100" message="The upper-range is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param name="reldist" type="integer" label="Relative distance" value="3000">
+    	<validator type="in_range" max="10000" min="100" message="The Relative distance is out of range, the parameter has to be between 100 to 10000" />
+    </param>
+    <param type="select" name="imagetype" display="radio" label="Image Type">
+      <option value="PNG">PNG format</option>
+      <option value="PDF">PDF format</option>
+    </param>
+
+    <conditional name="genegroup">
+      <param name="enable" type="select" label="Specify gene list in the signal profiling" force_select="true">
+	<option value="no">No</option>
+	<option value="yes">Yes</option>
+      </param>
+
+      <when value="no">
+      </when>
+
+      <when value="yes">
+	<param format="text" name="genelist" type="data" label="Gene List" optional="false"/>
+	<param name="label" type="text" label="Gene List Label" optional="false" />
+	<repeat name="more" title="Gene Lists">
+	  <param format="text" name="genelist" type="data" label="Gene List" optional="false"/>
+	  <param name="label" type="text" label="Gene List Label" optional="false" />
+	</repeat>
+	<param name="idtype" type="select" label="Are they ...">
+	  <option value="">refseq</option>
+	  <option value=" --gname2">genesymbol</option>
+	</param>
+      </when>
+    </conditional>
+
+  </inputs>
+
+  <outputs>
+    <data format="png" name="output">
+      <change_format>
+	<when input="imagetype" value="PDF" format="pdf" />
+      </change_format>
+    </data>
+    <data format="txt" name="log" label="ceas job log" />
+  </outputs>
+
+  <configfiles>
+    <configfile name="shscript">
+#!/bin/bash
+#import os
+
+#set $dollar = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $ad = chr(38)
+
+if [ $bfile == "None" ];then
+		if [ $wfile == "None" ];then
+				echo "Either wig or bed file is required!" ${gt}${ad}2
+        exit;
+		fi
+fi
+
+
+#if $bfile != None
+    dbkey=$bfile.metadata.dbkey
+#elif $wfile != None
+    dbkey=$wfile.metadata.dbkey
+#end if
+
+
+##REMOVING WIG VALIDATORS
+##if [ $wfile != "None" ];then
+##    wigsize=`du -b $wfile | awk '{print ${dollar}1}'`
+##    
+##    if [[ ${dollar}wigsize -gt 2097152000 ]];then
+##        echo "wig file is too big! 2G is the maximum!" ${gt}${ad}2
+##        exit;
+##    fi
+##fi
+
+
+#if $genegroup.enable == "yes"
+#set $gngroups = str($genegroup.genelist)
+#set $gnlabels = str($genegroup.label)
+lines=`wc -l $gngroups | tail -1 | awk '{print ${dollar}1}'`
+if [[ ${dollar}lines -gt 100000 ]];then
+    echo "Total lines of the gene list has to between 100 and 100000!" ${gt}${ad}2;
+    exit;
+fi
+if [[ ${dollar}lines -lt 100 ]];then
+    echo "Total lines of the gene list has to between 100 and 100000!" ${gt}${ad}2;
+    exit;
+fi
+genelength=`echo $gnlabels |awk '{print length($0)}'`
+if [[ ${dollar}genelength -gt 255 ]];then
+    echo "Gene List Label exceed the limit of 255 characters!" ${gt}${ad}2;
+    exit;
+fi
+#end if
+#if $genegroup.enable == "yes"
+#set $gngroups = str($genegroup.genelist)
+#set $gnlabels = str($genegroup.label)
+#for $m in $genegroup.more
+#set $gngroups = $gngroups+","+str($m.genelist)
+#set $gnlabels = $gnlabels+","+str($m.label)
+#end for
+#set $gngroupspara = "--gn-groups="+$gngroups
+#set $gnlabelspara = "--gn-group-names='"+$gnlabels+"'"+str($genegroup.idtype.value)
+#else
+#set $gngroupspara = ""
+#set $gnlabelspara = ""
+#end if
+
+#set $path = $os.path.abspath($__app__.config.tool_path)
+
+WIG=""
+if [ $wfile != "None" ]; then #were we sent in a value for the wig file?
+   WIG="-w $wfile"
+fi
+
+BED=""
+if [ $bfile != "None" ]; then #were we sent in a value for the wig file?
+   BED="-b $bfile"
+   lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'`
+   format=`$path/validation/fcfunc.py $bfile`
+
+   if [[ ${dollar}lines -gt 500000 ]];then
+      echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
+      exit;
+   fi
+   if [[ ${dollar}format != "passed" ]]; then
+      echo ${dollar}format ${gt}${ad}2
+      exit;
+   fi
+fi
+
+#set $sizes = str($lowersize) + "," + str($middlesize) + "," + str($uppersize)
+#set $bisizes = str($lowerbisize) + "," + str($upperbisize)
+
+#set $gtpath = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "ceaslib", "GeneTable", $dbkey )
+#set $length_file = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "chromLen", $dbkey+".len" )
+
+
+#if str($wfile) != "None"
+#if $wfile.extension == "wig"
+ceas ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes \
+   --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log
+#elif $wfile.extension == "bigwig"
+ceasBW ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes -l $length_file\
+   --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log
+#end if
+#else
+ceas ${dollar}WIG ${dollar}BED --span=$span --pf-res=$pfres --sizes=$sizes --bisizes=$bisizes \
+   --rel-dist=$reldist -g $gtpath $gngroupspara $gnlabelspara --name=ceas_out ${ad}${gt} $log
+#end if
+
+
+R --vanilla ${lt} ceas_out.R ${ad}${gt}/dev/null
+if [ $imagetype == "PNG" ]; then
+convert ceas_out.pdf ceas_out.png
+convert ceas_out-*.png -append ceas_out_joint.png
+mv ceas_out_joint.png $output
+else
+mv ceas_out.pdf $output
+fi
+
+    </configfile>
+  </configfiles>
+ <tests>
+  <test maxseconds="3600" name="CEAS_1">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="3000" />
+    <param name="pfres" value="50" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="2500" />
+    <param name="upperbisize" value="5000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="output" file="ceas_1/ceas_1.pdf" />
+    <output name="output" file="ceas_1/ceas_1.log" lines_diff = "200" />
+  </test>
+  <test maxseconds="3600" name="CEAS_2">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="1000" />
+    <param name="pfres" value="250" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="2500" />
+    <param name="upperbisize" value="5000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="output" file="ceas_2/ceas_2.pdf" />
+    <output name="output" file="ceas_2/ceas_2.log" lines_diff = "200" />
+  </test>
+  <test maxseconds="3600" name="CEAS_3">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="3000" />
+    <param name="pfres" value="150" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="5000" />
+    <param name="upperbisize" value="10000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="output" file="ceas_3/ceas_3.pdf" />
+    <output name="output" file="ceas_3/ceas_3.log" lines_diff = "200" />
+  </test>
+  <test maxseconds="3600" name="CEAS_4">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="3000" />
+    <param name="pfres" value="500" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="5000" />
+    <param name="upperbisize" value="10000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="output" file="ceas_4/ceas_4.pdf" />
+    <output name="output" file="ceas_4/ceas_4.log" lines_diff = "200" />
+  </test>
+  <test maxseconds="3600" name="CEAS_5">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="6000" />
+    <param name="pfres" value="500" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="5000" />
+    <param name="upperbisize" value="10000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="output" file="ceas_5/ceas_5.pdf" />
+    <output name="output" file="ceas_5/ceas_5.log" lines_diff = "200" />
+  </test>
+</tests> 
+  <help>
+This tool annotates the given intervals and scores with genome
+features such as gene body. It's the major module in CEAS package
+which is written by Hyunjin Gene Shin, published in Bioinformatics
+(pubmed id:19689956).
+
+.. class:: warningmark
+
+**NEED IMPROVEMENT**
+
+-----
+
+**Parameters**
+
+- **WIGGLE file** contains the scores for the experiment in a wiggle
+  format file. Normally, it's produced by the peak calling tool. It's
+  optional.
+- **BED file** contains the peak locations for the experiment in a BED
+  format file.
+- **Span** from TSS and TTS in the gene-centered annotation. ChIP
+  regions within this range from TSS and TTS are considered when
+  calculating the coverage rates in promoter and downstream.
+- **Profiling resolution** is the WIGGLE profiling resolution.
+- **Promoter/downstream intervals** for ChIP region annotation are
+  comma-separated three values or a single value can be given. If a
+  single value is given, it will be segmented into three equal
+  fractions (ie, 3000 is equivalent to 1000,2000,3000)
+- **BiPromoter ranges** is for ChIP region annotation. It's
+  comma-separated two values or a single value can be given. If a
+  single value is given, it will be segmented into two equal fractions
+  (ie, 5000 is equivalent to 2500,5000) 
+- **Relative distance** is the relative distance to TSS/TTS in WIGGLE file
+  profiling
+- **Genome Annotation Version** to specify the annotations according to
+  the data set. The annotations are downloaded from UCSC genome site.
+- **Image type** specify the output image format, either in PNG or in
+  PDF format.
+- If **Specify gene list in the signal profiling** is set, you can specify
+  different gene groups for CEAS to put them together in the profile
+  figure. You need to select several **Gene List** files from history which
+  contains the RefSeq ids or Gene Symbols for each row, and
+  **Gene List Label** for each gene list file.
+
+-----
+
+**Outputs**
+
+- **PNG/PDF file** is the result for CEAS analysis, containing 5 pages.
+- **LOG file** for job log. If you see errors, please attached this in
+  the bug report
+
+-----
+
+**script parameter list of CEAS 0.9.8**
+
+Options:
+  --version             show program's version number and exit
+  -h, --help            Show this help message and exit.
+  -b BED, --bed=BED     BED file of ChIP regions.
+  -w WIG, --wig=WIG     WIG file for either wig profiling or genome background
+                        annotation. WARNING: --bg flag must be set for genome
+                        background re-annotation.
+  -e EBED, --ebed=EBED  BED file of extra regions of interest (eg, non-coding
+                        regions)
+  -g GDB, --gt=GDB      Gene annotation table (eg, a refGene table in sqlite3
+                        db format provided through the CEAS web,
+                        http://liulab.dfci.harvard.edu/CEAS/download.html).
+  --name=NAME           Experiment name. This will be used to name the output
+                        files. If an experiment name is not given, the stem of
+                        the input BED file name will be used instead (eg, if
+                        'peaks.bed', 'peaks' will be used as a name.)
+  --sizes=SIZES         Promoter (also dowsntream) sizes for ChIP region
+                        annotation. Comma-separated three values or a single
+                        value can be given. If a single value is given, it
+                        will be segmented into three equal fractions (ie, 3000
+                        is equivalent to 1000,2000,3000), DEFAULT:
+                        1000,2000,3000. WARNING: Values > 10000bp are
+                        automatically set to 10000bp.
+  --bisizes=BISIZES     Bidirectional-promoter sizes for ChIP region
+                        annotation Comma-separated two values or a single
+                        value can be given. If a single value is given, it
+                        will be segmented into two equal fractions (ie, 5000
+                        is equivalent to 2500,5000) DEFAULT: 2500,5000bp.
+                        WARNING: Values > 20000bp are automatically set to
+                        20000bp.
+  --bg                  Run genome BG annotation again. WARNING: This flag is
+                        effective only if a WIG file is given through -w
+                        (--wig). Otherwise, ignored.
+  --span=SPAN           Span from TSS and TTS in the gene-centered annotation.
+                        ChIP regions within this range from TSS and TTS are
+                        considered when calculating the coverage rates in
+                        promoter and downstream, DEFAULT=3000bp
+  --pf-res=PF_RES       Wig profiling resolution, DEFAULT: 50bp. WARNING:
+                        Value smaller than the wig interval (resolution) may
+                        cause aliasing error.
+  --rel-dist=REL_DIST   Relative distance to TSS/TTS in wig profiling,
+                        DEFAULT: 3000bp
+  --gn-groups=GN_GROUPS
+                        Gene-groups of particular interest in wig profiling.
+                        Each gene group file must have gene names in the 1st
+                        column. The file names are separated by commas w/ no
+                        space (eg, --gn-groups=top10.txt,bottom10.txt)
+  --gn-group-names=GN_NAMES
+                        The names of the gene groups in --gn-groups. The gene
+                        group names are separated by commas. (eg, --gn-group-
+                        names='top 10%,bottom 10%'). These group names appear
+                        in the legends of the wig profiling plots. If no group
+                        names given, the groups are represented as 'Group 1,
+                        Group2,...Group n'.
+  --gname2              Whether or not use the 'name2' column of the gene
+                        annotation table when reading the gene IDs in the
+                        files given through --gn-groups. This flag is
+                        meaningful only with --gn-groups.
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fcfunc.py	Tue Dec 30 18:45:34 2014 +0900
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import sys
+
+def checkFormat(file):
+    f = open(file)
+    n = 0      #line count
+    oldlen=0   #length of previous line
+
+    for line in f:
+        line=line.rstrip('\n\t ')
+        n=n+1
+        if line.find('track') == -1 and line.find('browse') == -1 and line and n<20:
+            row=line.split();
+            rowlen=len(row)
+        
+            #check for 3 fields
+            if rowlen < 3:
+                sys.stdout.write('Error with BED file format: need atleast 3 fields per line\n')
+                sys.exit()
+
+            #check is field 3 is greater thant field 2
+            if int(row[2]) < int(row[1]):
+                sys.stdout.write('Error with BED file format: field 3 needs to be greater than row 2\n')
+                sys.exit()
+
+            #check each field
+            m=0   #field number
+            while m < len(row):
+                #check if length of lines are consistent
+                if rowlen != oldlen and oldlen != 0:
+                    sys.stdout.write('Error with BED file format: number of fields do not match\n')
+                    sys.exit()
+                #column 1 & 4
+                #if (m==0 or m==3) and row[m].isdigit() == 1:
+                #    pass
+
+                #column 2
+                elif m==1:
+                    if row[m].isdigit() == 0:
+                        sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1))
+                        sys.exit()
+                    elif row[m] < 0:
+                        sys.stdout.write('Error with BED file format: line%d,field%d can not be negative\n' %(n.m))
+                        sys.exit()
+                #column 3, 7, 8, 10
+                elif (m==2 or m==6 or m==7 or m==9):
+                    if row[m].isdigit() == 0:
+                        sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1))
+                        sys.exit()
+                    elif m==9:
+                        blockCount=int(row[9])
+                #column 5
+                #elif m==4:
+                #    try:
+                #        float(row[4])
+                #    except ValueError:
+                #        sys.stdout.write('Error with BED file format: line%d,field%d needs to be a number\n' %(n,m+1))
+                #        sys.exit()
+                #    else:
+                #        pass    
+
+                #column 6
+                elif m==5 and (row[5] != '-' and row[5] != '+'):
+                    sys.stdout.write('Error with BED file format: need +/- in line%d,field%d\n' %(n,m+1))
+                    sys.exit()
+                #column 9
+                elif m==8 and row[8] != '0':
+                    sys.stdout.write('Error with BED file format: line%d,field%d is always 0\n' %(n,m+1))
+                    sys.exit()   
+                #column 11 & 12
+                elif m==10 or m==11: 
+                    if row[m].find(',')==-1:
+                        sys.stdout.write('Error with BED file format: need comma separated list at line%d,field%d\n' %(n,m+1))
+                        sys.exit()
+                    else:
+                        col=row[m].strip(',').split(',')
+                        if len(col) != blockCount:
+                            sys.stdout.write('Error with BED file format: block count does not match list length at line%d,field%d\n' %(n,m+1))
+                            sys.exit()
+                        else:
+                            for l in col:
+                                if l.isdigit() == 0:
+                                    sys.stdout.write('Error with BED file format: need list of numbers at line%d,field%d\n' %(n,m+1))
+                                    sys.exit()
+                m=m+1
+                oldlen=rowlen
+        #elif n>=20:
+        #    break
+if __name__ == '__main__':
+    checkFormat(sys.argv[1])
+    sys.stdout.write('passed')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macs.xml	Tue Dec 30 18:45:34 2014 +0900
@@ -0,0 +1,785 @@
+<tool name="MACS" id="cistrome_macs">
+  <description>Model-based Analysis for ChIP-Seq</description>
+  <command interpreter="command">/bin/bash $shscript</command>
+  <inputs>
+    <param format="interval,sam,bam,eland,elandmulti,bed" name="tfile" type="data" label="Treatment file"/>
+    <param format="interval,sam,bam,eland,elandmulti,bed" name="cfile" type="data" label="Input file" optional="true"/>
+    <param name="format" type="select" label="Format">
+      <option value="AUTO">Auto</option>
+      <option value="BED">Bed</option>
+      <option value="SAM">SAM (support PES)</option>
+      <option value="BAM">BAM (support PES)</option>
+      <option value="BOWTIE">Bowtie default format</option>
+      <option value="ELAND">ELAND_result</option>
+      <option value="ELANDMULTI">ELAND_multi</option>
+      <option value="ELANDEXPORT">ELAND_export</option>
+    </param>
+    <conditional name="genome_size_cond">
+      <param name="genome_size" type="select" label="Effective Genome Size">
+	<option value="2770000000">Human (hg18)</option>
+	<option value="2790000000">Human (hg19)</option>
+	<option value="1870000000">Mouse (mm8)</option>
+	<option value="1910000000">Mouse (mm9)</option>
+	<option value="90300000">C elegans (ce4)</option>
+	<option value="90300000">C elegans (ce6)</option>
+	<option value="119000000">Drosophila (dm2)</option>
+	<option value="152000000">Drosophila (dm3)</option>
+	<option value="OTHER">Other</option>
+      </param>
+      <when value="OTHER">
+	<param name="genome_size_other" type="text" label="Custom Genome Size"/>
+      </when>
+      <when value="2770000000"/>
+      <when value="2790000000"/>
+      <when value="1870000000"/>
+      <when value="1910000000"/>
+      <when value="90300000"/>
+      <when value="90300000"/>
+      <when value="119000000"/>
+      <when value="152000000"/>
+    </conditional>
+    <param name="tag_size" type="integer" label="Tag size (Optional)" value="25" optional="true">
+      <validator type="in_range" max="1000" min="20" message="Tag size is out of range, Tag size has to be between 20 to 1000" />
+    </param>
+    <param name="pvalue" type="float" label="P-Value" value="0.00001">
+      <validator type="in_range" max="1" min="0" message="Pvalue is out of range, Pvalue has to be between 0 to 1" />
+    </param>
+    <param name="keep_dup" type="select" label="Keep duplicate tags at the exact same location?" display="radio">
+      <option value="all" >Keep ALL</option>
+      <option value="auto" >Auto by Binomial</option>
+      <option value="1" selected="true">Keep Single</option>
+    </param>
+    
+    <conditional name="model">
+      <param name="use_model" type="select" label="Use Model?">
+        <option value="">True</option>
+        <option value="nomodel">False</option>
+      </param>
+      <when value="">
+	<param name="sfold" type="text" label="small fold enrichment for model building" value="10" >
+	  <validator type="in_range" max="100" min="5" message="Small fold enrichment is out of range, Small fold enrichment has to be between 5 to 100" />
+	</param>
+	<param name="lfold" type="text" label="large fold" value="30" >
+	  <validator type="in_range" max="100" min="5" message="Large Fold is out of range, Large Fold has to be between 5 to 100" />
+	</param>
+      </when>
+      <when value="nomodel">
+	<param name="shift_size" type="text" label="Shift size" value="100">
+    <validator type="in_range" max="1000" min="50" message="Shift size is out of range, Shift size has to be between 50 to 1000" />    	
+	</param>
+      </when>
+    </conditional>
+
+    <conditional name="advopt">
+      <param name="advopt_select" type="select" label="Advanced Options">
+	<option value="no">No</option>
+	<option value="yes">Yes</option>
+      </param>
+      <when value="yes">
+	<param name="bandwidth" type="text" label="Bandwidth" value="300">
+	  <validator type="in_range" max="1000" min="100" message="Bandwidth is out of range, Bandwidth has to be between 100 to 2000" />
+	</param>
+	<param name="lambda" type="select" label="Use Lambda?" display="radio">
+          <option value="">True</option>
+          <option value="nolambda">False</option>
+	</param>
+	<param name="lambda_small" type="text" label="Small Lambda" value="1000">
+	  <validator type="in_range" max="1000000" min="100" message="Small Lambda is out of range, Small Lambda has to be between 100 to 1000000" />
+	</param>
+	<param name="lambda_large" type="text" label="Large Lambda" value="10000">
+	  <validator type="in_range" max="1000000" min="100" message="Large Lambda is out of range, Large Lambda has to be between 100 to 1000000" />
+	</param>
+	<param name="make_wig" type="select" label="Generate a wig file?" display="radio">
+          <option value="wig" selected="true">Yes</option>
+          <option value="" >No</option>
+	</param>
+      </when>
+      <when value="no" />
+    </conditional>
+    
+    <conditional name="diag_report">
+      <param name="diag_report_select" type="select" label="Diagnosis Report">
+	<option value="no">No</option>
+	<option value="yes">Yes</option>
+      </param>
+      <when value="yes">
+	<param name="femin" type="text" label="Minimum Fold Enrichment" value="0">
+	  <validator type="in_range" max="100" min="0" message="Minimum Fold Enrichment is out of range, Minimum Fold Enrichment has to be between 0 to 100" />
+	</param>
+	<param name="femax" type="text" label="Maximum Fold Enrichment(optional)" optional="true">
+	  <validator type="in_range" max="100" min="0" message="Maximum Fold Enrichment is out of range, Maximum Fold Enrichment has to be between Minimum Fold Enrichment to 100" />
+	</param>
+	<param name="festep" type="text" label="Fold Enrichment Step" value="20">
+	  <validator type="in_range" max="100" min="0" message="Fold Enrichment Step is out of range, Fold Enrichment Step has to be between 0 to 100" />
+	</param>
+      </when>
+      <when value="no" />
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="bed" name="bedoutput" label="MACS peaks on ${tfile.name}" />
+    <data format="bed" name="summitsoutput" label="MACS summits on ${tfile.name}" />
+    <data format="tabular" name="xlsoutput" label="MACS xls on ${tfile.name}" />
+    <data format="wig" name="wigoutput" label="MACS wiggle on ${tfile.name}" />
+    <data format="txt" name="log" label="MACS job log on ${tfile.name}" />
+    <data format="tabular" name="xlsdiagreport" label="MACS diagnosis report on ${tfile.name}" />
+  </outputs>
+
+  <configfiles>
+    <configfile name="shscript">
+#!/bin/bash
+#set $dollar = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $ad = chr(38)
+
+tfilesize=`du -b $tfile | awk '{print ${dollar}1}'`
+
+if [[ ${dollar}tfilesize -gt 10000000000 ]];then
+    echo "Treatment file is too big! 10G is the maximum!" ${gt}${ad}2
+    exit;
+fi
+
+if [ $cfile != "None" ];then
+    cfilesize=`du -b $cfile | awk '{print ${dollar}1}'`
+    
+    if [[ ${dollar}cfilesize -gt 10000000000 ]];then
+        echo "Control file is too big! 10G is the maximum!" ${gt}${ad}2
+        exit;
+    fi
+fi
+
+#if $model.use_model == ""
+#if $model.sfold.value > $model.lfold.value
+		echo "Large Fold has to be greater than Small Fold" ${gt}${ad}2
+    exit;
+#end if
+#end if
+
+#if $diag_report.diag_report_select == "yes"
+#if $diag_report.femin.value > $diag_report.femax.value
+		echo "Maximum Fold Enrichment has to be greater than Minimum Fold Enrichment" ${gt}${ad}2
+    exit;
+#end if
+#end if
+
+#if $advopt.advopt_select == "yes"
+#if $advopt.lambda_small.value > $advopt.lambda_large.value
+		echo "Large lambda has to be greater than Small lambda" ${gt}${ad}2
+    exit;
+#end if
+#end if
+
+#NOTE: IF --nomodel is used, then we feed in the shiftsize param, ELSE feed in mfold param.
+#if $model.use_model == "nomodel"
+#set $m="--nomodel --shiftsize "+str($model.shift_size)
+#else
+#set $m="--mfold "+str($model.sfold)+","+str($model.lfold)
+#end if
+
+#NOTE: the control file is optional, if it is unspecified it is sent as 'None'
+#if str($cfile) != "None"
+#set $c = " -c "+ str($cfile)
+#else
+#set $c = ""
+#end if
+
+##set $extracommand = "touch "+str($wigoutput)
+
+#if $advopt.advopt_select == "yes"
+#if str($advopt.lambda) == "nolambda"
+#set $lamb="--"+str($advopt.lambda)
+#else
+#set $lamb=""
+#end if
+#if str($advopt.make_wig) == "wig"
+#set $mkwig="--"+str($advopt.make_wig)
+#else
+#set $mkwig=""
+#end if
+#set $advparam = "--bw="+str($advopt.bandwidth)+" "+str($lamb)+" --slocal="+str($advopt.lambda_small)+" --llocal="+str($advopt.lambda_large)+" "+str($mkwig)
+#if $advopt.make_wig.value != ""
+#set $extracommand = "zcat macs_output_MACS_wiggle/treat/*.gz > "+str($wigoutput)
+#end if
+#else
+#set $advparam = "--bw=300 --wig"
+#set $extracommand = "zcat macs_output_MACS_wiggle/treat/*.gz > "+str($wigoutput)
+#end if
+
+#if $diag_report.diag_report_select == "yes"
+#set $diagparam = "--diag --fe-min=0 --fe-step=20"
+#if $diag_report.femin.value != ""
+#set $diagparam = $diagparam.replace("--fe-min=0", "--fe-min="+str($diag_report.femin.value))
+#end if
+#if $diag_report.festep.value != ""
+#set $diagparam = $diagparam.replace("--fe-step=20", "--fe-step="+str($diag_report.festep.value))
+#end if
+#if str($diag_report.femax.value) != ""
+#set $diagparam = $diagparam+" --fe-max="+str($diag_report.femax.value)
+#end if
+#else
+#set $diagparam = " "
+#end if
+
+#set genomeSize = $genome_size_cond.genome_size
+#if $genome_size_cond.genome_size == "OTHER"
+#set $genomeSize = $genome_size_cond.genome_size_other
+#end if
+
+#set $keepdup = "--keep-dup "+ str($keep_dup)
+
+macs14 -t $tfile $c --format=$format --gsize=$genomeSize --tsize=$tag_size $m $advparam --pvalue=$pvalue --name=macs_output $diagparam --single-profile $keepdup ${ad}${gt} $log
+cp macs_output_peaks.bed $bedoutput
+cp macs_output_summits.bed $summitsoutput
+cp macs_output_peaks.xls $xlsoutput
+#if $diag_report.diag_report_select == "yes"
+cp macs_output_diag.xls $xlsdiagreport
+#end if
+
+#if $advopt.advopt_select == "yes"
+#if $advopt.make_wig.value != ""
+$extracommand 
+#end if
+#else
+$extracommand
+#end if
+    </configfile>
+  </configfiles>
+
+  <tests>
+    <test maxseconds="3600" name="TreatmentFile">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_1/macs_output_peaks.bed" />
+      <output name="output" file="macs_1/macs_output_summits.bed" /> 
+      <output name="output" file="macs_1/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_1/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_1/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_1/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="ControlFile">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" value="macs_control_hg18.bed" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_2/macs_output_peaks.bed" />
+      <output name="output" file="macs_2/macs_output_summits.bed" />
+      <output name="output" file="macs_2/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_2/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_2/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_2/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="GSize">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_3/macs_output_peaks.bed" />
+      <output name="output" file="macs_3/macs_output_summits.bed" />
+      <output name="output" file="macs_3/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_3/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_3/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_3/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="TagSize">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_4/macs_output_peaks.bed" />
+      <output name="output" file="macs_4/macs_output_summits.bed" />
+      <output name="output" file="macs_4/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_4/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_4/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_4/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="Pvalue">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_5/macs_output_peaks.bed" />
+      <output name="output" file="macs_5/macs_output_summits.bed" />
+      <output name="output" file="macs_5/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_5/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_5/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_5/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="UseModel">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_6/macs_output_peaks.bed" />
+      <output name="output" file="macs_6/macs_output_summits.bed" />
+      <output name="output" file="macs_6/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_6/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_6/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_6/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="ShiftSize">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="nomodel" />
+      <param name="shift_size" value="100" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_7/macs_output_peaks.bed" />
+      <output name="output" file="macs_7/macs_output_summits.bed" />
+      <output name="output" file="macs_7/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_7/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_7/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_7/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="advopt_1">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="nomodel" />
+      <param name="shift_size" value="100" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_8/macs_output_peaks.bed" />
+      <output name="output" file="macs_8/macs_output_summits.bed" />
+      <output name="output" file="macs_8/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_8/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_8/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_8/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="advopt_2">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_9/macs_output_peaks.bed" />
+      <output name="output" file="macs_9/macs_output_summits.bed" />
+      <output name="output" file="macs_9/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_9/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_9/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_9/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="Bandwidth">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_10/macs_output_peaks.bed" />
+      <output name="output" file="macs_10/macs_output_summits.bed" />
+      <output name="output" file="macs_10/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_10/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_10/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_10/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="UseLambda_1">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_11/macs_output_peaks.bed" />
+      <output name="output" file="macs_11/macs_output_summits.bed" />
+      <output name="output" file="macs_11/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_11/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_11/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_11/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="UseLambda_2">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="nolambda" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_12/macs_output_peaks.bed" />
+      <output name="output" file="macs_12/macs_output_summits.bed" />
+      <output name="output" file="macs_12/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_12/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_12/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_12/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="LambdaSet">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_13/macs_output_peaks.bed" />
+      <output name="output" file="macs_13/macs_output_summits.bed" />
+      <output name="output" file="macs_13/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_13/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_13/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_13/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="wig_1">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="wig" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_14/macs_output_peaks.bed" />
+      <output name="output" file="macs_14/macs_output_summits.bed" />
+      <output name="output" file="macs_14/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_14/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_14/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_14/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="wig_2">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="yes" />
+      <param name="bandwidth" value="300" />
+      <param name="lambda" value="" />
+      <param name="lambda_small" value="1000" />
+      <param name="lambda_large" value="10000" />
+      <param name="make_wig" value="" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_15/macs_output_peaks.bed" />
+      <output name="output" file="macs_15/macs_output_summits.bed" />
+      <output name="output" file="macs_15/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_15/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_15/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_15/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="diag_1">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="yes" />
+      <param name="femin" value="0" />
+      <param name="femax" value="20" />
+      <param name="festep" value="20" />
+      <output name="output" file="macs_16/macs_output_peaks.bed" />
+      <output name="output" file="macs_16/macs_output_summits.bed" />
+      <output name="output" file="macs_16/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_16/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_16/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_16/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="diag_2">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="no" />
+      <output name="output" file="macs_17/macs_output_peaks.bed" />
+      <output name="output" file="macs_17/macs_output_summits.bed" />
+      <output name="output" file="macs_17/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_17/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_17/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_17/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="femin">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="yes" />
+      <param name="femin" value="0" />
+      <param name="femax" value="20" />
+      <param name="festep" value="20" />
+      <output name="output" file="macs_18/macs_output_peaks.bed" />
+      <output name="output" file="macs_18/macs_output_summits.bed" />
+      <output name="output" file="macs_18/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_18/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_18/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_18/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="femax">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="yes" />
+      <param name="femin" value="0" />
+      <param name="femax" value="20" />
+      <param name="festep" value="20" />
+      <output name="output" file="macs_19/macs_output_peaks.bed" />
+      <output name="output" file="macs_19/macs_output_summits.bed" />
+      <output name="output" file="macs_19/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_19/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_19/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_19/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+    <test maxseconds="3600" name="festep">
+      <param name="tfile" value="macs_treatment_hg18.bed" />
+      <param name="cfile" />
+      <param name="format" value="AUTO" />
+      <param name="genome_size" value="2770000000" />
+      <param name="tag_size" value="25" />
+      <param name="pvalue" value="0.00001" />
+      <param name="keep_dup" value="1" />
+      <param name="use_model" value="" />
+      <param name="sfold" value="10" />
+      <param name="lfold" value="30" />
+      <param name="advopt_select" value="no" />
+      <param name="diag_report_select" value="yes" />
+      <param name="femin" value="0" />
+      <param name="femax" value="20" />
+      <param name="festep" value="20" />
+      <output name="output" file="macs_20/macs_output_peaks.bed" />
+      <output name="output" file="macs_20/macs_output_summits.bed" />
+      <output name="output" file="macs_20/macs_output_peaks.xls" lines_diff = "4" />
+      <output name="output" file="macs_20/macs_output_treat_afterfiting_all.wig" />
+      <output name="output" file="macs_20/macs_output.log" lines_diff = "200"/>
+      <output name="output" file="macs_20/macs_output_diag.xls" lines_diff = "20"/>
+    </test>
+  </tests>
+  <help>
+This tool performs peak calling for ChIP-Seq data. MACS is developped
+in Xiaole Shirley Liu's lab, by Tao Liu and Yong Zhang, and published
+on Genome Biology (pubmed: 18798982). The version deployed here is
+1.4.0rc2.
+
+.. class:: infomark
+
+**TIP:** Please first upload your treatment and control files using the **Upload File from your computer tool**.
+
+.. class:: infomark
+
+**TIP:** If you choose to generate a wiggle file, it will take longer
+time.
+
+-----
+
+**Parameters**
+
+- **Treatment file** The input file for ChIP/treatment channel chosen from the
+  history. Approporiate format is BED, ELAND, and ELAND_MULTI. SAM and BAM are
+  also supported. MACS can accept pair-end sequencing data in SAM/BAM format.
+- **Control file** The input file for input/control channel chosen
+  from the history. 
+- **Format** The format of input files. Choices are AUTO (Auto detect format) 
+  BED ( >= 6 columns), ELAND ( eland_result), ELAND_MULTI ( eland_multi),
+  ELANDMULTIPET, ELANDEXPORT, SAM (SAM format), BAM ( binary BAM format) 
+  and BOWTIE ( bowtie .map output)
+- **Effective genome size** Select the desired genome assembly and the correct 
+  size will be selected. The choices are hg18(2.77e9), hg19(2.79e9), mm8(1.87e9), 
+  mm9(1.91e9), ce4(9.03e7), ce6(9.03e7), dm2(1.19e8), and dm3(1.52e8).
+- **Tag size** is the size of reads.
+- **P-Value** is the pvalue cutoff. Default is 0.00001, for looser
+  results, try 0.001 instead.
+- **Use Model** is whether or not to use Macs paired peaks model.
+- **Model fold** is available when **Use Model** is true, which is the
+  foldchange to chose paired peaks to build paired peaks model. Users
+  need to set a lower(smaller) and upper(larger) number for fold change
+  so that MACS will only use the peaks within these foldchange range to
+  build model.
+- **Shift size** is available when **Use Model** is false, which will
+  represent the *HALF* of the fragment size of your sample. If your
+  sonication and size selection size is 300 bps, after you trim out
+  nearly 100 bps adapters, the fragment size is about 200 bps, so you
+  can specify 100 here.
+- **Keep Duplicate Tags** It controls the MACS behavior towards
+  duplicate tags at the exact same location -- the same coordination and
+  the same strand. The default 'auto' option makes MACS calculate the
+  maximum tags at the exact same location based on binomal distribution
+  using 1e-5 as pvalue cutoff; and the 'all' option keeps every tags.
+  If an integer is given, at most this number of tags will be kept at
+  the same location. Default: 1
+- **Advanced Options** to turn on advanced settings. You will lose
+  advanced settings and go back to default parameters when you choose
+  'No'.
+- **Bandwidth (Advanced)** is the bandwidth to scan for paired peaks when **Use
+  Model** is on, and is the *HALF* of the window size to detect actual
+  peaks when **Use Model** is off.
+- **Use Lambda (Advanced)** to turn on or off local lambda model which
+  can use the local bias at peak regions to throw out false positives.
+- **Small Lambda (Advanced)** The small nearby region in basepairs to calculate
+  dynamic lambda. This is used to capture the bias near the peak summit region. 
+  Invalid if there is no control data. DEFAULT: 1000
+- **Large Lambda (Advanced)** The large nearby region in basepairs to calculate
+  dynamic lambda. This is used to capture the surround bias. DEFAULT: 10000.
+- **Generate a wig file (Advanced)** to turn on or off the wiggle file
+  generation for ChIP channel. The wiggle file is the fragment pileup
+  accumulations at every 10 bps.
+- **Diagnostic Report** whether or not to produce a diagnosis report.
+- **Minimum Fold Enrichment (Diagnosis)** Minimum fold enrichment to consider. 
+- **Maximum Fold Enrichment (Diagnosis)** Maximum fold enrchment to consider.
+  Leave blank for default max value.
+- **Fold Enrichment Step (Diagnosis)** Interval of fold enrichment. 
+
+-----
+
+**Outputs**
+
+- **BED file** for peak locations in BED format. Typically used in gene association study like CEAS, or correlation calculation.
+- **BED file** for peak summits locations in BED format. Typically used in DNA motif analysis or conservation check.
+- **XLS file** for detail informations in a tab-delimited file.
+- **WIGGLE file** for fragments pileup at every 10 bps of chIP channel
+  in WIGGLE format.
+- **LOG file** for job log. If you see errors, please attach this in
+  the bug report
+- **Diagnosis Report** empty unless diagnosis report is set to yes
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peak2gene.xml	Tue Dec 30 18:45:34 2014 +0900
@@ -0,0 +1,89 @@
+<tool name="peak2gene: Peak Center Annotation" id="ceas_peak2gene">
+  <!--<description>Input a peak file, and It will search each peak on UCSC GeneTable to get the refGenes near the peak center.</description>
+  -->
+  <description>This tool is abolished, please use BETA-minus instead.</description>
+  <command interpreter="command">/bin/bash $shscript</command>
+  <inputs>
+    <param format="bed" name="bfile" type="data" label="BED file(center will be aligned, 100,000 lines max)">
+      <validator type="unspecified_build" />
+    </param>
+    <param name="dist" type="integer" label="Distance from peak center" value="30000">
+      <validator type="in_range" max="30000" min="300" message="distance is out of range, distance has to be between 300 to 30000" />
+    </param>
+    <param name="genetype" type="select" label="which you want to output for gene name">
+            <option value="--symbol">Offical gene symbol</option>
+            <option value=" ">Refseq gene</option>
+    </param>    
+    <param name="op" type="select" label="Which gene you want to output. eg) up(uptream of gene overlaps with peak center)">
+            <option value="all">all</option>
+            <option value="up">up</option>
+            <option value="down">down</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="bed" name="output_gene_annotation" label="annotation for each gene" />
+    <data format="bed" name="output_peak_annotation" label="annotation for each peak" />
+    <data format="txt" name="log" label="GetRef log" />
+  </outputs>
+<tests><test>
+<param name="bfile" value="peak.bed" />
+<param name="dist" value="10000" />
+<param name="genome" value="hg19" />
+<param name="genetype" value="symbol" />
+<param name="op" value="up" />
+<param name="outputgenelist" file="PCGA_test_gene_annotation.txt" diff_lines="100"/>
+<param name="outputbed" file="PCGA_test_peaks_annotation.txt" diff_ines="100" />
+</test></tests>
+  <configfiles>
+    <configfile name="shscript">
+#!/bin/bash
+
+#import os
+
+#set $dollar = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $ad = chr(38)
+
+#set $path = $os.path.abspath($__app__.config.tool_path)
+
+lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'`
+format=`$path/cistrome/fcfunc.py $bfile`
+
+if [[ ${dollar}lines -gt 500000 ]];then
+   echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
+   exit;
+fi
+if [[ ${dollar}format != "passed" ]]; then
+   echo ${dollar}format ${gt}${ad}2
+   exit;
+fi
+
+#set $advparam = ""
+#if $dist
+#set $advparam = $advparam+"-d "+str($dist)
+#end if
+
+#set $gtpath = os.path.join( os.path.abspath("tool-data"), "ceaslib", "GeneTable", $bfile.metadata.dbkey )
+
+PCGA.py -t $bfile --name=output -g $gtpath --op=$op $genetype $advparam ${ad}${gt} $log
+mv output_peaks_annotation.txt $output_peak_annotation
+mv output_gene_annotation.txt $output_gene_annotation
+    </configfile>
+  </configfiles>
+  <tests>
+    <test>
+      <param name="bfile" value="bedfile.bed" ftype="bed"/>
+      <param name="dist" value="10000" />
+      <param name="genome" value="hg19" />
+      <param name="genetype" value="refseq" />
+      <param name="op" value="down" />
+      <output name="output_gene_annotation" file="getref_result_gene.bed" />
+      <output name="output_peak_annotation" file="getref_result_peak.bed" />
+    </test>
+  </tests>
+  <help>
+This tool is abolished, please use BETA-minus instead.
+  </help>
+
+</tool>