diff macs2_callpeak.xml @ 0:9c157b556c33 draft

Uploaded
author iuc
date Thu, 16 Jan 2014 13:31:17 -0500
parents
children d202e3d663bb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macs2_callpeak.xml	Thu Jan 16 13:31:17 2014 -0500
@@ -0,0 +1,253 @@
+<tool id="macs2_callpeak" name="Call peaks" version="2.0.10.0">
+    <description>from alignment results</description>
+    <requirements>
+        <requirement type="python-module">macs2</requirement>
+        <requirement type="python-module">numpy</requirement>
+        <requirement type="package" version="2.0.10.2">macs2</requirement>
+        <requirement type="package" version="1.7.1">numpy</requirement>
+        <!-- awk and R is missing -->
+    </requirements>
+    <command>
+        #set $temp_stderr = 'macs2_stderr'
+        macs2 callpeak
+
+            --name "MACS2"
+            -t #echo ' '.join( map(str, $input_treatment_file) )#
+
+            #if ' '.join( map(str, $input_control_file) ) != 'None':
+                -c #echo ' '.join( map(str, $input_control_file) )#
+            #end if
+
+        #for $ifile in $input_treatment_file:
+            --format='$ifile.ext.upper()'
+        #end for
+        
+        #if $effective_genome_size_options.effective_genome_size_options_selector == 'user_defined':
+            --gsize $effective_genome_size_options.gsize
+        #else:
+            --gsize $effective_genome_size_options.effective_genome_size_options_selector
+        #end if
+
+        --bw='$bw'
+
+        ##advanced options
+        #if str( $advanced_options.advanced_options_selector ) == 'on':
+            --mfold $advanced_options.mfoldlo $advanced_options.mfoldhi
+            $advanced_options.nolambda
+
+            #if str($advanced_options.broad_options.broad_options_selector) == '--broad':
+                #set $__options['broad'] = str( $advanced_options.broad_options.broad_options_selector )
+                #set $__options['broad_cutoff'] = float( str( $advanced_options.broad_options.broad_cutoff ) )
+            #end if
+
+            #if str($advanced_options.broad_options.broad_options_selector) == '--broad':
+                --broad
+                --broad-cutoff='$advanced_options.broad_options.broad_cutoff'
+            #end if
+
+        #else:
+            --mfold 10 30
+        #end if
+
+        $bdg
+
+        ##pq value select options
+        #if str( $pq_options.pq_options_selector ) == 'qvalue':
+            --qvalue $pq_options.qvalue
+        #else:
+            --pvalue $pq_options.pvalue
+        #end if
+
+        ##model options
+        #if str( $nomodel_type.nomodel_type_selector ) == 'nomodel':
+            --nomodel --shiftsize='$nomodel_type.shiftsize'
+        #end if
+
+        2> $temp_stderr;
+        #######################################################
+        ## move files generated by callpeak command 
+
+        ## TODO
+        ## run R to create pdf from model script
+        ##if os.path.exists( os.path.join( tmp_dir, "MACS2_PREFIX_model.r" ) ):
+        ##    cmdline = 'Rscript "MACS2_PREFIX_model.r" > "MACS2_PREFIX_model.r.log"' )
+        ##    proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
+        ##    proc.wait()
+
+
+        ## move bed out to proper output file
+        ##set $file = os.path.join( $tmp_dir, "%s_peaks.bed" % $experiment_name )
+        ##if os.path.exists( $file ):
+        ##    mv $file $output_bed_file
+        ##end if
+
+        ## OICR peak_xls file
+        ##set $file = os.path.join( $tmp_dir, "%s_peaks.xls" % $experiment_name )
+        ##if os.path.exists( $file ):
+        ##    mv $file output_peaks $output_peaks_file
+        ##end if
+
+        ### peaks.encodepeaks (narrowpeaks) file
+        ##set $file = os.path.join ( $tmp_dir, "%s_peaks.encodePeak" % $experiment_name )
+        ##if os.path.exists( $file ):
+        ##    mv $file $output_narrowpeaks_file
+
+
+        ##parse xls files to interval files as needed
+        ##TODO is in working dir
+        #if 'peaks_interval' in str($outputs).split(','):
+            #set $file = os.path.join( $tmp_dir, 'MACS2_PREFIX_peaks.xls' )
+            #if os.path.exists( $file ):
+                echo '#peaks file' > $output_xls_to_interval_peaks_file;
+                awk '$2-=1' $file >> $output_xls_to_interval_peaks_file;
+                ##xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' )
+            #end if
+        #end if
+
+        #if 'html' in str($outputs).split(','):
+            ## if output files exists, move them to the extra_files_path and create a html result page linking to them
+            count=`ls -1 MACS2* 2>/dev/null | wc -l`;
+            if [ \$count != 0 ];
+            then 
+                mkdir $output_extra_files.extra_files_path;
+                mv MACS2* $output_extra_files.extra_files_path;
+                python /home/bag/projects/github/galaxytools/macs2/dir2html.py $output_extra_files.extra_files_path $temp_stderr > $output_extra_files;
+            fi;
+        #end if
+
+        cat $temp_stderr 2>&#38;1
+
+    </command>
+    <inputs>
+        <param name="input_control_file" type="data" format="bam,sam,bed" multiple="True" optional="True" label="ChIP-Seq Control File" />
+        <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="True" label="ChIP-Seq Treatment File" />
+
+        <conditional name="effective_genome_size_options">
+            <param name="effective_genome_size_options_selector" type="select" label="Effective genome size" help="--gsize">
+                <option value="3300000000">Human (3.300.000.000)</option>
+                <option value="3000000000">Mouse (3.000.000.000)</option>
+                <option value="190000000">Fly (190.000.000)</option>
+                <option value="130000000">Worm (130.000.000)</option>
+                <option value="user_defined">User defined</option>
+            </param>
+            <when value="user_defined">
+                <param name="gsize" type="integer" size="12" label="Effective genome size" value=""/>
+            </when>
+        </conditional>
+
+        <param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>
+        <param name="bdg" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph" help="Files are located in the html report."/>
+
+        <conditional name="pq_options">
+            <param name="pq_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
+                <option value="qvalue">q-value</option>
+                <option value="pvalue">p-value</option>
+            </param>
+            <when value="pvalue">
+                <param name="pvalue" type="float" label="p-value cutoff for peak detection" value="1e-2" help="default: 1e-2 (--pvalue)"/>
+            </when>
+            <when value="qvalue">
+                <param name="qvalue" type="float" label="q-value cutoff for peak detection" value="5e-2" help="default: 5e-2 (--qvalue)"/>
+            </when> 
+        </conditional>
+
+        <conditional name="nomodel_type">
+            <param name="nomodel_type_selector" type="select" label="Build Model">
+                <option value="nomodel">Do not build the shifting model (--nomodel)</option>
+                <option value="create_model" selected="true">Build the shifting model</option>
+            </param>
+            <when value="nomodel">
+                <param name="shiftsize" type="integer" label="Arbitrary shift size in bp" value="100" help="(--shiftsize)"/>
+            </when>
+        </conditional>
+
+        <param name="outputs" type="select" display="checkboxes" multiple="True" label="Outputs">
+            <option value="peaks_bed" selected="True">Peaks as bed file</option>
+            <option value="html">Summary page (html)</option>
+            <option value="narrow">narrow Peaks</option>
+            <option value="broad">broad Peaks</option>
+            <option value="gapped">gapped Peaks</option>
+            <option value="summits" selected="true">summits</option>
+            <option value="peaks_interval">Peaks as interval file</option>
+
+            <validator type="no_options" message="Please select at least one output file." />
+        </param>
+
+        <conditional name="advanced_options">
+            <param name="advanced_options_selector" type="select" label="Advanced options">
+                <option value="off">Hide advanced options</option>
+                <option value="on">Display advanced options</option>
+            </param>
+            <when value="on">
+                <param name="mfoldlo" type="integer" label="Fold-enrichment lower limit" value="10" help="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (--mfold)"/>
+                <param name="mfoldhi" type="integer" label="Fold-enrichment upper-limit" value="30" help="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (--mfold)"/>
+                <param name="nolambda" label="Use fixed background lambda as local lambda for every peak region" type="boolean" truevalue="--nolambda" falsevalue="" checked="False" help="up to 9X more time consuming (--nolambda)"/>
+                <conditional name="broad_options">
+                    <param name="broad_options_selector" type="select" label="Composite broad regions" help="by putting nearby highly enriched regions into a broad region with loose cutoff (--broad)">
+                        <option value="">No broad regions</option>
+                        <option value="--broad">broad regions</option>
+                    </param>
+                    <when value="--broad">
+                        <param name="broad_cutoff" type="float" label="Cutoff for broad region" value="0.1" help="value is either p-value or q-value as specified above (--broad-cutoff)"/>
+                    </when>
+                </conditional>
+            </when>
+            <when value="off" />
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <!--callpeaks output-->
+        <data name="output_bed" format="tabular" from_work_dir="MACS2_PREFIX_peaks.xls" label="${tool.name} on ${on_string} (peaks: xls)">
+            <filter>'peaks_bed' in outputs</filter>
+        </data>
+        <data name="output_narrowpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.narrowPeak" label="${tool.name} on ${on_string} (narrow Peaks)">
+            <filter>'narrow' in outputs</filter>
+        </data>
+        <data name="output_broadpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.broadPeak" label="${tool.name} on ${on_string} (broad Peaks)">
+            <filter>'broad' in outputs</filter>
+        </data>
+        <data name="output_gappedpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.gappedPeak" label="${tool.name} on ${on_string} (gapped Peaks)">
+            <filter>'gapped' in outputs</filter>
+        </data>
+        <data name="output_summits" format="bed" from_work_dir="MACS2_PREFIX_summits.bed" label="${tool.name} on ${on_string} (summits)">
+            <filter>'summits' in outputs</filter>
+        </data>
+        <data name="output_xls_to_interval_peaks_file" format="interval" label="${tool.name} on ${on_string} (peaks: interval)">
+            <filter>'peaks_interval' in outputs</filter>
+        </data>
+        <data name="output_extra_files" format="html" label="${tool.name} on ${on_string} (html report)">
+            <filter>'html' in outputs</filter>
+        </data>
+  </outputs>
+  <tests>
+    <!--none yet for macs2-->
+  </tests>
+  <help>
+**What it does**
+
+With the improvement of sequencing techniques, chromatin immunoprecipitation followed by high throughput sequencing (ChIP-Seq)
+is getting popular to study genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq analysis method, we present a novel algorithm, named Model-based Analysis of ChIP-Seq (MACS), for
+identifying transcript factor binding sites. MACS captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and MACS improves the spatial resolution of
+binding sites through combining the information of both sequencing tag position and orientation. MACS can be easily used for ChIP-Seq data alone, or with control sample with the increase of specificity.
+
+View the original MACS2 documentation: https://github.com/taoliu/MACS/blob/master/README
+
+------
+
+**Usage**
+
+**Peak Calling**: Main MACS2 Function to Call peaks from alignment results.
+
+**Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph.
+
+
+------
+
+**Citation**
+
+For the underlying tool, please cite Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137.
+
+Integration of MACS2 with Galaxy performed by Ziru Zhou ( ziruzhou@gmail.com ). Please send your comments/questions to modENCODE DCC at help@modencode.org.
+  </help>
+</tool>