view macs2_callpeak.xml @ 1:d202e3d663bb draft

Uploaded
author iuc
date Thu, 16 Jan 2014 15:44:43 -0500
parents 9c157b556c33
children da0a1fcf7fe0
line wrap: on
line source

<tool id="macs2_callpeak" name="Call peaks" version="2.0.10.0">
    <description>from alignment results</description>
    <expand macro="requirements">
        <requirement type="package" version="3.0.1">R_3_0_1</requirement>
        <!-- awk and R is missing -->
    </expand>
    <expand macro="version_command" />
    <macros>
        <import>macs2_macros.xml</import>
    </macros>
    <command>
        #set $temp_stderr = 'macs2_stderr'
        macs2 callpeak

            --name "MACS2"
            -t #echo ' '.join( map(str, $input_treatment_file) )#

            #if ' '.join( map(str, $input_control_file) ) != 'None':
                -c #echo ' '.join( map(str, $input_control_file) )#
            #end if

        #for $ifile in $input_treatment_file:
            --format='$ifile.ext.upper()'
        #end for

        @effective_genome_size@

        --bw='$band_width'

        ##advanced options
        #if str( $advanced_options.advanced_options_selector ) == 'on':
            --mfold $advanced_options.mfoldlo $advanced_options.mfoldhi
            $advanced_options.nolambda

            #if str($advanced_options.broad_options.broad_options_selector) == '--broad':
                #set $__options['broad'] = str( $advanced_options.broad_options.broad_options_selector )
                #set $__options['broad_cutoff'] = float( str( $advanced_options.broad_options.broad_cutoff ) )
            #end if

            #if str($advanced_options.broad_options.broad_options_selector) == '--broad':
                --broad
                --broad-cutoff='$advanced_options.broad_options.broad_cutoff'
            #end if

        #else:
            --mfold 10 30
        #end if

        $bdg

        ##pq value select options
        #if str( $pq_options.pq_options_selector ) == 'qvalue':
            --qvalue $pq_options.qvalue
        #else:
            --pvalue $pq_options.pvalue
        #end if

        ##model options
        #if str( $nomodel_type.nomodel_type_selector ) == 'nomodel':
            --nomodel --shiftsize='$nomodel_type.shiftsize'
        #end if

        2> $temp_stderr;
        #######################################################
        ## move files generated by callpeak command 

        ## TODO
        ## run R to create pdf from model script
        ##if os.path.exists( os.path.join( tmp_dir, "MACS2_PREFIX_model.r" ) ):
        ##    cmdline = 'Rscript "MACS2_PREFIX_model.r" > "MACS2_PREFIX_model.r.log"' )
        ##    proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
        ##    proc.wait()


        ## move bed out to proper output file
        ##set $file = os.path.join( $tmp_dir, "%s_peaks.bed" % $experiment_name )
        ##if os.path.exists( $file ):
        ##    mv $file $output_bed_file
        ##end if

        ## OICR peak_xls file
        ##set $file = os.path.join( $tmp_dir, "%s_peaks.xls" % $experiment_name )
        ##if os.path.exists( $file ):
        ##    mv $file output_peaks $output_peaks_file
        ##end if

        ### peaks.encodepeaks (narrowpeaks) file
        ##set $file = os.path.join ( $tmp_dir, "%s_peaks.encodePeak" % $experiment_name )
        ##if os.path.exists( $file ):
        ##    mv $file $output_narrowpeaks_file


        ##parse xls files to interval files as needed
        ##TODO is in working dir
        #if 'peaks_interval' in str($outputs).split(','):
            #set $file = os.path.join( $tmp_dir, 'MACS2_PREFIX_peaks.xls' )
            #if os.path.exists( $file ):
                echo '#peaks file' > $output_xls_to_interval_peaks_file;
                awk '$2-=1' $file >> $output_xls_to_interval_peaks_file;
                ##xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' )
            #end if
        #end if

        #if 'html' in str($outputs).split(','):
            ## if output files exists, move them to the extra_files_path and create a html result page linking to them
            count=`ls -1 MACS2* 2>/dev/null | wc -l`;
            if [ \$count != 0 ];
            then 
                mkdir $output_extra_files.extra_files_path;
                mv MACS2* $output_extra_files.extra_files_path;
                python /home/bag/projects/github/galaxytools/macs2/dir2html.py $output_extra_files.extra_files_path $temp_stderr > $output_extra_files;
            fi;
        #end if

        cat $temp_stderr 2>&#38;1
    </command>
    <inputs>
        <param name="input_control_file" type="data" format="bam,sam,bed" multiple="True" optional="True" label="ChIP-Seq Control File" />
        <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="True" label="ChIP-Seq Treatment File" />

        <expand macro="conditional_effective_genome_size" />

        <param name="band_width" type="integer" value="300" label="Band width for picking regions to compute fragment size"
            help="This value is only used while building the shifting model." />
        <param name="bdg" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph" help="Files are located in the html report."/>

        <conditional name="pq_options">
            <param name="pq_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
                <option value="qvalue">q-value</option>
                <option value="pvalue">p-value</option>
            </param>
            <when value="pvalue">
                <param name="pvalue" type="float" label="p-value cutoff for peak detection" value="1e-2" help="default: 1e-2 (--pvalue)"/>
            </when>
            <when value="qvalue">
                <param name="qvalue" type="float" label="q-value cutoff for peak detection" value="5e-2" help="default: 5e-2 (--qvalue)"/>
            </when> 
        </conditional>

        <conditional name="nomodel_type">
            <param name="nomodel_type_selector" type="select" label="Build Model">
                <option value="nomodel">Do not build the shifting model (--nomodel)</option>
                <option value="create_model" selected="true">Build the shifting model</option>
            </param>
            <when value="nomodel">
                <param name="shiftsize" type="integer" label="Arbitrary shift size in bp" value="100" help="(--shiftsize)"/>
            </when>
        </conditional>

        <param name="outputs" type="select" display="checkboxes" multiple="True" label="Outputs">
            <option value="peaks_bed" selected="True">Peaks as bed file</option>
            <option value="html">Summary page (html)</option>
            <option value="narrow">narrow Peaks</option>
            <option value="broad">broad Peaks</option>
            <option value="gapped">gapped Peaks</option>
            <option value="summits" selected="true">summits</option>
            <option value="peaks_interval">Peaks as interval file</option>

            <validator type="no_options" message="Please select at least one output file." />
        </param>

        <conditional name="advanced_options">
            <param name="advanced_options_selector" type="select" label="Advanced options">
                <option value="off">Hide advanced options</option>
                <option value="on">Display advanced options</option>
            </param>
            <when value="on">
                <param name="mfoldlo" type="integer" label="Fold-enrichment lower limit" value="10" help="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (--mfold)"/>
                <param name="mfoldhi" type="integer" label="Fold-enrichment upper-limit" value="30" help="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (--mfold)"/>
                <param name="nolambda" label="Use fixed background lambda as local lambda for every peak region" type="boolean" truevalue="--nolambda" falsevalue="" checked="False" help="up to 9X more time consuming (--nolambda)"/>
                <conditional name="broad_options">
                    <param name="broad_options_selector" type="select" label="Composite broad regions" help="by putting nearby highly enriched regions into a broad region with loose cutoff (--broad)">
                        <option value="">No broad regions</option>
                        <option value="--broad">broad regions</option>
                    </param>
                    <when value="--broad">
                        <param name="broad_cutoff" type="float" label="Cutoff for broad region" value="0.1" help="value is either p-value or q-value as specified above (--broad-cutoff)"/>
                    </when>
                </conditional>
            </when>
            <when value="off" />
        </conditional>

    </inputs>
    <outputs>
        <!--callpeaks output-->
        <data name="output_bed" format="tabular" from_work_dir="MACS2_PREFIX_peaks.xls" label="${tool.name} on ${on_string} (peaks: xls)">
            <filter>'peaks_bed' in outputs</filter>
        </data>
        <data name="output_narrowpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.narrowPeak" label="${tool.name} on ${on_string} (narrow Peaks)">
            <filter>'narrow' in outputs</filter>
        </data>
        <data name="output_broadpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.broadPeak" label="${tool.name} on ${on_string} (broad Peaks)">
            <filter>'broad' in outputs</filter>
        </data>
        <data name="output_gappedpeaks" format="tabular" from_work_dir="MACS2_PREFIX_peaks.gappedPeak" label="${tool.name} on ${on_string} (gapped Peaks)">
            <filter>'gapped' in outputs</filter>
        </data>
        <data name="output_summits" format="bed" from_work_dir="MACS2_PREFIX_summits.bed" label="${tool.name} on ${on_string} (summits)">
            <filter>'summits' in outputs</filter>
        </data>
        <data name="output_xls_to_interval_peaks_file" format="interval" label="${tool.name} on ${on_string} (peaks: interval)">
            <filter>'peaks_interval' in outputs</filter>
        </data>
        <data name="output_extra_files" format="html" label="${tool.name} on ${on_string} (html report)">
            <filter>'html' in outputs</filter>
        </data>
  </outputs>
  <tests>
    <!--none yet for macs2-->
  </tests>
  <help>
**What it does**

With the improvement of sequencing techniques, chromatin immunoprecipitation followed by high throughput sequencing (ChIP-Seq)
is getting popular to study genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq analysis method, we present a novel algorithm, named Model-based Analysis of ChIP-Seq (MACS), for
identifying transcript factor binding sites. MACS captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and MACS improves the spatial resolution of
binding sites through combining the information of both sequencing tag position and orientation. MACS can be easily used for ChIP-Seq data alone, or with control sample with the increase of specificity.

View the original MACS2 documentation: https://github.com/taoliu/MACS/blob/master/README

------

**Usage**

**Peak Calling**: Main MACS2 Function to Call peaks from alignment results.

**Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph.


------

**Citation**

For the underlying tool, please cite Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137.

Integration of MACS2 with Galaxy performed by Ziru Zhou ( ziruzhou@gmail.com ). Please send your comments/questions to modENCODE DCC at help@modencode.org.
  </help>
</tool>