view segemehl.xml @ 0:94926c35b6f3 draft

intial uploaded
author rnateam
date Thu, 17 Oct 2013 04:07:29 -0400
parents
children 468b59eae694
line wrap: on
line source

<tool id="segemehl" name="segemehl" version="0.1">
    <description>suffix arrays based short read aligner</description>
    <requirements>
        <requirement type="package" version="0.1.6">segemehl</requirement>
    </requirements>
    <command>
        ## prepare segemehl index if no reference genome is supplied
        temp_index = `mktemp`;
        #if $refGenomeSource.genomeSource == "history":
            segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
        #else:
            $temp_index = ${refGenomeSource.index.fields.index_path}
        #end if


        ## execute segemehl
        segemehl.x
        
        ## number of threads
        -t 4

        ## db file path
        -d ${refGenomeSource.index.fields.db_path}

        -i $temp_index

        ## check for single/pair-end
        #if str( $library.type ) == "single":
            #set $query_list = list()
            ## prepare inputs
            #for $fastq in $library.reads:
                $query_list.append('%s' %($fastq.input_query))
            #end for
            -q "#echo ' '.join( $query_list )#"
        #else
            ## prepare inputs
            
            #set $mate1 = list()
            #set $mate2 = list()
            #for $mate_pair in $library.mate_list:
                $mate1.append( str($mate_pair.first_strand_query) )
                $mate2.append( str($mate_pair.second_strand_query) )
            #end for

            -q #echo ','.join($mate1)
            -p #echo ','.join($mate2)

            -I $library.maxinsertsize
        #end if
        -m $minsize
        -A $accuracy
        -H $hitstrategy
        #if str( $prime5 ).strip():
            -P $prime5
        #end if
        #if str( $prime3 ).strip():
            -Q $prime3
        #end if
        $polyA
        $autoclip
        $hardclip
        $order
        -s
        -o $segemehl_out
    </command>
    <stdio>
        <regex match="Exit forced" 
           source="both" 
           level="fatal" 
           description="Execution halted." />
    </stdio>
    <inputs>

        <conditional name="refGenomeSource">
          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
            <option value="indexed">Use a built-in index</option>
            <option value="history">Use one from the history</option>
          </param>
          <when value="indexed">
            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
              <options from_data_table="segemehl_indexes">
                <column name="value" index="0"/>
                <column name="dbkey" index="1"/>
                <column name="name" index="2"/>
                <column name="db_path" index="3"/>
                <column name="index_path" index="4"/>
                <filter type="sort_by" column="2"/>
                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
              </options>
            </param>
          </when>  <!-- build-in -->
          <when value="history">
            <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
          </when>  <!-- history -->
        </conditional>  <!-- refGenomeSource -->


        <conditional name="library">
            <param name="type" type="select" label="Is this library paired-end?">
                <option value="single">Single-end</option>
                <option value="paired">Paired-end</option>
            </param>
            <when value="single">
                <repeat name="reads" title="FASTQ/FASTA files">
                    <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
                </repeat>
            </when>
            <when value="paired">
                <repeat name="mate_list" title="Paired End Pairs" min="1">
                    <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
                    <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
                </repeat>
                <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end) (default:5000)" />
            </when>
        </conditional>


        <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries (default:12)">
            <validator type="in_range" min="1"/>
        </param>
        <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment (default:85)" >
            <validator type="in_range" min="1" max="100"/>
        </param>
        <param name="hitstrategy" type="integer" value="1" size="5" label="report only best scoring hits (=1) or all (=0) (default:1)" />
        <param name="prime5" type="text" size="80" label="add 5' adapter (default:none)" />
        <param name="prime3" type="text" size="80" label="add 3' adapter (default:none)" />
        <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail"/>
        <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter"/>
        <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping"/>
        <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position (might take a while!)"/>
    </inputs>

    <outputs>
        <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
    </outputs>
    <help>

.. class:: infomark

**What it does** 

Segemelt is a short read mapper with gaps.

segemehl is a software to map short sequencer reads to reference genomes. 
Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. 
Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. 
segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl now supports the SAM format, 
reads gziped queries to save both disk and memory space and allows bisulfite sequencing mapping and split read mapping.

    </help>
</tool>