Mercurial > repos > rnateam > segemehl

<tool id="segemehl" name="segemehl" version="0.2.0.4">
  <description>short read mapping with gaps</description>
  <requirements>
    <requirement type="package" version="0.2.0">segemehl</requirement>
  </requirements>
  <stdio>
    <regex match="Exit forced"
           source="both"
           level="fatal"
           description="Execution halted." />
  </stdio>
  <command>
<!--
        ## check for single/pair-end
        #if str( $library.type ) == "single":
            #set $query_list = list()
        ## prepare inputs
        #for $fastq in $library.input_query:
            $query_list.append('%s' % $fastq )
        #end for
        -q "#echo ' '.join( $query_list )#"
        #else
            ## prepare inputs
            #set $mate1 = list()
            #set $mate2 = list()
            #for $mate_pair in $library.mate_list:
                $mate1.append( str($mate_pair.first_strand_query) )
                $mate2.append( str($mate_pair.second_strand_query) )
            #end for

            -q #echo ','.join($mate1)
            -p #echo ','.join($mate2)

            -I $library.maxinsertsize
        #end if
-->
    <![CDATA[
## UNIMPLEMENTED
## [SEEDEXTENSIONPARAMS]
## -e, --extensionscore <n>        score of a match during extension (default:2)
## -n, --extensionpenalty <n>      penalty for a mismatch during extension (default:4)
## -X, --dropoff <n>               dropoff parameter for extension (default:8)
##  --showalign

## prepare segemehl index if no reference genome is supplied
        #if $refGenomeSource.genomeSource == "history":
            mkdir ./temp_index/ &&
            #set $temp_index = './temp_index/temp.idx'
            segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
        #else:
            #set $temp_index = $refGenomeSource.index.fields.index_path
        #end if

        ## execute segemehl
            segemehl.x

        ## number of threads
            -t "\${GALAXY_SLOTS:-12}"

        #if $refGenomeSource.genomeSource == "history":
            -d $refGenomeSource.own_reference_genome
        #else:
            -d ${refGenomeSource.index.fields.db_path}
        #end if

        -i $temp_index

        ## check for single/pair-end
        #if str( $library.type ) == "single":
            ## prepare inputs
            -q ${library.input_query}
        #else
            -q ${mate_pair.first_strand_query}
            -p ${mate_pair.second_strand_query}
            -I ${library.maxinsertsize}
        #end if
        #if str( $bisulfite ) != "0":
            -F $bisulfite
        #end if
        -m $minsize
        -A $accuracy
        -H $hitstrategy
        #if str( $prime5 ).strip():
            -P "$prime5"
        #end if
        #if str( $prime3 ).strip():
            -Q "$prime3"
        #end if
        -R $clipacc
        $polyA
        $autoclip
        $hardclip
        $order
        #if $maxout:
            --maxout $maxout
        #end if
        #if str( $splitreads.splits ) == "splits":
            --splits
            --minsplicecover $splitreads.minsplicecover
            --minfragscore $splitreads.minfragscore
            --minfraglen $splitreads.minfraglen
	    --splicescorescale $splitreads.splicescorescale
	    --maxsplitevalue $splitreads.maxsplitevalue
        #end if
        -M $maxinterval
        -E $evalue
        -D $differences
        -J $jump
        -s
        -o '$segemehl_out'
        #if str( $nomatchfilename ) == 'yes':
           -u '$segemehl_outunmatched'
        #end if
    ]]>
  </command>
  <inputs>
    <conditional name="refGenomeSource">
      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
        <option value="indexed">Use a built-in index</option>
        <option value="history">Use one from the history</option>
      </param>
      <when value="indexed">
        <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
          <options from_data_table="segemehl_indexes">
            <column name="value" index="0"/>
            <column name="dbkey" index="1"/>
            <column name="name" index="2"/>
            <column name="db_path" index="3"/>
            <column name="index_path" index="4"/>
            <filter type="sort_by" column="2"/>
            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
          </options>
        </param>
      </when>  <!-- build-in -->
      <when value="history">
        <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
      </when>  <!-- history -->
    </conditional>  <!-- refGenomeSource -->

    <conditional name="library">
      <param name="type" type="select" label="Is this library paired-end?">
        <option value="single">Single-end</option>
        <option value="paired">Paired-end</option>
      </param>
      <when value="single">
		<!--        <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" /> -->
		<param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
      </when>
      <when value="paired">
        <!-- ToDo paired coolections -->
        <repeat name="mate_list" title="Paired End Pairs" min="1">
          <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
          <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
        </repeat>
        <param argument="--maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000" />
      </when>
    </conditional>

    <conditional name="splitreads">
      <param argument="splits" type="select" label="Detect split/spliced reads">
        <option value="nosplit">No splits</option>
        <option value="splits">Split reads</option>
      </param>
      <when value="splits">
        <param argument="--minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" />
        <param argument="--minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" />
        <param argument="--minfraglen" type="integer" value="20" label="Min length of a spliced fragment"  />
        <param argument="--splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score"
               help="Report only if this value x score is larger than next best spliced alignment" />
       <param argument="--maxsplitevalue" type="float" min="0" value="50.000000" label="max evalue for splits"/>
      </when>
      <when value="nosplit">
      </when>
    </conditional>
    <param argument="--bisulfite" type="select" label="Bisulfite mapping">
      <option value="0">No bisulfite mapping</option>
      <option value="1">bisulfite mapping with methylC-seq/Lister et al.</option>
      <option value="2">bs-seq/Cokus et al. protocol</option>
    </param>
    <param argument="--minsize" type="integer" value="12" min="1" label="Minimum size of queries" />
    <param argument="--maxout" type="integer" min="0" value="0" optional="True"
           label="Maximum number of alignments that will be reported"/>
    <param argument="--accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" />
    <param argument="--hitstrategy" type="select" label="Hits to report?">
      <option value="1">report only best scoring hits</option>
      <option value="0">report all scoring hits</option>
    </param>
    <param argument="--prime5" type="text" label="add 5' adapter" help="default: none" />
    <param argument="--prime3" type="text" label="add 3' adapter" help="default: none"/>
    <param argument="--clipacc" value="70" type="integer" label="clipping accuracy" />
    <param argument="--polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" />
    <param argument="--autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter"/>
    <param argument="--hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping"/>
    <param argument="--order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" />
    <param argument="--differences" type="integer" min="0" value="1" label="search seeds initially with n differences"/>
    <param argument="--jump" type="integer" value="0" min="0" label="search seeds with jump size" help="(0=automatic) (default:0)?"/>
    <param argument="--evalue" type="float" min="0" value="5.000000" label="max evalue"/>
    <param argument="--maxinterval" type="integer" min="1" value="100" label="maximum width of a suffix array interval, i.e. a query seed will be omitted if it matches more than n times"/>
    <param argument="--nomatchfilename" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output unmatched reads"/>
  </inputs>
  <outputs>
    <data format="sam" name="segemehl_out" label="${tool.name} on ${on_string}"/>
    <data format="fastq" name="segemehl_outunmatched" label="${tool.name} unaligned reads ${on_string}">
      <filter>output_unmatched</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <param name="genomeSource" value="history" />
      <param name="own_reference_genome" value="chr1.fa" />
      <param name="library" value="single" />
      <param name="input_query" value="test.fastq" />
      <param name="splits" value="nosplit" />
      <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
    </test>
    <test>
      <param name="genomeSource" value="history" />
      <param name="own_reference_genome" value="chr1.fa" />
      <param name="library" value="single" />
      <param name="input_query" value="test.fastq" />
      <param name="splits" value="splits" />
      <param name="minsplicecover" value="40" />
      <param name="nomatchfilename" value="yes" />
      <output name="segemehl_out" file="testmap2.sam" lines_diff="2" />
      <output name="segemehl_outunmatched" file="testmap2.fastq" />
    </test>
  </tests>
  <help>
    <![CDATA[

.. class:: infomark

**What it does**

Segemehl_ is a short read mapper with gaps.

Segemehl_ is a software to map short sequencer reads to reference genomes.
Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.

.. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/


    ]]>
  </help>
  <citations>
    <citation type="doi">10.1371/journal.pcbi.1000502</citation>
  </citations>
</tool>
author	bgruening
date	Thu, 23 Aug 2018 13:27:21 -0400
parents	90b8941e5115
children