changeset 0:328222bf2289 draft default tip

Uploaded
author soranzo
date Fri, 06 Sep 2013 11:34:34 -0400
parents
children
files COPYING blat_wrapper.py blat_wrapper.xml tool_dependencies.xml
diffstat 4 files changed, 396 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COPYING	Fri Sep 06 11:34:34 2013 -0400
@@ -0,0 +1,23 @@
+Copyright © 2013 CRS4 Srl. http://www.crs4.it/
+Created by:
+Andrea Pinna <andrea.pinna@crs4.it>
+Nicola Soranzo <nicola.soranzo@crs4.it>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_wrapper.py	Fri Sep 06 11:34:34 2013 -0400
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+"""
+BLAT
+version 0.2.1 (andrea.pinna@crs4.it)
+"""
+
+import optparse
+import subprocess
+import sys
+
+def check_password(password_file, input_password):
+    with open(password_file, 'r') as f:
+        lines = f.readlines()
+    
+    continue_reading = True
+    i = 0
+    is_correct = False
+    while continue_reading and i < len(lines):
+        current_password = lines[i].split('\t')[0]
+        current_user = lines[i].split('\t')[1][0:-1]
+        #print 'Current password: %s' % current_password
+        #print 'Current user: %s' % current_user
+        if current_password == input_password:
+            continue_reading = False
+            is_correct = True
+            #print 'User: %s' % current_user
+        i = i + 1
+        #print 'Current item: %s\n' % i
+    
+    if is_correct:
+        print 'User \'%s\' allowed to BLAT!\n' % current_user
+    else:
+        print 'Password does not match!\n'
+    return is_correct
+
+
+def __main__():
+    # load arguments
+    print 'Parsing input options...'
+    parser = optparse.OptionParser()
+    parser.add_option('--database', dest='database', help='')
+    parser.add_option('--query', dest='query', help='')
+    parser.add_option('--t', dest='databaseType', choices=['dna', 'prot', 'dnax'], help='')
+    parser.add_option('--q', dest='queryType', choices=['dna', 'rna', 'prot', 'dnax', 'rnax'], help='')
+    parser.add_option('--ooc', dest='ooc', help='')
+    parser.add_option('--tileSize', dest='tileSize', type='int', help='')
+    parser.add_option('--stepSize', dest='stepSize', type='int', help='')
+    parser.add_option('--oneOff', action='store_true', dest='oneOff', help='')
+    parser.add_option('--minMatch', dest='minMatch', type='int', help='')
+    parser.add_option('--minScore', dest='minScore', type='int', help='')
+    parser.add_option('--minIdentity', dest='minIdentity', type='int', help='')
+    parser.add_option('--maxGap', dest='maxGap', type='int', help='')
+    parser.add_option('--noHead', action='store_true', dest='noHead', help='')
+    parser.add_option('--makeOoc', dest='makeOoc', help='')
+    parser.add_option('--repMatch', dest='repMatch', type='int', help='')
+    parser.add_option('--maskType', dest='maskType', choices=['lower', 'upper', 'out', 'file.out'], help='')
+    parser.add_option('--qMaskType', dest='qMaskType', choices=['lower', 'upper', 'out', 'file.out'], help='')
+    parser.add_option('--repeatsType', dest='repeatsType', choices=['lower', 'upper', 'out', 'file.out'], help='')
+    parser.add_option('--minRepDivergence', dest='minRepDivergence', type='int', help='')
+    parser.add_option('--dots', dest='dots', type='int', help='')
+    parser.add_option('--trimT', action='store_true', dest='trimT', help='')
+    parser.add_option('--noTrimA', action='store_true', dest='noTrimA', help='')
+    parser.add_option('--trimHardA', action='store_true', dest='trimHardA', help='')
+    parser.add_option('--fastMap', action='store_true', dest='fastMap', help='')
+    parser.add_option('--outType', dest='outType', choices=['psl', 'pslx', 'axt', 'maf', 'sim4', 'wublast', 'blast', 'blast8', 'blast9'], help='')
+    parser.add_option('--fine', action='store_true', dest='fine', help='')
+    parser.add_option('--maxIntron', dest='maxIntron', type='int', help='')
+    parser.add_option('--extendThroughN', action='store_true', dest='extendThroughN', help='')
+    parser.add_option('--blat_password', dest='blat_password', help='')
+    parser.add_option('--output', dest='output', help='')
+    parser.add_option('--logfile', dest='logfile', help='')
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Wrong number of arguments')
+    
+    # check BLAT password
+    is_correct = check_password('/u/galaxy/galaxy_passwords.tsv', options.blat_password)
+    if is_correct:
+        print 'Correct BLAT password!'
+    else:
+        sys.exit('Wrong BLAT password!!!\n')
+    
+    # build BLAT command to be executed
+    # database file
+    database = options.database
+    # query file
+    query = options.query
+    # database type
+    t = "-t=%s" % (options.databaseType) if options.databaseType else ''
+    # query type
+    q = "-q=%s" % (options.queryType) if options.queryType else ''
+    # tile size
+    tileSize = "-tileSize=%d" % (options.tileSize) if options.tileSize is not None and options.tileSize >= 1 else ''
+    # step size
+    stepSize = "-stepSize=%d" % (options.stepSize) if options.stepSize is not None and options.stepSize >= 1 else ''
+    # one off
+    oneOff = '-oneOff=1' if options.oneOff else ''
+    # min match
+    minMatch = "-minMatch=%d" % (options.minMatch) if options.minMatch is not None and options.minMatch >= 0 else ''
+    # min score
+    minScore = "-minScore=%d" % (options.minScore) if options.minScore is not None and options.minScore >= 1 else ''
+    # min identity
+    minIdentity = "-minIdentity=%d" % (options.minIdentity) if options.minIdentity is not None and options.minIdentity >= 1 else ''
+    # max gap
+    maxGap = "-maxGap=%d" % (options.maxGap) if options.maxGap is not None and options.maxGap >= 1 else ''
+    # no head
+    noHead = '-noHead' if options.noHead else ''
+    # rep match
+    repMatch = "-repMatch=%d" % (options.repMatch) if options.repMatch is not None and options.repMatch >= 1 else ''
+    # mask type
+    maskType = "-mask=%s" % (options.maskType) if options.maskType else ''
+    # qmask type
+    qMaskType = "-qMask=%s" % (options.qMaskType) if options.qMaskType else ''
+    # repeats type
+    repeatsType = "-repeats=%s" % (options.repeatsType) if options.repeatsType else ''
+    # min rep divergence
+    minRepDivergence = "-minRepDivergence=%d" % (options.minRepDivergence) if options.minRepDivergence is not None and options.minRepDivergence >= 1 else ''
+    # dots
+    dots = "-dots=%d" % (options.dots) if options.dots is not None and options.dots >= 1 else ''
+    # trim t
+    trimT = '-trimT' if options.trimT else ''
+    # no trim a
+    noTrimA = '-noTrimA' if options.noTrimA else ''
+    # trim hard a
+    trimHardA = '-trimHardA' if options.trimHardA else ''
+    # fast map
+    fastMap = '-fastMap' if options.fastMap else ''
+    # output type
+    outType = "-out=%s" % (options.outType) if options.outType else ''
+    # fine
+    fine = '-fine' if options.fine else ''
+    # max intron
+    maxIntron = "-maxIntron=%d" % (options.maxIntron) if options.maxIntron is not None and options.maxIntron >= 1 else ''
+    # extend through n
+    extendThroughN = '-extendThroughN' if options.extendThroughN else ''
+    # output file
+    output = options.output
+    logfile = options.logfile
+    
+    # Build BLAT command
+    cmd = ' '.join(['blat', database, query, t, q, tileSize, stepSize, oneOff, minMatch, minScore, minIdentity, maxGap, noHead, repMatch, maskType, qMaskType, repeatsType, minRepDivergence, dots, trimT, noTrimA, trimHardA, fastMap, outType, fine, maxIntron, extendThroughN, output])
+    print '\nBLAT command to be executed:\n ' + cmd
+    
+    # Execution of BLAT
+    print 'Executing BLAT...'
+    log = open(logfile, 'w') if logfile else sys.stdout
+    try:
+        subprocess.check_call(cmd, stdout=log, shell=True)
+    finally:
+        if log != sys.stdout:
+            log.close()
+    print 'BLAT executed!'
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_wrapper.xml	Fri Sep 06 11:34:34 2013 -0400
@@ -0,0 +1,211 @@
+<tool id="blat_wrapper" name="BLAT" version="0.2">
+  <description></description>
+  <requirements>
+    <requirement type="package" version="3.5">blat</requirement>
+  </requirements>
+  <command interpreter="python">
+    blat_wrapper.py --database $database --query $query --t=$databaseType --q=$queryType
+    #if str($tileSize)
+      --tileSize=$tileSize
+    #end if
+    #if str($stepSize)
+      --stepSize=$stepSize
+    #end if
+    #if $oneOff
+      --oneOff
+    #end if
+    #if str($minMatch)
+      --minMatch=$minMatch
+    #end if
+    #if str($minScore)
+      --minScore=$minScore
+    #end if
+    #if str($minIdentity)
+      --minIdentity=$minIdentity
+    #end if
+    #if str($maxGap)
+      --maxGap=$maxGap
+    #end if
+    #if $noHead
+      --noHead 
+    #end if
+    #if str($repMatch)
+      --repMatch=$repMatch
+    #end if
+    #if $maskType
+      --maskType=$maskType
+    #end if
+    #if $qMaskType
+      --qMaskType=$qMaskType
+    #end if
+    #if $repeatsType
+      --repeatsType=$repeatsType
+    #end if
+    #if str($minRepDivergence)
+      --minRepDivergence=$minRepDivergence
+    #end if
+    #if str($dots)
+      --dots=$dots
+    #end if
+    #if $trimT
+      --trimT
+    #end if
+    #if $noTrimA
+      --noTrimA
+    #end if
+    #if $trimHardA
+      --trimHardA
+    #end if
+    #if $fastMap
+      --fastMap
+    #end if
+    #if $outType
+      --outType=$outType
+    #end if
+    #if $fine
+      --fine
+    #end if
+    #if str($maxIntron)
+      --maxIntron=$maxIntron
+    #end if
+    #if $extendThroughN
+      --extendThroughN
+    #end if
+    --blat_password "$blat_password"
+    --logfile $logfile --output $output
+  </command>
+
+  <inputs>
+    <param name="database" type="data" format="fasta,twobit" label="Database" help="FASTA or 2bit format" />
+    <param name="query" type="data" format="fasta,twobit" label="Query" help="FASTA or 2bit format" />
+
+    <param name="databaseType" type="select" label="Select the database type (-t)">
+        <option value="dna" selected="true">DNA sequence</option>
+        <option value="prot">Protein sequence</option>
+        <option value="dnax">DNA sequence translated in six frames to protein</option>
+    </param>
+
+    <param name="queryType" type="select" label="Select the query type (-q)">
+        <option value="dna" selected="true">DNA sequence</option>
+        <option value="rna">RNA sequence</option>
+        <option value="prot">Protein sequence</option>
+        <option value="dnax">DNA sequence translated in six frames to protein</option>
+        <option value="rnax">DNA sequence translated in three frames to protein</option>
+    </param>
+
+<!--    <param name="ooc" type="data" format="ooc" optional="true" label="Over-occuring N-mers file (-ooc) produced with blat -makeOoc" help="Use N as tileSize below." /> This should wait for a makeOoc wrapper -->
+
+    <param name="tileSize" type="integer" value="" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12. Default is 11 for DNA and 5 for protein." />
+
+    <param name="stepSize" type="integer" value="" optional="true" label="Spacing between tiles (-stepSize)" help="Default is tileSize." />
+
+    <param name="oneOff" type="boolean" checked="false" label="If set, this allows one mismatch in tile and still triggers an alignments (-oneOff)" />
+
+    <param name="minMatch" type="integer" value="" optional="true" label="Number of tile matches (-minMatch)" help="Default is 2 for nucleotide, 1 for protein." />
+
+    <param name="minScore" type="integer" value="" optional="true" label="Minimum score (-minScore)" help="It is the matches minus the mismatches minus some sort of gap penalty. Default is 30." />
+
+    <param name="minIdentity" type="integer" value="" optional="true" label="Minimum sequence identity (%) (-minIdentity)" help="Default is 90 for nucleotide searches, 25 for protein or translated protein searches." />
+
+    <param name="maxGap" type="integer" value="" optional="true" label="Maximum gap between tiles in a clump (-maxGap)" help="Usually set from 0 to 3. Default is 2. Only relevant for minMatch > 1." />
+
+    <param name="noHead" type="boolean" checked="false" label="Suppress .psl header (-noHead)" help="If selected, the output is a tab-separated value file." />
+
+<!--    <param name="makeOoc" type="boolean" checked="false" label="Make overused tile file N.ooc (-makeOoc)" help="Target needs to be a complete genome." /> This should go in a separate wrapper since after making the ooc file, Blat exits -->
+
+    <param name="repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused (-repMatch)" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate." />
+
+    <param name="maskType" optional="true" type="select" label="Select how to mask out repeats in database sequences (-mask)" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored completely in protein or translated searches">
+        <option value="">...</option>
+        <option value="lower">Mask out lower cased sequence</option>
+        <option value="upper">Mask out upper cased sequence</option>
+<!--        <option value="out">Mask out according to database.out RepeatMasker.out file</option>
+        <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
+    </param>
+
+    <param name="qMaskType" optional="true" type="select" label="Select how to mask out repeats in query sequences (-qMask)">
+        <option value="">...</option>
+        <option value="lower">Mask out lower cased sequence</option>
+        <option value="upper">Mask out upper cased sequence</option>
+<!--        <option value="out">Mask out according to database.out RepeatMasker .out file</option>
+        <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
+    </param>
+
+    <param name="repeatsType" optional="true" type="select" label="Select repeat type if matches in repeat areas should be reported separately from matches in other areas (-repeats)" help="If mask type is selected, this should be left unselected. If mask type is unselected, then this can be selected">
+        <option value="">...</option>
+        <option value="lower">Mask out lower cased sequence</option>
+        <option value="upper">Mask out upper cased sequence</option>
+<!--        <option value="out">Mask out according to database.out RepeatMasker.out file</option>
+        <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
+    </param>
+
+    <param name="minRepDivergence" type="integer" optional="true" value="" label="Minimum percent divergence of repeats to allow them to be unmasked (-minRepDivergence)" help="Default is 15. Only relevant for masking using RepeatMasket .out files." />
+
+    <param name="dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log (-dots)" help="Dots show program's progress." />
+
+    <param name="trimT" type="boolean" checked="false" label="Trim leading poly-T (-trimT)" />
+
+    <param name="noTrimA" type="boolean" checked="false" label="Don't trim trailing poly-A (-noTrimA)" />
+
+    <param name="trimHardA" type="boolean" checked="false" label="Remove poly-A tail from qSize and alignments in .psl output (-trimHardA)" help="" />
+
+    <param name="fastMap" type="boolean" checked="false" label="Run for fast DNA/DNA remapping (-fastMap)" help="It does not allow introns and require high %ID." />
+
+    <param name="outType" optional="true" type="select" label="Select output file format (-out)">
+        <option value="">...</option>
+        <option value="psl">Tab separated format, no sequence</option>
+        <option value="pslx">Tab separated format with sequence</option>
+        <option value="axt">Blastz-associated axt format</option>
+        <option value="maf">Multiz-associated maf format</option>
+        <option value="sim4">Similar to sim4 format</option>
+        <option value="wublast">Similar to wublast format</option>
+        <option value="blast">Similar to NCBI blast format</option>
+        <option value="blast8">NCBI blast tabular format</option>
+        <option value="blast9">NCBI blast tabular format with comments</option>
+    </param>
+
+    <param name="fine" type="boolean" checked="false" label="Refine search for small initial and terminal exons (-fine)" help="For high-quality mRNAs. Not recommended for ESTs." />
+
+    <param name="maxIntron" type="integer" value="" optional="true" label="Maximum intron size (-maxIntron)" help="Default is 750000." />
+
+    <param name="extendThroughN" type="boolean" checked="false" label="Allow extension of alignment through large blocks of N's (-extendThroughN)" help="" />
+
+    <param name="blat_password" type="text" value="" label="Insert password to run BLAT">
+      <validator type="empty_field" />
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
+    <data name="output" format="tabular" label="${tool.name} on ${on_string}: alignment" />
+  </outputs>
+
+  <tests>
+
+  </tests>
+  <help>
+**What it does**
+
+BLAT produces two major classes of alignments:
+
+- at the DNA level between two sequences that are of 95% or greater identity, but which may include large inserts;
+- at the protein or translated DNA level between sequences that are of 80% or greater identity and may also include large inserts.
+
+The output of BLAT is flexible. By default it is a simple tab-delimited file which describes the alignment, but which does not include the sequence of the alignment itself. Optionally it can produce BLAST and WU-BLAST compatible output as well as a number of other formats.
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+If you use this tool in Galaxy, please cite the website http://orione.crs4.it/
+
+This tool uses `BLAT`_, which is licensed separately. Please cite |Kent2002|_.
+
+.. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html
+.. |Kent2002| replace:: Kent, W. J. (2002) BLAT – The BLAST-Like Alignment Tool. *Genome Res.* 12(4), 656-664
+.. _Kent2002: http://genome.cshlp.org/content/12/4/656
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Sep 06 11:34:34 2013 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="blat" version="3.5">
+    <repository changeset_revision="509545f4884e" name="package_blat_35x1" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>