Mercurial > repos > soranzo > blat
changeset 0:328222bf2289 draft default tip
Uploaded
author | soranzo |
---|---|
date | Fri, 06 Sep 2013 11:34:34 -0400 |
parents | |
children | |
files | COPYING blat_wrapper.py blat_wrapper.xml tool_dependencies.xml |
diffstat | 4 files changed, 396 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Fri Sep 06 11:34:34 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Andrea Pinna <andrea.pinna@crs4.it> +Nicola Soranzo <nicola.soranzo@crs4.it> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_wrapper.py Fri Sep 06 11:34:34 2013 -0400 @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +""" +BLAT +version 0.2.1 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def check_password(password_file, input_password): + with open(password_file, 'r') as f: + lines = f.readlines() + + continue_reading = True + i = 0 + is_correct = False + while continue_reading and i < len(lines): + current_password = lines[i].split('\t')[0] + current_user = lines[i].split('\t')[1][0:-1] + #print 'Current password: %s' % current_password + #print 'Current user: %s' % current_user + if current_password == input_password: + continue_reading = False + is_correct = True + #print 'User: %s' % current_user + i = i + 1 + #print 'Current item: %s\n' % i + + if is_correct: + print 'User \'%s\' allowed to BLAT!\n' % current_user + else: + print 'Password does not match!\n' + return is_correct + + +def __main__(): + # load arguments + print 'Parsing input options...' + parser = optparse.OptionParser() + parser.add_option('--database', dest='database', help='') + parser.add_option('--query', dest='query', help='') + parser.add_option('--t', dest='databaseType', choices=['dna', 'prot', 'dnax'], help='') + parser.add_option('--q', dest='queryType', choices=['dna', 'rna', 'prot', 'dnax', 'rnax'], help='') + parser.add_option('--ooc', dest='ooc', help='') + parser.add_option('--tileSize', dest='tileSize', type='int', help='') + parser.add_option('--stepSize', dest='stepSize', type='int', help='') + parser.add_option('--oneOff', action='store_true', dest='oneOff', help='') + parser.add_option('--minMatch', dest='minMatch', type='int', help='') + parser.add_option('--minScore', dest='minScore', type='int', help='') + parser.add_option('--minIdentity', dest='minIdentity', type='int', help='') + parser.add_option('--maxGap', dest='maxGap', type='int', help='') + parser.add_option('--noHead', action='store_true', dest='noHead', help='') + parser.add_option('--makeOoc', dest='makeOoc', help='') + parser.add_option('--repMatch', dest='repMatch', type='int', help='') + parser.add_option('--maskType', dest='maskType', choices=['lower', 'upper', 'out', 'file.out'], help='') + parser.add_option('--qMaskType', dest='qMaskType', choices=['lower', 'upper', 'out', 'file.out'], help='') + parser.add_option('--repeatsType', dest='repeatsType', choices=['lower', 'upper', 'out', 'file.out'], help='') + parser.add_option('--minRepDivergence', dest='minRepDivergence', type='int', help='') + parser.add_option('--dots', dest='dots', type='int', help='') + parser.add_option('--trimT', action='store_true', dest='trimT', help='') + parser.add_option('--noTrimA', action='store_true', dest='noTrimA', help='') + parser.add_option('--trimHardA', action='store_true', dest='trimHardA', help='') + parser.add_option('--fastMap', action='store_true', dest='fastMap', help='') + parser.add_option('--outType', dest='outType', choices=['psl', 'pslx', 'axt', 'maf', 'sim4', 'wublast', 'blast', 'blast8', 'blast9'], help='') + parser.add_option('--fine', action='store_true', dest='fine', help='') + parser.add_option('--maxIntron', dest='maxIntron', type='int', help='') + parser.add_option('--extendThroughN', action='store_true', dest='extendThroughN', help='') + parser.add_option('--blat_password', dest='blat_password', help='') + parser.add_option('--output', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # check BLAT password + is_correct = check_password('/u/galaxy/galaxy_passwords.tsv', options.blat_password) + if is_correct: + print 'Correct BLAT password!' + else: + sys.exit('Wrong BLAT password!!!\n') + + # build BLAT command to be executed + # database file + database = options.database + # query file + query = options.query + # database type + t = "-t=%s" % (options.databaseType) if options.databaseType else '' + # query type + q = "-q=%s" % (options.queryType) if options.queryType else '' + # tile size + tileSize = "-tileSize=%d" % (options.tileSize) if options.tileSize is not None and options.tileSize >= 1 else '' + # step size + stepSize = "-stepSize=%d" % (options.stepSize) if options.stepSize is not None and options.stepSize >= 1 else '' + # one off + oneOff = '-oneOff=1' if options.oneOff else '' + # min match + minMatch = "-minMatch=%d" % (options.minMatch) if options.minMatch is not None and options.minMatch >= 0 else '' + # min score + minScore = "-minScore=%d" % (options.minScore) if options.minScore is not None and options.minScore >= 1 else '' + # min identity + minIdentity = "-minIdentity=%d" % (options.minIdentity) if options.minIdentity is not None and options.minIdentity >= 1 else '' + # max gap + maxGap = "-maxGap=%d" % (options.maxGap) if options.maxGap is not None and options.maxGap >= 1 else '' + # no head + noHead = '-noHead' if options.noHead else '' + # rep match + repMatch = "-repMatch=%d" % (options.repMatch) if options.repMatch is not None and options.repMatch >= 1 else '' + # mask type + maskType = "-mask=%s" % (options.maskType) if options.maskType else '' + # qmask type + qMaskType = "-qMask=%s" % (options.qMaskType) if options.qMaskType else '' + # repeats type + repeatsType = "-repeats=%s" % (options.repeatsType) if options.repeatsType else '' + # min rep divergence + minRepDivergence = "-minRepDivergence=%d" % (options.minRepDivergence) if options.minRepDivergence is not None and options.minRepDivergence >= 1 else '' + # dots + dots = "-dots=%d" % (options.dots) if options.dots is not None and options.dots >= 1 else '' + # trim t + trimT = '-trimT' if options.trimT else '' + # no trim a + noTrimA = '-noTrimA' if options.noTrimA else '' + # trim hard a + trimHardA = '-trimHardA' if options.trimHardA else '' + # fast map + fastMap = '-fastMap' if options.fastMap else '' + # output type + outType = "-out=%s" % (options.outType) if options.outType else '' + # fine + fine = '-fine' if options.fine else '' + # max intron + maxIntron = "-maxIntron=%d" % (options.maxIntron) if options.maxIntron is not None and options.maxIntron >= 1 else '' + # extend through n + extendThroughN = '-extendThroughN' if options.extendThroughN else '' + # output file + output = options.output + logfile = options.logfile + + # Build BLAT command + cmd = ' '.join(['blat', database, query, t, q, tileSize, stepSize, oneOff, minMatch, minScore, minIdentity, maxGap, noHead, repMatch, maskType, qMaskType, repeatsType, minRepDivergence, dots, trimT, noTrimA, trimHardA, fastMap, outType, fine, maxIntron, extendThroughN, output]) + print '\nBLAT command to be executed:\n ' + cmd + + # Execution of BLAT + print 'Executing BLAT...' + log = open(logfile, 'w') if logfile else sys.stdout + try: + subprocess.check_call(cmd, stdout=log, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'BLAT executed!' + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_wrapper.xml Fri Sep 06 11:34:34 2013 -0400 @@ -0,0 +1,211 @@ +<tool id="blat_wrapper" name="BLAT" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.5">blat</requirement> + </requirements> + <command interpreter="python"> + blat_wrapper.py --database $database --query $query --t=$databaseType --q=$queryType + #if str($tileSize) + --tileSize=$tileSize + #end if + #if str($stepSize) + --stepSize=$stepSize + #end if + #if $oneOff + --oneOff + #end if + #if str($minMatch) + --minMatch=$minMatch + #end if + #if str($minScore) + --minScore=$minScore + #end if + #if str($minIdentity) + --minIdentity=$minIdentity + #end if + #if str($maxGap) + --maxGap=$maxGap + #end if + #if $noHead + --noHead + #end if + #if str($repMatch) + --repMatch=$repMatch + #end if + #if $maskType + --maskType=$maskType + #end if + #if $qMaskType + --qMaskType=$qMaskType + #end if + #if $repeatsType + --repeatsType=$repeatsType + #end if + #if str($minRepDivergence) + --minRepDivergence=$minRepDivergence + #end if + #if str($dots) + --dots=$dots + #end if + #if $trimT + --trimT + #end if + #if $noTrimA + --noTrimA + #end if + #if $trimHardA + --trimHardA + #end if + #if $fastMap + --fastMap + #end if + #if $outType + --outType=$outType + #end if + #if $fine + --fine + #end if + #if str($maxIntron) + --maxIntron=$maxIntron + #end if + #if $extendThroughN + --extendThroughN + #end if + --blat_password "$blat_password" + --logfile $logfile --output $output + </command> + + <inputs> + <param name="database" type="data" format="fasta,twobit" label="Database" help="FASTA or 2bit format" /> + <param name="query" type="data" format="fasta,twobit" label="Query" help="FASTA or 2bit format" /> + + <param name="databaseType" type="select" label="Select the database type (-t)"> + <option value="dna" selected="true">DNA sequence</option> + <option value="prot">Protein sequence</option> + <option value="dnax">DNA sequence translated in six frames to protein</option> + </param> + + <param name="queryType" type="select" label="Select the query type (-q)"> + <option value="dna" selected="true">DNA sequence</option> + <option value="rna">RNA sequence</option> + <option value="prot">Protein sequence</option> + <option value="dnax">DNA sequence translated in six frames to protein</option> + <option value="rnax">DNA sequence translated in three frames to protein</option> + </param> + +<!-- <param name="ooc" type="data" format="ooc" optional="true" label="Over-occuring N-mers file (-ooc) produced with blat -makeOoc" help="Use N as tileSize below." /> This should wait for a makeOoc wrapper --> + + <param name="tileSize" type="integer" value="" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12. Default is 11 for DNA and 5 for protein." /> + + <param name="stepSize" type="integer" value="" optional="true" label="Spacing between tiles (-stepSize)" help="Default is tileSize." /> + + <param name="oneOff" type="boolean" checked="false" label="If set, this allows one mismatch in tile and still triggers an alignments (-oneOff)" /> + + <param name="minMatch" type="integer" value="" optional="true" label="Number of tile matches (-minMatch)" help="Default is 2 for nucleotide, 1 for protein." /> + + <param name="minScore" type="integer" value="" optional="true" label="Minimum score (-minScore)" help="It is the matches minus the mismatches minus some sort of gap penalty. Default is 30." /> + + <param name="minIdentity" type="integer" value="" optional="true" label="Minimum sequence identity (%) (-minIdentity)" help="Default is 90 for nucleotide searches, 25 for protein or translated protein searches." /> + + <param name="maxGap" type="integer" value="" optional="true" label="Maximum gap between tiles in a clump (-maxGap)" help="Usually set from 0 to 3. Default is 2. Only relevant for minMatch > 1." /> + + <param name="noHead" type="boolean" checked="false" label="Suppress .psl header (-noHead)" help="If selected, the output is a tab-separated value file." /> + +<!-- <param name="makeOoc" type="boolean" checked="false" label="Make overused tile file N.ooc (-makeOoc)" help="Target needs to be a complete genome." /> This should go in a separate wrapper since after making the ooc file, Blat exits --> + + <param name="repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused (-repMatch)" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate." /> + + <param name="maskType" optional="true" type="select" label="Select how to mask out repeats in database sequences (-mask)" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored completely in protein or translated searches"> + <option value="">...</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> + + <param name="qMaskType" optional="true" type="select" label="Select how to mask out repeats in query sequences (-qMask)"> + <option value="">...</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker .out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> + + <param name="repeatsType" optional="true" type="select" label="Select repeat type if matches in repeat areas should be reported separately from matches in other areas (-repeats)" help="If mask type is selected, this should be left unselected. If mask type is unselected, then this can be selected"> + <option value="">...</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> + + <param name="minRepDivergence" type="integer" optional="true" value="" label="Minimum percent divergence of repeats to allow them to be unmasked (-minRepDivergence)" help="Default is 15. Only relevant for masking using RepeatMasket .out files." /> + + <param name="dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log (-dots)" help="Dots show program's progress." /> + + <param name="trimT" type="boolean" checked="false" label="Trim leading poly-T (-trimT)" /> + + <param name="noTrimA" type="boolean" checked="false" label="Don't trim trailing poly-A (-noTrimA)" /> + + <param name="trimHardA" type="boolean" checked="false" label="Remove poly-A tail from qSize and alignments in .psl output (-trimHardA)" help="" /> + + <param name="fastMap" type="boolean" checked="false" label="Run for fast DNA/DNA remapping (-fastMap)" help="It does not allow introns and require high %ID." /> + + <param name="outType" optional="true" type="select" label="Select output file format (-out)"> + <option value="">...</option> + <option value="psl">Tab separated format, no sequence</option> + <option value="pslx">Tab separated format with sequence</option> + <option value="axt">Blastz-associated axt format</option> + <option value="maf">Multiz-associated maf format</option> + <option value="sim4">Similar to sim4 format</option> + <option value="wublast">Similar to wublast format</option> + <option value="blast">Similar to NCBI blast format</option> + <option value="blast8">NCBI blast tabular format</option> + <option value="blast9">NCBI blast tabular format with comments</option> + </param> + + <param name="fine" type="boolean" checked="false" label="Refine search for small initial and terminal exons (-fine)" help="For high-quality mRNAs. Not recommended for ESTs." /> + + <param name="maxIntron" type="integer" value="" optional="true" label="Maximum intron size (-maxIntron)" help="Default is 750000." /> + + <param name="extendThroughN" type="boolean" checked="false" label="Allow extension of alignment through large blocks of N's (-extendThroughN)" help="" /> + + <param name="blat_password" type="text" value="" label="Insert password to run BLAT"> + <validator type="empty_field" /> + </param> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="output" format="tabular" label="${tool.name} on ${on_string}: alignment" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +BLAT produces two major classes of alignments: + +- at the DNA level between two sequences that are of 95% or greater identity, but which may include large inserts; +- at the protein or translated DNA level between sequences that are of 80% or greater identity and may also include large inserts. + +The output of BLAT is flexible. By default it is a simple tab-delimited file which describes the alignment, but which does not include the sequence of the alignment itself. Optionally it can produce BLAST and WU-BLAST compatible output as well as a number of other formats. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite the website http://orione.crs4.it/ + +This tool uses `BLAT`_, which is licensed separately. Please cite |Kent2002|_. + +.. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html +.. |Kent2002| replace:: Kent, W. J. (2002) BLAT – The BLAST-Like Alignment Tool. *Genome Res.* 12(4), 656-664 +.. _Kent2002: http://genome.cshlp.org/content/12/4/656 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Sep 06 11:34:34 2013 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="blat" version="3.5"> + <repository changeset_revision="509545f4884e" name="package_blat_35x1" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>