changeset 0:de3662385a52 draft

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:32:07 -0500
parents
children ce785326df6e
files rgClustalw.py rgClustalw.xml test-data/rgClustal_testin.fasta test-data/rgClustal_testout.fasta test-data/rgClustal_testout.log tool_dependencies.xml
diffstat 6 files changed, 414 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgClustalw.py	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,60 @@
+"""
+rgclustalw.py
+wrapper for clustalw necessitated by bad choice of output path for .dnd file based on input file. Naughty.
+Copyright ross lazarus march 2011
+All rights reserved
+Licensed under the LGPL
+"""
+
+import sys,optparse,os,subprocess,tempfile,shutil
+
+class Clustrunner:
+    """
+    """
+    def __init__(self,opts=None):
+        self.opts = opts
+        self.iname = 'infile_copy'
+        shutil.copy(self.opts.input,self.iname) 
+
+    def run(self):
+        tlf = open(self.opts.outlog,'w')
+        cl = ['clustalw2 -INFILE=%s -OUTFILE=%s -OUTORDER=%s -TYPE=%s -OUTPUT=%s' % (self.iname,self.opts.output,self.opts.out_order,self.opts.dnarna,self.opts.outform)]
+        if self.opts.seq_range_end <> None and self.opts.seq_range_start <> None:
+            cl.append('-RANGE=%s,%s' % (self.opts.seq_range_start,self.opts.seq_range_end))
+        if self.opts.outform=='CLUSTAL' and self.opts.outseqnos <> None:
+            cl.append('-SEQNOS=ON')
+        process = subprocess.Popen(' '.join(cl), shell=True, stderr=tlf, stdout=tlf)
+        rval = process.wait()
+        dndf = '%s.dnd' % self.iname
+        if os.path.exists(dndf):
+            tlf.write('\nClustal created the following dnd file for your information:\n')
+            dnds = open('%s.dnd' % self.iname,'r').readlines()
+	    for row in dnds:
+                tlf.write(row)
+            tlf.write('\n')
+        tlf.close()
+        os.unlink(self.iname)
+    
+
+
+if __name__ == "__main__":
+    op = optparse.OptionParser()
+    op.add_option('-i', '--input', default=None)
+    op.add_option('-o', '--output', default=None)
+    op.add_option('-t', '--outname', default="rgClustal")
+    op.add_option('-s', '--out_order', default='ALIGNMENT')
+    op.add_option('-f', '--outform', default='CLUSTAL')
+    op.add_option('-e', '--seq_range_end',default=None)
+    op.add_option('-b', '--seq_range_start',default=None)
+    op.add_option('-l','--outlog',default='rgClustalw.log')
+    op.add_option('-q', '--outseqnos',default=None)    
+    op.add_option('-d', '--dnarna',default='DNA')    
+    
+    opts, args = op.parse_args()
+    assert opts.input <> None
+    assert os.path.isfile(opts.input)
+    c = Clustrunner(opts)
+    c.run()
+    
+            
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgClustalw.xml	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,131 @@
+<tool id="clustalw" name="ClustalW" version="0.1">
+   <requirements>
+      <requirement type="package" version="2.1">clustalw2</requirement>
+   </requirements>
+   <description>multiple sequence alignment program for DNA or proteins</description>
+   <command interpreter="python"> 
+    rgClustalw.py -i "$input" -o "$output" -s "$out_order" -l "$outlog" -t "$outname" -d "$dnarna"
+    #if   ($range.mode=="part")
+-b "$range.seq_range_start" -e "$range.seq_range_end"
+    #end if
+    #if ($outcontrol.outform=="clustal")
+-f "CLUSTAL"
+    #if ($outcontrol.out_seqnos=="ON")
+-q "ON"
+    #end if
+    #end if
+    #if ($outcontrol.outform=="phylip")
+-f "PHYLIP"
+    #end if
+    #if ($outcontrol.outform=="fasta")
+-f "FASTA"
+    #end if
+   </command>
+  <inputs>
+   <page>
+    <param format="fasta" name="input" type="data" label="Fasta File" />
+    <param name="outname" label="Name for output files to make it easy to remember what you did" type="text" size="50" value="Clustal_run" />
+    <param name="dnarna" type="select" label="Data Type">
+      <option value="DNA" selected="True">DNA nucleotide sequences</option>
+      <option value="PROTEIN">Protein sequences</option>
+    </param>
+    <conditional name="outcontrol">
+      <param name="outform" type="select" label="Output alignment format">
+        <option value="clustal" selected="True">Native Clustal output format</option>
+        <option value="phylip">Phylip format</option>
+        <option value="fasta">Fasta format</option>
+      </param>
+      <when value="fasta" />
+      <when value="phylip" />
+      <when value="clustal">
+       <param name="out_seqnos" type="select" label="Show residue numbers in clustal format output">
+         <option value="ON">yes</option>
+         <option value="OFF" selected="true">no</option>
+       </param>
+      </when>
+    </conditional>
+    <param name="out_order" type="select" label="Output Order">
+      <option value="ALIGNED">aligned</option>
+      <option value="INPUT">same order as input file</option>
+    </param>
+
+    <conditional name="range">
+        <param name="mode" type="select" label="Output complete alignment (or specify part to output)">
+          <option value="complete">complete alignment</option>
+          <option value="part">only part of the alignment</option>
+        </param>
+        <when value="complete">
+        </when>
+        <when value="part">    
+           <param name="seq_range_start" size="5" type="integer" value="1" label="start point" help="sequence range to write">
+           </param>
+           <param name="seq_range_end" size="5" type="integer" value="99999" label="end point" >
+           </param> 
+        </when>
+    </conditional>
+   </page>
+  </inputs>
+  <outputs>
+    <data format="clustal" name="output"  label="${outname}_output.${outcontrol.outform}">
+       <change_format>
+           <when input="outcontrol.outform" value="phylip" format="phylip" />
+           <when input="outcontrol.outform" value="fasta" format="fasta" />
+       </change_format>
+    </data>
+    <data format="txt" name="outlog"  label="${outname}_clustal_log.txt"/>
+  </outputs>
+  <tests>
+     <test>
+      <param name="input" value="rgClustal_testin.fasta" />
+      <param name="outname" value="" />
+      <param name="outform" value="fasta" />
+      <param name="dnarna" value="DNA" />
+      <param name="mode" value="complete" />
+      <param name="out_order" value="ALIGNED" />
+      <output name="output" file="rgClustal_testout.fasta" ftype="fasta" />
+      <output name="outlog" file="rgClustal_testout.log" ftype="txt" lines_diff="5" />
+     </test>
+  </tests>
+  <help>
+
+**Note**
+
+This tool allows you to run a multiple sequence alignment with ClustalW2 (see Clustsrc_) using the default options.
+ 
+For a tutorial introduction, see ClustalW2_
+
+You can align DNA or protein sequences in the input file which should be multiple sequences to be aligned in a fasta file
+
+A log will be output to your history showing the output Clustal would normally write to standard output.
+
+The alignments will appear as a clustal format file or optionally, as phylip or fasta format files in your history. If you choose fasta as 
+the output format, you can create a 'Logo' image using the Sequence Logo tool.
+
+If Clustal format is chosen, you have the option of adding basepair counts to the output
+
+A subsequence of the alignment can be output by setting the Output complete parameter to "Partial" and defining the offset and end of the subsequence to be output 
+
+----
+
+**Attribution**
+
+Clustal attribution and associated documentation are available at Clustsrc_
+
+The first iteration of this Galaxy wrapper was written by Hans-Rudolf Hotz - see Clustfirst_
+
+It was modified by Ross Lazarus for the rgenetics project - tests and some additional parameters were added
+
+This wrapper is released licensed under the LGPL_
+
+.. _ClustalW2: http://www.ebi.ac.uk/2can/tutorials/protein/clustalw.html  
+
+.. _Clustsrc: http://www.clustal.org
+
+.. _Clustfirst: http://lists.bx.psu.edu/pipermail/galaxy-dev/2010-November/003732.html
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+
+  </help>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rgClustal_testin.fasta	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,25 @@
+>c_briggsae-chrII(+)/43862-46313
+ATGAGCTTCCACAAAAGCATGAGCTTTCTCAGCTTCTGCCACATCAGCATTCAAATGATC
+>c_remanei-Crem_Contig172(-)/123228-124941
+ATGAGCCTCTACAACCGCATGATTCTTTTCAGCCTCTGCCACGTCCGCATTCAAATGCTC
+>c_brenneri-Cbre_Contig60(+)/627772-630087
+ATGAGCCTCCACAACAGCATGATTTTTCTCGGCTTCCGCCACATCCGCATTCAAATGATC
+>c_elegans-II(+)/9706834-9708803
+ATGAGCCTCTACTACAGCATGATTCTTCTCAGCTTCTGCAACGTCAGCATTCAGATGATC
+>c_briggsae-chrIfooI(+)/43862-46313
+CGCACAAATATGATGCACAAATCCACAACCTAAAGCATCTCCGATAACGTTGACCGAAGT
+>c_remanei-Crem_Contig172foo(-)/123228-124941
+AGCACAAATGTAATGAACGAATCCGCATCCCAACGCATCGCCAATCACATTCACAGATGT
+>c_brenneri-Cbre_Contig60gak(+)/627772-630087
+CGCACAAATGTAGTGGACAAATCCGCATCCCAAAGCGTCTCCGATAACATTTACCGAAGT
+>c_elegans-II(+)more/9706834-9708803
+TGCACAAATGTGATGAACGAATCCACATCCCAATGCATCACCGATCACATTGACAGATGT
+>c_briggsae-chrII(+)bar/43862-46313
+CCGGAGTCGATCCCTGAAT-----------------------------------------
+>c_remanei-Crem_Contig172zot(-)/123228-124941
+ACGAAGTCGGTCCCTATAAGGTATGATTTTATATGA----TGTACCATAAGGAAATAGTC
+>c_brenneri-Cbre_Contig60fee(+)/627772-630087
+ACGAAGTCGATCCCTGAAA---------TCAGATGAGCGGTTGACCA---GAGAACAACC
+>c_elegans-II(+)meh/9706834-9708803
+ACGAAGTCGGTCCCTGAAC--AATTATTT----TGA----TATA---GAAAGAAACGGTA
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rgClustal_testout.fasta	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,48 @@
+>c_briggsae-chrII_+_
+---ATGAGCTTCCACAAAAGCATGAGCTTT
+CTCAGCTTCTGCCACATCAGCATTCAAATG
+ATC
+>c_brenneri-Cbre_Contig60_+_
+---ATGAGCCTCCACAACAGCATGATTTTT
+CTCGGCTTCCGCCACATCCGCATTCAAATG
+ATC
+>c_remanei-Crem_Contig172_-_
+---ATGAGCCTCTACAACCGCATGATTCTT
+TTCAGCCTCTGCCACGTCCGCATTCAAATG
+CTC
+>c_elegans-II_+_
+---ATGAGCCTCTACTACAGCATGATTCTT
+CTCAGCTTCTGCAACGTCAGCATTCAGATG
+ATC
+>c_briggsae-chrII_+_bar
+---CCGGAGTCGATCCCTGAAT--------
+------------------------------
+---
+>c_brenneri-Cbre_Contig60fee_+_
+---ACGAAGTCGATCCCTGAAA--------
+-TCAGATGAGCGGTTGACCA---GAGAACA
+ACC
+>c_remanei-Crem_Contig172zot_-_
+---ACGAAGTCGGTCCCTATAAGGTATGAT
+TTTATATGA----TGTACCATAAGGAAATA
+GTC
+>c_elegans-II_+_meh
+---ACGAAGTCGGTCCCTGAAC--AATTAT
+TT----TGA----TATA---GAAAGAAACG
+GTA
+>c_briggsae-chrIfooI_+_
+CGCACAAATATGATGCACAAATCCACAACC
+TAAAGCATCTCCGATAACGTTGACCGAAGT
+---
+>c_brenneri-Cbre_Contig60gak_+_
+CGCACAAATGTAGTGGACAAATCCGCATCC
+CAAAGCGTCTCCGATAACATTTACCGAAGT
+---
+>c_remanei-Crem_Contig172foo_-_
+AGCACAAATGTAATGAACGAATCCGCATCC
+CAACGCATCGCCAATCACATTCACAGATGT
+---
+>c_elegans-II_+_more
+TGCACAAATGTGATGAACGAATCCACATCC
+CAATGCATCACCGATCACATTGACAGATGT
+---
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rgClustal_testout.log	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,144 @@
+
+
+
+ CLUSTAL 2.1 Multiple Sequence Alignments
+
+
+Sequence type explicitly set to DNA
+Sequence format is Pearson
+Sequence 1: c_briggsae-chrII_+_/43862-46313                 60 bp
+Sequence 2: c_remanei-Crem_Contig172_-_/123228-124941       60 bp
+Sequence 3: c_brenneri-Cbre_Contig60_+_/627772-630087       60 bp
+Sequence 4: c_elegans-II_+_/9706834-9708803                 60 bp
+Sequence 5: c_briggsae-chrIfooI_+_/43862-46313              60 bp
+Sequence 6: c_remanei-Crem_Contig172foo_-_/123228-124941    60 bp
+Sequence 7: c_brenneri-Cbre_Contig60gak_+_/627772-630087    60 bp
+Sequence 8: c_elegans-II_+_more/9706834-9708803             60 bp
+Sequence 9: c_briggsae-chrII_+_bar/43862-46313              60 bp
+Sequence 10: c_remanei-Crem_Contig172zot_-_/123228-124941    60 bp
+Sequence 11: c_brenneri-Cbre_Contig60fee_+_/627772-630087    60 bp
+Sequence 12: c_elegans-II_+_meh/9706834-9708803              60 bp
+Start of Pairwise alignments
+Aligning...
+
+Sequences (1:2) Aligned. Score:  80
+Sequences (1:3) Aligned. Score:  88
+Sequences (1:4) Aligned. Score:  83
+Sequences (1:5) Aligned. Score:  21
+Sequences (1:6) Aligned. Score:  20
+Sequences (1:7) Aligned. Score:  23
+Sequences (1:8) Aligned. Score:  18
+Sequences (1:9) Aligned. Score:  21
+Sequences (1:10) Aligned. Score:  16
+Sequences (1:11) Aligned. Score:  25
+Sequences (1:12) Aligned. Score:  10
+Sequences (2:3) Aligned. Score:  85
+Sequences (2:4) Aligned. Score:  86
+Sequences (2:5) Aligned. Score:  21
+Sequences (2:6) Aligned. Score:  20
+Sequences (2:7) Aligned. Score:  25
+Sequences (2:8) Aligned. Score:  20
+Sequences (2:9) Aligned. Score:  36
+Sequences (2:10) Aligned. Score:  16
+Sequences (2:11) Aligned. Score:  22
+Sequences (2:12) Aligned. Score:  17
+Sequences (3:4) Aligned. Score:  85
+Sequences (3:5) Aligned. Score:  13
+Sequences (3:6) Aligned. Score:  20
+Sequences (3:7) Aligned. Score:  25
+Sequences (3:8) Aligned. Score:  20
+Sequences (3:9) Aligned. Score:  36
+Sequences (3:10) Aligned. Score:  16
+Sequences (3:11) Aligned. Score:  18
+Sequences (3:12) Aligned. Score:  25
+Sequences (4:5) Aligned. Score:  13
+Sequences (4:6) Aligned. Score:  11
+Sequences (4:7) Aligned. Score:  20
+Sequences (4:8) Aligned. Score:  10
+Sequences (4:9) Aligned. Score:  31
+Sequences (4:10) Aligned. Score:  17
+Sequences (4:11) Aligned. Score:  29
+Sequences (4:12) Aligned. Score:  14
+Sequences (5:6) Aligned. Score:  73
+Sequences (5:7) Aligned. Score:  83
+Sequences (5:8) Aligned. Score:  80
+Sequences (5:9) Aligned. Score:  31
+Sequences (5:10) Aligned. Score:  14
+Sequences (5:11) Aligned. Score:  14
+Sequences (5:12) Aligned. Score:  12
+Sequences (6:7) Aligned. Score:  80
+Sequences (6:8) Aligned. Score:  88
+Sequences (6:9) Aligned. Score:  26
+Sequences (6:10) Aligned. Score:  16
+Sequences (6:11) Aligned. Score:  25
+Sequences (6:12) Aligned. Score:  12
+Sequences (7:8) Aligned. Score:  78
+Sequences (7:9) Aligned. Score:  31
+Sequences (7:10) Aligned. Score:  10
+Sequences (7:11) Aligned. Score:  12
+Sequences (7:12) Aligned. Score:  12
+Sequences (8:9) Aligned. Score:  31
+Sequences (8:10) Aligned. Score:  10
+Sequences (8:11) Aligned. Score:  14
+Sequences (8:12) Aligned. Score:  12
+Sequences (9:10) Aligned. Score:  63
+Sequences (9:11) Aligned. Score:  84
+Sequences (9:12) Aligned. Score:  78
+Sequences (10:11) Aligned. Score:  64
+Sequences (10:12) Aligned. Score:  76
+Sequences (11:12) Aligned. Score:  46
+Guide tree file created:   [infile_copy.dnd]
+
+There are 11 groups
+Start of Multiple Alignment
+
+Aligning...
+Group 1: Sequences:   2      Score:1045
+Group 2: Sequences:   2      Score:1016
+Group 3: Sequences:   4      Score:1001
+Group 4: Sequences:   2      Score:313
+Group 5: Sequences:   2      Score:731
+Group 6: Sequences:   4      Score:516
+Group 7: Sequences:   8      Score:344
+Group 8: Sequences:   2      Score:1016
+Group 9: Sequences:   2      Score:1054
+Group 10: Sequences:   4      Score:945
+Group 11: Sequences:  12      Score:380
+Alignment Score 6283
+
+CLUSTAL-Alignment file created  [/share/shared/galaxy/database/files/002/dataset_2801.dat]
+
+
+Clustal created the following dnd file for your information:
+(
+(
+c_briggsae-chrII_+_/43862-46313:0.07349,
+c_brenneri-Cbre_Contig60_+_/627772-630087:0.04317)
+:0.02387,
+(
+c_remanei-Crem_Contig172_-_/123228-124941:0.06114,
+c_elegans-II_+_/9706834-9708803:0.07219)
+:0.01779,
+(
+(
+(
+c_briggsae-chrIfooI_+_/43862-46313:0.10368,
+c_brenneri-Cbre_Contig60gak_+_/627772-630087:0.06298)
+:0.01654,
+(
+c_remanei-Crem_Contig172foo_-_/123228-124941:0.05765,
+c_elegans-II_+_more/9706834-9708803:0.05902)
+:0.06262)
+:0.31533,
+(
+(
+c_briggsae-chrII_+_bar/43862-46313:0.02327,
+c_brenneri-Cbre_Contig60fee_+_/627772-630087:0.13463)
+:0.05016,
+(
+c_remanei-Crem_Contig172zot_-_/123228-124941:0.11667,
+c_elegans-II_+_meh/9706834-9708803:0.11737)
+:0.12013)
+:0.20951)
+:0.30133);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jan 23 12:32:07 2014 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="clustalw2" version="2.1">
+      <repository changeset_revision="2f10e85b56ab" name="package_clustalw_2_1" owner="devteam" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>