comparison rgWebLogo3.py @ 3:13045f6015cb draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/weblogo3 commit d0f167e74e705788adcd9c846db011aee490074b
author iuc
date Fri, 17 Nov 2017 09:38:19 -0500
parents ced02f5334a0
children
comparison
equal deleted inserted replaced
2:ced02f5334a0 3:13045f6015cb
1 """
2 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion 1 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
3 # rgWebLogo3.py 2 # rgWebLogo3.py
4 # wrapper to check that all fasta files are same length 3 # wrapper to check that all fasta files are same length
5 4
6 """ 5 from __future__ import print_function
7 import optparse, os, sys, subprocess, tempfile
8 6
9 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? 7 import optparse
8 import os
9 import subprocess
10 import sys
11 import tempfile
12
13 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
14
10 15
11 class WL3: 16 class WL3:
12 """ 17 """
13 simple wrapper class to check fasta sequence lengths are all identical 18 simple wrapper class to check fasta sequence lengths are all identical
14 """ 19 """
15 FASTASTARTSYM = '>' 20 FASTASTARTSYM = '>'
16 badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully' 21 badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
17 22
18 def __init__(self,opts=None): 23 def __init__(self, opts=None):
19 assert opts<>None,'WL3 class needs opts passed in - got None' 24 assert opts is not None, 'WL3 class needs opts passed in - got None'
20 self.opts = opts 25 self.opts = opts
21 self.fastaf = file(self.opts.input,'r') 26 self.fastaf = open(self.opts.input, 'r')
22 self.clparams = {} 27 self.clparams = {}
23 28
24 def whereis(self,program): 29 def whereis(self, program):
25 for path in os.environ.get('PATH', '').split(':'): 30 for path in os.environ.get('PATH', '').split(':'):
26 if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)): 31 if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
27 return os.path.join(path, program) 32 return os.path.join(path, program)
28 return None 33 return None
29 34
30 def runCL(self): 35 def runCL(self):
31 """ construct and run a command line 36 """ construct and run a command line
32 """ 37 """
33 wl = self.whereis(WEBLOGO) 38 wl = self.whereis(WEBLOGO)
34 if not wl: 39 if not wl:
35 print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO 40 print('## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % (WEBLOGO), file=sys.stderr)
36 print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo' 41 print('## Please ensure it is installed and working from https://github.com/weblogo/weblogo', file=sys.stderr)
37 sys.exit(1) 42 sys.exit(1)
38 cll = [WEBLOGO,] 43 cll = [WEBLOGO, ]
39 cll += [' '.join(it) for it in list(self.clparams.items())] 44 cll += [' '.join(it) for it in list(self.clparams.items())]
40 cl = ' '.join(cll) 45 cl = ' '.join(cll)
41 assert cl > '', 'runCL needs a command line as clparms' 46 assert cl > '', 'runCL needs a command line as clparms'
42 fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt') 47 fd, templog = tempfile.mkstemp(suffix='rgtempRun.txt')
43 tlf = open(templog,'w') 48 tlf = open(templog, 'w')
44 process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf) 49 process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
45 rval = process.wait() 50 rval = process.wait()
46 tlf.close() 51 tlf.close()
47 tlogs = ''.join(open(templog,'r').readlines()) 52 tlogs = ''.join(open(templog, 'r').readlines())
48 if len(tlogs) > 1: 53 if len(tlogs) > 1:
49 s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs) 54 s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl, rval, tlogs)
50 else: 55 else:
51 s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval) 56 s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl, rval)
52 os.unlink(templog) # always 57 os.unlink(templog) # always
53 if rval <> 0: 58 if rval != 0:
54 print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval) 59 print('## rgWebLogo3.py error - executing %s returned error code %d' % (cl, rval), file=sys.stderr)
55 print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO 60 print('## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO, file=sys.stderr)
56 print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO 61 print('## Please ensure %s is correctly installed and working on the command line -see https://github.com/weblogo/weblogo' % WEBLOGO, file=sys.stderr)
57 sys.exit(1) 62 sys.exit(1)
58 return s 63 return s
59 64
60
61 def iter_fasta(self): 65 def iter_fasta(self):
62 """ 66 """
63 generator for fasta sequences from a file 67 generator for fasta sequences from a file
64 """ 68 """
65 aseq = [] 69 aseq = []
66 seqname = None 70 seqname = None
67 for i,row in enumerate(self.fastaf): 71 for i, row in enumerate(self.fastaf):
68 if row.startswith(self.FASTASTARTSYM): 72 if row.startswith(self.FASTASTARTSYM):
69 if seqname <> None: # already in a sequence 73 if seqname is not None: # already in a sequence
70 s = ''.join(aseq) 74 s = ''.join(aseq)
71 l = len(s) 75 ls = len(s)
72 yield (seqname,l) 76 yield (seqname, ls)
73 seqname = row[1:].strip() 77 seqname = row[1:].strip()
74 aseq = [] 78 aseq = []
75 else: 79 else:
76 if i > 0: 80 if i > 0:
77 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) 81 print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr)
78 sys.exit(1) 82 sys.exit(1)
79 else: 83 else:
80 seqname = row[1:].strip() 84 seqname = row[1:].strip()
81 else: # sequence row 85 else: # sequence row
82 if seqname == None: 86 if seqname is None:
83 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) 87 print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr)
84 sys.exit(1) 88 sys.exit(1)
85 else: 89 else:
86 aseq.append(row.strip()) 90 aseq.append(row.strip())
87 91
88 if seqname <> None: # last one 92 if seqname is not None: # last one
89 l = len(''.join(aseq)) 93 las = len(''.join(aseq))
90 yield (seqname,l) 94 yield (seqname, las)
91 95
92
93 def fcheck(self): 96 def fcheck(self):
94 """ are all fasta sequence same length? 97 """ are all fasta sequence same length?
95 might be mongo big 98 might be mongo big
96 """ 99 """
97 flen = None 100 flen = None
98 lasti = None 101 lasti = None
99 f = self.iter_fasta() 102 f = self.iter_fasta()
100 for i,(seqname,seqlen) in enumerate(f): 103 for i, (seqname, seqlen) in enumerate(f):
101 lasti = i 104 lasti = i
102 if i == 0: 105 if i == 0:
103 flen = seqlen 106 flen = seqlen
104 else: 107 else:
105 if seqlen <> flen: 108 if seqlen != flen:
106 print >> sys.stderr,self.badseq % self.opts.input 109 print(self.badseq % self.opts.input, file=sys.stderr)
107 sys.exit(1) 110 sys.exit(1)
108 return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti+1, flen) 111 return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti + 1, flen)
109
110 112
111 def run(self): 113 def run(self):
112 check = self.fcheck() 114 check = self.fcheck()
113 self.clparams['-f'] = self.opts.input 115 self.clparams['-f'] = self.opts.input
114 self.clparams['-o'] = self.opts.output 116 self.clparams['-o'] = self.opts.output
115 self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string 117 self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string
116 self.clparams['-F'] = self.opts.outformat 118 self.clparams['-F'] = self.opts.outformat
117 if self.opts.size <> None: 119 if self.opts.size is not None:
118 self.clparams['-s'] = self.opts.size 120 self.clparams['-s'] = self.opts.size
119 if self.opts.lower <> None: 121 if self.opts.lower is not None:
120 self.clparams['-l'] = self.opts.lower 122 self.clparams['-l'] = self.opts.lower
121 if self.opts.upper <> None: 123 if self.opts.upper is not None:
122 self.clparams['-u'] = self.opts.upper 124 self.clparams['-u'] = self.opts.upper
123 if self.opts.colours <> None: 125 if self.opts.colours is not None:
124 self.clparams['-c'] = self.opts.colours 126 self.clparams['-c'] = self.opts.colours
125 if self.opts.units <> None: 127 if self.opts.units is not None:
126 self.clparams['-U'] = self.opts.units 128 self.clparams['-U'] = self.opts.units
127 s = self.runCL() 129 s = self.runCL()
128 return check,s 130 return check, s
129 131
130 132
131 if __name__ == '__main__': 133 if __name__ == '__main__':
132 '''
133 called as
134 <command interpreter="python">
135 rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
136 #if $range.mode == 'part'
137 -l "$range.seqstart" -u "$range.seqend"
138 #end if
139 </command>
140
141 '''
142 op = optparse.OptionParser() 134 op = optparse.OptionParser()
143 op.add_option('-i', '--input', default=None) 135 op.add_option('-i', '--input', default=None)
144 op.add_option('-F', '--outformat', default='png') 136 op.add_option('-F', '--outformat', default='png')
145 op.add_option('-s', '--size', default=None) 137 op.add_option('-s', '--size', default=None)
146 op.add_option('-o', '--output', default='rgWebLogo3') 138 op.add_option('-o', '--output', default='rgWebLogo3')
147 op.add_option('-t', '--logoname', default='rgWebLogo3') 139 op.add_option('-t', '--logoname', default='rgWebLogo3')
148 op.add_option('-c', '--colours', default=None) 140 op.add_option('-c', '--colours', default=None)
149 op.add_option('-l', '--lower', default=None) 141 op.add_option('-l', '--lower', default=None)
150 op.add_option('-u', '--upper', default=None) 142 op.add_option('-u', '--upper', default=None)
151 op.add_option('-U', '--units', default=None) 143 op.add_option('-U', '--units', default=None)
152 opts, args = op.parse_args() 144 opts, args = op.parse_args()
153 assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open' 145 assert opts.input is not None, 'weblogo3 needs a -i parameter with a fasta input file - cannot open'
154 assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input 146 assert os.path.isfile(opts.input), 'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
155 w = WL3(opts) 147 w = WL3(opts)
156 checks,s = w.run() 148 checks, s = w.run()
157 print >> sys.stdout, checks # for info 149 print(checks, file=sys.stdout) # for info