Mercurial > repos > devteam > weblogo3
comparison rgWebLogo3.py @ 3:13045f6015cb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/weblogo3 commit d0f167e74e705788adcd9c846db011aee490074b
author | iuc |
---|---|
date | Fri, 17 Nov 2017 09:38:19 -0500 |
parents | ced02f5334a0 |
children |
comparison
equal
deleted
inserted
replaced
2:ced02f5334a0 | 3:13045f6015cb |
---|---|
1 """ | |
2 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion | 1 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion |
3 # rgWebLogo3.py | 2 # rgWebLogo3.py |
4 # wrapper to check that all fasta files are same length | 3 # wrapper to check that all fasta files are same length |
5 | 4 |
6 """ | 5 from __future__ import print_function |
7 import optparse, os, sys, subprocess, tempfile | |
8 | 6 |
9 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? | 7 import optparse |
8 import os | |
9 import subprocess | |
10 import sys | |
11 import tempfile | |
12 | |
13 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? | |
14 | |
10 | 15 |
11 class WL3: | 16 class WL3: |
12 """ | 17 """ |
13 simple wrapper class to check fasta sequence lengths are all identical | 18 simple wrapper class to check fasta sequence lengths are all identical |
14 """ | 19 """ |
15 FASTASTARTSYM = '>' | 20 FASTASTARTSYM = '>' |
16 badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully' | 21 badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully' |
17 | 22 |
18 def __init__(self,opts=None): | 23 def __init__(self, opts=None): |
19 assert opts<>None,'WL3 class needs opts passed in - got None' | 24 assert opts is not None, 'WL3 class needs opts passed in - got None' |
20 self.opts = opts | 25 self.opts = opts |
21 self.fastaf = file(self.opts.input,'r') | 26 self.fastaf = open(self.opts.input, 'r') |
22 self.clparams = {} | 27 self.clparams = {} |
23 | 28 |
24 def whereis(self,program): | 29 def whereis(self, program): |
25 for path in os.environ.get('PATH', '').split(':'): | 30 for path in os.environ.get('PATH', '').split(':'): |
26 if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)): | 31 if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)): |
27 return os.path.join(path, program) | 32 return os.path.join(path, program) |
28 return None | 33 return None |
29 | 34 |
30 def runCL(self): | 35 def runCL(self): |
31 """ construct and run a command line | 36 """ construct and run a command line |
32 """ | 37 """ |
33 wl = self.whereis(WEBLOGO) | 38 wl = self.whereis(WEBLOGO) |
34 if not wl: | 39 if not wl: |
35 print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO | 40 print('## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % (WEBLOGO), file=sys.stderr) |
36 print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo' | 41 print('## Please ensure it is installed and working from https://github.com/weblogo/weblogo', file=sys.stderr) |
37 sys.exit(1) | 42 sys.exit(1) |
38 cll = [WEBLOGO,] | 43 cll = [WEBLOGO, ] |
39 cll += [' '.join(it) for it in list(self.clparams.items())] | 44 cll += [' '.join(it) for it in list(self.clparams.items())] |
40 cl = ' '.join(cll) | 45 cl = ' '.join(cll) |
41 assert cl > '', 'runCL needs a command line as clparms' | 46 assert cl > '', 'runCL needs a command line as clparms' |
42 fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt') | 47 fd, templog = tempfile.mkstemp(suffix='rgtempRun.txt') |
43 tlf = open(templog,'w') | 48 tlf = open(templog, 'w') |
44 process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf) | 49 process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf) |
45 rval = process.wait() | 50 rval = process.wait() |
46 tlf.close() | 51 tlf.close() |
47 tlogs = ''.join(open(templog,'r').readlines()) | 52 tlogs = ''.join(open(templog, 'r').readlines()) |
48 if len(tlogs) > 1: | 53 if len(tlogs) > 1: |
49 s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs) | 54 s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl, rval, tlogs) |
50 else: | 55 else: |
51 s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval) | 56 s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl, rval) |
52 os.unlink(templog) # always | 57 os.unlink(templog) # always |
53 if rval <> 0: | 58 if rval != 0: |
54 print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval) | 59 print('## rgWebLogo3.py error - executing %s returned error code %d' % (cl, rval), file=sys.stderr) |
55 print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO | 60 print('## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO, file=sys.stderr) |
56 print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO | 61 print('## Please ensure %s is correctly installed and working on the command line -see https://github.com/weblogo/weblogo' % WEBLOGO, file=sys.stderr) |
57 sys.exit(1) | 62 sys.exit(1) |
58 return s | 63 return s |
59 | 64 |
60 | |
61 def iter_fasta(self): | 65 def iter_fasta(self): |
62 """ | 66 """ |
63 generator for fasta sequences from a file | 67 generator for fasta sequences from a file |
64 """ | 68 """ |
65 aseq = [] | 69 aseq = [] |
66 seqname = None | 70 seqname = None |
67 for i,row in enumerate(self.fastaf): | 71 for i, row in enumerate(self.fastaf): |
68 if row.startswith(self.FASTASTARTSYM): | 72 if row.startswith(self.FASTASTARTSYM): |
69 if seqname <> None: # already in a sequence | 73 if seqname is not None: # already in a sequence |
70 s = ''.join(aseq) | 74 s = ''.join(aseq) |
71 l = len(s) | 75 ls = len(s) |
72 yield (seqname,l) | 76 yield (seqname, ls) |
73 seqname = row[1:].strip() | 77 seqname = row[1:].strip() |
74 aseq = [] | 78 aseq = [] |
75 else: | 79 else: |
76 if i > 0: | 80 if i > 0: |
77 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) | 81 print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr) |
78 sys.exit(1) | 82 sys.exit(1) |
79 else: | 83 else: |
80 seqname = row[1:].strip() | 84 seqname = row[1:].strip() |
81 else: # sequence row | 85 else: # sequence row |
82 if seqname == None: | 86 if seqname is None: |
83 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) | 87 print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr) |
84 sys.exit(1) | 88 sys.exit(1) |
85 else: | 89 else: |
86 aseq.append(row.strip()) | 90 aseq.append(row.strip()) |
87 | 91 |
88 if seqname <> None: # last one | 92 if seqname is not None: # last one |
89 l = len(''.join(aseq)) | 93 las = len(''.join(aseq)) |
90 yield (seqname,l) | 94 yield (seqname, las) |
91 | 95 |
92 | |
93 def fcheck(self): | 96 def fcheck(self): |
94 """ are all fasta sequence same length? | 97 """ are all fasta sequence same length? |
95 might be mongo big | 98 might be mongo big |
96 """ | 99 """ |
97 flen = None | 100 flen = None |
98 lasti = None | 101 lasti = None |
99 f = self.iter_fasta() | 102 f = self.iter_fasta() |
100 for i,(seqname,seqlen) in enumerate(f): | 103 for i, (seqname, seqlen) in enumerate(f): |
101 lasti = i | 104 lasti = i |
102 if i == 0: | 105 if i == 0: |
103 flen = seqlen | 106 flen = seqlen |
104 else: | 107 else: |
105 if seqlen <> flen: | 108 if seqlen != flen: |
106 print >> sys.stderr,self.badseq % self.opts.input | 109 print(self.badseq % self.opts.input, file=sys.stderr) |
107 sys.exit(1) | 110 sys.exit(1) |
108 return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti+1, flen) | 111 return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti + 1, flen) |
109 | |
110 | 112 |
111 def run(self): | 113 def run(self): |
112 check = self.fcheck() | 114 check = self.fcheck() |
113 self.clparams['-f'] = self.opts.input | 115 self.clparams['-f'] = self.opts.input |
114 self.clparams['-o'] = self.opts.output | 116 self.clparams['-o'] = self.opts.output |
115 self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string | 117 self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string |
116 self.clparams['-F'] = self.opts.outformat | 118 self.clparams['-F'] = self.opts.outformat |
117 if self.opts.size <> None: | 119 if self.opts.size is not None: |
118 self.clparams['-s'] = self.opts.size | 120 self.clparams['-s'] = self.opts.size |
119 if self.opts.lower <> None: | 121 if self.opts.lower is not None: |
120 self.clparams['-l'] = self.opts.lower | 122 self.clparams['-l'] = self.opts.lower |
121 if self.opts.upper <> None: | 123 if self.opts.upper is not None: |
122 self.clparams['-u'] = self.opts.upper | 124 self.clparams['-u'] = self.opts.upper |
123 if self.opts.colours <> None: | 125 if self.opts.colours is not None: |
124 self.clparams['-c'] = self.opts.colours | 126 self.clparams['-c'] = self.opts.colours |
125 if self.opts.units <> None: | 127 if self.opts.units is not None: |
126 self.clparams['-U'] = self.opts.units | 128 self.clparams['-U'] = self.opts.units |
127 s = self.runCL() | 129 s = self.runCL() |
128 return check,s | 130 return check, s |
129 | 131 |
130 | 132 |
131 if __name__ == '__main__': | 133 if __name__ == '__main__': |
132 ''' | |
133 called as | |
134 <command interpreter="python"> | |
135 rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" | |
136 #if $range.mode == 'part' | |
137 -l "$range.seqstart" -u "$range.seqend" | |
138 #end if | |
139 </command> | |
140 | |
141 ''' | |
142 op = optparse.OptionParser() | 134 op = optparse.OptionParser() |
143 op.add_option('-i', '--input', default=None) | 135 op.add_option('-i', '--input', default=None) |
144 op.add_option('-F', '--outformat', default='png') | 136 op.add_option('-F', '--outformat', default='png') |
145 op.add_option('-s', '--size', default=None) | 137 op.add_option('-s', '--size', default=None) |
146 op.add_option('-o', '--output', default='rgWebLogo3') | 138 op.add_option('-o', '--output', default='rgWebLogo3') |
147 op.add_option('-t', '--logoname', default='rgWebLogo3') | 139 op.add_option('-t', '--logoname', default='rgWebLogo3') |
148 op.add_option('-c', '--colours', default=None) | 140 op.add_option('-c', '--colours', default=None) |
149 op.add_option('-l', '--lower', default=None) | 141 op.add_option('-l', '--lower', default=None) |
150 op.add_option('-u', '--upper', default=None) | 142 op.add_option('-u', '--upper', default=None) |
151 op.add_option('-U', '--units', default=None) | 143 op.add_option('-U', '--units', default=None) |
152 opts, args = op.parse_args() | 144 opts, args = op.parse_args() |
153 assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open' | 145 assert opts.input is not None, 'weblogo3 needs a -i parameter with a fasta input file - cannot open' |
154 assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input | 146 assert os.path.isfile(opts.input), 'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input |
155 w = WL3(opts) | 147 w = WL3(opts) |
156 checks,s = w.run() | 148 checks, s = w.run() |
157 print >> sys.stdout, checks # for info | 149 print(checks, file=sys.stdout) # for info |