comparison tools/protein_analysis/promoter2.py @ 26:20139cb4c844 draft

planemo upload for repository https://github.com/peterjc/pico_galaxy/tools/protein_analysis commit 221d4187992cbb993e02dc3ea0ef0150c7916a4a-dirty
author peterjc
date Wed, 13 May 2015 06:14:42 -0400
parents 5e62aefb2918
children 22e71e53f534
comparison
equal deleted inserted replaced
25:41a42022f815 26:20139cb4c844
28 """ 28 """
29 import sys 29 import sys
30 import os 30 import os
31 import commands 31 import commands
32 import tempfile 32 import tempfile
33 from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count 33 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
34 34
35 FASTA_CHUNK = 500 35 FASTA_CHUNK = 500
36 36
37 if len(sys.argv) != 4: 37 if len(sys.argv) != 4:
38 stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " 38 sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
39 "Got %i arguments." % (len(sys.argv)-1)) 39 "Got %i arguments." % (len(sys.argv)-1))
40 40
41 num_threads = thread_count(sys.argv[3],default=4) 41 num_threads = thread_count(sys.argv[3],default=4)
42 fasta_file = os.path.abspath(sys.argv[2]) 42 fasta_file = os.path.abspath(sys.argv[2])
43 tabular_file = os.path.abspath(sys.argv[3]) 43 tabular_file = os.path.abspath(sys.argv[3])
46 46
47 def get_path_and_binary(): 47 def get_path_and_binary():
48 platform = commands.getoutput("uname") #e.g. Linux 48 platform = commands.getoutput("uname") #e.g. Linux
49 shell_script = commands.getoutput("which promoter") 49 shell_script = commands.getoutput("which promoter")
50 if not os.path.isfile(shell_script): 50 if not os.path.isfile(shell_script):
51 stop_err("ERROR: Missing promoter executable shell script") 51 sys_exit("ERROR: Missing promoter executable shell script")
52 path = None 52 path = None
53 for line in open(shell_script): 53 for line in open(shell_script):
54 if line.startswith("setenv"): #could then be tab or space! 54 if line.startswith("setenv"): #could then be tab or space!
55 parts = line.rstrip().split(None, 2) 55 parts = line.rstrip().split(None, 2)
56 if parts[0] == "setenv" and parts[1] == "PROM": 56 if parts[0] == "setenv" and parts[1] == "PROM":
57 path = parts[2] 57 path = parts[2]
58 if not path: 58 if not path:
59 stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script) 59 sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
60 if not os.path.isdir(path): 60 if not os.path.isdir(path):
61 stop_error("ERROR: %r is not a directory" % path) 61 sys_exit("ERROR: %r is not a directory" % path)
62 bin = "%s/bin/promoter_%s" % (path, platform) 62 bin = "%s/bin/promoter_%s" % (path, platform)
63 if not os.path.isfile(bin): 63 if not os.path.isfile(bin):
64 stop_err("ERROR: Missing promoter binary %r" % bin) 64 sys_exit("ERROR: Missing promoter binary %r" % bin)
65 return path, bin 65 return path, bin
66 66
67 def make_tabular(raw_handle, out_handle): 67 def make_tabular(raw_handle, out_handle):
68 """Parse text output into tabular, return query count.""" 68 """Parse text output into tabular, return query count."""
69 identifier = None 69 identifier = None
84 try: 84 try:
85 position, score, likelihood = line.strip().split(None,2) 85 position, score, likelihood = line.strip().split(None,2)
86 except ValueError: 86 except ValueError:
87 print "WARNING: Problem with line: %r" % line 87 print "WARNING: Problem with line: %r" % line
88 continue 88 continue
89 #stop_err("ERROR: Problem with line: %r" % line) 89 #sys_exit("ERROR: Problem with line: %r" % line)
90 if likelihood not in ["ignored", 90 if likelihood not in ["ignored",
91 "Marginal prediction", 91 "Marginal prediction",
92 "Medium likely prediction", 92 "Medium likely prediction",
93 "Highly likely prediction"]: 93 "Highly likely prediction"]:
94 stop_err("ERROR: Problem with line: %r" % line) 94 sys_exit("ERROR: Problem with line: %r" % line)
95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) 95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood))
96 return queries 96 return queries
97 97
98 working_dir, bin = get_path_and_binary() 98 working_dir, bin = get_path_and_binary()
99 99
100 if not os.path.isfile(fasta_file): 100 if not os.path.isfile(fasta_file):
101 stop_err("ERROR: Missing input FASTA file %r" % fasta_file) 101 sys_exit("ERROR: Missing input FASTA file %r" % fasta_file)
102 102
103 #Note that if the input FASTA file contains no sequences, 103 #Note that if the input FASTA file contains no sequences,
104 #split_fasta returns an empty list (i.e. zero temp files). 104 #split_fasta returns an empty list (i.e. zero temp files).
105 #We deliberately omit the FASTA descriptions to avoid a 105 #We deliberately omit the FASTA descriptions to avoid a
106 #bug in promoter2 with descriptions over 200 characters. 106 #bug in promoter2 with descriptions over 200 characters.
131 try: 131 try:
132 output = open(temp).readline() 132 output = open(temp).readline()
133 except IOError: 133 except IOError:
134 output = "" 134 output = ""
135 clean_up(fasta_files + temp_files) 135 clean_up(fasta_files + temp_files)
136 stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 136 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
137 error_level) 137 error_level)
138 138
139 del results 139 del results
140 del jobs 140 del jobs
141 141
146 data_handle = open(temp) 146 data_handle = open(temp)
147 count = make_tabular(data_handle, out_handle) 147 count = make_tabular(data_handle, out_handle)
148 data_handle.close() 148 data_handle.close()
149 if not count: 149 if not count:
150 clean_up(fasta_files + temp_files) 150 clean_up(fasta_files + temp_files)
151 stop_err("No output from promoter2") 151 sys_exit("No output from promoter2")
152 queries += count 152 queries += count
153 out_handle.close() 153 out_handle.close()
154 154
155 clean_up(fasta_files + temp_files) 155 clean_up(fasta_files + temp_files)
156 print "Results for %i queries" % queries 156 print "Results for %i queries" % queries