Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/promoter2.py @ 26:20139cb4c844 draft
planemo upload for repository https://github.com/peterjc/pico_galaxy/tools/protein_analysis commit 221d4187992cbb993e02dc3ea0ef0150c7916a4a-dirty
author | peterjc |
---|---|
date | Wed, 13 May 2015 06:14:42 -0400 |
parents | 5e62aefb2918 |
children | 22e71e53f534 |
comparison
equal
deleted
inserted
replaced
25:41a42022f815 | 26:20139cb4c844 |
---|---|
28 """ | 28 """ |
29 import sys | 29 import sys |
30 import os | 30 import os |
31 import commands | 31 import commands |
32 import tempfile | 32 import tempfile |
33 from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count | 33 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count |
34 | 34 |
35 FASTA_CHUNK = 500 | 35 FASTA_CHUNK = 500 |
36 | 36 |
37 if len(sys.argv) != 4: | 37 if len(sys.argv) != 4: |
38 stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " | 38 sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " |
39 "Got %i arguments." % (len(sys.argv)-1)) | 39 "Got %i arguments." % (len(sys.argv)-1)) |
40 | 40 |
41 num_threads = thread_count(sys.argv[3],default=4) | 41 num_threads = thread_count(sys.argv[3],default=4) |
42 fasta_file = os.path.abspath(sys.argv[2]) | 42 fasta_file = os.path.abspath(sys.argv[2]) |
43 tabular_file = os.path.abspath(sys.argv[3]) | 43 tabular_file = os.path.abspath(sys.argv[3]) |
46 | 46 |
47 def get_path_and_binary(): | 47 def get_path_and_binary(): |
48 platform = commands.getoutput("uname") #e.g. Linux | 48 platform = commands.getoutput("uname") #e.g. Linux |
49 shell_script = commands.getoutput("which promoter") | 49 shell_script = commands.getoutput("which promoter") |
50 if not os.path.isfile(shell_script): | 50 if not os.path.isfile(shell_script): |
51 stop_err("ERROR: Missing promoter executable shell script") | 51 sys_exit("ERROR: Missing promoter executable shell script") |
52 path = None | 52 path = None |
53 for line in open(shell_script): | 53 for line in open(shell_script): |
54 if line.startswith("setenv"): #could then be tab or space! | 54 if line.startswith("setenv"): #could then be tab or space! |
55 parts = line.rstrip().split(None, 2) | 55 parts = line.rstrip().split(None, 2) |
56 if parts[0] == "setenv" and parts[1] == "PROM": | 56 if parts[0] == "setenv" and parts[1] == "PROM": |
57 path = parts[2] | 57 path = parts[2] |
58 if not path: | 58 if not path: |
59 stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script) | 59 sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script) |
60 if not os.path.isdir(path): | 60 if not os.path.isdir(path): |
61 stop_error("ERROR: %r is not a directory" % path) | 61 sys_exit("ERROR: %r is not a directory" % path) |
62 bin = "%s/bin/promoter_%s" % (path, platform) | 62 bin = "%s/bin/promoter_%s" % (path, platform) |
63 if not os.path.isfile(bin): | 63 if not os.path.isfile(bin): |
64 stop_err("ERROR: Missing promoter binary %r" % bin) | 64 sys_exit("ERROR: Missing promoter binary %r" % bin) |
65 return path, bin | 65 return path, bin |
66 | 66 |
67 def make_tabular(raw_handle, out_handle): | 67 def make_tabular(raw_handle, out_handle): |
68 """Parse text output into tabular, return query count.""" | 68 """Parse text output into tabular, return query count.""" |
69 identifier = None | 69 identifier = None |
84 try: | 84 try: |
85 position, score, likelihood = line.strip().split(None,2) | 85 position, score, likelihood = line.strip().split(None,2) |
86 except ValueError: | 86 except ValueError: |
87 print "WARNING: Problem with line: %r" % line | 87 print "WARNING: Problem with line: %r" % line |
88 continue | 88 continue |
89 #stop_err("ERROR: Problem with line: %r" % line) | 89 #sys_exit("ERROR: Problem with line: %r" % line) |
90 if likelihood not in ["ignored", | 90 if likelihood not in ["ignored", |
91 "Marginal prediction", | 91 "Marginal prediction", |
92 "Medium likely prediction", | 92 "Medium likely prediction", |
93 "Highly likely prediction"]: | 93 "Highly likely prediction"]: |
94 stop_err("ERROR: Problem with line: %r" % line) | 94 sys_exit("ERROR: Problem with line: %r" % line) |
95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) | 95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) |
96 return queries | 96 return queries |
97 | 97 |
98 working_dir, bin = get_path_and_binary() | 98 working_dir, bin = get_path_and_binary() |
99 | 99 |
100 if not os.path.isfile(fasta_file): | 100 if not os.path.isfile(fasta_file): |
101 stop_err("ERROR: Missing input FASTA file %r" % fasta_file) | 101 sys_exit("ERROR: Missing input FASTA file %r" % fasta_file) |
102 | 102 |
103 #Note that if the input FASTA file contains no sequences, | 103 #Note that if the input FASTA file contains no sequences, |
104 #split_fasta returns an empty list (i.e. zero temp files). | 104 #split_fasta returns an empty list (i.e. zero temp files). |
105 #We deliberately omit the FASTA descriptions to avoid a | 105 #We deliberately omit the FASTA descriptions to avoid a |
106 #bug in promoter2 with descriptions over 200 characters. | 106 #bug in promoter2 with descriptions over 200 characters. |
131 try: | 131 try: |
132 output = open(temp).readline() | 132 output = open(temp).readline() |
133 except IOError: | 133 except IOError: |
134 output = "" | 134 output = "" |
135 clean_up(fasta_files + temp_files) | 135 clean_up(fasta_files + temp_files) |
136 stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 136 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), |
137 error_level) | 137 error_level) |
138 | 138 |
139 del results | 139 del results |
140 del jobs | 140 del jobs |
141 | 141 |
146 data_handle = open(temp) | 146 data_handle = open(temp) |
147 count = make_tabular(data_handle, out_handle) | 147 count = make_tabular(data_handle, out_handle) |
148 data_handle.close() | 148 data_handle.close() |
149 if not count: | 149 if not count: |
150 clean_up(fasta_files + temp_files) | 150 clean_up(fasta_files + temp_files) |
151 stop_err("No output from promoter2") | 151 sys_exit("No output from promoter2") |
152 queries += count | 152 queries += count |
153 out_handle.close() | 153 out_handle.close() |
154 | 154 |
155 clean_up(fasta_files + temp_files) | 155 clean_up(fasta_files + temp_files) |
156 print "Results for %i queries" % queries | 156 print "Results for %i queries" % queries |