Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/signalp3.py @ 8:391a142c1e60 draft
Uploaded
author | peterjc |
---|---|
date | Tue, 26 Mar 2013 14:27:44 -0400 |
parents | 5e62aefb2918 |
children | 20139cb4c844 |
line wrap: on
line diff
--- a/tools/protein_analysis/signalp3.py Tue Mar 26 14:24:56 2013 -0400 +++ b/tools/protein_analysis/signalp3.py Tue Mar 26 14:27:44 2013 -0400 @@ -63,34 +63,34 @@ MAX_LEN = 6000 #Found by trial and error if len(sys.argv) not in [6,8]: - stop_err("Require five (or 7) arguments, organism, truncate, threads, " - "input protein FASTA file & output tabular file (plus " - "optionally cut method and GFF3 output file). " - "Got %i arguments." % (len(sys.argv)-1)) + stop_err("Require five (or 7) arguments, organism, truncate, threads, " + "input protein FASTA file & output tabular file (plus " + "optionally cut method and GFF3 output file). " + "Got %i arguments." % (len(sys.argv)-1)) organism = sys.argv[1] if organism not in ["euk", "gram+", "gram-"]: - stop_err("Organism argument %s is not one of euk, gram+ or gram-" % organism) + stop_err("Organism argument %s is not one of euk, gram+ or gram-" % organism) try: - truncate = int(sys.argv[2]) + truncate = int(sys.argv[2]) except: - truncate = 0 + truncate = 0 if truncate < 0: - stop_err("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2]) + stop_err("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2]) num_threads = thread_count(sys.argv[3], default=4) fasta_file = sys.argv[4] tabular_file = sys.argv[5] if len(sys.argv) == 8: - cut_method = sys.argv[6] - if cut_method not in ["NN_Cmax", "NN_Ymax", "NN_Smax", "HMM_Cmax"]: - stop_err("Invalid cut method %r" % cut_method) - gff3_file = sys.argv[7] + cut_method = sys.argv[6] + if cut_method not in ["NN_Cmax", "NN_Ymax", "NN_Smax", "HMM_Cmax"]: + stop_err("Invalid cut method %r" % cut_method) + gff3_file = sys.argv[7] else: - cut_method = None - gff3_file = None + cut_method = None + gff3_file = None tmp_dir = tempfile.mkdtemp() @@ -98,18 +98,19 @@ def clean_tabular(raw_handle, out_handle, gff_handle=None, cut_method=None): """Clean up SignalP output to make it tabular.""" if cut_method: - cut_col = {"NN_Cmax" : 2, - "NN_Ymax" : 5, - "NN_Smax" : 8, - "HMM_Cmax" : 16}[cut_method] + cut_col = {"NN_Cmax" : 2, + "NN_Ymax" : 5, + "NN_Smax" : 8, + "HMM_Cmax" : 16}[cut_method] else: - cut_col = None + cut_col = None for line in raw_handle: if not line or line.startswith("#"): continue parts = line.rstrip("\r\n").split() assert len(parts)==21, repr(line) - assert parts[14].startswith(parts[0]) + assert parts[14].startswith(parts[0]), \ + "Bad entry in SignalP output, ID miss-match:\n%r" % line #Remove redundant truncated name column (col 0) #and put full name at start (col 14) parts = parts[14:15] + parts[1:14] + parts[15:] @@ -218,6 +219,6 @@ #GFF3: if cut_method: - make_gff(fasta_file, tabular_file, gff3_file, cut_method) + make_gff(fasta_file, tabular_file, gff3_file, cut_method) clean_up(fasta_files + temp_files)