# HG changeset patch # User peterjc # Date 1431512082 14400 # Node ID 20139cb4c844c5e6474222824de48533d06e8d14 # Parent 41a42022f815e62bf7609a0db63623884f04e012 planemo upload for repository https://github.com/peterjc/pico_galaxy/tools/protein_analysis commit 221d4187992cbb993e02dc3ea0ef0150c7916a4a-dirty diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/README.rst --- a/tools/protein_analysis/README.rst Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/README.rst Wed May 13 06:14:42 2015 -0400 @@ -14,7 +14,7 @@ To use these Galaxy wrappers you must first install the command line tools. At the time of writing they are all free for academic use, or open source. -These wrappers are copyright 2010-2013 by Peter Cock, James Hutton Institute +These wrappers are copyright 2010-2015 by Peter Cock, James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. Contributions/revisions copyright 2011 Konrad Paszkiewicz. All rights reserved. See the included LICENCE file for details (MIT open source licence). @@ -174,6 +174,9 @@ v0.2.6 - Use the new ``$GALAXY_SLOTS`` environment variable for thread count. - Updated the ``suite_config.xml`` file (overdue). - Tool definition now embeds citation information. +v0.2.7 - Style cleanup in Python scripts. +v0.2.8 - Reorder XML elements (internal change only). + - Planemo for Tool Shed upload (``.shed.yml``, internal change only). ======= ====================================================================== @@ -187,10 +190,61 @@ Development has now moved to a dedicated GitHub repository: https://github.com/peterjc/pico_galaxy/tree/master/tools -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use -the following command from the Galaxy root folder:: + +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: + + $ planemo shed_upload --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/ + ... + +or:: + + $ planemo shed_upload --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/ + ... + +To just build and check the tar ball, use:: - $ ./tools/protein_analysis/make_tmhmm_and_signalp.sh + $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/protein_analysis/ + ... + $ tar -tzf shed_upload.tar.gz + test-data/Adenovirus.fasta + test-data/Adenovirus.promoter2.tabular + test-data/empty.fasta + test-data/empty_promoter2.tabular + test-data/empty_psortb_terse.tabular + test-data/empty_rxlr.Bhattacharjee2006.tabular + test-data/empty_rxlr.Whisson2007.tabular + test-data/empty_rxlr.Win2007.tabular + test-data/empty_signalp3.tabular + test-data/empty_tmhmm2.tabular + test-data/empty_wolf_psort.tabular + test-data/four_human_proteins.fasta + test-data/four_human_proteins.signalp3.tabular + test-data/four_human_proteins.tmhmm2.tabular + test-data/four_human_proteins.wolf_psort.tabular + test-data/k12_ten_proteins.fasta + test-data/k12_ten_proteins_psortb_p_terse.tabular + test-data/rxlr_win_et_al_2007.fasta + test-data/rxlr_win_et_al_2007.tabular + test-data/rxlr_win_et_al_2007_sp3.tabular + tools/protein_analysis/LICENSE.txt + tools/protein_analysis/README.rst + tools/protein_analysis/promoter2.py + tools/protein_analysis/promoter2.xml + tools/protein_analysis/psortb.py + tools/protein_analysis/psortb.xml + tools/protein_analysis/rxlr_motifs.py + tools/protein_analysis/rxlr_motifs.xml + tools/protein_analysis/seq_analysis_utils.py + tools/protein_analysis/signalp3.py + tools/protein_analysis/signalp3.xml + tools/protein_analysis/suite_config.xml + tools/protein_analysis/tmhmm2.py + tools/protein_analysis/tmhmm2.xml + tools/protein_analysis/whisson_et_al_rxlr_eer_cropped.hmm + tools/protein_analysis/wolf_psort.py + tools/protein_analysis/wolf_psort.xml This simplifies ensuring a consistent set of files is bundled each time, including all the relevant test files. diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/promoter2.py --- a/tools/protein_analysis/promoter2.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/promoter2.py Wed May 13 06:14:42 2015 -0400 @@ -30,12 +30,12 @@ import os import commands import tempfile -from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count +from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count FASTA_CHUNK = 500 if len(sys.argv) != 4: - stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " + sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " "Got %i arguments." % (len(sys.argv)-1)) num_threads = thread_count(sys.argv[3],default=4) @@ -48,7 +48,7 @@ platform = commands.getoutput("uname") #e.g. Linux shell_script = commands.getoutput("which promoter") if not os.path.isfile(shell_script): - stop_err("ERROR: Missing promoter executable shell script") + sys_exit("ERROR: Missing promoter executable shell script") path = None for line in open(shell_script): if line.startswith("setenv"): #could then be tab or space! @@ -56,12 +56,12 @@ if parts[0] == "setenv" and parts[1] == "PROM": path = parts[2] if not path: - stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script) + sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script) if not os.path.isdir(path): - stop_error("ERROR: %r is not a directory" % path) + sys_exit("ERROR: %r is not a directory" % path) bin = "%s/bin/promoter_%s" % (path, platform) if not os.path.isfile(bin): - stop_err("ERROR: Missing promoter binary %r" % bin) + sys_exit("ERROR: Missing promoter binary %r" % bin) return path, bin def make_tabular(raw_handle, out_handle): @@ -86,19 +86,19 @@ except ValueError: print "WARNING: Problem with line: %r" % line continue - #stop_err("ERROR: Problem with line: %r" % line) + #sys_exit("ERROR: Problem with line: %r" % line) if likelihood not in ["ignored", "Marginal prediction", "Medium likely prediction", "Highly likely prediction"]: - stop_err("ERROR: Problem with line: %r" % line) + sys_exit("ERROR: Problem with line: %r" % line) out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) return queries working_dir, bin = get_path_and_binary() if not os.path.isfile(fasta_file): - stop_err("ERROR: Missing input FASTA file %r" % fasta_file) + sys_exit("ERROR: Missing input FASTA file %r" % fasta_file) #Note that if the input FASTA file contains no sequences, #split_fasta returns an empty list (i.e. zero temp files). @@ -133,7 +133,7 @@ except IOError: output = "" clean_up(fasta_files + temp_files) - stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), + sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), error_level) del results @@ -148,7 +148,7 @@ data_handle.close() if not count: clean_up(fasta_files + temp_files) - stop_err("No output from promoter2") + sys_exit("No output from promoter2") queries += count out_handle.close() diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/promoter2.xml --- a/tools/protein_analysis/promoter2.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/promoter2.xml Wed May 13 06:14:42 2015 -0400 @@ -1,27 +1,28 @@ - + Find eukaryotic PolII promoters in DNA sequences + + promoter + promoter + + + + + + promoter2.py "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file" ##If the environment variable isn't set, get "", and the python wrapper ##defaults to four threads. - - - - - - - promoter - diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/psortb.py --- a/tools/protein_analysis/psortb.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/psortb.py Wed May 13 06:14:42 2015 -0400 @@ -24,7 +24,7 @@ import sys import os import tempfile -from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count +from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count FASTA_CHUNK = 500 @@ -33,7 +33,7 @@ sys.exit(os.system("psort --version")) if len(sys.argv) != 8: - stop_err("Require 7 arguments, number of threads (int), type (e.g. archaea), " + sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), " "output (e.g. terse/normal/long), cutoff, divergent, input protein " "FASTA file & output tabular file") @@ -56,7 +56,7 @@ if out_type == "terse": header = ['SeqID', 'Localization', 'Score'] elif out_type == "normal": - stop_err("Normal output not implemented yet, sorry.") + sys_exit("Normal output not implemented yet, sorry.") elif out_type == "long": if org_type == "-n": #Gram negative bacteria @@ -93,9 +93,9 @@ 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', 'Secondary_Localization', 'PSortb_Version'] else: - stop_err("Expected -n, -p or -a for the organism type, not %r" % org_type) + sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type) else: - stop_err("Expected terse, normal or long for the output type, not %r" % out_type) + sys_exit("Expected terse, normal or long for the output type, not %r" % out_type) tmp_dir = tempfile.mkdtemp() @@ -149,7 +149,7 @@ except IOError: output = "" clean_up(fasta_files + temp_files) - stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), + sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), error_level) del results del jobs @@ -163,7 +163,7 @@ data_handle.close() if not count: clean_up(fasta_files + temp_files) - stop_err("No output from psortb") + sys_exit("No output from psortb") out_handle.close() print "%i records" % count diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/psortb.xml --- a/tools/protein_analysis/psortb.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/psortb.xml Wed May 13 06:14:42 2015 -0400 @@ -1,61 +1,62 @@ - - Determines sub-cellular localisation of bacterial/archaeal protein sequences - - - - psortb.py --version - - psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile" - ##If the environment variable isn't set, get "", and python wrapper - ##defaults to four threads. - - - - - - - - - - - - - - - - - - - - - - - - - - psort - - - - - - - - - - - - - - + + Determines sub-cellular localisation of bacterial/archaeal protein sequences + + + + + psort + psort + + + + + + + psortb.py --version + +psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile" +##If the environment variable isn't set, get "", and python wrapper +##defaults to four threads. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** @@ -99,9 +100,9 @@ This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/tmhmm_and_signalp + 10.7717/peerj.167 10.1093/bioinformatics/btq249 - diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/rxlr_motifs.py --- a/tools/protein_analysis/rxlr_motifs.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/rxlr_motifs.py Wed May 13 06:14:42 2015 -0400 @@ -40,10 +40,14 @@ import sys import re import subprocess -from seq_analysis_utils import stop_err, fasta_iterator +from seq_analysis_utils import sys_exit, fasta_iterator + +if "-v" in sys.argv: + print("RXLR Motifs v0.0.10") + sys.exit(0) if len(sys.argv) != 5: - stop_err("Requires four arguments: protein FASTA filename, threads, model, and output filename") + sys_exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") fasta_file, threads, model, tabular_file = sys.argv[1:] hmm_output_file = tabular_file + ".hmm.tmp" @@ -53,36 +57,36 @@ hmmer_search = "hmmsearch2" if model == "Bhattacharjee2006": - signalp_trunc = 70 - re_rxlr = re.compile("R.LR") - min_sp = 10 - max_sp = 40 - max_sp_rxlr = 100 - min_rxlr_start = 1 - #Allow signal peptide to be at most 40aa, and want RXLR to be - #within 100aa, therefore for the prescreen the max start is 140: - max_rxlr_start = max_sp + max_sp_rxlr + signalp_trunc = 70 + re_rxlr = re.compile("R.LR") + min_sp = 10 + max_sp = 40 + max_sp_rxlr = 100 + min_rxlr_start = 1 + # Allow signal peptide to be at most 40aa, and want RXLR to be + # within 100aa, therefore for the prescreen the max start is 140: + max_rxlr_start = max_sp + max_sp_rxlr elif model == "Win2007": - signalp_trunc = 70 - re_rxlr = re.compile("R.LR") - min_sp = 10 - max_sp = 40 - min_rxlr_start = 30 - max_rxlr_start = 60 - #No explicit limit on separation of signal peptide clevage - #and RXLR, but shortest signal peptide is 10, and furthest - #away RXLR is 60, so effectively limit is 50. - max_sp_rxlr = max_rxlr_start - min_sp + 1 + signalp_trunc = 70 + re_rxlr = re.compile("R.LR") + min_sp = 10 + max_sp = 40 + min_rxlr_start = 30 + max_rxlr_start = 60 + # No explicit limit on separation of signal peptide clevage + # and RXLR, but shortest signal peptide is 10, and furthest + # away RXLR is 60, so effectively limit is 50. + max_sp_rxlr = max_rxlr_start - min_sp + 1 elif model == "Whisson2007": - signalp_trunc = 0 #zero for no truncation - re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]") - min_sp = 10 - max_sp = 40 - max_sp_rxlr = 100 - min_rxlr_start = 1 - max_rxlr_start = max_sp + max_sp_rxlr + signalp_trunc = 0 # zero for no truncation + re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]") + min_sp = 10 + max_sp = 40 + max_sp_rxlr = 100 + min_rxlr_start = 1 + max_rxlr_start = max_sp + max_sp_rxlr else: - stop_err("Did not recognise the model name %r\n" + sys_exit("Did not recognise the model name %r\n" "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) @@ -108,49 +112,49 @@ hmm_file = os.path.join(os.path.split(sys.argv[0])[0], "whisson_et_al_rxlr_eer_cropped.hmm") if not os.path.isfile(hmm_file): - stop_err("Missing HMM file for Whisson et al. (2007)") + sys_exit("Missing HMM file for Whisson et al. (2007)") if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): - stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher) + sys_exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) hmm_hits = set() valid_ids = set() for title, seq in fasta_iterator(fasta_file): name = title.split(None,1)[0] if name in valid_ids: - stop_err("Duplicated identifier %r" % name) + sys_exit("Duplicated identifier %r" % name) else: valid_ids.add(name) if not valid_ids: - #Special case, don't need to run HMMER if there are no sequences + # Special case, don't need to run HMMER if there are no sequences pass else: - #I've left the code to handle HMMER 3 in situ, in case - #we revisit the choice to insist on HMMER 2. + # I've left the code to handle HMMER 3 in situ, in case + # we revisit the choice to insist on HMMER 2. hmmer3 = (3 == get_hmmer_version(hmmer_search)) - #Using zero (or 5.6?) for bitscore threshold + # Using zero (or 5.6?) for bitscore threshold if hmmer3: - #The HMMER3 table output is easy to parse - #In HMMER3 can't use both -T and -E + # The HMMER3 table output is easy to parse + # In HMMER3 can't use both -T and -E cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \ % (hmmer_search, hmm_output_file, hmm_file, fasta_file) else: - #For HMMER2 we are stuck with parsing stdout - #Put 1e6 to effectively have no expectation threshold (otherwise - #HMMER defaults to 10 and the calculated e-value depends on the - #input FASTA file, and we can loose hits of interest). + # For HMMER2 we are stuck with parsing stdout + # Put 1e6 to effectively have no expectation threshold (otherwise + # HMMER defaults to 10 and the calculated e-value depends on the + # input FASTA file, and we can loose hits of interest). cmd = "%s -T 0 -E 1e6 %s %s > %s" \ % (hmmer_search, hmm_file, fasta_file, hmm_output_file) return_code = os.system(cmd) if return_code: - stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code) + sys_exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code) handle = open(hmm_output_file) for line in handle: if not line.strip(): - #We expect blank lines in the HMMER2 stdout + # We expect blank lines in the HMMER2 stdout continue elif line.startswith("#"): - #Header + # Header continue else: name = line.split(None,1)[0] @@ -159,18 +163,18 @@ if name in valid_ids: hmm_hits.add(name) elif hmmer3: - stop_err("Unexpected identifer %r in hmmsearch output" % name) + sys_exit("Unexpected identifer %r in hmmsearch output" % name) handle.close() - #if hmmer3: - # print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) - #else: - # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) - #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids)) + # if hmmer3: + # print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) + # else: + # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) + # print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids)) os.remove(hmm_output_file) del valid_ids -#Prepare short list of candidates containing RXLR to pass to SignalP +# Prepare short list of candidates containing RXLR to pass to SignalP assert min_rxlr_start > 0, "Min value one, since zero based counting" count = 0 total = 0 @@ -180,27 +184,28 @@ name = title.split(None,1)[0] match = re_rxlr.search(seq[min_rxlr_start-1:].upper()) if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start: - #This is a potential RXLR, depending on the SignalP results. - #Might as well truncate the sequence now, makes the temp file smaller + # This is a potential RXLR, depending on the SignalP results. + # Might as well truncate the sequence now, makes the temp file smaller if signalp_trunc: handle.write(">%s (truncated)\n%s\n" % (name, seq[:signalp_trunc])) else: - #Does it matter we don't line wrap? + # Does it matter we don't line wrap? handle.write(">%s\n%s\n" % (name, seq)) count += 1 handle.close() -#print "Running SignalP on %i/%i potentials." % (count, total) +# print "Running SignalP on %i/%i potentials." % (count, total) -#Run SignalP (using our wrapper script to get multi-core support etc) +# Run SignalP (using our wrapper script to get multi-core support etc) signalp_script = os.path.join(os.path.split(sys.argv[0])[0], "signalp3.py") if not os.path.isfile(signalp_script): - stop_err("Error - missing signalp3.py script") + sys_exit("Error - missing signalp3.py script") cmd = "python %s euk %i %s %s %s" % (signalp_script, signalp_trunc, threads, signalp_input_file, signalp_output_file) return_code = os.system(cmd) if return_code: - stop_err("Error %i from SignalP:\n%s" % (return_code, cmd)) -#print "SignalP done" + sys_exit("Error %i from SignalP:\n%s" % (return_code, cmd)) +# print "SignalP done" + def parse_signalp(filename): """Parse SignalP output, yield tuples of ID, HMM_Sprob_score and NN predicted signal peptide length. @@ -217,7 +222,7 @@ handle.close() -#Parse SignalP results and apply the strict RXLR criteria +# Parse SignalP results and apply the strict RXLR criteria total = 0 tally = dict() handle = open(tabular_file, "w") @@ -230,26 +235,26 @@ match = re_rxlr.search(seq[min_rxlr_start-1:].upper()) if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start: del match - #This was the criteria for calling SignalP, + # This was the criteria for calling SignalP, #so it will be in the SignalP results. sp_id, sp_hmm_score, sp_nn_len = signalp_results.next() assert name == sp_id, "%s vs %s" % (name, sp_id) if sp_hmm_score >= min_signalp_hmm and min_sp <= sp_nn_len <= max_sp: match = re_rxlr.search(seq[sp_nn_len:].upper()) - if match and match.start() + 1 <= max_sp_rxlr: #1-based counting + if match and match.start() + 1 <= max_sp_rxlr: # 1-based counting rxlr_start = sp_nn_len + match.start() + 1 if min_rxlr_start <= rxlr_start <= max_rxlr_start: rxlr = "Y" if model == "Whisson2007": - #Combine the signalp with regular expression heuristic and the HMM + # Combine the signalp with regular expression heuristic and the HMM if name in hmm_hits and rxlr == "N": - rxlr = "hmm" #HMM only + rxlr = "hmm" # HMM only elif rxlr == "N": - rxlr = "neither" #Don't use N (no) + rxlr = "neither" # Don't use N (no) elif name not in hmm_hits and rxlr == "Y": - rxlr = "re" #Heuristic only - #Now have a four way classifier: Y, hmm, re, neither - #and count is the number of Y results (both HMM and heuristic) + rxlr = "re" # Heuristic only + # Now have a four way classifier: Y, hmm, re, neither + # and count is the number of Y results (both HMM and heuristic) handle.write("%s\t%s\n" % (name, rxlr)) try: tally[rxlr] += 1 @@ -258,17 +263,17 @@ handle.close() assert sum(tally.values()) == total -#Check the iterator is finished +# Check the iterator is finished try: signalp_results.next() assert False, "Unexpected data in SignalP output" except StopIteration: pass -#Cleanup +# Cleanup os.remove(signalp_input_file) os.remove(signalp_output_file) -#Short summary to stdout for Galaxy's info display +# Short summary to stdout for Galaxy's info display print "%s for %i sequences:" % (model, total) print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems())) diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/rxlr_motifs.xml --- a/tools/protein_analysis/rxlr_motifs.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/rxlr_motifs.xml Wed May 13 06:14:42 2015 -0400 @@ -1,13 +1,22 @@ - + Find RXLR Effectors of Plant Pathogenic Oomycetes - - rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file" - + + + signalp + signalp + + hmmsearch + hmmsearch + + rxlr_motifs.py -v + + rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file" + @@ -19,12 +28,6 @@ - - - signalp - - hmmsearch - diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/seq_analysis_utils.py --- a/tools/protein_analysis/seq_analysis_utils.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/seq_analysis_utils.py Wed May 13 06:14:42 2015 -0400 @@ -14,7 +14,7 @@ __version__ = "0.0.1" -def stop_err(msg, error_level=1): +def sys_exit(msg, error_level=1): """Print error message to stdout and quit with given error level.""" sys.stderr.write("%s\n" % msg) sys.exit(error_level) @@ -57,7 +57,7 @@ except: num = default if num < 1: - stop_err("Threads argument %r is not a positive integer" % command_line_arg) + sys_exit("Threads argument %r is not a positive integer" % command_line_arg) #Cap this with the pysical limit of the machine, try: num = min(num, cpu_count()) diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/signalp3.py --- a/tools/protein_analysis/signalp3.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/signalp3.py Wed May 13 06:14:42 2015 -0400 @@ -56,28 +56,28 @@ import sys import os import tempfile -from seq_analysis_utils import stop_err, split_fasta, fasta_iterator +from seq_analysis_utils import sys_exit, split_fasta, fasta_iterator from seq_analysis_utils import run_jobs, thread_count FASTA_CHUNK = 500 MAX_LEN = 6000 #Found by trial and error if len(sys.argv) not in [6,8]: - stop_err("Require five (or 7) arguments, organism, truncate, threads, " + sys_exit("Require five (or 7) arguments, organism, truncate, threads, " "input protein FASTA file & output tabular file (plus " "optionally cut method and GFF3 output file). " "Got %i arguments." % (len(sys.argv)-1)) organism = sys.argv[1] if organism not in ["euk", "gram+", "gram-"]: - stop_err("Organism argument %s is not one of euk, gram+ or gram-" % organism) + sys_exit("Organism argument %s is not one of euk, gram+ or gram-" % organism) try: truncate = int(sys.argv[2]) except: truncate = 0 if truncate < 0: - stop_err("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2]) + sys_exit("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2]) num_threads = thread_count(sys.argv[3], default=4) fasta_file = sys.argv[4] @@ -86,7 +86,7 @@ if len(sys.argv) == 8: cut_method = sys.argv[6] if cut_method not in ["NN_Cmax", "NN_Ymax", "NN_Smax", "HMM_Cmax"]: - stop_err("Invalid cut method %r" % cut_method) + sys_exit("Invalid cut method %r" % cut_method) gff3_file = sys.argv[7] else: cut_method = None @@ -197,7 +197,7 @@ output = "(no output)" if error_level or output.lower().startswith("error running"): clean_up(fasta_files + temp_files) - stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), + sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), error_level) del results diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/signalp3.xml --- a/tools/protein_analysis/signalp3.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/signalp3.xml Wed May 13 06:14:42 2015 -0400 @@ -1,18 +1,22 @@ - + Find signal peptides in protein sequences + + signalp + signalp + + + + + + signalp3.py $organism $truncate "\$GALAXY_SLOTS" $fasta_file $tabular_file ##If the environment variable isn't set, get "", and the python wrapper ##defaults to four threads. - - - - - @@ -27,9 +31,6 @@ - - signalp - diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/suite_config.xml --- a/tools/protein_analysis/suite_config.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/suite_config.xml Wed May 13 06:14:42 2015 -0400 @@ -1,21 +1,21 @@ - + TMHMM, SignalP, RXLR motifs, WoLF PSORT - + Find transmembrane domains in protein sequences - + Find signal peptides in protein sequences - + Find eukaryotic PolII promoters in DNA sequences - + Bacteria/archaea protein subcellular localization prediction - + Eukaryote protein subcellular localization prediction - + Find RXLR Effectors of Plant Pathogenic Oomycetes diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/tmhmm2.py --- a/tools/protein_analysis/tmhmm2.py Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/tmhmm2.py Wed May 13 06:14:42 2015 -0400 @@ -43,12 +43,12 @@ import sys import os import tempfile -from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count +from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count FASTA_CHUNK = 500 if len(sys.argv) != 4: - stop_err("Require three arguments, number of threads (int), input protein FASTA file & output tabular file") + sys_exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file") num_threads = thread_count(sys.argv[1], default=4) fasta_file = sys.argv[2] @@ -68,7 +68,7 @@ identifier, length, expAA, first60, predhel, topology = parts except: assert len(parts)!=6 - stop_err("Bad line: %r" % line) + sys_exit("Bad line: %r" % line) assert length.startswith("len="), line length = length[4:] assert expAA.startswith("ExpAA="), line @@ -112,7 +112,7 @@ except IOError: output = "" clean_up(fasta_files + temp_files) - stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), + sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), error_level) del results del jobs @@ -125,7 +125,7 @@ data_handle.close() if not count: clean_up(fasta_files + temp_files) - stop_err("No output from tmhmm2") + sys_exit("No output from tmhmm2") out_handle.close() clean_up(fasta_files + temp_files) diff -r 41a42022f815 -r 20139cb4c844 tools/protein_analysis/tmhmm2.xml --- a/tools/protein_analysis/tmhmm2.xml Fri Nov 21 08:17:36 2014 -0500 +++ b/tools/protein_analysis/tmhmm2.xml Wed May 13 06:14:42 2015 -0400 @@ -1,18 +1,22 @@ - + Find transmembrane domains in protein sequences + + tmhmm + tmhmm + + + + + + tmhmm2.py "\$GALAXY_SLOTS" $fasta_file $tabular_file ##If the environment variable isn't set, get "", and the python wrapper ##defaults to four threads. - - - - - + + + wolf_psort.py $organism "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file" ##If the environment variable isn't set, get "", and python wrapper ##defaults to four threads. - - - - - @@ -21,9 +25,6 @@ - - runWolfPsortSummary -