# HG changeset patch # User peterjc # Date 1400244875 14400 # Node ID 8aa6385b9a05b2d04f6a0cf8ac46472385f48d35 # Parent a68f4e5789d7c4a887d24d607e3a00769303a448 Uploaded v0.1.0b, more verbose output diff -r a68f4e5789d7 -r 8aa6385b9a05 test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular --- a/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Thu May 15 13:11:03 2014 -0400 +++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Fri May 16 08:54:35 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B B_vs_A -ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 1514 +#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp +ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 92.07 86 1514 92.07 100 diff -r a68f4e5789d7 -r 8aa6385b9a05 test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular --- a/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Thu May 15 13:11:03 2014 -0400 +++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Fri May 16 08:54:35 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B B_vs_A -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 701 +#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 96.55 100 701 96.55 100 diff -r a68f4e5789d7 -r 8aa6385b9a05 test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular --- a/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Thu May 15 13:11:03 2014 -0400 +++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Fri May 16 08:54:35 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B B_vs_A -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 1474 +#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 92.07 100 1474 92.07 86 diff -r a68f4e5789d7 -r 8aa6385b9a05 tools/blast_rbh/blast_rbh.py --- a/tools/blast_rbh/blast_rbh.py Thu May 15 13:11:03 2014 -0400 +++ b/tools/blast_rbh/blast_rbh.py Fri May 16 08:54:35 2014 -0400 @@ -11,6 +11,8 @@ 7. Output filename """ +# TODO - Output more columns, e.g. pident, qcovs, descriptions? + import os import sys import tempfile @@ -69,17 +71,14 @@ blast_exe = "blastp" else: stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type) -makeblastdb_exe = "makeblastdb" -def run(cmd): - return_code = os.system(cmd) - if return_code: - stop_err("Error %i from: %s" % (return_code, cmd)) +try: + threads = int(os.environ.get("GALAXY_SLOTS", "1")) +except: + threads = 1 +assert 1 <= threads, threads -def makeblastdb(fasta_file, dbtype, output_stem): - cmd = "%s -dbtype %s -in %s -out %s" % (makeblastdb_exe, dbtype, fasta_file, output_stem) - return run(cmd) - +makeblastdb_exe = "makeblastdb" base_path = tempfile.mkdtemp() db_a = os.path.join(base_path, "SpeciesA") @@ -100,13 +99,13 @@ run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log)) run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log)) print("BLAST databases prepared.") -run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s"' % (blast_exe, blast_type, fasta_a, db_b, a_vs_b, cols)) +run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' + % (blast_exe, blast_type, fasta_a, db_b, a_vs_b, cols, threads)) print("BLAST species A vs species B done.") -run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s"' % (blast_exe, blast_type, fasta_b, db_a, b_vs_a, cols)) +run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' + % (blast_exe, blast_type, fasta_b, db_a, b_vs_a, cols, threads)) print("BLAST species B vs species A done.") -#TODO - Include identity and coverage filters... - best_a_vs_b = dict() for line in open(a_vs_b): if line.startswith("#"): continue @@ -117,8 +116,9 @@ continue score = float(parts[c_score]) if a not in best_a_vs_b or score > best_a_vs_b[a][1]: - best_a_vs_b[a] = (b, score, parts[c_score]) -b_short_list = set(b for (b,score, score_str) in best_a_vs_b.values()) + # Store bitscore as a float for sorting, original string for output + best_a_vs_b[a] = (b, score, parts[c_score], parts[c_identity], parts[c_coverage]) +b_short_list = set(v[0] for v in best_a_vs_b.values()) best_b_vs_a = dict() for line in open(b_vs_a): @@ -134,22 +134,23 @@ continue score = float(parts[c_score]) if b not in best_b_vs_a or score > best_b_vs_a[b][1]: - best_b_vs_a[b] = (a, score, parts[c_score]) + # Store bitscore as a float for sorting, original string for output + best_b_vs_a[b] = (a, score, parts[c_score], parts[c_identity], parts[c_coverage]) #TODO - Preserve order from A vs B? -a_short_list = sorted(set(a for (a,score,score_str) in best_b_vs_a.values())) +a_short_list = sorted(set(v[0] for v in best_b_vs_a.values())) count = 0 outfile = open(out_file, 'w') -outfile.write("#A_id\tB_id\tA_vs_B\tB_vs_A\n") +outfile.write("#A_id\tB_id\tA_vs_B bitscore\tA_vs_B pident\tA_vs_B qcovhsp\tB_vs_A bitscore\tB_vs_A pident\tB_vs_A qcovhsp\n") for a in a_short_list: b = best_a_vs_b[a][0] if b in best_b_vs_a and a == best_b_vs_a[b][0]: - outfile.write("%s\t%s\t%s\t%s\n" % (a, b, best_a_vs_b[a][2], best_b_vs_a[b][2])) + #Output the original string versions of the scores + values = [a, b] + list(best_a_vs_b[a][2:]) + list(best_b_vs_a[b][2:]) + outfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % tuple(values)) count += 1 outfile.close() print "Done, %i RBH found" % count - #Remove temp files... shutil.rmtree(base_path) - diff -r a68f4e5789d7 -r 8aa6385b9a05 tools/blast_rbh/blast_rbh.xml --- a/tools/blast_rbh/blast_rbh.xml Thu May 15 13:11:03 2014 -0400 +++ b/tools/blast_rbh/blast_rbh.xml Fri May 16 08:54:35 2014 -0400 @@ -119,21 +119,28 @@ **What it does** -Takes two FASTA files (species *A* and species *B*), builds a BLAST database +Takes two FASTA files (*species A* and *species B*), builds a BLAST database for each, runs reciprocal BLAST searchs (*A vs B*, and *B vs A*), optionally -filters these, and then compiles a list of the reciprocal best hits (RBH). +filters the HSPs, and then compiles a list of the reciprocal best hits (RBH). -The output from this tool is a tabular file containing four columns, with -the order taken from input file A: +The output from this tool is a tabular file containing eight columns, with +information about the BLAST matches used: -====== ====================== +====== ================================= Column Description ------- ---------------------- - 1 ID from species *A* - 2 ID from species *B* +------ --------------------------------- + 1 ID from *species A* + 2 ID from *species B* 3 Bitscore from *A vs B* - 4 Bitscore from *B vs A* -====== ====================== + 4 Percentage identity from *A vs B* + 5 Query coverage from *A vs B* + 6 Bitscore from *B vs A* + 7 Percentage identity from *B vs A* + 8 Query coverage from *B vs A* +====== ================================= + +These values correspond to the ``bitscore``, ``pident`` and ``qcovhsp`` +values in the BLAST+ tabular output. .. class:: warningmark