# HG changeset patch # User peterjc # Date 1400521195 14400 # Node ID 57245c11b8cb28e67db8d440f5ac5e740e2eb8b5 # Parent 347eb6bfabd7ca0be06d2d6663979d26569c78c6 Uploaded v0.1.0d, TBLASTX support; changed output columns diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular --- a/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Fri May 16 11:16:02 2014 -0400 +++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Mon May 19 13:39:55 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp -ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 92.07 86 1514 92.07 100 +#A_id B_id bitscore pident A qcovhsp B qcovhsp +ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 92.07 86 100 diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular --- a/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Fri May 16 11:16:02 2014 -0400 +++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Mon May 19 13:39:55 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 96.55 100 701 96.55 100 +#A_id B_id bitscore pident A qcovhsp B qcovhsp +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 96.55 100 100 diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular --- a/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Fri May 16 11:16:02 2014 -0400 +++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon May 19 13:39:55 2014 -0400 @@ -1,2 +1,2 @@ -#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 92.07 100 1474 92.07 86 +#A_id B_id bitscore pident A qcovhsp B qcovhsp +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 92.07 100 86 diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_none.tabular --- a/test-data/rbh_none.tabular Fri May 16 11:16:02 2014 -0400 +++ b/test-data/rbh_none.tabular Mon May 19 13:39:55 2014 -0400 @@ -1,1 +1,1 @@ -#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp +#A_id B_id bitscore pident A qcovhsp B qcovhsp diff -r 347eb6bfabd7 -r 57245c11b8cb tools/blast_rbh/blast_rbh.py --- a/tools/blast_rbh/blast_rbh.py Fri May 16 11:16:02 2014 -0400 +++ b/tools/blast_rbh/blast_rbh.py Mon May 19 13:39:55 2014 -0400 @@ -62,13 +62,16 @@ if dbtype == "nucl": - if blast_type not in ["megablast", "blastn", "blastn-short", "dc-megablast"]: + if blast_type in ["megablast", "blastn", "blastn-short", "dc-megablast"]: + blast_cmd = "blastn -task %s" % blast_type + elif blast_type == "tblastx": + blast_cmd = "tblastx" + else: stop_err("Invalid BLAST type for BLASTN: %r" % blast_type) - blast_exe = "blastn" elif dbtype == "prot": if blast_type not in ["blastp", "blastp-short"]: stop_err("Invalid BLAST type for BLASTP: %r" % blast_type) - blast_exe = "blastp" + blast_cmd = "blastp -task %s" % blast_type else: stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type) @@ -94,17 +97,17 @@ c_identity = 3 c_coverage = 4 -print("Starting...") +#print("Starting...") #TODO - Report log in case of error? run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log)) run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log)) -print("BLAST databases prepared.") -run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' - % (blast_exe, blast_type, fasta_a, db_b, a_vs_b, cols, threads)) -print("BLAST species A vs species B done.") -run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' - % (blast_exe, blast_type, fasta_b, db_a, b_vs_a, cols, threads)) -print("BLAST species B vs species A done.") +#print("BLAST databases prepared.") +run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' + % (blast_cmd, fasta_a, db_b, a_vs_b, cols, threads)) +#print("BLAST species A vs species B done.") +run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' + % (blast_cmd, fasta_b, db_a, b_vs_a, cols, threads)) +#print("BLAST species B vs species A done.") best_a_vs_b = dict() for line in open(a_vs_b): @@ -141,13 +144,28 @@ count = 0 outfile = open(out_file, 'w') -outfile.write("#A_id\tB_id\tA_vs_B bitscore\tA_vs_B pident\tA_vs_B qcovhsp\tB_vs_A bitscore\tB_vs_A pident\tB_vs_A qcovhsp\n") +outfile.write("#A_id\tB_id\tbitscore\tpident\tA qcovhsp\tB qcovhsp\n") for a in a_short_list: - b = best_a_vs_b[a][0] - if b in best_b_vs_a and a == best_b_vs_a[b][0]: + a_values = best_a_vs_b[a] + b = a_values[0] + b_values = best_b_vs_a[b] + if b in best_b_vs_a and a == b_values[0]: #Output the original string versions of the scores - values = [a, b] + list(best_a_vs_b[a][2:]) + list(best_b_vs_a[b][2:]) - outfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % tuple(values)) + values = [a,b] + #[1] = bitscore as float, [2] = bitscore as original string + if a_values[1] < b_values[1]: + values.append(a_values[2]) + else: + values.append(b_values[2]) + #[3] = identity as string + if float(a_values[3]) < float(b_values[3]): + values.append(a_values[3]) + else: + values.append(b_values[3]) + #[4] = coverage + values.append(a_values[4]) + values.append(b_values[4]) + outfile.write("%s\t%s\t%s\t%s\t%s\t%s\n" % tuple(values)) count += 1 outfile.close() print "Done, %i RBH found" % count diff -r 347eb6bfabd7 -r 57245c11b8cb tools/blast_rbh/blast_rbh.xml --- a/tools/blast_rbh/blast_rbh.xml Fri May 16 11:16:02 2014 -0400 +++ b/tools/blast_rbh/blast_rbh.xml Mon May 19 13:39:55 2014 -0400 @@ -43,6 +43,7 @@ + @@ -131,16 +132,16 @@ ------ --------------------------------- 1 ID from *species A* 2 ID from *species B* - 3 Bitscore from *A vs B* - 4 Percentage identity from *A vs B* + 3 Bitscore + 4 Percentage identity 5 Query coverage from *A vs B* - 6 Bitscore from *B vs A* - 7 Percentage identity from *B vs A* - 8 Query coverage from *B vs A* + 6 Query coverage from *B vs A* ====== ================================= These values correspond to the ``bitscore``, ``pident`` and ``qcovhsp`` -values in the BLAST+ tabular output. +values in the BLAST+ tabular output. For the bitscore and percentage identity +the values for *A vs B* and *B vs A* are typically the same, so their minimum +is shown. The coverage values differ according to the query lengths. .. class:: warningmark