# HG changeset patch
# User peterjc
# Date 1400521195 14400
# Node ID 57245c11b8cb28e67db8d440f5ac5e740e2eb8b5
# Parent 347eb6bfabd7ca0be06d2d6663979d26569c78c6
Uploaded v0.1.0d, TBLASTX support; changed output columns
diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
--- a/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Fri May 16 11:16:02 2014 -0400
+++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Mon May 19 13:39:55 2014 -0400
@@ -1,2 +1,2 @@
-#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp
-ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 92.07 86 1514 92.07 100
+#A_id B_id bitscore pident A qcovhsp B qcovhsp
+ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1514 92.07 86 100
diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
--- a/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Fri May 16 11:16:02 2014 -0400
+++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Mon May 19 13:39:55 2014 -0400
@@ -1,2 +1,2 @@
-#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp
-sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 96.55 100 701 96.55 100
+#A_id B_id bitscore pident A qcovhsp B qcovhsp
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 701 96.55 100 100
diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
--- a/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Fri May 16 11:16:02 2014 -0400
+++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon May 19 13:39:55 2014 -0400
@@ -1,2 +1,2 @@
-#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 92.07 100 1474 92.07 86
+#A_id B_id bitscore pident A qcovhsp B qcovhsp
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1474 92.07 100 86
diff -r 347eb6bfabd7 -r 57245c11b8cb test-data/rbh_none.tabular
--- a/test-data/rbh_none.tabular Fri May 16 11:16:02 2014 -0400
+++ b/test-data/rbh_none.tabular Mon May 19 13:39:55 2014 -0400
@@ -1,1 +1,1 @@
-#A_id B_id A_vs_B bitscore A_vs_B pident A_vs_B qcovhsp B_vs_A bitscore B_vs_A pident B_vs_A qcovhsp
+#A_id B_id bitscore pident A qcovhsp B qcovhsp
diff -r 347eb6bfabd7 -r 57245c11b8cb tools/blast_rbh/blast_rbh.py
--- a/tools/blast_rbh/blast_rbh.py Fri May 16 11:16:02 2014 -0400
+++ b/tools/blast_rbh/blast_rbh.py Mon May 19 13:39:55 2014 -0400
@@ -62,13 +62,16 @@
if dbtype == "nucl":
- if blast_type not in ["megablast", "blastn", "blastn-short", "dc-megablast"]:
+ if blast_type in ["megablast", "blastn", "blastn-short", "dc-megablast"]:
+ blast_cmd = "blastn -task %s" % blast_type
+ elif blast_type == "tblastx":
+ blast_cmd = "tblastx"
+ else:
stop_err("Invalid BLAST type for BLASTN: %r" % blast_type)
- blast_exe = "blastn"
elif dbtype == "prot":
if blast_type not in ["blastp", "blastp-short"]:
stop_err("Invalid BLAST type for BLASTP: %r" % blast_type)
- blast_exe = "blastp"
+ blast_cmd = "blastp -task %s" % blast_type
else:
stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type)
@@ -94,17 +97,17 @@
c_identity = 3
c_coverage = 4
-print("Starting...")
+#print("Starting...")
#TODO - Report log in case of error?
run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log))
run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log))
-print("BLAST databases prepared.")
-run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
- % (blast_exe, blast_type, fasta_a, db_b, a_vs_b, cols, threads))
-print("BLAST species A vs species B done.")
-run('%s -task %s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
- % (blast_exe, blast_type, fasta_b, db_a, b_vs_a, cols, threads))
-print("BLAST species B vs species A done.")
+#print("BLAST databases prepared.")
+run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
+ % (blast_cmd, fasta_a, db_b, a_vs_b, cols, threads))
+#print("BLAST species A vs species B done.")
+run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
+ % (blast_cmd, fasta_b, db_a, b_vs_a, cols, threads))
+#print("BLAST species B vs species A done.")
best_a_vs_b = dict()
for line in open(a_vs_b):
@@ -141,13 +144,28 @@
count = 0
outfile = open(out_file, 'w')
-outfile.write("#A_id\tB_id\tA_vs_B bitscore\tA_vs_B pident\tA_vs_B qcovhsp\tB_vs_A bitscore\tB_vs_A pident\tB_vs_A qcovhsp\n")
+outfile.write("#A_id\tB_id\tbitscore\tpident\tA qcovhsp\tB qcovhsp\n")
for a in a_short_list:
- b = best_a_vs_b[a][0]
- if b in best_b_vs_a and a == best_b_vs_a[b][0]:
+ a_values = best_a_vs_b[a]
+ b = a_values[0]
+ b_values = best_b_vs_a[b]
+ if b in best_b_vs_a and a == b_values[0]:
#Output the original string versions of the scores
- values = [a, b] + list(best_a_vs_b[a][2:]) + list(best_b_vs_a[b][2:])
- outfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % tuple(values))
+ values = [a,b]
+ #[1] = bitscore as float, [2] = bitscore as original string
+ if a_values[1] < b_values[1]:
+ values.append(a_values[2])
+ else:
+ values.append(b_values[2])
+ #[3] = identity as string
+ if float(a_values[3]) < float(b_values[3]):
+ values.append(a_values[3])
+ else:
+ values.append(b_values[3])
+ #[4] = coverage
+ values.append(a_values[4])
+ values.append(b_values[4])
+ outfile.write("%s\t%s\t%s\t%s\t%s\t%s\n" % tuple(values))
count += 1
outfile.close()
print "Done, %i RBH found" % count
diff -r 347eb6bfabd7 -r 57245c11b8cb tools/blast_rbh/blast_rbh.xml
--- a/tools/blast_rbh/blast_rbh.xml Fri May 16 11:16:02 2014 -0400
+++ b/tools/blast_rbh/blast_rbh.xml Mon May 19 13:39:55 2014 -0400
@@ -43,6 +43,7 @@
+
@@ -131,16 +132,16 @@
------ ---------------------------------
1 ID from *species A*
2 ID from *species B*
- 3 Bitscore from *A vs B*
- 4 Percentage identity from *A vs B*
+ 3 Bitscore
+ 4 Percentage identity
5 Query coverage from *A vs B*
- 6 Bitscore from *B vs A*
- 7 Percentage identity from *B vs A*
- 8 Query coverage from *B vs A*
+ 6 Query coverage from *B vs A*
====== =================================
These values correspond to the ``bitscore``, ``pident`` and ``qcovhsp``
-values in the BLAST+ tabular output.
+values in the BLAST+ tabular output. For the bitscore and percentage identity
+the values for *A vs B* and *B vs A* are typically the same, so their minimum
+is shown. The coverage values differ according to the query lengths.
.. class:: warningmark