Previous changeset 45:abc41ef6c6d4 (2014-03-11) Next changeset 47:d0de6862cda1 (2014-10-31) |
Commit message:
Uploaded v0.1.01, preview 1, adds makeprofiledb etc |
modified:
tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml tools/ncbi_blast_plus/repository_dependencies.xml |
added:
test-data/cd00003.smp test-data/cd00003_and_cd00008.aux test-data/cd00003_and_cd00008.freq test-data/cd00003_and_cd00008.loo test-data/cd00003_and_cd00008.phr test-data/cd00003_and_cd00008.pin test-data/cd00003_and_cd00008.psd test-data/cd00003_and_cd00008.psi test-data/cd00003_and_cd00008.psq test-data/cd00003_and_cd00008.rps test-data/cd00008.smp test-data/empty_file.dat tools/ncbi_blast_plus/ncbi_makeprofiledb.xml |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003.smp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003.smp Wed Mar 19 10:51:45 2014 -0400 |
b |
b'@@ -0,0 +1,19957 @@\n+PssmWithParameters ::= {\n+ pssm {\n+ isProtein TRUE,\n+ numRows 28,\n+ numColumns 234,\n+ byRow FALSE,\n+ query seq {\n+ id {\n+ general {\n+ db "CDD",\n+ tag id 237977\n+ }\n+ },\n+ descr {\n+ title "cd00003, PNPsynthase, Pyridoxine 5\'-phosphate (PNP) synthase\n+ domain; pyridoxal 5\'-phosphate is the active form of vitamin B6 that acts as\n+ an essential, ubiquitous coenzyme in amino acid metabolism. In bacteria,\n+ formation of pyridoxine 5\'-phosphate is a step in the biosynthesis of vitamin\n+ B6. PNP synthase, a homooctameric enzyme, catalyzes the final step in PNP\n+ biosynthesis, the condensation of 1-amino-acetone 3-phosphate and\n+ 1-deoxy-D-xylulose 5-phosphate. PNP synthase adopts a TIM barrel topology,\n+ intersubunit contacts are mediated by three \'\'extra\'\' helices, generating a\n+ tetramer of symmetric dimers with shared active sites; the open state has\n+ been proposed to accept substrates and to release products, while most of the\n+ catalytic events are likely to occur in the closed state; a hydrophilic\n+ channel running through the center of the barrel was identified as the\n+ essential structural feature that enables PNP synthase to release water\n+ molecules produced during the reaction from the closed, solvent-shielded\n+ active site."\n+ },\n+ inst {\n+ repr raw,\n+ mol aa,\n+ length 234,\n+ seq-data ncbieaa "RLGVNIDHVATLRNARGTNYPDPVEAALLAEKAGADGITVHLREDRRHIQDR\n+DVRLLRELVRTELNLEMAPTEEMLEIALEVKPHQVTLVPEKREELTTEGGLDVAGQAEKLKPIIERLKDAGIRVSLFI\n+DPDPEQIEAAKEVGADRVELHTGPYANAYDKAEREAELERIAKAAKLARELGLGVNAGHGLNYENVKPIAKIPGIAEL\n+NIGHAIISRALFVGLEEAVREMKDLI"\n+ }\n+ },\n+ intermediateData {\n+ weightedResFreqsPerPos {\n+ { 0, 10, 0 },\n+ { 575880368388257, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 663632240275508, 10, -16 },\n+ { 18974024208621, 10, -15 },\n+ { 435260939968393, 10, -16 },\n+ { 0, 10, 0 },\n+ { 231579235547631, 10, -16 },\n+ { 217943087911858, 10, -16 },\n+ { 197347688751049, 10, -15 },\n+ { 149575209809135, 10, -15 },\n+ { 0, 10, 0 },\n+ { 373639929584932, 10, -16 },\n+ { 0, 10, 0 },\n+ { 454596950735964, 10, -16 },\n+ { 280508357667036, 10, -15 },\n+ { 0, 10, 0 },\n+ { 276040598748792, 10, -16 },\n+ { 187780737233597, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 119593107246649, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 355209907721631, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 925910073190528, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 385689360373084, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 526473206870828, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 499042712921408, 10, -15 },\n+ { 160105971725447, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 217340080528288, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 38877105237495, 10, -14 },\n+ { 0, 10, 0 },\n+ { 217943087911858, 10, -16 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ '..b'-414,\n+ -399,\n+ -460,\n+ 5,\n+ 352,\n+ 16,\n+ -587,\n+ -100,\n+ -494,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -195,\n+ -32768,\n+ -409,\n+ -650,\n+ -589,\n+ -382,\n+ -651,\n+ -647,\n+ 402,\n+ -562,\n+ -63,\n+ -221,\n+ -629,\n+ -572,\n+ -558,\n+ -589,\n+ -496,\n+ -335,\n+ 665,\n+ -607,\n+ -100,\n+ -446,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -24,\n+ -32768,\n+ -591,\n+ -46,\n+ -272,\n+ -576,\n+ -277,\n+ -37,\n+ -291,\n+ 283,\n+ -138,\n+ -402,\n+ -350,\n+ -488,\n+ 369,\n+ 578,\n+ -203,\n+ -174,\n+ -491,\n+ -587,\n+ -100,\n+ -483,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 7,\n+ -32768,\n+ -616,\n+ 241,\n+ 528,\n+ -606,\n+ -496,\n+ -367,\n+ -230,\n+ 196,\n+ -343,\n+ -105,\n+ -331,\n+ -454,\n+ 43,\n+ 272,\n+ -194,\n+ -392,\n+ -502,\n+ -619,\n+ -100,\n+ -523,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -446,\n+ -32768,\n+ -500,\n+ -647,\n+ -538,\n+ 178,\n+ -619,\n+ -325,\n+ -106,\n+ -485,\n+ -175,\n+ 921,\n+ -550,\n+ -602,\n+ -404,\n+ -481,\n+ -486,\n+ -420,\n+ -288,\n+ -277,\n+ -100,\n+ 627,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -165,\n+ -32768,\n+ -534,\n+ -503,\n+ -358,\n+ -453,\n+ -555,\n+ -452,\n+ -144,\n+ 482,\n+ 287,\n+ -280,\n+ -440,\n+ -508,\n+ -91,\n+ 361,\n+ -409,\n+ -396,\n+ 122,\n+ -575,\n+ -100,\n+ -472,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 149,\n+ -32768,\n+ -567,\n+ 385,\n+ 307,\n+ -606,\n+ -460,\n+ -383,\n+ -292,\n+ 51,\n+ -396,\n+ -460,\n+ -145,\n+ -458,\n+ 207,\n+ 304,\n+ -90,\n+ -107,\n+ -277,\n+ -625,\n+ -100,\n+ -526,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ 176,\n+ -32768,\n+ 31,\n+ -605,\n+ -520,\n+ -357,\n+ -572,\n+ -562,\n+ 483,\n+ -473,\n+ 369,\n+ -197,\n+ -576,\n+ -553,\n+ -44,\n+ -69,\n+ -433,\n+ -185,\n+ 8,\n+ -549,\n+ -100,\n+ -456,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -154,\n+ -32768,\n+ 387,\n+ -646,\n+ -594,\n+ -334,\n+ -636,\n+ -593,\n+ 485,\n+ -545,\n+ 343,\n+ 529,\n+ -604,\n+ -586,\n+ -513,\n+ -556,\n+ -479,\n+ -56,\n+ 41,\n+ -533,\n+ -100,\n+ -450,\n+ -32768,\n+ -32768,\n+ -400,\n+ -32768,\n+ -32768\n+ },\n+ lambda { 267, 10, -3 },\n+ kappa { 695502437462053, 10, -16 },\n+ h { 14, 10, -2 },\n+ scalingFactor 100,\n+ lambdaUngapped { 315181590957692, 10, -15 },\n+ kappaUngapped { 22723615854819, 10, -14 },\n+ hUngapped { 852942415611443, 10, -15 }\n+ }\n+ },\n+ params {\n+ pseudocount 10,\n+ rpsdbparams {\n+ matrixName "BLOSUM62"\n+ }\n+ }\n+}\n' |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.aux --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003_and_cd00008.aux Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -0,0 +1,12 @@ +BLOSUM62 +11 +1 +0.000000e+00 +0.000000e+00 +0 +0 +100.000000 +234 +6.955024e-02 +160 +4.862535e-02 |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.freq |
b |
Binary file test-data/cd00003_and_cd00008.freq has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.loo |
b |
Binary file test-data/cd00003_and_cd00008.loo has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.phr |
b |
Binary file test-data/cd00003_and_cd00008.phr has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.pin |
b |
Binary file test-data/cd00003_and_cd00008.pin has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00003_and_cd00008.psd Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -0,0 +1,2 @@ +gnl|cdd|1890191 +gnl|cdd|2379770 |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.psi |
b |
Binary file test-data/cd00003_and_cd00008.psi has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.psq |
b |
Binary file test-data/cd00003_and_cd00008.psq has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00003_and_cd00008.rps |
b |
Binary file test-data/cd00003_and_cd00008.rps has changed |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb test-data/cd00008.smp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd00008.smp Wed Mar 19 10:51:45 2014 -0400 |
b |
b'@@ -0,0 +1,13679 @@\n+PssmWithParameters ::= {\n+ pssm {\n+ isProtein TRUE,\n+ numRows 28,\n+ numColumns 160,\n+ byRow FALSE,\n+ query seq {\n+ id {\n+ general {\n+ db "CDD",\n+ tag id 189019\n+ }\n+ },\n+ descr {\n+ title "cd00008, PIN_53EXO-like, PIN domains of the 5\'-3\' exonucleases\n+ of DNA polymerase I, bacteriophage T4 RNase H and T5-5\' nucleases, and\n+ homologs. PIN (PilT N terminus) domains of the 5\'-3\' exonucleases (53EXO) of\n+ mutli-domain DNA polymerase I and single domain protein homologs, as well as,\n+ the PIN domains of bacteriophage T5-5\'nuclease (T5FEN or 5\'-3\'exonuclease),\n+ bacteriophage T4 RNase H (T4FEN), bacteriophage T3 (T3 phage\n+ exodeoxyribonuclease) and other similar nucleases are included in this\n+ family. The 53EXO of DNA polymerase I recognizes and endonucleolytically\n+ cleaves a structure-specific DNA substrate that has a bifurcated downstream\n+ duplex and an upstream template-primer duplex that overlaps the downstream\n+ duplex by 1 bp. The T5-5\'nuclease is a 5\'-3\'exodeoxyribonuclease that also\n+ exhibits endonucleolytic activity on flap structures (branched duplex DNA\n+ containing a free single-stranded 5\'end). T4 RNase H, which removes the RNA\n+ primers that initiate lagging strand fragments, has 5\'- 3\'exonuclease\n+ activity on DNA/DNA and RNA/DNA duplexes and has endonuclease activity on\n+ flap or forked DNA structures. These nucleases are members of the\n+ structure-specific, 5\' nuclease family that catalyzes hydrolysis of DNA\n+ duplex-containing nucleic acid structures during DNA replication, repair, and\n+ recombination. They contain a PIN domain with a helical arch/clamp region (I\n+ domain) of variable length (approximately 16 to 30 residues in 53EXO-like PIN\n+ domains) and a H3TH (helix-3-turn-helix) domain, an atypical\n+ helix-hairpin-helix-2-like region. Both the H3TH domain (not included here)\n+ and the helical arch/clamp region are involved in DNA binding. The active\n+ site of the 53EXO of Taq DNA polymerase I includes a set of conserved acidic\n+ residues that are essential for binding three divalent metal ions (two Mn2+\n+ ions and one Zn2+ ion) required for nuclease activity. T5-5\'nuclease requires\n+ at least two bound divalent metal ions for nuclease activity and is reported\n+ to be able to use Mg2+, Mn2+ or Co2+ as co-factors."\n+ },\n+ inst {\n+ repr raw,\n+ mol aa,\n+ length 160,\n+ seq-data ncbieaa "LMLVDGTNLAFRTKHNNSKKKEKINLSPFASSYVSSIQSLAKSYSARTTIVL\n+GDKGKSVFRLEHLPEYKGNRDEKYAEEKALDEQFFEYLKDAFELCKATTFPTFTIRGYEADDMAAYLVKKIGHEGDHV\n+WIISTDGDWDQLLTDKVSRFSPTTRREYHL"\n+ }\n+ },\n+ intermediateData {\n+ weightedResFreqsPerPos {\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 338020833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 328645833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 333333333333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 338020833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 333333333333333, 10, -15 },\n+ { 328645833333333, 10, -15 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0, 10, 0 },\n+ { 0'..b' -307,\n+ 704,\n+ -250,\n+ -309,\n+ -192,\n+ -207,\n+ -249,\n+ -197,\n+ -100,\n+ -12,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -64,\n+ -32768,\n+ -122,\n+ -145,\n+ -113,\n+ -102,\n+ -157,\n+ -140,\n+ -20,\n+ -97,\n+ 17,\n+ 497,\n+ -93,\n+ -116,\n+ -65,\n+ -112,\n+ -12,\n+ 327,\n+ -19,\n+ -149,\n+ -100,\n+ -121,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -142,\n+ -32768,\n+ -302,\n+ -51,\n+ 332,\n+ -282,\n+ -264,\n+ 602,\n+ -290,\n+ -70,\n+ -294,\n+ -205,\n+ -45,\n+ -211,\n+ 22,\n+ -107,\n+ -28,\n+ 303,\n+ -228,\n+ -332,\n+ -100,\n+ -69,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -121,\n+ -32768,\n+ -379,\n+ -186,\n+ -84,\n+ -383,\n+ 340,\n+ -157,\n+ -394,\n+ 351,\n+ -349,\n+ -246,\n+ -95,\n+ -242,\n+ -9,\n+ 403,\n+ -102,\n+ -177,\n+ -341,\n+ -352,\n+ -100,\n+ -289,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -196,\n+ -32768,\n+ -375,\n+ -234,\n+ -82,\n+ -9,\n+ -299,\n+ -9,\n+ -301,\n+ 349,\n+ -256,\n+ -192,\n+ -143,\n+ -264,\n+ -2,\n+ 405,\n+ -146,\n+ -178,\n+ -268,\n+ -72,\n+ -100,\n+ 490,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -226,\n+ -32768,\n+ -301,\n+ -157,\n+ 297,\n+ -71,\n+ -349,\n+ -222,\n+ -123,\n+ -172,\n+ 207,\n+ -46,\n+ -277,\n+ -314,\n+ -71,\n+ -214,\n+ -226,\n+ -213,\n+ -167,\n+ 856,\n+ -100,\n+ -25,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -167,\n+ -32768,\n+ -210,\n+ -396,\n+ -331,\n+ 65,\n+ -415,\n+ -122,\n+ 350,\n+ -300,\n+ 9,\n+ -12,\n+ -350,\n+ -347,\n+ -284,\n+ -313,\n+ -261,\n+ -135,\n+ 320,\n+ -71,\n+ -100,\n+ 490,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -136,\n+ -32768,\n+ -287,\n+ -161,\n+ -56,\n+ -281,\n+ -243,\n+ 591,\n+ -274,\n+ 318,\n+ -281,\n+ -184,\n+ -43,\n+ -206,\n+ -7,\n+ 7,\n+ -29,\n+ 308,\n+ -222,\n+ -337,\n+ -100,\n+ -69,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768,\n+ -32768,\n+ -187,\n+ -32768,\n+ -240,\n+ -339,\n+ -301,\n+ -129,\n+ -391,\n+ -351,\n+ 323,\n+ -269,\n+ 251,\n+ 24,\n+ -362,\n+ 495,\n+ -275,\n+ -318,\n+ -249,\n+ -165,\n+ 48,\n+ -331,\n+ -100,\n+ -238,\n+ -32768,\n+ -32768,\n+ -399,\n+ -32768,\n+ -32768\n+ },\n+ lambda { 267, 10, -3 },\n+ kappa { 486253485452101, 10, -16 },\n+ h { 14, 10, -2 },\n+ scalingFactor 100,\n+ lambdaUngapped { 318588052238909, 10, -15 },\n+ kappaUngapped { 158869858915243, 10, -15 },\n+ hUngapped { 43477934178065, 10, -14 }\n+ }\n+ },\n+ params {\n+ pseudocount 10,\n+ rpsdbparams {\n+ matrixName "BLOSUM62"\n+ }\n+ }\n+}\n' |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Tue Mar 11 11:31:38 2014 -0400 +++ b/tools/ncbi_blast_plus/README.rst Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -21,9 +21,9 @@ ====================== Galaxy should be able to automatically install the dependencies, i.e. the -``blast_datatypes`` repository which defines the BLAST XML file format -(``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and -``blastdbn``). +BLAST+ binaries and the ``blast_datatypes`` repository which defines the +BLAST XML file format (``blastxml``), protein and nucleotide BLAST databases +(``blastdbp`` and ``blastdbn``), and so on. See the configuration notes below. @@ -47,6 +47,7 @@ <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" /> <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" /> <tool file="ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml" /> + <tool file="ncbi_blast_plus/ncbi_makeprofiledb.xml" /> <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" /> </section> @@ -61,7 +62,7 @@ files. You must install the NCBI BLAST+ standalone tools somewhere on the system -path. Currently the unit tests are written using "BLAST 2.2.28+". +path. Currently the unit tests are written using "BLAST 2.2.29+". Run the functional tests (adjusting the section identifier to match your ``tool_conf.xml.sample`` file):: @@ -91,9 +92,9 @@ will appear as ``N/A`` in the tabular output. The BLAST+ binaries support multi-threaded operation, which is handled via the -$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy -via your job runner settings, which allows you to (for example) allocate four -cores to each BLAST job. +``$GALAXY_SLOTS`` environment variable. This should be set automatically by +Galaxy via your job runner settings, which allows you to (for example) allocate +four cores to each BLAST job. In addition, the BLAST+ wrappers also support high level parallelism by task splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini`` @@ -111,19 +112,19 @@ v0.0.11 - Final revision as part of the Galaxy main repository, and the first release via the Tool Shed v0.0.12 - Implements genetic code option for translation searches. - - Changes <parallelism> to 1000 sequences at a time (to cope with + - Changes ``<parallelism>`` to 1000 sequences at a time (to cope with very large sets of queries where BLAST+ can become memory hungry) - Include warning that BLAST+ with subject FASTA gives pairwise e-values v0.0.13 - Use the new error handling options in Galaxy (the previously - bundled hide_stderr.py script is no longer needed). + bundled ``hide_stderr.py`` script is no longer needed). v0.0.14 - Support for makeblastdb and blastdbinfo with local BLAST databases in the history (using work from Edward Kirton), requires v0.0.14 - of the 'blast_datatypes' repository from the Tool Shed. + of the ``blast_datatypes`` repository from the Tool Shed. v0.0.15 - Stronger warning in help text against searching against subject FASTA files (better looking e-values than you might be expecting). v0.0.16 - Added repository_dependencies.xml for automates installation of the - 'blast_datatypes' repository from the Tool Shed. + ``blast_datatypes`` repository from the Tool Shed. v0.0.17 - The BLAST+ search tools now default to extended tabular output (all too often our users where having to re-run searches just to get one of the missing columns like query or subject length) @@ -138,36 +139,44 @@ - Added percentage identity option to BLASTN. - Fallback on ElementTree if cElementTree missing in XML to tabular. - Link to Tool Shed added to help text and this documentation. - - Tweak dependency on blast_datatypes to also work on Test Tool Shed. - - Dependency on new package_blast_plus_2_2_26 in Tool Shed. + - Tweak dependency on ``blast_datatypes`` to also work on Test Tool Shed. + - Dependency on new ``package_blast_plus_2_2_26`` in Tool Shed. - Adopted standard MIT License. - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). -v0.0.21 - Use macros to simplify the XML wrappers. +v0.0.21 - Use macros to simplify the XML wrappers (by John Chilton). - Added wrapper for dustmasker. - - Enabled masking for makeblastdb. - - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes. - defined in updated blast_datatypes on Galaxy ToolShed. + - Enabled masking for makeblastdb (Nicola Soranzo). + - Requires ``maskinfo-asn1`` and ``maskinfo-asn1-binary`` datatypes, + defined in ``blast_datatypes`` v0.0.17 on Galaxy ToolShed. - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26. - - Now depends on package_blast_plus_2_2_27 in ToolShed. -v0.0.22 - More use macros to simplify the wrappers. - - Set number of threads via $GALAXY_SLOTS environment variable. + - Now depends on ``package_blast_plus_2_2_27`` in ToolShed. +v0.0.22 - More use of macros to simplify the wrappers. + - Set number of threads via ``$GALAXY_SLOTS`` environment variable. - More descriptive default output names. - - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18). + - Tests require updated BLAST DB definitions (``blast_datatypes`` v0.0.18). - Pre-check for duplicate identifiers in makeblastdb wrapper. - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. - - Now depends on package_blast_plus_2_2_28 in ToolShed. + - Now depends on ``package_blast_plus_2_2_28`` in ToolShed. - Extended tabular output includes 'salltitles' as column 25. -v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed. - - Tabular output now includes option to pick specific columns, - including previously unavailable taxonomy columns. - - BLAST XML to tabular tool supports multiple input files. +v0.1.00 - Now depends on ``package_blast_plus_2_2_29`` in ToolShed. + - Tabular output now includes option to pick specific columns + (based on contribution from Jim Johnson), including previously + unavailable taxonomy columns. + - BLAST XML to tabular tool supports multiple input files + (based on contribution from Jim Johnson). - More detailed descriptions for BLASTN and BLASTP task option. - - Wrappers for segmasker, dustmasker and convert2blastmask. + - Wrappers for segmasker, dustmasker and convert2blastmask + (contribution from Bjoern Gruening). - Supports using maskinfo with makeblastdb wrapper. - Supports setting a taxonomy ID in makeblastdb wrapper. - Subtle changes like new conditional settings will require some old - workflows be updated to cope. + workflows be updated to cope. +v0.1.01 - Requires ``blastdbd`` datatype (``blast_datatypes`` v0.0.19). + - Wrapper for makeprofiledb added to create protein domain databases + (based on contribution from Bjoern Gruening). + - The RPS-BLAST and RPS-TBLASTN wrappers support using a protein + domain database from the user's history. ======= ====================================================================== |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/ncbi_macros.xml --- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Mar 11 11:31:38 2014 -0400 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -228,10 +228,8 @@ <xml name="input_conditional_pssm"> <conditional name="db_opts"> <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> - <option value="db" selected="True">Locally installed BLAST database</option> - <!-- TODO - define new datatype + <option value="db" selected="True">Locally installed BLAST protein domain database</option> <option value="histdb">BLAST protein domain database from your history</option> - --> </param> <when value="db"> <param name="database" type="select" label="Protein domain database"> @@ -244,13 +242,11 @@ <param name="histdb" type="hidden" value="" /> <param name="subject" type="hidden" value="" /> </when> - <!-- TODO - define new datatype <when value="histdb"> <param name="database" type="hidden" value="" /> <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> <param name="subject" type="hidden" value="" /> </when> - --> </conditional> </xml> <xml name="input_conditional_choose_db_type"> |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/ncbi_makeprofiledb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -0,0 +1,127 @@ +<tool id="ncbi_makeprofiledb" name="NCBI BLAST+ makeprofiledb" version="0.1.01"> + <description>Make profile database</description> + <macros> + <token name="@BINARY@">makeprofiledb</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +##Unlike makeblastdb, makeprofiledb needs directory to exist already: +mkdir -p $outfile.extra_files_path && +makeprofiledb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" + +##We turn $infile_list into $infiles with a configfile entry defined below +-in $infiles + +#if $title: +-title "$title" +#else: +##Would default to being based on the cryptic Galaxy filenames, which is unhelpful +-title "Profile Database" +#end if + +-threshold $threshold + +#if str($contain_pssm_scores.contain_pssm_scores_type) == 'no': + -gapopen $contain_pssm_scores.gapopen + -gapextend $contain_pssm_scores.gapextend + -scale $contain_pssm_scores.scale + -matrix $contain_pssm_scores.matrix +#end if + +-obsr_threshold $obsr_threshold +-exclude_invalid $exclude_invalid + +-logfile "$outfile" + </command> + <expand macro="stdio" /> + <inputs> + <param name="input_file" type="data" multiple="true" optional="false" format="pssm-asn1" + label="Input PSSM files(s)" + help="One or NCBI PSSM ASN.1 format scoremat files (often named *.smp)" /> + <param name="infile_list" type="data" multiple="true" format="pssm-asn1" /> + + <param name="title" type="text" value="" label="Title for the profile database" help="This is the database name shown in BLAST search output" /> + <param name="threshold" type="float" size="5" value="9.82" label="Minimum word score to add a word to the lookup table" /> + + <!-- output options --> + <!-- Initially we're only offering the default, RPS databases for use with rpsblast and rpstblastn + <param name="dbtype" type="select" display="radio" label="Type of database"> + <option value="cobalt">Cobalt</option> + <option value="delta">Delta</option> + <option value="rps" selected="true">RPS</option> + </param> + --> + + <conditional name="contain_pssm_scores"> + <param name="contain_pssm_scores_type" type="select" label="Does your input file contain PSSM scores?"> + <option value="yes" selected="True">Yes</option> + <option value="no">No</option> + </param> + <when value="yes" /> + <when value="no"> + <param name="gapopen" type="integer" size="5" value="" label="Cost to open a gap" /> + <param name="gapextend" type="integer" size="5" value="" label="Cost to extend a gap" /> + <param name="scale" type="float" size="5" value="" label="PSSM scale factor" /> + <expand macro="input_scoring_matrix" /> + </when> + </conditional> + + <!-- Delta Blast Options --> + <param name="exclude_invalid" type="boolean" truevalue="true" falsevalue="false" checked="true" + label="Exclude invalid domains?" + help="Exclude domains that do not pass validation test" /> + <param name="obsr_threshold" type="float" size="5" value="6.0" + label="Observation threshold" + help="Exclude domains with with maximum number of independent observations below this threshold" /> + </inputs> + <configfiles> + <configfile name="infiles"> +#for $infile in $input_file +${infile} +#end for + </configfile> + </configfiles> + <outputs> + <data name="outfile" format="blastdbd" label="RPS database from ${on_string}" /> + </outputs> + <tests> + <test> + <param name="input_file" value="cd00003.smp,cd00008.smp" ftype="pssm-asn1" /> + <param name="title" value="Just 2 PSSM matrices" /> + <param name="contain_pssm_scores_type" value="yes" /> + <output name="out_file" file="empty_file.dat" ftype="blastdbd" > + <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" /> + <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" /> + <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" /> + <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" /> + <extra_files type="file" value="cd00003_and_cd00008.psd" name="blastdb.psd" /> + <extra_files type="file" value="cd00003_and_cd00008.psi" name="blastdb.psi" /> + <extra_files type="file" value="cd00003_and_cd00008.rps" name="blastdb.rps" /> + <extra_files type="file" value="cd00003_and_cd00008.aux" name="blastdb.aux" /> + </output> + </test> + </tests> + <help> +**What it does** + +Make a protein domain profile database (for use with RPS-BLAST or RSP-TBLASTN) +from one or more Position Specific Scoring Matrices (PSSM) files in the NCBI +"scoremat" ASN.1 format (usually named ``*.smp``). + +This is a wrapper for the NCBI BLAST+ tool 'makeprofiledb'. + +More information about makeprofiledb can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> +</tool> |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Mar 11 11:31:38 2014 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.00"> +<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.01"> <description>Search protein domain database (PSSMs) with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Mar 11 11:31:38 2014 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.00"> +<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.01"> <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> |
b |
diff -r abc41ef6c6d4 -r 148eceb80cbb tools/ncbi_blast_plus/repository_dependencies.xml --- a/tools/ncbi_blast_plus/repository_dependencies.xml Tue Mar 11 11:31:38 2014 -0400 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Wed Mar 19 10:51:45 2014 -0400 |
b |
@@ -1,4 +1,4 @@ <?xml version="1.0"?> <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format)."> - <repository changeset_revision="e36c60d13c94" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="939a600f45e9" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </repositories> |