Mercurial > repos > bgruening > trna_prediction
changeset 3:f4416f1a674a draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit cfb19d75629f02e0dea4475c16c016ed5510eb44
author | bgruening |
---|---|
date | Wed, 26 Jul 2017 10:13:35 -0400 |
parents | 6d97da269ee2 |
children | |
files | aragorn.xml tRNAscan.py tRNAscan.xml test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt tool_dependencies.xml trna_prediction.tar.gz |
diffstat | 6 files changed, 223 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/aragorn.xml Thu Sep 17 16:49:26 2015 -0400 +++ b/aragorn.xml Wed Jul 26 10:13:35 2017 -0400 @@ -1,25 +1,27 @@ -<tool id="aragorn_trna" name="tRNA and tmRNA" version="0.5"> +<tool id="aragorn_trna" name="tRNA and tmRNA" version="0.6"> <description>prediction (Aragorn)</description> <requirements> <requirement type="package" version="1.2.36">aragorn</requirement> + <requirement type="package" version="2.7">python</requirement> </requirements> - <command> -<![CDATA[ - aragorn - $input - -gc$genbank_gencode - $tmRNA - $tRNA - $mtRNA - $mam_mtRNA - $topology - -o $output - $secondary_structure - $introns; - + <command><![CDATA[ + #if not $gff3_output: + aragorn + '$input' + -gc$genbank_gencode + $tmRNA + $tRNA + $mtRNA + $mam_mtRNA + $topology + -o '$output' + $secondary_structure + $introns + #end if + #if $gff3_output: aragorn - $input + '$input' -gc$genbank_gencode $tmRNA $tRNA @@ -28,14 +30,14 @@ $topology $introns -w - | python $__tool_directory__/aragorn_out_to_gff3.py $gff3_model > $gff3_output_file; + | python '$__tool_directory__/aragorn_out_to_gff3.py' $gff3_model > '$gff3_output_file' #end if -]]> + ]]> </command> <inputs> <param name="input" type="data" format="fasta" label="Genome Sequence"/> <param name="genbank_gencode" type="select" label="Genetic code"> - <option value="1" select="True">1. Standard</option> + <option value="1" selected="true">1. Standard</option> <option value="2">2. Vertebrate Mitochondrial</option> <option value="3">3. Yeast Mitochondrial</option> <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> @@ -76,7 +78,7 @@ <outputs> <data name="output" format="fasta"> <change_format> - <when input="secondary_structure" value="true" format="text"/> + <when input="secondary_structure" value="-fasta" format="txt"/> </change_format> </data> <data format="gff3" name="gff3_output_file" > @@ -93,9 +95,34 @@ <param name="mtRNA" value="False" /> <param name="mam_mtRNA" value="False" /> <param name="introns" value="False" /> - <param name="secondary_structure" value="False" /> + <param name="secondary_structure" value="-fon" /> + <param name="gff3_output" value="false" /> + <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.fasta" ftype="fasta" /> + </test> + + <test> + <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> + <param name="genbank_gencode" value="1" /> + <param name="topology" value="-c" /> + <param name="tmRNA" value="True" /> + <param name="tRNA" value="True" /> + <param name="mtRNA" value="False" /> + <param name="mam_mtRNA" value="False" /> + <param name="introns" value="False" /> + <param name="secondary_structure" value="-fasta" /> + <param name="gff3_output" value="false" /> + <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.txt" ftype="txt" lines_diff="2" /> + </test> + <test> + <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> + <param name="genbank_gencode" value="1" /> + <param name="topology" value="-c" /> + <param name="tmRNA" value="True" /> + <param name="tRNA" value="True" /> + <param name="mtRNA" value="False" /> + <param name="mam_mtRNA" value="False" /> + <param name="introns" value="False" /> <param name="gff3_output" value="True" /> - <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.fasta" ftype="fasta" /> <output name="gff3_output_file" file="aragorn_tansl-table-1_tmRNA_tRNA.gff3" ftype="gff3" /> </test> <test>
--- a/tRNAscan.py Thu Sep 17 16:49:26 2015 -0400 +++ b/tRNAscan.py Wed Jul 26 10:13:35 2017 -0400 @@ -8,13 +8,12 @@ from Bio.SeqRecord import SeqRecord import subprocess - def main(args): """ Call from galaxy: tRNAscan.py $organism $mode $showPrimSecondOpt $disablePseudo $showCodons $tabular_output $inputfile $fasta_output - tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -d -Q -y -q -b -o $tabular_output $inputfile; + tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -Q -y -q -b -o $tabular_output $inputfile; """ cmd = """tRNAscan-SE -Q -y -q -b %s""" % ' '.join( args[:-1] ) child = subprocess.Popen(cmd.split(),
--- a/tRNAscan.xml Thu Sep 17 16:49:26 2015 -0400 +++ b/tRNAscan.xml Wed Jul 26 10:13:35 2017 -0400 @@ -1,38 +1,50 @@ -<tool id="trnascan" name="tRNA prediction" version="0.3"> +<tool id="trnascan" name="tRNA prediction" version="0.4"> <description>(tRNAscan)</description> <requirements> - <requirement type="package" version="1.3.1">tRNAscan-SE</requirement> - <requirement type="package" version="1.61">biopython</requirement> + <requirement type="package" version="1.3.1">trnascan-se</requirement> + <requirement type="package" version="1.0.2">infernal</requirement> + <requirement type="package" version="1.70">biopython</requirement> + <requirement type="package" version="2.7">python</requirement> </requirements> - <command interpreter="python"> -<![CDATA[ - tRNAscan.py - $organism - $mode - $showPrimSecondOpt - $disablePseudo - $showCodons - -o - $tabular_output - $inputfile - $fasta_output -]]> + <command> + <![CDATA[ +python '$__tool_directory__/tRNAscan.py' +#if $organism + $organism +#end if +#if $mode + $mode +#end if +#if $showPrimSecondOpt + $showPrimSecondOpt +#end if +#if $disablePseudo + $disablePseudo +#end if +#if $showCodons + $showCodons +#end if +-o +'$tabular_output' +'$inputfile' +'$fasta_output' + ]]> </command> <inputs> <param name="inputfile" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/> <param name="organism" type="select" label="Select Organism"> - <option value="">Eukaryotic</option> + <option value="" selected="true">Eukaryotic</option> <option value="-G">general tRNA model</option> <option value="-B">Bacterial</option> <option value="-A">Archaeal</option> <option value="-O">Mitochondrial/Chloroplast</option> </param> <param name="mode" type="select" label="Select Mode"> - <option value="">Default</option> + <option value="" selected="true">Default</option> <option value="-C">Covariance model analysis only (slow)</option> <option value="-T">tRNAscan only</option> <option value="-E">EufindtRNA only</option> - <option value="--infernal">Infernal cm analysis (max sensitivity, very slow)</option> + <option value="--infernal">Infernal cm analysis (max sensitivity, very slow)</option> <option value="--newscan">Infernal and new cm models</option> </param> <param name="disablePseudo" type="boolean" label="Disable pseudogene checking" truevalue="-D" falsevalue="" /> @@ -45,14 +57,14 @@ </outputs> <tests> <test> - <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> + <param name="inputfile" value="trna_arabidopsis.fasta" ftype="fasta" /> <param name="organism" value="" /> - <param name="mode" value="--infernal" /> + <param name="mode" value="--infernal" /> <!-- Infernal test not working due to cmsearch error--> <param name="disablePseudo" value="" /> <param name="showPrimSecondOpt" value="" /> <param name="showCodons" value="" /> <output name="fasta_output" file="tRNAscan_eukaryotic_infernal.fasta" ftype="fasta" /> - <output name="fasta_output" file="tRNAscan_eukaryotic_infernal.tabular" ftype="tabular" /> + <output name="tabular_output" file="tRNAscan_eukaryotic_infernal.tabular" ftype="tabular" /> </test> </tests> <help> @@ -82,41 +94,41 @@ - use general tRNA model: - This option selects the general tRNA covariance model that was trained - on tRNAs from all three phylogenetic domains (Archaea, Bacteria, and - Eukarya). This mode can be used when analyzing a mixed collection of - sequences from more than one phylogenetic domain, with only slight - loss of sensitivity and selectivity. The original publication - describing this program and tRNAscan-SE version 1.0 used this general - tRNA model exclusively. If you wish to compare scores to those found - in the paper or scans using v1.0, use this option. Use of this option - is compatible with all other search mode options described in this - section. + This option selects the general tRNA covariance model that was trained + on tRNAs from all three phylogenetic domains (Archaea, Bacteria, and + Eukarya). This mode can be used when analyzing a mixed collection of + sequences from more than one phylogenetic domain, with only slight + loss of sensitivity and selectivity. The original publication + describing this program and tRNAscan-SE version 1.0 used this general + tRNA model exclusively. If you wish to compare scores to those found + in the paper or scans using v1.0, use this option. Use of this option + is compatible with all other search mode options described in this + section. - search for bacterial tRNAs - This option selects the bacterial covariance model for tRNA analysis, - and loosens the search parameters for EufindtRNA to improve detection - of bacterial tRNAs. Use of this mode with bacterial sequences - will also improve bounds prediction of the 3' end (the terminal CAA - triplet). + This option selects the bacterial covariance model for tRNA analysis, + and loosens the search parameters for EufindtRNA to improve detection + of bacterial tRNAs. Use of this mode with bacterial sequences + will also improve bounds prediction of the 3' end (the terminal CAA + triplet). - search for archaeal tRNAs - This option selects an archaeal-specific covariance model for tRNA - analysis, as well as slightly loosening the EufindtRNA search - cutoffs. + This option selects an archaeal-specific covariance model for tRNA + analysis, as well as slightly loosening the EufindtRNA search + cutoffs. - search for organellar (mitochondrial/chloroplast) tRNAs - This parameter bypasses the fast first-pass scanners that are poor at - detecting organellar tRNAs and runs Cove analysis only. Since true - organellar tRNAs have been found to have Cove scores between 15 and 20 - bits, the search cutoff is lowered from 20 to 15 bits. Also, - pseudogene checking is disabled since it is only applicable to - eukaryotic cytoplasmic tRNA pseudogenes. Since Cove-only mode is - used, searches will be very slow (see -C option below) relative to the - default mode. + This parameter bypasses the fast first-pass scanners that are poor at + detecting organellar tRNAs and runs Cove analysis only. Since true + organellar tRNAs have been found to have Cove scores between 15 and 20 + bits, the search cutoff is lowered from 20 to 15 bits. Also, + pseudogene checking is disabled since it is only applicable to + eukaryotic cytoplasmic tRNA pseudogenes. Since Cove-only mode is + used, searches will be very slow (see -C option below) relative to the + default mode. @@ -124,29 +136,29 @@ - search using Cove analysis only (max sensitivity, slow) - Directs tRNAscan-SE to analyze sequences using Cove analysis only. - This option allows a slightly more sensitive search than the default - tRNAscan + EufindtRNA -> Cove mode, but is much slower (by approx. 250 - to 3,000 fold). Output format and other program defaults are - otherwise identical to the normal analysis. + Directs tRNAscan-SE to analyze sequences using Cove analysis only. + This option allows a slightly more sensitive search than the default + tRNAscan + EufindtRNA -> Cove mode, but is much slower (by approx. 250 + to 3,000 fold). Output format and other program defaults are + otherwise identical to the normal analysis. - search using Eukaryotic tRNA finder (EufindtRNA) only: - This option runs EufindtRNA alone to search for tRNAs. Since Cove is - not being used as a secondary filter to remove false positives, this - run mode defaults to "Normal" parameters which more closely - approximates the sensitivity and selectivity of the original algorithm - describe by Pavesi and colleagues. + This option runs EufindtRNA alone to search for tRNAs. Since Cove is + not being used as a secondary filter to remove false positives, this + run mode defaults to "Normal" parameters which more closely + approximates the sensitivity and selectivity of the original algorithm + describe by Pavesi and colleagues. - search using tRNAscan only (defaults to strict search parameters) - Directs tRNAscan-SE to use only tRNAscan to analyze sequences. This - mode will cause tRNAscan to default to using "strict" parameters - (similar to tRNAscan version 1.3 operation). This mode of operation - is faster (about 3-5 times faster than default mode analysis), but - will result in approximately 0.2 to 0.6 false positive tRNAs per Mbp, - decreased sensitivity, and less reliable prediction of anticodons, - tRNA isotype, and introns. + Directs tRNAscan-SE to use only tRNAscan to analyze sequences. This + mode will cause tRNAscan to default to using "strict" parameters + (similar to tRNAscan version 1.3 operation). This mode of operation + is faster (about 3-5 times faster than default mode analysis), but + will result in approximately 0.2 to 0.6 false positive tRNAs per Mbp, + decreased sensitivity, and less reliable prediction of anticodons, + tRNA isotype, and introns. - search using Infernal cm analysis only (max sensitivity, very slow) @@ -157,32 +169,32 @@ **disable pseudogene checking** - Manually disable checking tRNAs for poor primary or secondary - structure scores often indicative of eukaryotic pseudogenes. This - will slightly speed the program and may be necessary for non-eukaryotic - sequences that are flagged as possible pseudogenes but are known to be - functional tRNAs. + Manually disable checking tRNAs for poor primary or secondary + structure scores often indicative of eukaryotic pseudogenes. This + will slightly speed the program and may be necessary for non-eukaryotic + sequences that are flagged as possible pseudogenes but are known to be + functional tRNAs. **Show both primary and secondary structure score components to covariance model bit scores** - This option displays the breakdown of the two components of the - covariance model bit score. Since tRNA pseudogenes often have one - very low component (good secondary structure but poor primary sequence - similarity to the tRNA model, or vice versa), this information may be - useful in deciding whether a low-scoring tRNA is likely to be a - pseudogene. The heuristic pseudogene detection filter uses this - information to flag possible pseudogenes -- use this option to see why - a hit is marked as a possible pseudogene. The user may wish to - examine score breakdowns from known tRNAs in the organism of interest - to get a frame of reference. + This option displays the breakdown of the two components of the + covariance model bit score. Since tRNA pseudogenes often have one + very low component (good secondary structure but poor primary sequence + similarity to the tRNA model, or vice versa), this information may be + useful in deciding whether a low-scoring tRNA is likely to be a + pseudogene. The heuristic pseudogene detection filter uses this + information to flag possible pseudogenes -- use this option to see why + a hit is marked as a possible pseudogene. The user may wish to + examine score breakdowns from known tRNAs in the organism of interest + to get a frame of reference. **Show codons instead of tRNA anticodons** - This option causes tRNAscan-SE to output a tRNA's corresponding codon - in place of its anticodon. + This option causes tRNAscan-SE to output a tRNA's corresponding codon + in place of its anticodon. @@ -190,15 +202,15 @@ **input** - >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7 - GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT - GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT - TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT - TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC - GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA - ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG - AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA - ..... + >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7 + GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT + GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT + TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT + TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC + GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA + ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG + AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA + ..... **output** @@ -217,14 +229,9 @@ ======== ====== ===== ====== ==== ========== ====== ====== ========== ========== - - - ]]> </help> - <citations> <citation type="doi">10.1093/nar/25.5.0955</citation> </citations> - </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt Wed Jul 26 10:13:35 2017 -0400 @@ -0,0 +1,70 @@ +------------------------------ +ARAGORN v1.2.36 Dean Laslett +------------------------------ + +Please reference the following paper if you use this +program as part of any published research. + +Laslett, D. and Canback, B. (2004) ARAGORN, a +program for the detection of transfer RNA and +transfer-messenger RNA genes in nucleotide sequences. +Nucleic Acids Research, 32;11-16. + + +Searching for tRNA genes with no introns +Searching for tmRNA genes +Assuming circular topology, search wraps around ends +Searching both strands +Using standard genetic code + + +gi|240255695:23036500-23037000 Arabidopsis thaliana chromosome 3, complete sequence +501 nucleotides in sequence +Mean G+C content = 43.1% + +1. + + + + a + g-c + g-c + g+t + g-c + a-t + t-a + g-c tt + t gtccc a + ta a !!!!! g + a ctcg caggg c + t !!!! a tt + g gagc c + gta g g + c-gag + t-a + c-g + g-c + c-g + t t + t a + tgc + + + + tRNA-Ala(tgc) + 73 bases, %GC = 56.2 + Sequence [381,453] + + + +>tRNA-Ala(tgc) [381,453] +ggggatgtagctcatatggtagagcgctcgctttgcatgcgagaggcaca +gggttcgattccctgcatctcca + + + + +Number of tmRNA genes = 0 + + +Configuration: aragorn /tmp/tmpx1qAPk/files/000/dataset_3.dat -gc1 -m -t -c -o /tmp/tmpx1qAPk/files/000/dataset_4.dat -fasta
--- a/tool_dependencies.xml Thu Sep 17 16:49:26 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="biopython" version="1.61"> - <repository changeset_revision="2f6c871cfa35" name="package_biopython_1_61" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="aragorn" version="1.2.36"> - <repository changeset_revision="d561a0a9f601" name="package_aragorn_1_2_36" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="tRNAscan-SE" version="1.3.1"> - <repository changeset_revision="b754416c3030" name="package_trnascan_1_3_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>