Mercurial > repos > jasper > align_back_trans

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/align_back_trans.xml	Fri Feb 03 12:50:11 2017 -0500
@@ -0,0 +1,129 @@
+<tool id="align_back_trans" name="Thread nucleotides onto a protein alignment (back-translation)" version="0.0.6">
+    <description>Gives a codon aware alignment</description>
+    <requirements>
+        <requirement type="package" version="1.63">biopython</requirement>
+        <requirement type="python-module">Bio</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">align_back_trans.py --version</version_command>
+    <command interpreter="python">
+align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
+    </command>
+    <inputs>
+        <param name="prot_align" type="data" format="fasta,muscle,clustal" label="Aligned protein file" help="Mutliple sequence file in FASTA, ClustalW or PHYLIP format." />
+        <param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons">
+            <option value="1">1. Standard</option>
+            <option value="2">2. Vertebrate Mitochondrial</option>
+            <option value="3">3. Yeast Mitochondrial</option>
+            <option value="4">4. Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
+            <option value="5">5. Invertebrate Mitochondrial</option>
+            <option value="6">6. Ciliate Macronuclear and Dasycladacean</option>
+            <option value="9">9. Echinoderm Mitochondrial</option>
+            <option value="10">10. Euplotid Nuclear</option>
+            <option value="11">11. Bacterial</option>
+            <option value="12">12. Alternative Yeast Nuclear</option>
+            <option value="13">13. Ascidian Mitochondrial</option>
+            <option value="14">14. Flatworm Mitochondrial</option>
+            <option value="15">15. Blepharisma Macronuclear</option>
+            <option value="16">16. Chlorophycean Mitochondrial</option>
+            <option value="21">21. Trematode Mitochondrial</option>
+            <option value="22">22. Scenedesmus obliquus</option>
+            <option value="23">23. Thraustochytrium Mitochondrial</option>
+            <option value="0">Don't check the translation</option>
+        </param>
+        <param name="nuc_file" type="data" format="fasta" label="Unaligned nucleotide sequences" help="FASTA format, using same identifiers as your protein alignment" />
+    </inputs>
+    <outputs>
+        <data name="out_nuc_align" format_source="prot_align" metadata_source="prot_align" label="${prot_align.name} (back-translated)"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="prot_align" value="demo_prot_align.fasta" />
+            <param name="nuc_file" value="demo_nucs.fasta" />
+            <param name="table" value="0" />
+            <output name="out_nuc_align" file="demo_nuc_align.fasta" />
+        </test>
+        <test>
+            <param name="prot_align" value="demo_prot_align.fasta" />
+            <param name="nuc_file" value="demo_nucs_trailing_stop.fasta" />
+            <param name="table" value="11" />
+            <output name="out_nuc_align" file="demo_nuc_align.fasta" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Takes an input file of aligned protein sequences (typically FASTA or Clustal
+format), and a matching file of unaligned nucleotide sequences (FASTA format,
+using the same identifiers), and threads the nucleotide sequences onto the
+protein alignment to produce a codon aware nucleotide alignment - which can
+be viewed as a back translation.
+
+If you specify one of the standard NCBI genetic codes (recommended), then the
+translation is verified. This will allow fuzzy matching if stop codons in the
+protein sequence have been reprented as X, and will allow for a trailing stop
+codon present in the nucleotide sequences but not the protein.
+
+Note - the protein and nucleotide sequences must use the same identifers.
+
+Note - If no translation table is specified, the provided nucleotide sequences
+should be exactly three times the length of the protein sequences (exluding the gaps).
+
+Note - the nucleotide FASTA file may contain extra sequences not in the
+protein alignment, they will be ignored. This can be useful if for example
+you have a nucleotide FASTA file containing all the genes in an organism,
+while the protein alignment is for a specific gene family.
+
+**Example**
+
+Given this protein alignment in FASTA format::
+
+    >Alpha
+    DEER
+    >Beta
+    DE-R
+    >Gamma
+    D--R
+
+and this matching unaligned nucleotide FASTA file::
+
+    >Alpha
+    GATGAGGAACGA
+    >Beta
+    GATGAGCGU
+    >Gamma
+    GATCGG
+
+the tool would return this nucleotide alignment::
+
+    >Alpha
+    GATGAGGAACGA
+    >Beta
+    GATGAG---CGU
+    >Gamma
+    GAT------CGG
+
+Notice that all the gaps are multiples of three in length.
+
+
+**Citation**
+
+This tool uses Biopython, so if you use this Galaxy tool in work leading to a
+scientific publication please cite the following paper:
+
+Cock et al (2009). Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+This tool is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
+    </help>
+    <citations>
+        <citation type="doi">10.7717/peerj.167</citation>
+        <citation type="doi">10.1093/bioinformatics/btp163</citation>
+    </citations>
+</tool>