annotate align_back_trans.xml @ 6:f43f1f9b4866 draft

Uploaded
author jasper
date Fri, 03 Feb 2017 12:53:31 -0500
parents 851b9da82fb0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
1 <tool id="align_back_trans" name="Thread nucleotides onto a protein alignment (back-translation)" version="0.0.6">
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
2 <description>Gives a codon aware alignment</description>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
3 <requirements>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
4 <requirement type="package" version="1.63">biopython</requirement>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
5 <requirement type="python-module">Bio</requirement>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
6 </requirements>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
7 <stdio>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
8 <!-- Anything other than zero is an error -->
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
9 <exit_code range="1:" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
10 <exit_code range=":-1" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
11 </stdio>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
12 <version_command interpreter="python">align_back_trans.py --version</version_command>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
13 <command interpreter="python">
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
14 align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
15 </command>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
16 <inputs>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
17 <param name="prot_align" type="data" format="fasta,muscle,clustal" label="Aligned protein file" help="Mutliple sequence file in FASTA, ClustalW or PHYLIP format." />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
18 <param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons">
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
19 <option value="1">1. Standard</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
20 <option value="2">2. Vertebrate Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
21 <option value="3">3. Yeast Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
22 <option value="4">4. Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
23 <option value="5">5. Invertebrate Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
24 <option value="6">6. Ciliate Macronuclear and Dasycladacean</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
25 <option value="9">9. Echinoderm Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
26 <option value="10">10. Euplotid Nuclear</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
27 <option value="11">11. Bacterial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
28 <option value="12">12. Alternative Yeast Nuclear</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
29 <option value="13">13. Ascidian Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
30 <option value="14">14. Flatworm Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
31 <option value="15">15. Blepharisma Macronuclear</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
32 <option value="16">16. Chlorophycean Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
33 <option value="21">21. Trematode Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
34 <option value="22">22. Scenedesmus obliquus</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
35 <option value="23">23. Thraustochytrium Mitochondrial</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
36 <option value="0">Don't check the translation</option>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
37 </param>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
38 <param name="nuc_file" type="data" format="fasta" label="Unaligned nucleotide sequences" help="FASTA format, using same identifiers as your protein alignment" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
39 </inputs>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
40 <outputs>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
41 <data name="out_nuc_align" format_source="prot_align" metadata_source="prot_align" label="${prot_align.name} (back-translated)"/>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
42 </outputs>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
43 <tests>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
44 <test>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
45 <param name="prot_align" value="demo_prot_align.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
46 <param name="nuc_file" value="demo_nucs.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
47 <param name="table" value="0" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
48 <output name="out_nuc_align" file="demo_nuc_align.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
49 </test>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
50 <test>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
51 <param name="prot_align" value="demo_prot_align.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
52 <param name="nuc_file" value="demo_nucs_trailing_stop.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
53 <param name="table" value="11" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
54 <output name="out_nuc_align" file="demo_nuc_align.fasta" />
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
55 </test>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
56 </tests>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
57 <help>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
58 **What it does**
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
59
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
60 Takes an input file of aligned protein sequences (typically FASTA or Clustal
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
61 format), and a matching file of unaligned nucleotide sequences (FASTA format,
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
62 using the same identifiers), and threads the nucleotide sequences onto the
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
63 protein alignment to produce a codon aware nucleotide alignment - which can
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
64 be viewed as a back translation.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
65
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
66 If you specify one of the standard NCBI genetic codes (recommended), then the
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
67 translation is verified. This will allow fuzzy matching if stop codons in the
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
68 protein sequence have been reprented as X, and will allow for a trailing stop
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
69 codon present in the nucleotide sequences but not the protein.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
70
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
71 Note - the protein and nucleotide sequences must use the same identifers.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
72
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
73 Note - If no translation table is specified, the provided nucleotide sequences
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
74 should be exactly three times the length of the protein sequences (exluding the gaps).
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
75
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
76 Note - the nucleotide FASTA file may contain extra sequences not in the
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
77 protein alignment, they will be ignored. This can be useful if for example
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
78 you have a nucleotide FASTA file containing all the genes in an organism,
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
79 while the protein alignment is for a specific gene family.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
80
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
81 **Example**
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
82
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
83 Given this protein alignment in FASTA format::
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
84
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
85 >Alpha
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
86 DEER
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
87 >Beta
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
88 DE-R
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
89 >Gamma
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
90 D--R
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
91
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
92 and this matching unaligned nucleotide FASTA file::
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
93
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
94 >Alpha
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
95 GATGAGGAACGA
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
96 >Beta
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
97 GATGAGCGU
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
98 >Gamma
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
99 GATCGG
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
100
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
101 the tool would return this nucleotide alignment::
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
102
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
103 >Alpha
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
104 GATGAGGAACGA
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
105 >Beta
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
106 GATGAG---CGU
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
107 >Gamma
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
108 GAT------CGG
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
109
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
110 Notice that all the gaps are multiples of three in length.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
111
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
112
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
113 **Citation**
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
114
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
115 This tool uses Biopython, so if you use this Galaxy tool in work leading to a
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
116 scientific publication please cite the following paper:
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
117
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
118 Cock et al (2009). Biopython: freely available Python tools for computational
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
119 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
120 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
121
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
122 This tool is available to install into other Galaxy Instances via the Galaxy
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
123 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
124 </help>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
125 <citations>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
126 <citation type="doi">10.7717/peerj.167</citation>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
127 <citation type="doi">10.1093/bioinformatics/btp163</citation>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
128 </citations>
851b9da82fb0 Uploaded
jasper
parents:
diff changeset
129 </tool>