Mercurial > repos > abims-sbr > pairwise
comparison pairwise.xml @ 0:e95d4b20c62d draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
| author | abims-sbr |
|---|---|
| date | Thu, 13 Apr 2017 05:46:29 -0400 |
| parents | |
| children | 5f68b2fc02c1 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e95d4b20c62d |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 | |
| 3 <tool name="Pairwise" id="pairwise" version="1.0"> | |
| 4 | |
| 5 <description> | |
| 6 Run reciproque tblastx pairwise | |
| 7 </description> | |
| 8 | |
| 9 <macros> | |
| 10 <import>macros.xml</import> | |
| 11 </macros> | |
| 12 | |
| 13 <requirements> | |
| 14 <expand macro="python_required" /> | |
| 15 <requirement type="package" version="2.2.22">blast-legacy</requirement> | |
| 16 <requirement type="package" version="1.3.1">samtools</requirement> | |
| 17 </requirements> | |
| 18 | |
| 19 <command> | |
| 20 <![CDATA[ | |
| 21 ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . | |
| 22 && | |
| 23 ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . | |
| 24 && | |
| 25 ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . | |
| 26 && | |
| 27 ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . | |
| 28 && | |
| 29 ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . | |
| 30 && | |
| 31 ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py . | |
| 32 && | |
| 33 ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py . | |
| 34 && | |
| 35 ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py . | |
| 36 && | |
| 37 ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py . | |
| 38 && | |
| 39 ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py . | |
| 40 && | |
| 41 ln -s $__tool_directory__/scripts/S12_prot2dna.py . | |
| 42 && | |
| 43 ln -s $__tool_directory__/scripts/S13_zip.py . | |
| 44 && | |
| 45 python $__tool_directory__/scripts/S01_organize_rbh.py 8 ${e_value} ${zip} | |
| 46 > ${output}; | |
| 47 ]]> | |
| 48 </command> | |
| 49 | |
| 50 <inputs> | |
| 51 <param name="zip" type="data" format="no_unzip.zip,zip" multiple="true" label="Choose your ZIP file" help="Contains the output of the filter tool" /> | |
| 52 <param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" /> | |
| 53 </inputs> | |
| 54 | |
| 55 <outputs> | |
| 56 <data format="txt" name="output" label="Pairwise" /> | |
| 57 <data format="no_unzip.zip" name="output_zip_DNA" label="Pairwise_zip_DNA" from_work_dir="output_file_DNA.zip" /> | |
| 58 <data format="no_unzip.zip" name="output_zip_PROT" label="Pairwise_zip_PROT" from_work_dir="output_file_PROT.zip" /> | |
| 59 </outputs> | |
| 60 | |
| 61 <tests> | |
| 62 <test> | |
| 63 <param name="zip" ftype="zip" value="test_02_input_pairwise.zip" /> | |
| 64 <param name="e-value" value="1e-5" /> | |
| 65 <output name="output" > | |
| 66 <assert_contents> | |
| 67 <has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/> | |
| 68 <has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> | |
| 69 <has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> | |
| 70 <has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> | |
| 71 <has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> | |
| 72 <has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> | |
| 73 | |
| 74 <has_text text="Number of pairwises parsed = 2" /> | |
| 75 <has_text text="Number of pairwises parsed = 3" /> | |
| 76 <has_text text="Number of pairwises parsed = 0" /> | |
| 77 <has_text text="Number of pairwises parsed = 5" /> | |
| 78 <has_text text="Number of pairwises parsed = 1" /> | |
| 79 | |
| 80 </assert_contents> | |
| 81 </output> | |
| 82 </test> | |
| 83 </tests> | |
| 84 | |
| 85 <help> | |
| 86 | |
| 87 ============ | |
| 88 What it does | |
| 89 ============ | |
| 90 | |
| 91 | This tool takes a zip archive containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons. | |
| 92 | There are 3 outputs. | |
| 93 | | |
| 94 | The run blast was written by the **NCBI**. | |
| 95 | The script was written by **Eric Fontanillas**. | |
| 96 | The wrapper was written by **Julie Baffard**. | |
| 97 | |
| 98 -------- | |
| 99 | |
| 100 ========== | |
| 101 Parameters | |
| 102 ========== | |
| 103 | |
| 104 The choice of parameters is possible : | |
| 105 | |
| 106 **-e** : | |
| 107 | is the option for the choice of the e-value. | |
| 108 | By default it's 10. | |
| 109 | | |
| 110 | |
| 111 -------- | |
| 112 | |
| 113 ======= | |
| 114 Outputs | |
| 115 ======= | |
| 116 | |
| 117 This tool, produces the following files : | |
| 118 | |
| 119 **Pairwise** : | |
| 120 | is the general output. It gives the information about what the tool is doing (for each pairwise). | |
| 121 | | |
| 122 | |
| 123 **Pairwise_zip_DNA.zip** : | |
| 124 | is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows : | |
| 125 | the name of the query sequence | |
| 126 | the part of the sequence in nucleotides | |
| 127 | the name of the match sequence | |
| 128 | the part of the sequence in nucleotides | |
| 129 | | |
| 130 | |
| 131 **Pairwise_zip_PROT.zip** : | |
| 132 | is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows : | |
| 133 | the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence) | |
| 134 | the part of the sequence in protein | |
| 135 | the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence) | |
| 136 | the part of the sequence in protein | |
| 137 | |
| 138 .. class:: warningmark | |
| 139 | |
| 140 The two zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. | |
| 141 | |
| 142 -------- | |
| 143 | |
| 144 =============== | |
| 145 Working Example | |
| 146 =============== | |
| 147 | |
| 148 --------------------------- | |
| 149 The input files and options | |
| 150 --------------------------- | |
| 151 | |
| 152 **Input files** | |
| 153 | 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta | |
| 154 | | |
| 155 **Parameters** | |
| 156 | e-value = 1e-20 | |
| 157 | | |
| 158 | |
| 159 ---------------- | |
| 160 The output files | |
| 161 ---------------- | |
| 162 | |
| 163 **Pairwise** | |
| 164 | |
| 165 | -------------------- Pairwise Pp_Ap -------------------- | |
| 166 | | |
| 167 | database : Pp.fasta | |
| 168 | query file : Ap.fasta | |
| 169 | | |
| 170 | ***** START run BLAST ***** | |
| 171 | ***** END run BLAST ***** | |
| 172 | | |
| 173 | | |
| 174 | database : Ap.fasta | |
| 175 | query file : only the sequences of Pp.fasta who matched during the last BLAST | |
| 176 | | |
| 177 | ***** START run BLAST ***** | |
| 178 | ***** END run BLAST ***** | |
| 179 | | |
| 180 | [3/5] Get pairs of sequences ... | |
| 181 | Get list of fasta name involved in RBH | |
| 182 | Number of pairwises parsed = 15 | |
| 183 | Get subset of Alvinella db | |
| 184 | Get subset of Paralvinella db | |
| 185 | | |
| 186 | -------------------- Pairwise Pp_Ac -------------------- | |
| 187 | | |
| 188 | database : Pp.fasta | |
| 189 | query file : Ac.fasta | |
| 190 | | |
| 191 | ***** START run BLAST ***** | |
| 192 | ***** END run BLAST ***** | |
| 193 | | |
| 194 | | |
| 195 | database : Ac.fasta | |
| 196 | query file : only the sequences of Pp.fasta who matched during the last BLAST | |
| 197 | | |
| 198 | ***** START run BLAST ***** | |
| 199 | ***** END run BLAST ***** | |
| 200 | | |
| 201 | [3/5] Get pairs of sequences ... | |
| 202 | Get list of fasta name involved in RBH | |
| 203 | Number of pairwises parsed = 13 | |
| 204 | Get subset of Alvinella db | |
| 205 | Get subset of Paralvinella db | |
| 206 | | |
| 207 | | |
| 208 | -------------------- Pairwise Ap_Ac -------------------- | |
| 209 | | |
| 210 | database : Ap.fasta | |
| 211 | query file : Ac.fasta | |
| 212 | | |
| 213 | ***** START run BLAST ***** | |
| 214 | ***** END run BLAST ***** | |
| 215 | | |
| 216 | | |
| 217 | database : Ac.fasta | |
| 218 | query file : only the sequences of Ap.fasta who matched during the last BLAST | |
| 219 | | |
| 220 | ***** START run BLAST ***** | |
| 221 | ***** END run BLAST ***** | |
| 222 | | |
| 223 | [3/5] Get pairs of sequences ... | |
| 224 | Get list of fasta name involved in RBH | |
| 225 | Number of pairwises parsed = 24 | |
| 226 | Get subset of Alvinella db | |
| 227 | Get subset of Paralvinella db | |
| 228 | | |
| 229 | | |
| 230 | |
| 231 **Pairwise_output_file_PROT** | |
| 232 | |
| 233 | Save as *Galaxy{number}-[Pairwise_output_file_PROT].zip* | |
| 234 | If you unzip the file, a number of file appears (number of pairwise) : 19_ReciprocalBestHits_{name_of_pairwise}.fasta | |
| 235 | For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta | |
| 236 | | |
| 237 | >Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0 | |
| 238 | FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA | |
| 239 | >Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0 | |
| 240 | FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA | |
| 241 | >Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0 | |
| 242 | LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP | |
| 243 | >Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0 | |
| 244 | LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP | |
| 245 | | |
| 246 | | |
| 247 | |
| 248 **Pairwise_output_file_DNA** | |
| 249 | |
| 250 | Save as *Galaxy{number}-[Pairwise_output_file_DNA].zip* | |
| 251 | If you unzip the file, a number of file appears (number of pairwise) : 25_DNAalignment_corresponding_to_protein_from_19_RBH_{name_of_pairwise}.fasta | |
| 252 | For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta | |
| 253 | | |
| 254 | >Ap123_1/1_1.000_748 | |
| 255 | CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA | |
| 256 | ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA | |
| 257 | TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA | |
| 258 | CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG | |
| 259 | TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG | |
| 260 | >Pp_146_1/2_1.000_713 | |
| 261 | CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA | |
| 262 | CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG | |
| 263 | AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG | |
| 264 | TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA | |
| 265 | CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT | |
| 266 | >Ap66_1/1_1.000_400 | |
| 267 | TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT | |
| 268 | ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG | |
| 269 | CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT | |
| 270 | >Pp_201_2/2_1.000_691 | |
| 271 | ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA | |
| 272 | GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG | |
| 273 | |
| 274 </help> | |
| 275 | |
| 276 <expand macro="citations" /> | |
| 277 | |
| 278 </tool> |
