Mercurial > repos > abims-sbr > pairwise
changeset 0:e95d4b20c62d draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
| author | abims-sbr |
|---|---|
| date | Thu, 13 Apr 2017 05:46:29 -0400 |
| parents | |
| children | c8af52875b0f |
| files | CHANGELOG.md macros.xml pairwise.xml test-data/test_02_input_pairwise.zip test-data/test_03.out test-data/test_03_output_Pairwise_PROT_inputPOGS..zip |
| diffstat | 6 files changed, 421 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHANGELOG.md Thu Apr 13 05:46:29 2017 -0400 @@ -0,0 +1,7 @@ +Changelog + +Version 1.0 - 13/04/2017 + + - Add functional test with planemo + - Planemo test with conda dependencies for blast, samtools and python + - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Apr 13 05:46:29 2017 -0400 @@ -0,0 +1,16 @@ +<macros> + + <xml name="python_required"> + <requirement type="package" version="2.7">python</requirement> + </xml> + + <xml name="citations"> + <citations> + <citation type="bibtex">Credits : ABIMS team, Roscoff Marine Station</citation> + <citation type="bibtex">Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool.</citation> + <citation type="bibtex">Version 1 : Scripts by Eric Fontanillas -- Galaxy integration by Julie Baffard</citation> + <citation type="bibtex">Version 2 : improvments by Victor Mataigne, Gildas le Corguillé, Misharl Monsoor</citation> + </citations> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pairwise.xml Thu Apr 13 05:46:29 2017 -0400 @@ -0,0 +1,278 @@ +<?xml version="1.0"?> + +<tool name="Pairwise" id="pairwise" version="1.0"> + + <description> + Run reciproque tblastx pairwise + </description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <expand macro="python_required" /> + <requirement type="package" version="2.2.22">blast-legacy</requirement> + <requirement type="package" version="1.3.1">samtools</requirement> + </requirements> + + <command> + <![CDATA[ + ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . + && + ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . + && + ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . + && + ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . + && + ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . + && + ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py . + && + ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py . + && + ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py . + && + ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py . + && + ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py . + && + ln -s $__tool_directory__/scripts/S12_prot2dna.py . + && + ln -s $__tool_directory__/scripts/S13_zip.py . + && + python $__tool_directory__/scripts/S01_organize_rbh.py 8 ${e_value} ${zip} + > ${output}; + ]]> + </command> + + <inputs> + <param name="zip" type="data" format="no_unzip.zip,zip" multiple="true" label="Choose your ZIP file" help="Contains the output of the filter tool" /> + <param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" /> + </inputs> + + <outputs> + <data format="txt" name="output" label="Pairwise" /> + <data format="no_unzip.zip" name="output_zip_DNA" label="Pairwise_zip_DNA" from_work_dir="output_file_DNA.zip" /> + <data format="no_unzip.zip" name="output_zip_PROT" label="Pairwise_zip_PROT" from_work_dir="output_file_PROT.zip" /> + </outputs> + + <tests> + <test> + <param name="zip" ftype="zip" value="test_02_input_pairwise.zip" /> + <param name="e-value" value="1e-5" /> + <output name="output" > + <assert_contents> + <has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/> + <has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> + <has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> + <has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> + <has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> + <has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> + + <has_text text="Number of pairwises parsed = 2" /> + <has_text text="Number of pairwises parsed = 3" /> + <has_text text="Number of pairwises parsed = 0" /> + <has_text text="Number of pairwises parsed = 5" /> + <has_text text="Number of pairwises parsed = 1" /> + + </assert_contents> + </output> + </test> + </tests> + + <help> + +============ +What it does +============ + +| This tool takes a zip archive containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons. +| There are 3 outputs. +| +| The run blast was written by the **NCBI**. +| The script was written by **Eric Fontanillas**. +| The wrapper was written by **Julie Baffard**. + +-------- + +========== +Parameters +========== + +The choice of parameters is possible : + +**-e** : + | is the option for the choice of the e-value. + | By default it's 10. + | + +-------- + +======= +Outputs +======= + +This tool, produces the following files : + +**Pairwise** : + | is the general output. It gives the information about what the tool is doing (for each pairwise). + | + +**Pairwise_zip_DNA.zip** : + | is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows : + | the name of the query sequence + | the part of the sequence in nucleotides + | the name of the match sequence + | the part of the sequence in nucleotides + | + +**Pairwise_zip_PROT.zip** : + | is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows : + | the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence) + | the part of the sequence in protein + | the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence) + | the part of the sequence in protein + +.. class:: warningmark + +The two zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. + +-------- + +=============== +Working Example +=============== + +--------------------------- +The input files and options +--------------------------- + +**Input files** + | 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta + | +**Parameters** + | e-value = 1e-20 + | + +---------------- +The output files +---------------- + +**Pairwise** + +| -------------------- Pairwise Pp_Ap -------------------- +| +| database : Pp.fasta +| query file : Ap.fasta +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| +| database : Ap.fasta +| query file : only the sequences of Pp.fasta who matched during the last BLAST +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| [3/5] Get pairs of sequences ... +| Get list of fasta name involved in RBH +| Number of pairwises parsed = 15 +| Get subset of Alvinella db +| Get subset of Paralvinella db +| +| -------------------- Pairwise Pp_Ac -------------------- +| +| database : Pp.fasta +| query file : Ac.fasta +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| +| database : Ac.fasta +| query file : only the sequences of Pp.fasta who matched during the last BLAST +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| [3/5] Get pairs of sequences ... +| Get list of fasta name involved in RBH +| Number of pairwises parsed = 13 +| Get subset of Alvinella db +| Get subset of Paralvinella db +| +| +| -------------------- Pairwise Ap_Ac -------------------- +| +| database : Ap.fasta +| query file : Ac.fasta +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| +| database : Ac.fasta +| query file : only the sequences of Ap.fasta who matched during the last BLAST +| +| ***** START run BLAST ***** +| ***** END run BLAST ***** +| +| [3/5] Get pairs of sequences ... +| Get list of fasta name involved in RBH +| Number of pairwises parsed = 24 +| Get subset of Alvinella db +| Get subset of Paralvinella db +| +| + +**Pairwise_output_file_PROT** + +| Save as *Galaxy{number}-[Pairwise_output_file_PROT].zip* +| If you unzip the file, a number of file appears (number of pairwise) : 19_ReciprocalBestHits_{name_of_pairwise}.fasta +| For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta +| +| >Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0 +| FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA +| >Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0 +| FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA +| >Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0 +| LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP +| >Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0 +| LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP +| +| + +**Pairwise_output_file_DNA** + +| Save as *Galaxy{number}-[Pairwise_output_file_DNA].zip* +| If you unzip the file, a number of file appears (number of pairwise) : 25_DNAalignment_corresponding_to_protein_from_19_RBH_{name_of_pairwise}.fasta +| For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta +| +| >Ap123_1/1_1.000_748 +| CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA +| ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA +| TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA +| CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG +| TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG +| >Pp_146_1/2_1.000_713 +| CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA +| CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG +| AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG +| TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA +| CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT +| >Ap66_1/1_1.000_400 +| TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT +| ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG +| CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT +| >Pp_201_2/2_1.000_691 +| ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA +| GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG + + </help> + + <expand macro="citations" /> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_03.out Thu Apr 13 05:46:29 2017 -0400 @@ -0,0 +1,120 @@ +Pair of species: +('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta') +Pair of species: +('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta') +Pair of species: +('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta') +Pair of species: +('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta') +Pair of species: +('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta') +Pair of species: +('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta') +-------------------- Pairwise PfPfiji_AmAmphi -------------------- + +database : PfPfiji_Trinity.fasta +query file : AmAmphi_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : AmAmphi_Trinity.fasta +query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 2 + + + +-------------------- Pairwise AmAmphi_AcAcaud -------------------- + +database : AmAmphi_Trinity.fasta +query file : AcAcaud_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : AcAcaud_Trinity.fasta +query file : only the sequences of AmAmphi_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 3 + + + +-------------------- Pairwise ApApomp_AmAmphi -------------------- + +database : ApApomp_Trinity.fasta +query file : AmAmphi_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : AmAmphi_Trinity.fasta +query file : only the sequences of ApApomp_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 0 + + + +-------------------- Pairwise PfPfiji_ApApomp -------------------- + +database : PfPfiji_Trinity.fasta +query file : ApApomp_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : ApApomp_Trinity.fasta +query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 5 + + + +-------------------- Pairwise PfPfiji_AcAcaud -------------------- + +database : PfPfiji_Trinity.fasta +query file : AcAcaud_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : AcAcaud_Trinity.fasta +query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 2 + + + +-------------------- Pairwise ApApomp_AcAcaud -------------------- + +database : ApApomp_Trinity.fasta +query file : AcAcaud_Trinity.fasta + +***** RUN FIRST BLAST ***** + + +database : AcAcaud_Trinity.fasta +query file : only the sequences of ApApomp_Trinity.fasta which matched during the last BLAST + +***** RUN SECOND BLAST ***** + + +Number of pairwises parsed = 1 + + +
