changeset 0:e95d4b20c62d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
author abims-sbr
date Thu, 13 Apr 2017 05:46:29 -0400
parents
children c8af52875b0f
files CHANGELOG.md macros.xml pairwise.xml test-data/test_02_input_pairwise.zip test-data/test_03.out test-data/test_03_output_Pairwise_PROT_inputPOGS..zip
diffstat 6 files changed, 421 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CHANGELOG.md	Thu Apr 13 05:46:29 2017 -0400
@@ -0,0 +1,7 @@
+Changelog
+
+Version 1.0 - 13/04/2017
+
+  - Add functional test with planemo
+  - Planemo test with conda dependencies for blast, samtools and python
+  - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Apr 13 05:46:29 2017 -0400
@@ -0,0 +1,16 @@
+<macros>
+
+	<xml name="python_required">		
+			<requirement type="package" version="2.7">python</requirement>		
+	</xml>
+
+	<xml name="citations">
+		<citations>
+			<citation type="bibtex">Credits : ABIMS team, Roscoff Marine Station</citation>
+			<citation type="bibtex">Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool.</citation>
+			<citation type="bibtex">Version 1 : Scripts by Eric Fontanillas -- Galaxy integration by Julie Baffard</citation>
+			<citation type="bibtex">Version 2 : improvments by Victor Mataigne, Gildas le Corguillé, Misharl Monsoor</citation>
+		</citations>
+	</xml>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pairwise.xml	Thu Apr 13 05:46:29 2017 -0400
@@ -0,0 +1,278 @@
+<?xml version="1.0"?>
+
+<tool name="Pairwise" id="pairwise" version="1.0">
+
+	<description>
+		Run reciproque tblastx pairwise
+	</description>
+
+	<macros>
+		<import>macros.xml</import>
+	</macros>
+
+	<requirements>
+		<expand macro="python_required" />		
+		<requirement type="package" version="2.2.22">blast-legacy</requirement>	
+		<requirement type="package" version="1.3.1">samtools</requirement>
+	</requirements>
+
+  	<command>
+        	<![CDATA[
+        ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh .
+        &&        
+        ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh .
+        &&
+        ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh .
+        &&
+        ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py .
+        &&
+        ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py .        
+        && 
+        ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py .
+        &&
+        ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py .
+        &&
+        ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py .
+        &&
+        ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py .
+        &&
+        ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py .
+        &&
+        ln -s $__tool_directory__/scripts/S12_prot2dna.py .
+        &&
+        ln -s $__tool_directory__/scripts/S13_zip.py .
+        &&
+		python $__tool_directory__/scripts/S01_organize_rbh.py 8 ${e_value} ${zip}
+		> ${output};
+		]]>
+  	</command>
+
+ 	<inputs>
+		<param name="zip" type="data" format="no_unzip.zip,zip" multiple="true" label="Choose your ZIP file" help="Contains the output of the filter tool" />
+		<param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" />	
+	</inputs>
+
+	<outputs>
+		<data format="txt" name="output" label="Pairwise" />
+		<data format="no_unzip.zip" name="output_zip_DNA" label="Pairwise_zip_DNA" from_work_dir="output_file_DNA.zip" />
+		<data format="no_unzip.zip" name="output_zip_PROT" label="Pairwise_zip_PROT" from_work_dir="output_file_PROT.zip" />
+	</outputs>
+
+	<tests>
+		<test>
+			<param name="zip" ftype="zip" value="test_02_input_pairwise.zip" />
+			<param name="e-value" value="1e-5" />
+			<output name="output" >
+				<assert_contents>
+					<has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/>
+					<has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
+					<has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
+					<has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
+					<has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
+					<has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
+					
+					<has_text text="Number of pairwises parsed = 2" />
+					<has_text text="Number of pairwises parsed = 3" />
+					<has_text text="Number of pairwises parsed = 0" />
+					<has_text text="Number of pairwises parsed = 5" />					
+					<has_text text="Number of pairwises parsed = 1" />
+					
+				</assert_contents>
+			</output>
+		</test>
+	</tests>	
+
+	<help>
+
+============
+What it does
+============
+
+| This tool takes a zip archive containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons. 
+| There are 3 outputs.
+|
+| The run blast was written by the **NCBI**.
+| The script was written by **Eric Fontanillas**.
+| The wrapper was written by **Julie Baffard**.
+
+--------
+
+==========
+Parameters
+==========
+
+The choice of parameters is possible : 
+
+**-e** :
+	|  is the option for the choice of the e-value. 
+	| By default it's 10.
+	| 
+
+--------
+
+=======
+Outputs
+=======
+
+This tool, produces the following files :
+
+**Pairwise** :
+	| is the general output. It gives the information about what the tool is doing (for each pairwise).
+	| 
+
+**Pairwise_zip_DNA.zip** :
+	| is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows : 
+	| the name of the query sequence
+	| the part of the sequence in nucleotides
+	| the name of the match sequence
+	| the part of the sequence in nucleotides
+	| 
+
+**Pairwise_zip_PROT.zip** :
+	| is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows :
+	| the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
+	| the part of the sequence in protein
+	| the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
+	| the part of the sequence in protein
+
+.. class:: warningmark
+
+The two zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface.
+
+--------
+
+===============
+Working Example
+===============
+
+---------------------------
+The input files and options
+---------------------------
+
+**Input files**
+	| 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta
+	| 
+**Parameters**
+	| e-value = 1e-20
+	| 
+
+----------------
+The output files
+----------------
+
+**Pairwise**
+
+| -------------------- Pairwise Pp_Ap --------------------
+|  
+| database :  Pp.fasta
+| query file :  Ap.fasta
+|  
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| 
+| database :  Ap.fasta
+| query file : only the sequences of Pp.fasta who matched during the last BLAST
+| 
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| [3/5] Get pairs of sequences ...
+| Get list of fasta name involved in RBH
+| Number of pairwises parsed = 15
+| Get subset of Alvinella db
+| Get subset of Paralvinella db 
+| 
+| -------------------- Pairwise Pp_Ac --------------------
+|  
+| database :  Pp.fasta
+| query file :  Ac.fasta
+|  
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| 
+| database :  Ac.fasta
+| query file : only the sequences of Pp.fasta who matched during the last BLAST
+|  
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| [3/5] Get pairs of sequences ...
+| Get list of fasta name involved in RBH
+| Number of pairwises parsed = 13
+| Get subset of Alvinella db
+| Get subset of Paralvinella db 
+| 
+| 
+| -------------------- Pairwise Ap_Ac --------------------
+|  
+| database :  Ap.fasta
+| query file :  Ac.fasta
+|  
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| 
+| database :  Ac.fasta
+| query file : only the sequences of Ap.fasta who matched during the last BLAST
+|  
+| ***** START run BLAST *****
+| ***** END run BLAST *****
+|  
+| [3/5] Get pairs of sequences ...
+| Get list of fasta name involved in RBH
+| Number of pairwises parsed = 24
+| Get subset of Alvinella db
+| Get subset of Paralvinella db 
+| 
+| 
+
+**Pairwise_output_file_PROT**
+
+| Save as *Galaxy{number}-[Pairwise_output_file_PROT].zip*
+| If you unzip the file, a number of file appears (number of pairwise) : 19_ReciprocalBestHits_{name_of_pairwise}.fasta
+| For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta
+| 
+| &gt;Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
+| FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA
+| &gt;Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
+| FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA
+| &gt;Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
+| LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP
+| &gt;Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
+| LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP
+| 
+| 
+
+**Pairwise_output_file_DNA**
+																				                  	              
+| Save as *Galaxy{number}-[Pairwise_output_file_DNA].zip*																		      	
+| If you unzip the file, a number of file appears (number of pairwise) : 25_DNAalignment_corresponding_to_protein_from_19_RBH_{name_of_pairwise}.fasta    	
+| For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta
+| 
+| &gt;Ap123_1/1_1.000_748
+| CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA
+| ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA
+| TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA
+| CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG
+| TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG
+| &gt;Pp_146_1/2_1.000_713																						      
+| CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA
+| CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG
+| AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG
+| TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA
+| CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT 
+| &gt;Ap66_1/1_1.000_400
+| TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT
+| ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG
+| CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT
+| &gt;Pp_201_2/2_1.000_691
+| ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA
+| GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG
+
+    </help>
+
+    <expand macro="citations" />
+
+</tool>
Binary file test-data/test_02_input_pairwise.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_03.out	Thu Apr 13 05:46:29 2017 -0400
@@ -0,0 +1,120 @@
+Pair of species:
+('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')
+Pair of species:
+('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')
+Pair of species:
+('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')
+Pair of species:
+('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')
+Pair of species:
+('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')
+Pair of species:
+('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')
+-------------------- Pairwise PfPfiji_AmAmphi --------------------
+
+database : PfPfiji_Trinity.fasta
+query file : AmAmphi_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : AmAmphi_Trinity.fasta
+query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 2 
+
+
+
+-------------------- Pairwise AmAmphi_AcAcaud --------------------
+
+database : AmAmphi_Trinity.fasta
+query file : AcAcaud_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : AcAcaud_Trinity.fasta
+query file : only the sequences of AmAmphi_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 3 
+
+
+
+-------------------- Pairwise ApApomp_AmAmphi --------------------
+
+database : ApApomp_Trinity.fasta
+query file : AmAmphi_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : AmAmphi_Trinity.fasta
+query file : only the sequences of ApApomp_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 0 
+
+
+
+-------------------- Pairwise PfPfiji_ApApomp --------------------
+
+database : PfPfiji_Trinity.fasta
+query file : ApApomp_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : ApApomp_Trinity.fasta
+query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 5 
+
+
+
+-------------------- Pairwise PfPfiji_AcAcaud --------------------
+
+database : PfPfiji_Trinity.fasta
+query file : AcAcaud_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : AcAcaud_Trinity.fasta
+query file : only the sequences of PfPfiji_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 2 
+
+
+
+-------------------- Pairwise ApApomp_AcAcaud --------------------
+
+database : ApApomp_Trinity.fasta
+query file : AcAcaud_Trinity.fasta
+
+***** RUN FIRST BLAST *****
+
+
+database : AcAcaud_Trinity.fasta
+query file : only the sequences of ApApomp_Trinity.fasta which matched during the last BLAST
+
+***** RUN SECOND BLAST *****
+
+
+Number of pairwises parsed = 1 
+
+
+
Binary file test-data/test_03_output_Pairwise_PROT_inputPOGS..zip has changed