view pairwise.xml @ 0:e95d4b20c62d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122
author abims-sbr
date Thu, 13 Apr 2017 05:46:29 -0400
parents
children 5f68b2fc02c1
line wrap: on
line source

<?xml version="1.0"?>

<tool name="Pairwise" id="pairwise" version="1.0">

	<description>
		Run reciproque tblastx pairwise
	</description>

	<macros>
		<import>macros.xml</import>
	</macros>

	<requirements>
		<expand macro="python_required" />		
		<requirement type="package" version="2.2.22">blast-legacy</requirement>	
		<requirement type="package" version="1.3.1">samtools</requirement>
	</requirements>

  	<command>
        	<![CDATA[
        ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh .
        &&        
        ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh .
        &&
        ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh .
        &&
        ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py .
        &&
        ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py .        
        && 
        ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py .
        &&
        ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py .
        &&
        ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py .
        &&
        ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py .
        &&
        ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py .
        &&
        ln -s $__tool_directory__/scripts/S12_prot2dna.py .
        &&
        ln -s $__tool_directory__/scripts/S13_zip.py .
        &&
		python $__tool_directory__/scripts/S01_organize_rbh.py 8 ${e_value} ${zip}
		> ${output};
		]]>
  	</command>

 	<inputs>
		<param name="zip" type="data" format="no_unzip.zip,zip" multiple="true" label="Choose your ZIP file" help="Contains the output of the filter tool" />
		<param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" />	
	</inputs>

	<outputs>
		<data format="txt" name="output" label="Pairwise" />
		<data format="no_unzip.zip" name="output_zip_DNA" label="Pairwise_zip_DNA" from_work_dir="output_file_DNA.zip" />
		<data format="no_unzip.zip" name="output_zip_PROT" label="Pairwise_zip_PROT" from_work_dir="output_file_PROT.zip" />
	</outputs>

	<tests>
		<test>
			<param name="zip" ftype="zip" value="test_02_input_pairwise.zip" />
			<param name="e-value" value="1e-5" />
			<output name="output" >
				<assert_contents>
					<has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/>
					<has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
					<has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
					<has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
					<has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
					<has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
					
					<has_text text="Number of pairwises parsed = 2" />
					<has_text text="Number of pairwises parsed = 3" />
					<has_text text="Number of pairwises parsed = 0" />
					<has_text text="Number of pairwises parsed = 5" />					
					<has_text text="Number of pairwises parsed = 1" />
					
				</assert_contents>
			</output>
		</test>
	</tests>	

	<help>

============
What it does
============

| This tool takes a zip archive containing nucleic fasta sequence files and searches different homologous genes from pairwise comparaisons. 
| There are 3 outputs.
|
| The run blast was written by the **NCBI**.
| The script was written by **Eric Fontanillas**.
| The wrapper was written by **Julie Baffard**.

--------

==========
Parameters
==========

The choice of parameters is possible : 

**-e** :
	|  is the option for the choice of the e-value. 
	| By default it's 10.
	| 

--------

=======
Outputs
=======

This tool, produces the following files :

**Pairwise** :
	| is the general output. It gives the information about what the tool is doing (for each pairwise).
	| 

**Pairwise_zip_DNA.zip** :
	| is the output wich contains nucleic sequences (of the pairwise) that are homologues. The sequences are with nucleotides. Shows : 
	| the name of the query sequence
	| the part of the sequence in nucleotides
	| the name of the match sequence
	| the part of the sequence in nucleotides
	| 

**Pairwise_zip_PROT.zip** :
	| is the output wich contains proteic sequences (of the pairwise) that are homologues. The sequences are with protein. Shows :
	| the name of the query sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
	| the part of the sequence in protein
	| the name of the match sequence (the name of the sequence || the position (Start and End) of the homologous sequences || divergence || number of gaps || real divergence || the length of the homologous sequence)
	| the part of the sequence in protein

.. class:: warningmark

The two zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface.

--------

===============
Working Example
===============

---------------------------
The input files and options
---------------------------

**Input files**
	| 3 files with 200 nucleic sequences each : Ap.fasta, Ac.fasta et Pp.fasta
	| 
**Parameters**
	| e-value = 1e-20
	| 

----------------
The output files
----------------

**Pairwise**

| -------------------- Pairwise Pp_Ap --------------------
|  
| database :  Pp.fasta
| query file :  Ap.fasta
|  
| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| 
| database :  Ap.fasta
| query file : only the sequences of Pp.fasta who matched during the last BLAST
| 
| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| [3/5] Get pairs of sequences ...
| Get list of fasta name involved in RBH
| Number of pairwises parsed = 15
| Get subset of Alvinella db
| Get subset of Paralvinella db 
| 
| -------------------- Pairwise Pp_Ac --------------------
|  
| database :  Pp.fasta
| query file :  Ac.fasta
|  
| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| 
| database :  Ac.fasta
| query file : only the sequences of Pp.fasta who matched during the last BLAST
|  
| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| [3/5] Get pairs of sequences ...
| Get list of fasta name involved in RBH
| Number of pairwises parsed = 13
| Get subset of Alvinella db
| Get subset of Paralvinella db 
| 
| 
| -------------------- Pairwise Ap_Ac --------------------
|  
| database :  Ap.fasta
| query file :  Ac.fasta

| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| 
| database :  Ac.fasta
| query file : only the sequences of Ap.fasta who matched during the last BLAST
|  
| ***** START run BLAST *****
| ***** END run BLAST *****
|  
| [3/5] Get pairs of sequences ...
| Get list of fasta name involved in RBH
| Number of pairwises parsed = 24
| Get subset of Alvinella db
| Get subset of Paralvinella db 
| 
| 

**Pairwise_output_file_PROT**

| Save as *Galaxy{number}-[Pairwise_output_file_PROT].zip*
| If you unzip the file, a number of file appears (number of pairwise) : 19_ReciprocalBestHits_{name_of_pairwise}.fasta
| For example the 4 last sequences of the file 19_ReciprocalBestHits_Pp_Ap.fasta
| 
| &gt;Ap123_1/1_1.000_748||254...478||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
| FVRITVGDEMSRRPKFAMITWVGPEVSPMKRAKVSTDKAFVKQIFQNFAKEIQTSERSELEEEYVRQEVMKAGGA
| &gt;Pp_146_1/2_1.000_713||259...483||[[1/1]][[1/6]]||29.3333333333||0||29.3333333333||75.0
| FAYIRCTNEESKRSKFAMITWIGQGVEAMKRAKVSMDKQFLKEIFQNFAREFQTSEKSELDEVCIKHALAIDDGA
| &gt;Ap66_1/1_1.000_400||192...398||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
| LSTSLLNWRKHTLCF*GMKLILIILLISFIIPAILFLLSIFTTMRMPESREKFRPYECGFDPNHSARTP
| &gt;Pp_201_2/2_1.000_691||14...220||[[1/1]][[1/6]]||21.7391304348||0||21.7391304348||69.0
| LSTSLLN*RKQPFASEEMKLLILLLFISALIPRILIILSIFTSIRTPKNREKSSPYECGFDPNHSARTP
| 
| 

**Pairwise_output_file_DNA**
																				                  	              
| Save as *Galaxy{number}-[Pairwise_output_file_DNA].zip*																		      	
| If you unzip the file, a number of file appears (number of pairwise) : 25_DNAalignment_corresponding_to_protein_from_19_RBH_{name_of_pairwise}.fasta    	
| For example the 4 last sequences of the file 25_DNAalignement_corresponding_to_protein_from_19_RBH_Pp_Ap.fasta
| 
| &gt;Ap123_1/1_1.000_748
| CCAGTAACAAGCCGCCACGGGTCCGTCGTGTCTTCTCTTCAAGGAAAGGTTGACAGATTCTCGTACGCTAGACGTCGCCACCTACTCGTCCTGGACTCCGGTGCCGTAGGTGGCGCCACCTGCTTTCATCACTTCCTGCCTA
| ACGTACTCCTCTTCTAGCTCCGATCTCTCGCTCGTCTGGATCTCTTTGGCAAAGTTCTGGAATATCTGCTTGACGAACGCCTTGTCCGTGCTGACTTTGGCGCGCTTCATTGGGCTCACTTCCGGTCCGACCCACGTGATCA
| TGGCGAACTTCGGTCTTCTGCTCATTTCGTCCCCGACGGTAATACGGACAAAGGCGAACGCCCGCTGGTCATCTTGTAGTTTTGATAACAGATCCTCGTATTCGGTTCCTGTAGAGTCCAGTATAATATTGTCGCCATCATA
| CGTCACAAACGCCCAGTTTGTCTCCGTCGCGTCGCTCCTGACGTCTTCGTAAGCCTGTCCGATAGCCTCTCTGTCGATGTCTGCCATGCTGCTGGTCCCGCTCTCGACGCTAATGAGCCAATCACGACTTCTGACAGACGAG
| TAGACATGCAGACAGCCAGACGGACTGACGGACTGACG
| &gt;Pp_146_1/2_1.000_713																						      
| CATTAATTGTGTGTCTGGTTGTGGGTGTGTGTTATAAGAGACATCACTTAGTGTATACTGATGTCCACGTGGTAGTTGACCAGCATGTCGAATATGGATAGGGACTCGATCTTGAATGGCTATGAGGAGGTTCGCAACGACGA
| CTCGGACATTAACTGGGCTTTCGTAACGTATTCACCTGACAACAAACTAGTACTTGATTCAACTGGCACAGACTACTTCCAGCTCCAGGAGAAATATCAAGATGATATGCGAGGATTTGCTTACATCCGGTGCACTAACGAGG
| AGAGTAAACGTTCTAAATTTGCCATGATTACCTGGATTGGACAAGGAGTGGAAGCAATGAAGCGTGCCAAGGTCAGCATGGACAAACAGTTCCTAAAGGAAATCTTCCAGAATTTCGCAAGAGAATTTCAGACGAGTGAAAAG
| TCAGAGCTTGATGAGGTCTGTATTAAACACGCGCTTGCCATTGACGATGGAGCTGGTTGCAAAGTGGAAAGCGAGGACACGAGAAAAGGGGCCTTTCTCAGGAAAGAGGATGACACTGAAGTGGAAAGGGAAACTAATGTCAA
| CAATGTCTCCGGTGTCGTGGAAGAAGATGATGACGCAAAAAATGCAAATGATTTTAATTACGAAGAGGACTGTAACAATGAATAGGTGCATGTCGATGATTTATATAGAGAACTAGACTTCGCACTCGCTAGGTGGTTGAT 
| &gt;Ap66_1/1_1.000_400
| TGATCGTCTTATAAACCTAACTTGAAAAACCTTCCTACCATTTAGGGCTAGCAGCCCTATTAATTATCACACCTATCGCAGCGCTCTCACTATAATTATAAGTATTGCGCCGGGTTTGAACGGATAGCTCTGATGCTGCTAATT
| ACGGGACCTAATAATCCCCAATACTTTATCCTTAGAGAGCTGTACCTCTTAGCACCAGTCTTTTAAACTGGCGAAAGCACACTTTATGCTTCTAAGGAATGAAACTAATTCTTATAATCCTACTAATCTCTTTTATCATCCCCG
| CCATTCTATTTTTACTCTCGATCTTTACTACTATGCGCATGCCAGAGAGCCGTGAAAAATTTAGGCCCTACGAGTGCGGGTTTGACCCCAATCACTCGGCCCGAACCCCATT
| &gt;Pp_201_2/2_1.000_691
| ATCGTAGGGAAAAAGGTGTTCGTGCAGAATGATTGGGGTCAAATCCACATTCGTAGGGGCTAGATTTTTCACGGTTTTTAGGTGTACGAATAGAGGTGAAGATTGATAGGATGATTAAAATTCTTGGGATTAATGCTGAAATAAA
| GAGAAGTAGGATTAAAAGTTTCATTTCCTCAGAAGCAAAGGGTTGCTTTCGTCAGTTTAAAAGACTGGTGCTAAGTAGGTACAGCTCTCTAAGGG

    </help>

    <expand macro="citations" />

</tool>