Mercurial > repos > abims-sbr > pogs
diff POGs.xml @ 5:dad7053ba20e draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
| author | abims-sbr |
|---|---|
| date | Wed, 27 Sep 2017 10:02:20 -0400 |
| parents | b7bd93fc0ac9 |
| children | b19ed7395dcc |
line wrap: on
line diff
--- a/POGs.xml Mon May 29 07:45:07 2017 -0400 +++ b/POGs.xml Wed Sep 27 10:02:20 2017 -0400 @@ -1,30 +1,44 @@ -<tool name="POGs" id="POGs" version="1.0"> +<tool name="POGs" id="POGs" version="2.0"> <description> Find orthologous groups </description> - + <macros> <import>macros.xml</import> </macros> <requirements> - <expand macro="python_required" /> + <expand macro="python_required" /> </requirements> - + <command> <![CDATA[ - python $__tool_directory__/scripts/S01_get_locus_orthologs_part1_v2.py ${zip_file} - > ${output} && + #set $infiles_from_filter_assembly = "" + #for $input_from_filter_assembly in $inputs_from_filter_assembly + ln -s '$input_from_filter_assembly' '$input_from_filter_assembly.element_identifier'; + #set $infiles_from_filter_assembly = $infiles_from_filter_assembly + $input_from_filter_assembly.element_identifier + "," + #end for + #set $infiles_from_filter_assembly = $infiles_from_filter_assembly[:-1] - python $__tool_directory__/scripts/S02_get_locus_orthologs_part2_v2.py ${zip} ${minseq} ${paralogs} - >> ${output}; + #set $infiles_from_paiwise_prot = "" + #for $input_from_paiwise_prot in $inputs_from_paiwise_prot + ln -s '$input_from_paiwise_prot' '$input_from_paiwise_prot.element_identifier'; + #set $infiles_from_paiwise_prot = $infiles_from_paiwise_prot + $input_from_paiwise_prot.element_identifier + "," + #end for + #set $infiles_from_paiwise_prot = $infiles_from_paiwise_prot[:-1] + + python '$__tool_directory__/scripts/S01_get_locus_orthologs_part1.py' '$infiles_from_paiwise_prot' + > ${log} && + + python '$__tool_directory__/scripts/S02_get_locus_orthologs_part2.py' '$infiles_from_filter_assembly' $minseq $paralogs + >> ${log}; ]]> </command> <inputs> - <param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" /> - <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file " help="Contains homologous sequences in PROTEIC format, ONLY A ZIP FILE" /> + <param name="inputs_from_filter_assembly" type="data" format="fasta" multiple="true" label="Input files from Filter assemblies" /> + <param name="inputs_from_paiwise_prot" type="data" format="fasta" multiple="true" label="Input files from Pairwise Prot" /> <param name="minseq" type="integer" value="3" label="Drop orthogroups with less than n species" /> <param name="paralogs" type="select" label="Paralogs savage removal" help="Yes : orthogroups with paralogs will be fully removed. No : paralogs sequences will be (naively) filtered to keep only one sequence." > <option value="yes">Yes</option> @@ -33,51 +47,73 @@ </inputs> <outputs> - <data format="txt" name="output" label="POGs" /> - <data format="no_unzip.zip" name="output_zip" label="POGs_locus_orthologs_unaligned" from_work_dir="POGs_locus_orthologs_unaligned.zip" /> + <data format="txt" name="log" label="POGs" /> + <collection name="output" type="list" label="POGs locus orthologs unaligned"> + <discover_datasets pattern="__name_and_ext__" directory="outputs" /> + </collection> </outputs> <tests> <test> - <param name="zip" ftype="zip" value="outputfilter.zip" /> - <param name="zip_file" ftype="zip" value="outputpairwise.zip" /> + <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" /> + <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" /> <param name="minseq" value="2" /> <param name="paralogs" value="no" /> - <output name="output" value="test_03.out" /> + <output name="log" value="pogs.output" /> + <output_collection name="output" type="list"> + <element name="locus1_sp2" value="outputs_pogs_no/locus1_sp2.fasta" /> + <element name="locus1_sp3" value="outputs_pogs_no/locus1_sp3.fasta" /> + <element name="locus2_sp2" value="outputs_pogs_no/locus2_sp2.fasta" /> + <element name="locus3_sp2" value="outputs_pogs_no/locus3_sp2.fasta" /> + <element name="locus4_sp2" value="outputs_pogs_no/locus4_sp2.fasta" /> + <element name="locus5_sp2" value="outputs_pogs_no/locus5_sp2.fasta" /> + <element name="locus6_sp2" value="outputs_pogs_no/locus6_sp2.fasta" /> + <element name="locus7_sp2" value="outputs_pogs_no/locus7_sp2.fasta" /> + <element name="locus8_sp2" value="outputs_pogs_no/locus8_sp2.fasta" /> + <element name="locus9_sp2" value="outputs_pogs_no/locus9_sp2.fasta" /> + <element name="locus10_sp2" value="outputs_pogs_no/locus10_sp2.fasta" /> + </output_collection> </test> <test> - <param name="zip" ftype="zip" value="outputfilter.zip" /> - <param name="zip_file" ftype="zip" value="outputpairwise.zip" /> + <param name="inputs_from_filter_assembly" ftype="fasta" value="inputs_from_filter_assembly/PfPfiji_Trinity.fasta,inputs_from_filter_assembly/ApApomp_Trinity.fasta,inputs_from_filter_assembly/AmAmphi_Trinity.fasta,inputs_from_filter_assembly/AcAcaud_Trinity.fasta" /> + <param name="inputs_from_paiwise_prot" ftype="fasta" value="inputs_from_paiwise_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta,inputs_from_paiwise_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" /> <param name="minseq" value="2" /> <param name="paralogs" value="yes" /> - <output name="output" value="test_03b.out" /> - </test> + <output name="log" value="pogs_para.output" /> + <output_collection name="output" type="list"> + <element name="locus1_sp2" value="outputs_pogs_yes/locus1_sp2.fasta" /> + <element name="locus1_sp3" value="outputs_pogs_yes/locus1_sp3.fasta" /> + <element name="locus2_sp2" value="outputs_pogs_yes/locus2_sp2.fasta" /> + <element name="locus3_sp2" value="outputs_pogs_yes/locus3_sp2.fasta" /> + <element name="locus4_sp2" value="outputs_pogs_yes/locus4_sp2.fasta" /> + <element name="locus5_sp2" value="outputs_pogs_yes/locus5_sp2.fasta" /> + <element name="locus6_sp2" value="outputs_pogs_yes/locus6_sp2.fasta" /> + <element name="locus7_sp2" value="outputs_pogs_yes/locus7_sp2.fasta" /> + <element name="locus8_sp2" value="outputs_pogs_yes/locus8_sp2.fasta" /> + <element name="locus9_sp2" value="outputs_pogs_yes/locus9_sp2.fasta" /> + <element name="locus10_sp2" value="outputs_pogs_yes/locus10_sp2.fasta" /> + </output_collection> + </test> </tests> <help> + +@HELP_AUTHORS@ + ============ What it does ============ -| This tool takes a zip archive containing nucleic fasta sequence files and a zip archive containing a file per pairwise with the homologous sequences (in proteic format). It searches for orthologous sequences. +| This tool parses homologous sequences obtained by pairwise and gather sequences into groups of orthologous sequences. | There are 2 outputs. -| -| The script was written by **Eric Fontanillas**. -| The wrapper was written by **Julie Baffard**. -| Last improvments (paralogous filtering and code factoring) made by Victor Mataigne. - -------- ====== Inputs ====== - -option **Select a zip file containing the input files** : - -| the input zip file must have the extension .ort.zip -| At the beginning, when you upload your input, you have to change the extension .zip to .ort.zip - +| A dataset collection with the the pairwise with the homologous sequences (obtained from the tool "Pairwise"). +| A dataset collection with the fasta sequences of each species (obtained from the tool "Filter_Assemblies") -------- @@ -89,22 +125,81 @@ **POGs** : -| is the general output. It gives different informations : number of locus, each locus, ... -| it gives for example, all the locus containing 2 species : -| 2_sp : [[name_of_sequence,name_of_squence],...,[name_of_sequence, name_of_sequence]] -| it's the same for locus containing 3 species, 4 species, ... -| +| is the general output. It gives the number of sequences at each filtering step and then count +| the number of groups. **POGs_locus_orthologs_unaligned** : -| is the output (in zip format) which contains each locus. -| one file corresponding to one locus. +| is the output which contains the groups of orthologous sequences, +| one file corresponding to one group. + +the sequences of each group are in nucleic format. + + +=============== +Working Example +=============== + +--------------- +The input files +--------------- + +| 4 simulated files with a few nucleic sequences each : Ac.fasta Am.fasta Ap.fasta Pf.fasta. +| 6 files containing the homologous sequences of each pairwise. + +---------------- +The output files +---------------- + +**POGs** + + +| Number of locus before removeRedondancy = 11 + + +| Number of locus = 11 + + +| NUMBER OF REMAINING LOCUS AFTER INTRA LOCUS TREATMENT [REMOVE GROUPS WITH PARALOGS] = 11 +| NUMBER OF REMAINING LOCUS AFTER 2ND TREATMENT [INTER LOCUS] = 11 + -the sequences of each locus are in nucleic format. +| REMOVAL OF LOCUS WITH LESS THAN 2 SEQUENCES + +| Number of species in the locus : 4 + Number of locus : 0 + +| Number of species in the locus : 3 + Number of locus : 1 + +| Number of species in the locus : 2 + Number of locus : 10 + +**POGs_locus_orthologs_unaligned** -.. class:: warningmark +| For example the file locus1_sp2.fasta : +| >Ac7_1/1_1.000_160 +| GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGGCATGTGGTACCATTGGGAACCGTCAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGCCCGTGTTTGACCTTGAGCACCAAG +| >Am3_1/1_1.000_160 + GCACCTAGAATTACCCGAAGCTTCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGGTGGAAGAAGGCATGTGGTACCATTGGGGTGGTACAAGCGTTTCCTCAGCCCTGTGGCAGCTGCCCGTCTGCGGAGCAGTTTGACCTTGAGCACCAAG +--------------------------------------------------- + +Changelog +--------- + +**Version 2.0 - 11/07/2017** -The zip output has to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. + - Replace the zip between tools by Dataset Collection + - Add option : paralogs filtering + - Added a second functional test (to test the new option) + - Code factoring + - Removed zip format of input and output files ; replaced by dataset collections + + +**Version 1.0 - 13/04/2017** + + - TEST: Add funtional test with planemo + - IMPROVEMENT: Use conda dependencies for python </help>
