Mercurial > repos > abims-sbr > pogs
diff POGs.xml @ 2:be2128ad0030 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 7e96bd705e9f8e81a04b9553aacddc61e4c2b5bf
| author | abims-sbr |
|---|---|
| date | Fri, 05 May 2017 11:01:05 -0400 |
| parents | b2895c835ea8 |
| children | b7bd93fc0ac9 |
line wrap: on
line diff
--- a/POGs.xml Thu Apr 13 09:47:11 2017 -0400 +++ b/POGs.xml Fri May 05 11:01:05 2017 -0400 @@ -9,16 +9,15 @@ </macros> <requirements> - <expand macro="python_required" /> - <!-- <requirement type="package" version="1.3.1">samtools</requirement> --> + <expand macro="python_required" /> </requirements> <command> <![CDATA[ - python $__tool_directory__/scripts/S01_get_locus_ortholog_part1.py ${zip_file} + python $__tool_directory__/scripts/S01_get_locus_orthologs_part1_v2.py ${zip_file} > ${output} && - python $__tool_directory__/scripts/S02_get_locus_ortholog_part2.py ${zip} + python $__tool_directory__/scripts/S02_get_locus_orthologs_part2_v2.py ${zip} ${minseq} ${paralogs} >> ${output}; ]]> </command> @@ -26,6 +25,11 @@ <inputs> <param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" /> <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file " help="Contains homologous sequences in PROTEIC format, ONLY A ZIP FILE" /> + <param name="minseq" type="integer" value="3" label="Drop orthogroups with less than n species" /> + <param name="paralogs" type="select" label="Paralogs filtering" help="Yes : paralogs sequences will be (naively) filtered to keep only one sequence. No : the whole orthogroup will be removed." > + <option value="yes">Yes</option> + <option value="no">No</option> + </param> </inputs> <outputs> @@ -37,8 +41,17 @@ <test> <param name="zip" ftype="zip" value="filter_oase_results_for_pogs.zip" /> <param name="zip_file" ftype="zip" value="test_03_output_Pairwise_PROT_inputPOGS.zip" /> + <param name="minseq" value="2" /> + <param name="paralogs" value="no" /> <output name="output" value="test_03.out" /> </test> + <test> + <param name="zip" ftype="zip" value="filter_oase_results_for_pogs.zip" /> + <param name="zip_file" ftype="zip" value="test_03_output_Pairwise_PROT_inputPOGS.zip" /> + <param name="minseq" value="2" /> + <param name="paralogs" value="yes" /> + <output name="output" value="test_03b.out" /> + </test> </tests> <help> @@ -51,6 +64,7 @@ | | The script was written by **Eric Fontanillas**. | The wrapper was written by **Julie Baffard**. +| Last improvments (paralogous filtering and code factoring) made by Victor Mataigne. -------- @@ -92,112 +106,6 @@ The zip output has to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. --------- - -=============== -Working Example -=============== - ---------------- -The input files ---------------- - - -| 3 files with 200 nucleic sequences each : Ac.fasta Ap.fasta Pf.fasta. -| a zip file with 3 files containing the homologous sequences of each pairwise. -| - ----------------- -The output files ----------------- - -**POGs** - -| Number of locus = 56 -| -| NUMBER OF REMAINING LOCUS AFTER 1RST TREATMENT [INTRA LOCUS] = 55 - -NUMBER OF REMAINING LOCUS AFTER 2ND TREATMENT [INTER LOCUS] = 53 - -| 2_sp : [['>Ac111_5/5_0.688_714', '>Ap70_3/8_0.688_1127'], ['>Ac99_4/4_0.571_7188', '>Ap74_6/9_0.406_9467'], ['>Ac200_7/8_0.694_1836', '>Ap53_1/1_1.000_938'], -| ['>Ac181_6/10_0.432_8240', '>Ap103_3/10_0.420_9102'], ['>Ac4_2/2_1.000_230', '>Ap27_4/6_0.556_8404'], ['>Ac189_9/10_0.839_7194', '>Ap97_3/4_0.452_3023'], -| ['>Ac145_4/5_0.538_1334', '>Ap168_5/6_0.824_2007'], ['>Ac178_1/1_1.000_758', '>Ap202_3/3_0.905_2656'], ['>Ac100_1/2_1.000_2338', '>Ap77_5/8_0.405_866'], -| ['>Ac86_3/5_0.780_4131', '>Ap41_5/10_0.362_5218'], ['>Ac140_4/4_0.886_637', '>Ap62_1/3_0.667_482'], ['>Ac144_1/1_1.000_4240', '>Ap162_1/1_1.000_4925'], -| ['>Ac34_3/8_0.548_7600', '>Ap144_6/10_0.690_9671'], ['>Ac187_1/7_0.618_12870', '>Ap73_7/10_0.343_2424'], ['>Ac120_1/1_1.000_522', '>Pf74_1/1_1.000_506'], -| ['>Ac108_2/10_0.395_1982', '>Pf26_1/1_1.000_1717'], ['>Ac88_4/4_0.833_906', '>Pf102_1/1_1.000_1876'], ['>Ac202_1/1_1.000_247', '>Pf36_2/3_0.800_1482'], -| ['>Ac121_1/1_1.000_659', '>Pf7_1/1_1.000_2255'], ['>Ac129_1/5_0.636_1798', '>Pf100_2/4_0.625_1698'], ['>Ac172_1/1_1.000_348', '>Pf118_1/1_1.000_1108'], -| ['>Ac66_3/4_0.462_1465', '>Pf229_1/1_1.000_1541'], ['>Ac47_2/7_0.412_1703', '>Pf1_1/2_1.000_1890'], ['>Ac171_3/3_0.926_338', '>Pf4_1/1_1.000_1337'], -| ['>Ac194_1/5_0.875_275', '>Pf106_1/2_1.000_994'], ['>Ap2_1/2_1.000_264', '>Pf19_1/1_1.000_1879'], ['>Ap35_1/1_1.000_2505', '>Pf70_1/1_1.000_1462'], -| ['>Ap116_3/4_0.600_1380', '>Pf78_1/1_1.000_1409'], ['>Ap193_8/9_0.614_1707', '>Pf21_1/8_0.630_1314'], ['>Ap51_1/2_1.000_4292', '>Pf164_1/1_1.000_1021'], -| ['>Ap170_1/3_0.778_1303', '>Pf87_2/4_0.700_773'], ['>Ap186_6/10_0.521_4506', '>Pf72_1/1_1.000_2344'],['>Ap114_1/2_1.000_859', '>Pf192_1/1_1.000_957'], -| ['>Ap65_3/7_0.278_914', '>Pf94_1/1_1.000_1135'], ['>Ap164_4/5_0.560_219', '>Pf163_4/4_0.556_866'], ['>Ap199_3/3_0.846_958', '>Pf58_1/3_0.667_1387'], -| ['>Ap183_7/8_0.326_2702', '>Pf166_1/2_1.000_2528'], ['>Ap89_3/3_0.905_211', '>Pf3_4/4_0.688_1776'], ['>Ap34_1/1_1.000_2528', '>Pf76_3/3_0.500_1714'], -| ['>Ap16_1/3_0.714_734', '>Pf139_1/1_1.000_508'], ['>Ap119_9/9_0.595_3402', '>Pf108_1/1_1.000_1932'], ['>Ap120_5/10_0.223_8988', '>Pf57_1/1_1.000_2461'], -| ['>Ap43_1/3_0.778_644', '>Pf2_1/1_1.000_582']] -| -| 3_sp : [['>Ac112_1/2_1.000_2711', '>Ap90_7/10_0.397_2604', '>Pf79_1/1_1.000_4515'], ['>Ac148_1/1_1.000_596', '>Ap32_1/1_1.000_664', '>Pf180_1/2_1.000_2336'], -| ['>Ac177_1/2_1.000_1664', '>Ap147_1/2_1.000_1728', '>Pf178_1/1_1.000_1671'], ['>Ac124_3/3_0.375_1572', '>Ap177_1/3_0.667_625', '>Pf115_1/1_1.000_557'], -| ['>Ac53_3/4_0.375_999', '>Ap40_1/1_1.000_1011', '>Pf201_1/1_1.000_908'], ['>Ac65_1/2_0.812_306', '>Ap59_7/10_0.227_2379', '>Pf32_3/3_0.818_1014'], -| ['>Ac190_7/10_0.319_2573', '>Ap166_7/10_0.279_1859', '>Pf13_7/7_0.514_1779'], ['>Ac1_1/2_1.000_6683', '>Ap55_1/9_1.000_267', '>Pf6_1/2_1.000_3095'], -| ['>Ac91_2/2_0.696_4298', '>Ap75_5/5_0.810_1434', '>Pf69_1/1_1.000_1712'], ['>Ac29_6/9_0.639_3877', '>Ap124_4/8_0.267_1557', '>Pf18_43/95_1.000_2422']] -| -| - -**POGs_locus_orthologs_unaligned** - -| Save as *Galaxy{number}-[POGs_locus_orthologs_unaligned].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus) : locus{nb}_sp{nb_of_species}.fasta -| For example the file locus1_sp2.fasta : -| -| >Ac111_5/5_0.688_714 -| ATCCGGAATGACCTTCGGCGGAGCCAGACACGCTGCTGACATCCGGGCAGACGACATGCACCAGTCGAGTGCCGGAGTGATCAACCTGCAGATGGGCACCAACCAGGGAGCGACCCAAAGCGGCATGTCCATGGGAGGTCGTCGGGACATCAGTT -| AAAAGGGGGCGTTTCACATTTCGTCCAGGATTCTGGAACTGTCCGGATCACAGTGCCACTATCCGTGTTTGATCAATTCAAATCGCTAACTTAAAACGGCCTGTATATAGCTACACTTCATTAACAAAACATTTAGCAATGCCTTATTCTATAACAAGCTACTG -| TATCAGCATAATTATGACTTTCCAATACTTCTTTTTAGCAGATCTTTAATTCTTATTCTTTCTTGTTCGCTGCGTACACTTTTTAACAATAACGTAACATTCTGTATTCATTGATCTATGCAGTTTTATATATTTGGAGTACTTGTTAACTTTAAATATCTGTTGAATGTTA -| CTTTTGAAATGTTGTCTCTAAATATTCGCATTAATTAACGTCGTGTTCTTCATCACAGTCAATGTTAACAGCAGCCATAGCACTTTAATTTATTTTGTTACTTTGTTTATTACGCATGTTTCTAAAATTATTCTTTCATTAAAGCTATTGTTCGTTGCACGTTGATAAGT - -ATTGTTGTTGCGTTTCGTGATTCTATACATAATATATTCAAAACAATTAAAAA - -| >Ap70_3/8_0.688_1127 -| GCAGGCCGGACCAACAAGGGCGCTAGCCAATCCGGAATGACCTTCGGCGGAGCCAGACACGCTGCTGACATCCGGGCAGACGACATGCACCAGTCGAGTGCCGGAGTGATCAACCTACAGATGGGCACCAACCAGGGGGCGTCCCAAAGCGGCA -| TGTCCATGGGAGGCCGTCGGGACATCAGTTAAAAGGGGGCGTTTCACATTTCGTGCCAGCGTTCTGGAACTGTCCGGATCACAGTGCCACCATCCGAGTGTTTGGTCAATTCAAATCGCTAACTTAAAACGGCCTGTATATAGCTACATTTCATTAACAAA -| ACATTTAGCGATGCCTTGTTCTATAACAAATTATTGTATCAGCATAATTATTACTTTCCAGTACTTCTTTATAGCAGATCTTTAATTATTGTTCTTTCTTGTTCGCTGCGTACTTTTTAAAAAGAATACAACATTCTGTATTCATTGATCTATGCAGTTTTATATATTTGGA - -GTACTTGTTAACTTTAAATATATGTTAAATGTTACTTTTGAAATGTTGTCTCTAAATATTCGCATTAATTAACGTCGTGCTCTTCATCACAGTCAAAGTTAACATTTAACATATATTTAAAGTTAACAAGTACTCCAAATATATAAAACTGCATAGATCAATGAATACAGA -ATGTTGTATTCTTTTTAAAAAGTACGCAGCGAACAAGAAAGAACAATAATTAAAGATCTGCTATAAAGAAGTACTGGAAAGTAATAATTATGCTGATACAATAATTTGTTATAGAACAAGGCATCGCTAAATGTTTTGTTAATGAAATGTAGCTATATACAGGCCGTTT -TAAGTTAGCGATTTGAATTGACCAAACACTCGGATGGTGGCACTGTGATCCGGACAGTTCCAGAACGCTGGCACGAAATGTGAAACGCCCCCTTTTAACTGATGTCCCGACGGCCTCCCATGGACATGCCGCTTTGGGACGCCCCCTGGTTGGTGCCCA -TCTGTAGGTTGATCACTCCGGCACTCGACTGGTGCATGTCGTCTGCCCGGATGTCAGCAGCGTGTCTGGCTCCGCCGAAGGTCATTCCGGATTGGCTAGCGCCCTTGTTGGATCCAGCTTGCAGACCGATACGGCCTGGCC - -| -| An other example, the file locus_3_sp_5.fasta -| - ->Ac53_3/4_0.375_999 -CCGATCTTGTCTGGTGTTTTGCTGCCCCCTAGCGAGTGACGACAAACTCGTTGGTACCTTGCTTAGCGCGATAGAAGGGTTGAACATGTTGGCTGCTGGGCTAAGAACTCTAAAAAGCCTTGCCCCTCGGGGTTGCGTAGCGTGGTCGTGTACTTCGGT -GCATGCCAAGCATACCCTACCAGACTTACCATATGATTACAATGCCCTGGAGCCACACATCAGTGCTGAAATCATGCTGCTGCATCACACCAAGCATCACCAGACGTATGTCAACAACCTGAATGTTGCAGAGGAGAAGTTTCATGAGGCTACAGAGAAA -GGTGATGTAACCACAGCAGTATCACTGATGCCAGCCCTAAGATTTAATGGTGGTGGACACATCAACCATACTATATTTTGGAAGAACATGTCACCAAATGGTGGTGGAGAGCCATCTGGCGAACTGATGGAGGCCATCAAACGTGACTTTGGCTCATTTGAA -AACATGAAGAACATGTTGAGTACATCAACCACTGCAGTGCAAGGCTCTGGCTGGGGATGGCTTGGGTACAATAAAAAGATGAAGAAACTAGAAATTGCCACCTGTGCCAACCAAGATCCATTAGAGGGCACTACAGGTCTCGTCCCGTTGTTTGGCATAG -ATGTCTGGGAACATGCCTATTACTTGCAGTACAAGAATGTTCGTCCAGACTATGTAAAGGCTATTTGGAATGTGGCCAACTGGGATGACATCATGGAACGTTACAACAATGCCAGAAAATAAACTGTTAAACAAATAATTAATATATTAATGTGTTGCAATTTTTG -TCAATTGGTACATACACAATTTTGTTCATAAAGAAAATTGTGATTACTTTCTGGCAACTAGTTCCCAGTGAATACAGCAATTGTTCTGCCAAGGATGCATTTGGATTGAGAAGGCACCCAGAATGGGTTATCCGTGAATGTCTATTGGAATGTGGCACCATACG -ATGTTTACTGTATTAGTTACAATTAAAA - -| >Ap40_1/1_1.000_1011 -| CTGATGTGTGGCCCCCTAGCGAGTGAAGACAAACTCGATAGCACCCAGACAGTTCTGTTGGTTAGATAAAAGGGAGAAACATGCTGGCTGCTGGGCTAAGAACTCTAAAAAGCCTTGCTCCTCGTGGTGGTCTAGCTTGGTCTTGTACCTCGGTACATGC -| CAAACACACACTGCCAGACTTGCCGTATGATTATAATGCTCTGGAGCCACACATCAGTGCTGAAATCATGTTGCTGCATCACACAAAACATCACCAGACGTATGTGAACAACCTGAATATTGCAGAGGAGAAGTTTCATGAGGCTACCGAGAAAGGCGATG -| TGACCACAGCAGTATCACTGATGCCAGCCCTAAGATTTAATGGTGGTGGACATATCAACCATACTATATTTTGGAAGAACATGTCACCAAATGGTGGTGGAGAACCATCTGGCGAACTGATGGAGGCCATCAAACGTGACTTTGGTTCATTTGAAAACATGA -| AGAACATGCTGAGTACAGCAACCACTGCAGTACAAGGCTCTGGCTGGGGATGGCTTGGGTACAATAAAAAGATGAAGAAACTAGAAATTGCCACCTGTGCCAACCAAGATCCATTAGAGGGCACTACAGGTCTTGTCCCACTGTTTGGTATCGATGTCTG -| GGAACATGCCTATTATTTACAGTACAAGAATGTTCGTCCAGACTATGTAAAGGCTATTTGGAATGTGGCTAACTGGGATGATATCATGGAGCGTTACAACAATGCCAGAAAATAAACTTAAATACATCATTATTTAGTTAATGTGCGACTTTTGTTTGTTAATCAGT -| TCACACACCATTCTATTCACAAAGAAAATGGTGTATTTGCTTTCTGTTCAACTGGTTCCCGGTGAATACAGCAGTTGTTCTGCCAAGGATGTATTTGGATTGAGAAGGCACCAAGAATGGGCTGTCACTGAATGTCAATTGGAATGTAGCCTCAATGTTTACT - -GTATTACCTACAATTAAAATGATTATGATATAACCAAG - -| >Pf201_1/1_1.000_908 -| CTTCTGGAACGATGTTGGCTGGGTATAGAACTTTGGCGAGGGTTATCCCACGTGGTGTCAGCTCCGCTTGGGCAAGTACAATAGTACATAACAAGCACACCTTGCCAGATTTACCATATGATTATAATGCCTTGGAACCACACATCAGTGCTGAAATAATGC - -TCCTTCATCATACAAAGCATCACCAGACATATGTGAACAATCTGAATGTAGCTGAAGAAAAGTTTCATGAAGCCACGGAGAAAGGTGATGTCACTACAGCTGTGTCGCTAATGCCAGCACTAAGATTTAATGGCGGAGGACACATCAATCACACCATTTTCT -GGAAGAACATGTCTCCTAATGGCGGAGGAGAGCCTTCTGGCGAGTTGATGGAAGCCATTAAACGTGATTTTGGTTCATTTGAGAATATGAAAAACATGTTAAGTACAGCTACAACAGCTGTCCAAGGATCTGGCTGGGGATGGCTTGGTTATAACAAAAAG -ATGAAAAAGCTCGAGATAGCCACTTGTGCCAACCAGGATCCACTGGAAGGAACAACAGGATTAATTCCACTGTTTGGTATTGACGTCTGGGAGCATGCTTACTATCTGCAATATAAAAATGTACGTCCAGATTATGTTAAAGCTATCTGGAATGTGGCCAACT -GGGATGATATTACAGAGCGCTACAACAATGCGAAGAAATAGATTTGCTGGGACATATGAATAGTGCTTGGCAGAGCCTAATGTTGTGTTTATTTCTGTGTTTCTTATCAATCAGTTGATGTATTAGCCCGGTGTATTGACATGAAGAATTGGACAATGATTTCAAA -TACATTTGAACAATAAATGTATCATGATCAAGCATTATTTTGTTAGTTGTCTCCTCCACTGAGAAGACCAAGTTTAATAAACAACAAACACAGA </help> <expand macro="citations" />
