Mercurial > repos > abims-sbr > cds_search
changeset 2:0d2f72caea10 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
line wrap: on
line diff
--- a/CDS_search.xml Thu Apr 13 09:47:57 2017 -0400 +++ b/CDS_search.xml Wed Sep 27 10:03:05 2017 -0400 @@ -1,6 +1,4 @@ -<?xml version="1.0"?> - -<tool name="CDS_search" id="cds_search" version="1.0"> +<tool name="CDS_search" id="cds_search" version="2.0"> <description> ORF and CDS search @@ -11,82 +9,65 @@ </macros> <requirements> - <expand macro="python_required" /> + <expand macro="python_required" /> </requirements> - <command> - <![CDATA[ - python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py ${input_file} $__tool_directory__/scripts/code_universel_modified.txt - #if $lenght.lenght_CDS == "yes" : - ${lenght.min_lenght_seq} - #else : - 50 - #end if - >${output}; + <command><![CDATA[ + #set $infiles = "" + #for $input in $inputs + ln -s '$input' '$input.element_identifier'; + #set $infiles = $infiles + $input.element_identifier + "," + #end for + #set $infiles = $infiles[:-1] - python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py ${nb_species_keep} - #if $methionine.value == "yes" : - oui - #else : - non - #end if - #if $lenght.lenght_CDS == "yes" : - ${lenght.min_lenght_seq} ${lenght.min_lenght_subseq} - #else : - 50 15 - #end if - >>${output}; + python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py + $infiles + $__tool_directory__/scripts/code_universel_modified.txt + $length.min_length_seq + > '$log' && - python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py ${nb_species_keep} - #if $lenght.lenght_CDS == "yes" : - ${lenght.min_lenght_nuc} - #else : - 50 - #end if - >>${output}; - ]]> - </command> + python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py + $nb_species_keep + $methionine + $length.min_length_seq + $length.min_length_subseq + >> '$log' && + + python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py + $nb_species_keep + $length.min_length_nuc + >> '$log'; + ]]></command> <inputs> - <param name="input_file" type="data" format="no_unzip.zip,zip" label="Choose your file" help="Only a fasta file with nucleic align sequences" /> + <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" help="Only a fasta file with nucleic align sequences" /> <!-- <param name="code_file" type="data" format="txt" label="Choose your file containing the universal code (codons and their amino acids)" /> --> <param name="nb_species_keep" type="integer" value="10" min="2" label="Minimal number of species in each locus" help="If you want to remove all the indels the maximum number of species is required" /> - <param name="methionine" type="select" label="Do you want to consider the Methionine in the search of CDS ? "> - <option value="yes">Yes</option> - <option value="no">No</option> - </param> + <param name="methionine" type="boolean" checked="true" truevalue="oui" falsevalue="non" label="Do you want to consider the Methionine in the search of CDS? " /> - <conditional name="lenght"> - <param name="lenght_CDS" type="select" label="Do you want to choose the minimum length oh the CDS ? "> - <option value="no">No</option> - <option value="yes">Yes</option> - </param> - <when value="yes"> - <param name="min_lenght_seq" type="integer" value="50" min="0" label="Minimal lenght of the CDS, in proteic" help="By default it's 50" /> - <param name="min_lenght_subseq" type="integer" value="15" min="0" label="Minimal lenght of the subsequence, in proteic between two series of indels" help="By default it's 15" /> - <param name="min_lenght_nuc" type="integer" value="50" min="0" label="Minimal lenght of the CDS, in nucleic without the indel" help="By default it's 50" /> - </when> - <when value="no"> - </when> - </conditional> + <section name="length" title="Do you want to choose the minimum length of the CDS?"> + <param name="min_length_seq" type="integer" value="50" min="0" label="Minimal length of the CDS, in proteic" help="By default it's 50" /> + <param name="min_length_subseq" type="integer" value="15" min="0" label="Minimal length of the subsequence, in proteic between two series of indels" help="By default it's 15" /> + <param name="min_length_nuc" type="integer" value="50" min="0" label="Minimal length of the CDS, in nucleic without the indel" help="By default it's 50" /> + </section> - <param name="out_BESTORF" type="select" label="Do you want the output zip containing files with the BEST ORF ? "> + <param name="out_BESTORF" type="select" label="Do you want the outputs (dataset collection list) containing files with the BEST ORF? "> <option value="no">No</option> <option value="aa">Yes, with the proteic format</option> <option value="nuc">Yes, with the nucleic format</option> <option value="both">Yes, with the proteic and nucleic format</option> </param> - <param name="out_CDS" type="select" label="Do you want the output zip containing files with CDS ? "> + <param name="out_CDS" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS? "> <option value="no">No</option> <option value="aa">Yes, with the proteic format</option> <option value="nuc">Yes, with the nucleic format</option> <option value="both">Yes, with the proteic and nucleic format</option> </param> - <param name="out_CDS_filter" type="select" label="Do you want the output zip containing files with CDS without indel ? "> + <param name="out_CDS_filter" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS without indel? "> <option value="no">No</option> <option value="aa">Yes, with the proteic format</option> <option value="nuc">Yes, with the nucleic format</option> @@ -95,63 +76,122 @@ </inputs> <outputs> - <data format="txt" name="output" label="ORF_Search" /> - - <data format="no_unzip.zip" name="output_BESTORF_aa" label="ORF_Search_Best_ORF_aa" from_work_dir="ORF_Search_bestORF_aa.zip"> - <filter>out_BESTORF == "aa" or out_BESTORF == "both"</filter> - </data> + <data format="txt" name="log" label="ORF_Search" /> + <collection name="output_BESTORF_aa" type="list" label="ORF_Search_Best_ORF_aa"> + <filter>out_BESTORF in ["aa","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_aa" /> + </collection> - <data format="no_unzip.zip" name="output_BESTORF_nuc" label="ORF_Search_Best_ORF_nuc" from_work_dir="ORF_Search_bestORF_nuc.zip"> - <filter>out_BESTORF == "nuc" or out_BESTORF == "both"</filter> - </data> + <collection name="output_BESTORF_nuc" type="list" label="ORF_Search_Best_ORF_nuc"> + <filter>out_BESTORF in ["nuc","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_nuc" /> + </collection> - <data format="no_unzip.zip" name="output_CDS_aa" label="ORF_Search_CDS_aa" from_work_dir="ORF_Search_CDS_aa.zip"> - <filter>(out_CDS == "aa" and methionine == "no") or (out_CDS == "both" and methionine == "no")</filter> - </data> + <collection name="output_CDS_aa" type="list" label="ORF_Search_CDS_aa"> + <filter>out_CDS in ["aa","both"] and not methionine</filter> + <discover_datasets pattern="__name_and_ext__" directory="05_CDS_aa" /> + </collection> - <data format="no_unzip.zip" name="output_CDS_nuc" label="ORF_Search_CDS_nuc" from_work_dir="ORF_Search_CDS_nuc.zip"> - <filter>(out_CDS == "nuc" and methionine == "no") or (out_CDS == "both" and methionine == "no")</filter> - </data> + <collection name="output_CDS_nuc" type="list" label="ORF_Search_CDS_nuc"> + <filter>out_CDS in ["nuc","both"] and not methionine</filter> + <discover_datasets pattern="__name_and_ext__" directory="05_CDS_nuc" /> + </collection> - <data format="no_unzip.zip" name="output_CDS_M_aa" label="ORF_Search_CDS_with_M_aa" from_work_dir="ORF_Search_CDSM_aa.zip"> - <filter>(out_CDS == "aa" and methionine == "yes") or (out_CDS == "both" and methionine == "yes")</filter> - </data> + <collection name="output_CDS_M_aa" type="list" label="ORF_Search_CDS_with_M_aa"> + <filter>(out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine)</filter> + <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_aa" /> + </collection> - <data format="no_unzip.zip" name="output_CDS_M_nuc" label="ORF_Search_CDS_with_M_nuc" from_work_dir="ORF_Search_CDSM_nuc.zip"> - <filter>(out_BESTORF == "nuc" and methionine == "yes") or (out_CDS == "both" and methionine == "yes")</filter> - </data> + <collection name="output_CDS_M_nuc" type="list" label="ORF_Search_CDS_with_M_nuc"> + <filter>(out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine)</filter> + <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_nuc" /> + </collection> - <data format="no_unzip.zip" name="output_filter_aa" label="ORF_Search_CDS_without_indel_aa" from_work_dir="ORF_Search_CDS_without_indel_aa.zip"> - <filter>out_CDS_filter == "aa" or out_CDS_filter == "both"</filter> - </data> + <collection name="output_filter_aa" type="list" label="ORF_Search_CDS_without_indel_aa"> + <filter>out_CDS_filter in ["aa","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="08_CDS_aa_MINIMUM_MISSING_SEQUENCES" /> + </collection> - <data format="no_unzip.zip" name="output_filter_nuc" label="ORF_Search_CDS_without_indel_nuc" from_work_dir="ORF_Search_CDS_without_indel_nuc.zip"> - <filter>out_CDS_filter == "nuc" or out_CDS_filter == "both"</filter> - </data> + <collection name="output_filter_nuc" type="list" label="ORF_Search_CDS_without_indel_nuc"> + <filter>out_CDS_filter in ["nuc","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" /> + </collection> </outputs> <tests> <test> - <param name="input_file" ftype="zip" value="test_05_input_CDS_Search.no_unzip.zip" /> + <param name="inputs" ftype="fasta" value="inputs/locus1_sp5.fasta,inputs/locus1_sp7.fasta,inputs/locus2_sp4.fasta,inputs/locus2_sp6.fasta" /> + <param name="nb_species_keep" value="2" /> + <param name="methionine" value="no" /> + <section name="length"> + <param name="min_length_seq" value="50" /> + <param name="min_length_subseq" value="15" /> + <param name="min_length_nuc" value="50" /> + </section> + <param name="out_BESTORF" value="both" /> + <param name="out_CDS" value="both" /> + <param name="out_CDS_filter" value="both" /> + <output name="log" value="cds_search.log" /> + <output_collection name="output_BESTORF_aa" type="list"> + <element name="locus1_sp7" value="outputs_ORF_Search_Best_ORF_aa/locus1_sp7.fasta" /> + <element name="locus2_sp6" value="outputs_ORF_Search_Best_ORF_aa/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_BESTORF_nuc" type="list"> + <element name="locus1_sp7" value="outputs_ORF_Search_Best_ORF_nuc/locus1_sp7.fasta" /> + <element name="locus2_sp6" value="outputs_ORF_Search_Best_ORF_nuc/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_CDS_aa" type="list"> + <element name="locus1_sp7" value="outputs_ORF_Search_CDS_aa/locus1_sp7.fasta" /> + <element name="locus2_sp6" value="outputs_ORF_Search_CDS_aa/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_CDS_nuc" type="list"> + <element name="locus1_sp7" value="outputs_ORF_Search_CDS_nuc/locus1_sp7.fasta" /> + <element name="locus2_sp6" value="outputs_ORF_Search_CDS_nuc/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_filter_aa" type="list"> + <element name="locus2_sp4_sp4" value="outputs_ORF_Search_CDS_without_indel_aa/locus2_sp4_sp4.fasta" /> + </output_collection> + <output_collection name="output_filter_nuc" type="list"> + <element name="locus2_sp4_sp4" value="outputs_ORF_Search_CDS_without_indel_nuc/locus2_sp4_sp4.fasta" /> + </output_collection> + </test> + <test> + <param name="inputs" ftype="fasta" value="inputs/locus2_sp6.fasta" /> <param name="nb_species_keep" value="2" /> <param name="methionine" value="yes" /> - <param name="lenght_CDS" value="no" /> - <param name="out_BESTORF" value="no" /> - <param name="out_CDS" value="no" /> - <param name="out_CDS_filter" value="nuc" /> - <output name="output" value="test_08.out" /> + <section name="length"> + <param name="min_length_seq" value="50" /> + <param name="min_length_subseq" value="15" /> + <param name="min_length_nuc" value="50" /> + </section> + <param name="out_BESTORF" value="both" /> + <param name="out_CDS" value="both" /> + <param name="out_CDS_filter" value="both" /> + <output name="log" value="cds_search_methionine.log" /> + <output_collection name="output_BESTORF_aa" type="list"> + <element name="locus2_sp6" value="outputs_ORF_Search_Best_ORF_aa/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_BESTORF_nuc" type="list"> + <element name="locus2_sp6" value="outputs_ORF_Search_Best_ORF_nuc/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_CDS_M_aa" type="list"> + <element name="locus2_sp6" value="outputs_ORF_Search_CDS_with_M_aa/locus2_sp6.fasta" /> + </output_collection> + <output_collection name="output_CDS_M_nuc" type="list"> + <element name="locus2_sp6" value="outputs_ORF_Search_CDS_with_M_nuc/locus2_sp6.fasta" /> + </output_collection> </test> </tests> + <help> - <help> +@HELP_AUTHORS@ + ============ What it does ============ -| This tool takes **zip file containing fasta files with nucleic aligned sequences** and search the ORF and the CDS -| -| The script in python was written by **Eric Fontanillas**. -| The wrapper was written by **Julie Baffard**. +| This tool takes **'dataset collection list' containing fasta files with nucleic aligned sequences** and search the ORF and the CDS +| -------- @@ -162,7 +202,7 @@ The choice of several parameters is possible. **min_length_seq** - | minimal length of the sequence in the proteic format + | minimal length of the sequence in the proteic format | when the removal of the indel is done, the minimal length equals :previous length less20 | for example if you choose 50 for the minimal length, the actual length equals 30 | @@ -171,11 +211,11 @@ | minimal length of the subsequence in the proteic format | subsequence means the part of the original sequence between 2 sets of indels | an indel set is composed by more than 2 indels, if not the set is considered as unknown amino acid - | + | **min_length_nuc** | Minimal length of the sequence in the nucleic format - | + | -------- @@ -213,48 +253,45 @@ **ORF_Search** | is the output with important informations (mainly statistics about the tools). - | + | **ORF_Search_Best_ORF_aa** | is the output with the best ORF in the proteic format. - | + | **ORF_Search_Best_ORF_nuc** | is the output with the best ORF in the nucleic format. - | + | **ORF_Search_CDS_aa** | is the output with the CDS (regardless the Methionine) in the proteic format. - | + | **ORF_Search_CDS_nuc** | is the output with the CDS (regardless the Methionine) in the nucleic format. - | + | **ORF_Search_CDS_with_M_aa** | is the output with the CDS (considering the Methionine) in proteic format. - | the rule : they must have a methionine before the minimale length of the sequence. + | the rule : they must have a methionine before the minimale length of the sequence. | for example before the 30 last amino acid. - | + | **ORF_Search_CDS_with_M_nuc** | is the output with the CDS (considering the Methionine) in nucleic format. | the rule : they must have a methionine before the minimale length of the sequence. | for example before the 30 last amino acid. - | + | **ORF_Search_CDS_without_indel_aa** | is the output with the CDS without indel in proteic format. | considering the Methionine or not : according to the option chosen. - | + | **ORF_Search_CDS_without_indel_nuc** | is the output with the CDS without indel in proteic format. | considering the Methionine or not : according to the option chosen. -.. class:: warningmark - -The zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. -------- @@ -268,9 +305,9 @@ **ORF_Search** -| a zip file containing 47 files with 1, 2 or 3 sequences inside +| a 'dataset collection list' containing 47 files with 1, 2 or 3 sequences inside | for example the file : locus5_sp3.fasta which contains 3 species : -| +| | >Ac533/40375 @@ -306,38 +343,38 @@ **ORF_Search_Best_ORF_aa** -| +| | *************** CDS detection *************** -| +| | Files processed: 47 -| +| | Files with CDS: 32 | Files with CDS plus M (codon start): 20 | Files without CDS: 15 -| -| In locus with CDS considering Methionine : -| +| +| In locus with CDS considering Methionine : +| | *************** 1st filter : selection of the locus *************** -| - +| + Total number of locus recorded = 20 | Number of locus with 1 species : 1 | Number of locus with 2 species : 16 | Number of locus with 3 species : 3 -| +| | Number of locus excluded (exclude if not at least 3 species in the alignment)= 17 | | *************** 2nd Filter : removal of the indel *************** | | Total number of locus recorded = 3 -| +| Total number of locus with no indels (SAVED) = 3 Total number of locus with indels (EXCLUDED) = 0 -| +| **ORF_Search_Best_ORF_aa** @@ -359,10 +396,7 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_Best_ORF_aa].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{nb_sp}.fasta | for example : locus5_sp3.fasta -| | **ORF_Search_Best_ORF_nuc** @@ -392,11 +426,8 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_Best_ORF_nuc].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{nb_sp}.fasta | for example : locus5_sp3.fasta -| -| +| **ORF_Search_CDS_with_M_aa** @@ -417,13 +448,10 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_CDS_with_M_aa].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{nb_sp}.fasta | for example : locus5_sp3.fasta | It's the same for the option : regardless Methionine -| -| +| **ORF_Search_CDS_with_M_nuc** @@ -449,13 +477,11 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_CDS_nuc].zip* -| if you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{nb_sp}.fasta | for example : locus5_sp3.fasta +| | It's the same for the option : regardless Methionine -| -| +| **ORF_Search_CDS_without_indel_aa** @@ -476,11 +502,8 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_CDS_without_indel_aa].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{old_nb_sp}_sp_{new_nb_sp}.fasta | for example locus5_sp3_sp3.fasta -| -| +|| **ORF_Search_CDS_without_indel_nuc** @@ -504,9 +527,28 @@ .. class:: infomark -| Save as *Galaxy{number}-[ORF_Search_CDS_without_indel_nuc].zip* -| If you unzip the file, a number of files are extracted (depends on the number of locus keeped) : locus{nb}_sp{old_nb_sp}_sp_{new_nb_sp}.fasta | for example : locus5_sp3_sp3.fasta +| + +--------------------------------------------------- + +Changelog +--------- + +**Version 2.0 - 05/07/2017** + + - NEW: Replace the zip between tools by Dataset Collection + + +**Version 1.0 - 13/04/2017** + + - Add funtional test with planemo + + - planemo test with conda dependency for python + + - Scripts renamed + symlinks to the directory 'scripts' + + </help> <expand macro="citations" />
--- a/README.md Thu Apr 13 09:47:57 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -Changelog - -Version 1.0 - 13/04/2017 - - - Add funtional test with planemo - - planemo test with conda dependency for python - - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +Changelog + +**Version 2.0 - 05/07/2017** + + - NEW: Replace the zip between tools by Dataset Collection + + +**Version 1.0 - 13/04/2017** + + - Add funtional test with planemo + + - planemo test with conda dependency for python + + - Scripts renamed + symlinks to the directory 'scripts'
--- a/macros.xml Thu Apr 13 09:47:57 2017 -0400 +++ b/macros.xml Wed Sep 27 10:03:05 2017 -0400 @@ -1,9 +1,24 @@ <macros> - <xml name="python_required"> - <requirement type="package" version="2.7">python</requirement> + <xml name="python_required"> + <requirement type="package" version="2.7">python</requirement> </xml> + <token name="@HELP_AUTHORS@"> +.. class:: infomark + +**Authors** Eric Fontanillas creates the scripts of this pipeline. + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM + + | Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + </token> + <xml name="citations"> <citations> <citation type="bibtex">Credits : ABIMS team, Roscoff Marine Station</citation>
--- a/scripts/S01_find_orf_on_multiple_alignment.py Thu Apr 13 09:47:57 2017 -0400 +++ b/scripts/S01_find_orf_on_multiple_alignment.py Wed Sep 27 10:03:05 2017 -0400 @@ -59,19 +59,19 @@ ###################################################################################################################### -##### DEF 3 : Test if the sequence is a multiple of 3, and if not correct the sequence to become a multiple of 3 ##### +##### DEF 3 : Test if the sequence is a multiple of 3, and if not correct the sequence to become a multiple of 3 ##### ###################################################################################################################### ### WEAKNESS OF THAT APPROACH = I remove extra base(s) at the end of the sequence ==> I can lost a codon, when I test ORF (as I will decay the ORF) def multiple3(seq): leng = len(seq) modulo = leng%3 if modulo == 0: # the results of dividing leng per 3 is an integer - new_seq = seq + new_seq = seq elif modulo == 1: # means 1 extra nc (nucleotid) needs to be removed (the remaining of modulo indicate the part which is non-dividable per 3) new_seq = seq[:-1] # remove the last nc elif modulo == 2: # means 2 extra nc (nucleotid) needs to be removed (the remaining of modulo indicate the part which is non-dividable per 3) new_seq = seq[:-2] # remove the 2 last nc - len1 = len(new_seq) + len1 = len(new_seq) return(new_seq, modulo) ########################################################## @@ -96,13 +96,13 @@ base2 = string.capitalize(base2) base3 = seq_dna[i+2] base3 = string.capitalize(base3) - + codon = base1+base2+base3 codon = string.replace(codon, "T", "U") if codon in bash_codeUniversel.keys(): aa = bash_codeUniversel[codon] - seq_aa = seq_aa + aa + seq_aa = seq_aa + aa else: seq_aa = seq_aa +"?" ### Take account for gap "-" and "N" i = i + 3 @@ -113,8 +113,8 @@ ###### DEF 4 - Part 2 - ###### ############################## -def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel): - +def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel): + ## 1 ## Get the list of aligned aa seq for the 3 ORF: bash_of_aligned_aa_seq_3ORF = {} bash_of_aligned_nuc_seq_3ORF = {} @@ -125,7 +125,7 @@ ## 1.2. ## Check whether the sequence is multiple of 3, and correct it if not: new_sequence_nc, modulo = multiple3(sequence_nc) ### DEF 3 ### - + ## 1.3. ## Get the 3 ORFs (nuc) for each sequence seq_nuc_ORF1 = new_sequence_nc seq_nuc_ORF2 = new_sequence_nc[1:-2] @@ -134,7 +134,7 @@ seq_nuc_ORF4=seq_reversed seq_nuc_ORF5=seq_reversed[1:-2] seq_nuc_ORF6=seq_reversed[2:-1] - + LIST_6_ORF_nuc = [seq_nuc_ORF1, seq_nuc_ORF2, seq_nuc_ORF3,seq_nuc_ORF4,seq_nuc_ORF5,seq_nuc_ORF6] bash_of_aligned_nuc_seq_3ORF[fasta_name] = LIST_6_ORF_nuc ### For each seq of the multialignment => give the 6 ORFs (in nuc) @@ -142,30 +142,30 @@ seq_prot_ORF1 = simply_get_ORF(seq_nuc_ORF1,bash_codeUniversel) ### DEF 4 - Part 1 - ## seq_prot_ORF2 = simply_get_ORF(seq_nuc_ORF2,bash_codeUniversel) ### DEF 4 - Part 1 - ## seq_prot_ORF3 = simply_get_ORF(seq_nuc_ORF3,bash_codeUniversel) ### DEF 4 - Part 1 - ## - seq_prot_ORF4 = simply_get_ORF(seq_nuc_ORF4,bash_codeUniversel) ### DEF 4 - Part 1 - ## + seq_prot_ORF4 = simply_get_ORF(seq_nuc_ORF4,bash_codeUniversel) ### DEF 4 - Part 1 - ## seq_prot_ORF5 = simply_get_ORF(seq_nuc_ORF5,bash_codeUniversel) ### DEF 4 - Part 1 - ## seq_prot_ORF6 = simply_get_ORF(seq_nuc_ORF6,bash_codeUniversel) ### DEF 4 - Part 1 - ## LIST_6_ORF_aa = [seq_prot_ORF1, seq_prot_ORF2, seq_prot_ORF3,seq_prot_ORF4,seq_prot_ORF5,seq_prot_ORF6] - bash_of_aligned_aa_seq_3ORF[fasta_name] = LIST_6_ORF_aa ### For each seq of the multialignment => give the 6 ORFs (in aa) + bash_of_aligned_aa_seq_3ORF[fasta_name] = LIST_6_ORF_aa ### For each seq of the multialignment => give the 6 ORFs (in aa) ## 2 ## Test for the best ORF (Get the longuest segment in the alignment with no codon stop ... for each ORF ... the longuest should give the ORF) BEST_MAX = 0 - for i in [0,1,2,3,4,5]: ### Test the 6 ORFs + for i in [0,1,2,3,4,5]: ### Test the 6 ORFs ORF_Aligned_aa = [] ORF_Aligned_nuc = [] - + ## 2.1 ## Get the alignment of sequence for a given ORF ## Compare the 1rst ORF between all sequence => list them in ORF_Aligned_aa // them do the same for the second ORF, and them the 3rd - for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): + for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): ORFsequence = bash_of_aligned_aa_seq_3ORF[fasta_name][i] aa_length = len(ORFsequence) ORF_Aligned_aa.append(ORFsequence) ### List of all sequences in the ORF nb "i" = n = i+1 - - for fasta_name in bash_of_aligned_nuc_seq_3ORF.keys(): + + for fasta_name in bash_of_aligned_nuc_seq_3ORF.keys(): ORFsequence = bash_of_aligned_nuc_seq_3ORF[fasta_name][i] nuc_length = len(ORFsequence) ORF_Aligned_nuc.append(ORFsequence) ### List of all sequences in the ORF nb "i" = @@ -175,11 +175,11 @@ ## Next step is to get the longuest subsequence whithout stop ## We will explore the presence of stop "*" in each column of the alignment, and get the positions of the segments between the positions with "*" MAX_LENGTH = 0 - LONGUEST_SEGMENT_UNSTOPPED = "" - j = 0 # Start from first position in alignment + LONGUEST_SEGMENT_UNSTOPPED = "" + j = 0 # Start from first position in alignment List_of_List_subsequences = [] List_positions_subsequence = [] - while j < aa_length: + while j < aa_length: column = [] for seq in ORF_Aligned_aa: column.append(seq[j]) @@ -189,7 +189,7 @@ List_positions_subsequence = [] ## Re-initialyse list of positions else: List_positions_subsequence.append(j) - + ## 2.3 ## Among all the sublists (separated by column with codon stop "*"), get the longuest one (BETTER SEGMENT for a given ORF) LONGUEST_SUBSEQUENCE_LIST_POSITION = [] MAX=0 @@ -197,7 +197,7 @@ if len(sublist) > MAX and len(sublist) > MINIMAL_CDS_LENGTH: MAX = len(sublist) LONGUEST_SUBSEQUENCE_LIST_POSITION = sublist - + ## 2.4. ## Test if the longuest subsequence start exactly at the beginning of the original sequence (i.e. means the ORF maybe truncated) if LONGUEST_SUBSEQUENCE_LIST_POSITION != []: if LONGUEST_SUBSEQUENCE_LIST_POSITION[0] == 0: @@ -206,7 +206,7 @@ CDS_maybe_truncated = 0 else: CDS_maybe_truncated = 0 - + ## 2.5 ## Test if this BETTER SEGMENT for a given ORF, is the better than the one for the other ORF (GET THE BEST ORF) ## Test whether it is the better ORF @@ -239,7 +239,7 @@ pos_MAX_nuc = pos_MAX_aa * 3 BESTORF_bash_aligned_nc_seq = {} - BESTORF_bash_aligned_nc_seq_CODING = {} + BESTORF_bash_aligned_nc_seq_CODING = {} for fasta_name in bash_aligned_nc_seq.keys(): seq = bash_of_aligned_nuc_seq_3ORF[fasta_name][index_BEST_ORF] seq_coding = seq[pos_MIN_nuc:pos_MAX_nuc] @@ -259,7 +259,7 @@ BESTORF_bash_of_aligned_aa_seq_CDS_with_M = {} BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = {} - + Ortho = 0 for fasta_name in BESTORF_bash_of_aligned_aa_seq_CODING.keys(): seq_aa = BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] @@ -271,14 +271,14 @@ BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING ## CASE 2: in case the CDS is truncated, so the "M" is maybe missing: - if Ortho == 0 and CDS_maybe_truncated == 1: + if Ortho == 0 and CDS_maybe_truncated == 1: BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING ## CASE 3: CDS not truncated AND no "M" found in good position (i.e. before the last 50 aa): ## => the 2 bash "CDS_with_M" are left empty ("{}") - - return(BESTORF_bash_aligned_nc_seq, BESTORF_bash_aligned_nc_seq_CODING, BESTORF_bash_of_aligned_nuc_seq_CDS_with_M, BESTORF_bash_of_aligned_aa_seq, BESTORF_bash_of_aligned_aa_seq_CODING, BESTORF_bash_of_aligned_aa_seq_CDS_with_M) + + return(BESTORF_bash_aligned_nc_seq, BESTORF_bash_aligned_nc_seq_CODING, BESTORF_bash_of_aligned_nuc_seq_CDS_with_M, BESTORF_bash_of_aligned_aa_seq, BESTORF_bash_of_aligned_aa_seq_CODING, BESTORF_bash_of_aligned_aa_seq_CDS_with_M) ########################################################## @@ -297,7 +297,7 @@ ############################################################ -###### DEF 6 : Detect if methionin in the aa sequence ###### +###### DEF 6 : Detect if methionin in the aa sequence ###### ############################################################ def detect_Methionine(seq_aa, Ortho): @@ -305,13 +305,13 @@ nbre = sys.argv[2] CUTOFF_Last_50aa = ln - MINIMAL_CDS_LENGTH #Ortho = 0 ## means orthologs not found - + ## Find all indices of occurances of "M" in a string of aa list_indices = allindices(seq_aa, "M") ### DEF5 ### - + ## If some "M" are present, find whether the first "M" found is not in the 50 last aa (indice < CUTOFF_Last_50aa) ==> in this case: maybenot a CDS if list_indices != []: - first_M = list_indices[0] + first_M = list_indices[0] if first_M < CUTOFF_Last_50aa: Ortho = 1 ## means orthologs found @@ -324,7 +324,7 @@ ############################################################ -###### DEF 7 : Reverse complement DNA sequence ###### +###### DEF 7 : Reverse complement DNA sequence ###### ###### Reference: http://crazyhottommy.blogspot.fr/2013/10/python-code-for-getting-reverse.html ############################################################ @@ -344,16 +344,16 @@ ####################### import string, os, time, re, zipfile, sys +infiles = sys.argv[1] MINIMAL_CDS_LENGTH = int(sys.argv[3]) ## in aa number ## INPUT / OUTPUT -list_file = [] -zfile = zipfile.ZipFile(sys.argv[1]) -for name in zfile.namelist() : - list_file.append(name) - zfile.extract(name, "./") - +list_file = str.split(infiles,",") + +### Get Universal Code F2 = open(sys.argv[2], 'r') +bash_codeUniversel = code_universel(F2) ### DEF2 ### +F2.close() os.mkdir("04_BEST_ORF_nuc") Path_OUT1 = "04_BEST_ORF_nuc" @@ -371,9 +371,7 @@ Path_OUT6 = "06_CDS_with_M_aa" -### Get Universal Code -bash_codeUniversel = code_universel(F2) ### DEF2 ### -F2.close() + ### Get the Bash corresponding to an alignment file in fasta format count_file_processed = 0 @@ -382,10 +380,10 @@ count_file_with_CDS_plus_M = 0 for file in list_file: - count_file_processed = count_file_processed + 1 + count_file_processed = count_file_processed + 1 fasta_file_path = "./%s" %file bash_fasta = dico(fasta_file_path) ### DEF 1 ### - BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### + BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### ## a ## OUTPUT BESTORF_nuc if BESTORF_nuc != {}: @@ -398,8 +396,8 @@ OUT1.close() else: count_file_without_CDS = count_file_without_CDS + 1 - - + + ## b ## OUTPUT BESTORF_nuc_CODING ===> THE MOST INTERESTING!!! if BESTORF_aa != {}: OUT2 = open("%s/%s" %(Path_OUT2,file), "w") @@ -407,7 +405,7 @@ seq = BESTORF_aa[fasta_name] OUT2.write("%s\n" %fasta_name) OUT2.write("%s\n" %seq) - OUT2.close() + OUT2.close() ## c ## OUTPUT BESTORF_aa if BESTORF_nuc_CODING != {}: @@ -425,7 +423,7 @@ seq = BESTORF_aa_CODING[fasta_name] OUT4.write("%s\n" %fasta_name) OUT4.write("%s\n" %seq) - OUT4.close() + OUT4.close() ## e ## OUTPUT BESTORF_nuc_CDS_with_M if BESTORF_nuc_CDS_with_M != {}: @@ -435,7 +433,7 @@ seq = BESTORF_nuc_CDS_with_M[fasta_name] OUT5.write("%s\n" %fasta_name) OUT5.write("%s\n" %seq) - OUT5.close() + OUT5.close() ## f ## OUTPUT BESTORF_aa_CDS_with_M if BESTORF_aa_CDS_with_M != {}: @@ -446,50 +444,12 @@ OUT6.write("%s\n" %seq) OUT6.close() - os.system("rm -rf %s" %file) + os.system("rm -rf %s" %file) -## Print +## Print print "*************** CDS detection ***************" print "\nFiles processed: %d" %count_file_processed print "\tFiles with CDS: %d" %count_file_with_CDS print "\t\tFiles with CDS plus M (codon start): %d" %count_file_with_CDS_plus_M print "\tFiles without CDS: %d \n" %count_file_without_CDS print "" - -## Zipfile -f_bestORF_nuc = zipfile.ZipFile("ORF_Search_bestORF_nuc.zip", "w") -f_bestORF_aa = zipfile.ZipFile("ORF_Search_bestORF_aa.zip", "w") -f_CDS_nuc = zipfile.ZipFile("ORF_Search_CDS_nuc.zip", "w") -f_CDS_aa = zipfile.ZipFile("ORF_Search_CDS_aa.zip", "w") -f_CDSM_nuc = zipfile.ZipFile("ORF_Search_CDSM_nuc.zip", "w") -f_CDSM_aa = zipfile.ZipFile("ORF_Search_CDSM_aa.zip", "w") - -os.chdir("%s" %Path_OUT1) -folder = os.listdir("./") -for i in folder : - f_bestORF_nuc.write("./%s" %i) - -os.chdir("../%s" %Path_OUT2) -folder = os.listdir("./") -for i in folder : - f_bestORF_aa.write("./%s" %i) - -os.chdir("../%s" %Path_OUT3) -folder = os.listdir("./") -for i in folder : - f_CDS_nuc.write("./%s" %i) - -os.chdir("../%s" %Path_OUT4) -folder = os.listdir("./") -for i in folder : - f_CDS_aa.write("./%s" %i) - -os.chdir("../%s" %Path_OUT5) -folder = os.listdir("./") -for i in folder : - f_CDSM_nuc.write("./%s" %i) - -os.chdir("../%s" %Path_OUT6) -folder = os.listdir("./") -for i in folder : - f_CDSM_aa.write("./%s" %i)
--- a/scripts/S03_remove_site_with_not_enough_species_represented.py Thu Apr 13 09:47:57 2017 -0400 +++ b/scripts/S03_remove_site_with_not_enough_species_represented.py Wed Sep 27 10:03:05 2017 -0400 @@ -61,7 +61,7 @@ LIST_POSITION_KEEPED_nuc.append(position1) LIST_POSITION_KEEPED_nuc.append(position2) LIST_POSITION_KEEPED_nuc.append(position3) - + ## 4 ## Create entries for "filtered_bash" for aa & nuc filtered_bash_aa = {} filtered_bash_nuc = {} @@ -69,7 +69,7 @@ filtered_bash_aa[fasta_name] = "" for fasta_name in bash_nuc.keys(): filtered_bash_nuc[fasta_name] = "" - + ## 5 ## Write "filtered_bash" for aa j=0 while j < ln_aa: @@ -87,8 +87,8 @@ seq = filtered_bash_aa[name] if seq == '': del filtered_bash_aa[name] - - + + ## 7 ## Write "filtered_bash" for nuc j=0 while j < ln_nuc: @@ -101,13 +101,13 @@ seq = seq + pos filtered_bash_nuc[fasta_name] = seq j = j + 1 - + ## 8 ## Remove empty sequence for name in filtered_bash_nuc.keys(): seq = filtered_bash_nuc[name] if seq == '': del filtered_bash_nuc[name] - + return(filtered_bash_aa, filtered_bash_nuc) #################################### @@ -118,7 +118,7 @@ import string, os, time, re, sys, zipfile ### 0 ### PARAMETERS -MIN_SPECIES_NB = int(sys.argv[1]) +MIN_SPECIES_NB = int(sys.argv[1]) MAX_sp = MIN_SPECIES_NB MIN_LENGTH_FINAL_ALIGNMENT_NUC = int(sys.argv[2]) n0 = 0 @@ -167,7 +167,7 @@ ## 4.1 ## REMOVE POSITIONS WITH TOO MUCH MISSING DATA (i.e. not enough taxa represented at each position in the alignment) filtered_bash_aa, filtered_bash_nuc = remove_position_with_too_much_missing_data(dico_aa, dico_nuc, MIN_SPECIES_NB) ### DEF 2 ### - k = filtered_bash_nuc.keys() + k = filtered_bash_nuc.keys() new_leng_nuc = 0 if k != []: k0 = k[0] @@ -185,7 +185,7 @@ ln_aa = len(filtered_bash_aa.keys()) nb = "sp%d" %ln_aa new_name = LS[0] + "_" + nb + "_" + LS[1] - n0+=1 + n0+=1 ## 4.5 ## Write filtered alignment in OUTPUTs ## aa @@ -204,28 +204,14 @@ seq_nuc = filtered_bash_nuc[fasta_name] OUTnuc.write("%s\n" %fasta_name) OUTnuc.write("%s\n" %seq_nuc) - OUTnuc.close() + OUTnuc.close() else: bad+=1 - + ## 5 ## Print print "*************** 2nd Filter : removal of the indel ***************" print "\nTotal number of locus recorded = %d" %n0 print "\tTotal number of locus with no indels (SAVED) = %d" %good print "\tTotal number of locus, when removing indel, wich are empty (EXCLUDED) = %d" %bad print "" - -## ZipFile -f_filter_aa = zipfile.ZipFile("ORF_Search_CDS_without_indel_aa.zip", "w") -f_filter_nuc = zipfile.ZipFile("ORF_Search_CDS_without_indel_nuc.zip", "w") - -os.chdir("%s" %path_OUT1) -folder = os.listdir("./") -for i in folder : - f_filter_aa.write("./%s" %i) - -os.chdir("../%s" %path_OUT2) -folder = os.listdir("./") -for i in folder : - f_filter_nuc.write("./%s" %i)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cds_search.log Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,24 @@ +*************** CDS detection *************** + +Files processed: 4 + Files with CDS: 2 + Files with CDS plus M (codon start): 1 + Files without CDS: 2 + + + +In locus with CDS regardless of the Methionine : + +*************** 1st filter : selection of the locus *************** + +Total number of locus recorded = 2 + Number of locus with 1 species : 1 + Number of locus with 2 species : 0 +Number of locus excluded (exclude if not at least 2 species in the alignment)= 1 + +*************** 2nd Filter : removal of the indel *************** + +Total number of locus recorded = 1 + Total number of locus with no indels (SAVED) = 1 + Total number of locus, when removing indel, wich are empty (EXCLUDED) = 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cds_search_methionine.log Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,24 @@ +*************** CDS detection *************** + +Files processed: 1 + Files with CDS: 1 + Files with CDS plus M (codon start): 1 + Files without CDS: 0 + + + +In locus with CDS considering Methionine : + +*************** 1st filter : selection of the locus *************** + +Total number of locus recorded = 1 + Number of locus with 1 species : 0 + Number of locus with 2 species : 0 +Number of locus excluded (exclude if not at least 1 species in the alignment)= 0 + +*************** 2nd Filter : removal of the indel *************** + +Total number of locus recorded = 1 + Total number of locus with no indels (SAVED) = 1 + Total number of locus, when removing indel, wich are empty (EXCLUDED) = 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/locus1_sp5.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,10 @@ +>Ac45601/110 +------------------------------------------------------------------------------------------------------------------------ +>Am32051/110 +------------------------------------------------------------------------------------------------------------------------ +>Ap54541/110 +------------------------------------------------------------------------------------------------------------------------ +>Pf28561/110 +agtgacggtgacgtcatcatcttggtgatcgtcacgctgagctacgtggatcaggcgtcgaacttctacgtcacttccgttctgccgcacggcttgacgaacgtactcttcgtcgctcta +>Ph75241/110 +------------------------------------ctgagctatctggatctggcttggaacttctacgt-------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/locus1_sp7.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +>Ac72851/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +>Am31481/110 +caaaacccgacaaaaatcaaaatgattcctccaaacgcatctctggtgaaatatgacaatcctgttcttgttagcaggaatacagacaagaaaacaccaagggcgcgtgctctcaaggtggctacacgtctagacgtagtcaatctacaggaggaactggatagacgtctacagcagagacaggcccgggaaacaggaatctgtccggtcagacgagagctctattcgcaatgctttgacgagttgattcgccaagtgaccattaactgtgctgagcgaggtttgttgttactacgtgttcgtgatgagatccgtatgacgatcgctgcctaccagaccctgtatgagagctctgtagcttttggtatgaggaaagccttacaagcagaacaaggcaaggccgacatggagagaaagatc +>Ap26441/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +>Pf20921/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------ctacagcaaagactcgcccgagaaaccggcatctgttcaataagaagagaactctattcacagtgctttgatgaactgatccgtcaggtgacaatcaactgcgctgagcgagg-----------------------------------------------------------tgtatgagagcagcgtcgcttttggaatgaggaaagc------------------------------------------- +>Pu7631/1100 +---------------------atgattcctccgaaatcctctctggttaaatatgataatcc---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te17661/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +>Th39471/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/locus2_sp4.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,8 @@ +>Ac38641/110 +------------------------------------------tatgagaacatccgtaagagtcggcttgatgaccacatcagtgcccattctggcataaagaatcatgtgtgtgcagtgtgtgggaaggcctttgccgggcggaagcacatgcagagacatgaaaaaacacacctggaagtgaagccccttaagtgtgaacaatgtgaatatgccacaacacgaagagacaagttacgtgatcacattagaaagcatcacaaagccatggccatcagcttgggtatccttgatccaaa- +>Am55481/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +>Ap59291/110 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +>Pg58451/110 +cataatggcaagaaattccggtgtacttttcctggctgtgcatatgagaacatacggaagagtcgtctcgatgatcacatcagtgctcacactggaattaagaatcatgtgtgctccatatgtggaaaggcatttgcaggacgcaaacatatgcagagacatgaaaagacacatttggaaattaagccgttaaagtgtgagcactgtgaatacgcaactacgcgcagagataaattacgtgatcacataagaaaacatcacagaaatattgccatcagcatgggtattcttgacccaaat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Ac28061/110 +aaacacgctgctttagtcactgctctctacatttactgtctgggtacctttgaatgtggtgacaggtacataggacaccaaggtgtaaagcttgtgaggcgagtcctcatcctcattcctcttacgagccaatcgaactcgtactctgaatggaacatttctaattcccttggaccaaatgtgtttgttcagtcttgtctctatgcgtacatcttcagttcccatctgcttgtaggcaaattgtctgatagcctttatagcccgtggcgccctcttcttgaaccctattccatggattcgcttgtggatgttgatggtgtattcccgtgtcaccacctcgttggtgacagacttcttcttgcctttctcgcggcgtgtcatcgcttccgg +>Am30661/110 +----------ctttattcactgctctccacattcacggtctgg---------------------------------------------------------------------------------------------------------------------------cccttggaccagacgtgcttgttgagtcgggtctcgattcggacatcctctgtgcccatctgcttgaaggcaaactctcggatggctttcaccgcgcgaggcgccctcctcttgaagccaatgccatgtatacgcttgtggatgttgatggtgtattctcgggtcacgacctcctgggtcacggacttctt--------------------------------- +>Pg28571/110 +---------------gtcactactctccacatttactgtctgtgtacctttgaatgttgtaacaggtacataggagaccaaggtgtaaagtttgtgtggcgagtcttcatcttcattccttttacgagccaatcttacacgcacacggaatgggacatttctaattccctttgaccaaatgtgtttgttgagtctcgtctctatacgcacatcctcagtgcccatctgtttaaaggcaaactgcctgatagccttgatcgcacgaggtgccctcttcttgaaacctattccatgaattcttttgtggatattcactgtgtattctctagttaccacctcattggtaacagacttcttcttgcccttctcccggcgtgccatggcttc--- +>Ph73091/110 +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aggcaaactgtctgatggctttgatggcacgtggcgccctcttcttgaacccaattccgtgaattcgtttatggatgtttatggtgtattcccttgttaccacttcattggtaacggacttcttctttcccttctcacggcgtg------------- +>Te26821/110 +-------------------------------------------------------------------acataggtcactaatgtataaagtttgtgcggtgaatcttcatcctcatt--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Th44731/110 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------cccatctgcttgttggcaaa------------------------------------------------------------------------------------------------------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_Best_ORF_aa/locus1_sp7.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +>Ac72851/110 +??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? +>Te17661/110 +??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? +>Ap26441/110 +??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? +>Th39471/110 +??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? +>Pf20921/110 +??????????????LSSFQKRRCSHT????????????????????LAQRS*LSPDGSVHQSTVNRVLFLLNRCRFLGRVFAV???????????????????????????????????????????????????????? +>Pu7631/1100 +???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????DYHI*PERISEES??????? +>Am31481/110 +SFSPCRPCLVLLVRLSSYQKLQSSHTGSGRQRSSYGSHHEHVVTTNLAQHS*WSLGESTRQSIANRALV*PDRFLFPGPVSAVDVYPVPPVD*LRLDV*PP*EHAPLVFSCLYSC*QEQDCHISPEMRLEESF*FLSGF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_Best_ORF_aa/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +??????????????????????????????????????????????????FANKQM?????????????????????????????????????????????????????????????????????????? +>Te26821/110 +???????????????????????????????????????????????????????????????????????????????????????????NEDEDSPHKLYTLVTY??????????????????????? +>Ph73091/110 +?????RREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFA?????????????????????????????????????????????????????????????????????????????? +>Am30661/110 +???????????KKSVTQEVVTREYTINIHKRIHGIGFKRRAPRAVKAIREFAFKQMGTEDVRIETRLNKHVWSK??????????????????????????????????????????QTVNVESSE*???? +>Ac28061/110 +PEAMTRREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFAYKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD*SSVF +>Pg28571/110 +?EAMARREKGKKKSVTNEVVTREYTVNIHKRIHGIGFKKRAPRAIKAIRQFAFKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD?????
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_Best_ORF_nuc/locus1_sp7.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +>Ac72851/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te17661/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Ap26441/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Th39471/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Pf20921/110 +-----------------------------------------gctttcctcattccaaaagcgacgctgctctcataca-----------------------------------------------------------cctcgctcagcgcagttgattgtcacctgacggatcagttcatcaaagcactgtgaatagagttctcttcttattgaacagatgccggtttctcgggcgagtctttgctgtag----------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Pu7631/1100 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ggattatcatatttaaccagagaggatttcggaggaatcat-------------------- +>Am31481/110 +tctttctctccatgtcggccttgccttgttctgcttgtaaggctttcctcataccaaaagctacagagctctcatacagggtctggtaggcagcgatcgtcatacggatctcatcacgaacacgtagtaacaacaaacctcgctcagcacagttaatggtcacttggcgaatcaactcgtcaaagcattgcgaatagagctctcgtctgaccggacagattcctgtttcccgggcctgtctctgctgtagacgtctatccagttcctcctgtagattgactacgtctagacgtgtagccaccttgagagcacgcgcccttggtgttttcttgtctgtattcctgctaacaagaacaggattgtcatatttcaccagagatgcgtttggaggaatcattttgatttttgtcgggtttt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_Best_ORF_nuc/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +------------------------------------------------------------------------------------------------------------------------------------------------------tttgccaacaagcagatggg---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te26821/110 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aatgaggatgaagattcaccgcacaaactttatacattagtgacctatgt------------------------------------------------------------------- +>Ph73091/110 +-------------cacgccgtgagaagggaaagaagaagtccgttaccaatgaagtggtaacaagggaatacaccataaacatccataaacgaattcacggaattgggttcaagaagagggcgccacgtgccatcaaagccatcagacagtttgcct----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Am30661/110 +---------------------------------aagaagtccgtgacccaggaggtcgtgacccgagaatacaccatcaacatccacaagcgtatacatggcattggcttcaagaggagggcgcctcgcgcggtgaaagccatccgagagtttgccttcaagcagatgggcacagaggatgtccgaatcgagacccgactcaacaagcacgtctggtccaaggg---------------------------------------------------------------------------------------------------------------------------ccagaccgtgaatgtggagagcagtgaataaag---------- +>Ac28061/110 +ccggaagcgatgacacgccgcgagaaaggcaagaagaagtctgtcaccaacgaggtggtgacacgggaatacaccatcaacatccacaagcgaatccatggaatagggttcaagaagagggcgccacgggctataaaggctatcagacaatttgcctacaagcagatgggaactgaagatgtacgcatagagacaagactgaacaaacacatttggtccaagggaattagaaatgttccattcagagtacgagttcgattggctcgtaagaggaatgaggatgaggactcgcctcacaagctttacaccttggtgtcctatgtacctgtcaccacattcaaaggtacccagacagtaaatgtagagagcagtgactaaagcagcgtgttt +>Pg28571/110 +---gaagccatggcacgccgggagaagggcaagaagaagtctgttaccaatgaggtggtaactagagaatacacagtgaatatccacaaaagaattcatggaataggtttcaagaagagggcacctcgtgcgatcaaggctatcaggcagtttgcctttaaacagatgggcactgaggatgtgcgtatagagacgagactcaacaaacacatttggtcaaagggaattagaaatgtcccattccgtgtgcgtgtaagattggctcgtaaaaggaatgaagatgaagactcgccacacaaactttacaccttggtctcctatgtacctgttacaacattcaaaggtacacagacagtaaatgtggagagtagtgac---------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_aa/locus1_sp7.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +>Ac72851/110 +??????????????????????????????????????????????????? +>Te17661/110 +??????????????????????????????????????????????????? +>Ap26441/110 +??????????????????????????????????????????????????? +>Th39471/110 +??????????????????????????????????????????????????? +>Pf20921/110 +??????????????LSSFQKRRCSHT????????????????????LAQRS +>Pu7631/1100 +??????????????????????????????????????????????????? +>Am31481/110 +SFSPCRPCLVLLVRLSSYQKLQSSHTGSGRQRSSYGSHHEHVVTTNLAQHS
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_aa/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +??????????????????????????????????????????????????FANKQM????????????????????????????????????????????????????????????????????? +>Te26821/110 +???????????????????????????????????????????????????????????????????????????????????????????NEDEDSPHKLYTLVTY?????????????????? +>Ph73091/110 +?????RREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFA????????????????????????????????????????????????????????????????????????? +>Am30661/110 +???????????KKSVTQEVVTREYTINIHKRIHGIGFKRRAPRAVKAIREFAFKQMGTEDVRIETRLNKHVWSK??????????????????????????????????????????QTVNVESSE +>Ac28061/110 +PEAMTRREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFAYKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD +>Pg28571/110 +?EAMARREKGKKKSVTNEVVTREYTVNIHKRIHGIGFKKRAPRAIKAIRQFAFKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_nuc/locus1_sp7.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,14 @@ +>Ac72851/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te17661/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------- +>Ap26441/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------- +>Th39471/110 +--------------------------------------------------------------------------------------------------------------------------------------------------------- +>Pf20921/110 +-----------------------------------------gctttcctcattccaaaagcgacgctgctctcataca-----------------------------------------------------------cctcgctcagcgcagt +>Pu7631/1100 +--------------------------------------------------------------------------------------------------------------------------------------------------------- +>Am31481/110 +tctttctctccatgtcggccttgccttgttctgcttgtaaggctttcctcataccaaaagctacagagctctcatacagggtctggtaggcagcgatcgtcatacggatctcatcacgaacacgtagtaacaacaaacctcgctcagcacagt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_nuc/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +------------------------------------------------------------------------------------------------------------------------------------------------------tttgccaacaagcagatggg------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te26821/110 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aatgaggatgaagattcaccgcacaaactttatacattagtgacctatgt---------------------------------------------------- +>Ph73091/110 +-------------cacgccgtgagaagggaaagaagaagtccgttaccaatgaagtggtaacaagggaatacaccataaacatccataaacgaattcacggaattgggttcaagaagagggcgccacgtgccatcaaagccatcagacagtttgcct-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Am30661/110 +---------------------------------aagaagtccgtgacccaggaggtcgtgacccgagaatacaccatcaacatccacaagcgtatacatggcattggcttcaagaggagggcgcctcgcgcggtgaaagccatccgagagtttgccttcaagcagatgggcacagaggatgtccgaatcgagacccgactcaacaagcacgtctggtccaaggg---------------------------------------------------------------------------------------------------------------------------ccagaccgtgaatgtggagagcagtgaa +>Ac28061/110 +ccggaagcgatgacacgccgcgagaaaggcaagaagaagtctgtcaccaacgaggtggtgacacgggaatacaccatcaacatccacaagcgaatccatggaatagggttcaagaagagggcgccacgggctataaaggctatcagacaatttgcctacaagcagatgggaactgaagatgtacgcatagagacaagactgaacaaacacatttggtccaagggaattagaaatgttccattcagagtacgagttcgattggctcgtaagaggaatgaggatgaggactcgcctcacaagctttacaccttggtgtcctatgtacctgtcaccacattcaaaggtacccagacagtaaatgtagagagcagtgac +>Pg28571/110 +---gaagccatggcacgccgggagaagggcaagaagaagtctgttaccaatgaggtggtaactagagaatacacagtgaatatccacaaaagaattcatggaataggtttcaagaagagggcacctcgtgcgatcaaggctatcaggcagtttgcctttaaacagatgggcactgaggatgtgcgtatagagacgagactcaacaaacacatttggtcaaagggaattagaaatgtcccattccgtgtgcgtgtaagattggctcgtaaaaggaatgaagatgaagactcgccacacaaactttacaccttggtctcctatgtacctgttacaacattcaaaggtacacagacagtaaatgtggagagtagtgac
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_with_M_aa/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +??????????????????????????????????????????????????FANKQM????????????????????????????????????????????????????????????????????? +>Te26821/110 +???????????????????????????????????????????????????????????????????????????????????????????NEDEDSPHKLYTLVTY?????????????????? +>Ph73091/110 +?????RREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFA????????????????????????????????????????????????????????????????????????? +>Am30661/110 +???????????KKSVTQEVVTREYTINIHKRIHGIGFKRRAPRAVKAIREFAFKQMGTEDVRIETRLNKHVWSK??????????????????????????????????????????QTVNVESSE +>Ac28061/110 +PEAMTRREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFAYKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD +>Pg28571/110 +?EAMARREKGKKKSVTNEVVTREYTVNIHKRIHGIGFKKRAPRAIKAIRQFAFKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_with_M_nuc/locus2_sp6.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,12 @@ +>Th44731/110 +------------------------------------------------------------------------------------------------------------------------------------------------------tttgccaacaagcagatggg------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Te26821/110 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aatgaggatgaagattcaccgcacaaactttatacattagtgacctatgt---------------------------------------------------- +>Ph73091/110 +-------------cacgccgtgagaagggaaagaagaagtccgttaccaatgaagtggtaacaagggaatacaccataaacatccataaacgaattcacggaattgggttcaagaagagggcgccacgtgccatcaaagccatcagacagtttgcct-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Am30661/110 +---------------------------------aagaagtccgtgacccaggaggtcgtgacccgagaatacaccatcaacatccacaagcgtatacatggcattggcttcaagaggagggcgcctcgcgcggtgaaagccatccgagagtttgccttcaagcagatgggcacagaggatgtccgaatcgagacccgactcaacaagcacgtctggtccaaggg---------------------------------------------------------------------------------------------------------------------------ccagaccgtgaatgtggagagcagtgaa +>Ac28061/110 +ccggaagcgatgacacgccgcgagaaaggcaagaagaagtctgtcaccaacgaggtggtgacacgggaatacaccatcaacatccacaagcgaatccatggaatagggttcaagaagagggcgccacgggctataaaggctatcagacaatttgcctacaagcagatgggaactgaagatgtacgcatagagacaagactgaacaaacacatttggtccaagggaattagaaatgttccattcagagtacgagttcgattggctcgtaagaggaatgaggatgaggactcgcctcacaagctttacaccttggtgtcctatgtacctgtcaccacattcaaaggtacccagacagtaaatgtagagagcagtgac +>Pg28571/110 +---gaagccatggcacgccgggagaagggcaagaagaagtctgttaccaatgaggtggtaactagagaatacacagtgaatatccacaaaagaattcatggaataggtttcaagaagagggcacctcgtgcgatcaaggctatcaggcagtttgcctttaaacagatgggcactgaggatgtgcgtatagagacgagactcaacaaacacatttggtcaaagggaattagaaatgtcccattccgtgtgcgtgtaagattggctcgtaaaaggaatgaagatgaagactcgccacacaaactttacaccttggtctcctatgtacctgttacaacattcaaaggtacacagacagtaaatgtggagagtagtgac
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_without_indel_aa/locus2_sp4_sp4.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,8 @@ +>Ac28061/110 +EAMTRREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFAYKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD +>Pg28571/110 +EAMARREKGKKKSVTNEVVTREYTVNIHKRIHGIGFKKRAPRAIKAIRQFAFKQMGTEDVRIETRLNKHIWSKGIRNVPFRVRVRLARKRNEDEDSPHKLYTLVSYVPVTTFKGTQTVNVESSD +>Ph73091/110 +----RREKGKKKSVTNEVVTREYTINIHKRIHGIGFKKRAPRAIKAIRQFA------------------------------------------------------------------------- +>Am30661/110 +----------KKSVTQEVVTREYTINIHKRIHGIGFKRRAPRAVKAIREFAFKQMGTEDVRIETRLNKHVWSK---------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_CDS_without_indel_nuc/locus2_sp4_sp4.fasta Wed Sep 27 10:03:05 2017 -0400 @@ -0,0 +1,8 @@ +>Ac28061/110 +gaagcgatgacacgccgcgagaaaggcaagaagaagtctgtcaccaacgaggtggtgacacgggaatacaccatcaacatccacaagcgaatccatggaatagggttcaagaagagggcgccacgggctataaaggctatcagacaatttgcctacaagcagatgggaactgaagatgtacgcatagagacaagactgaacaaacacatttggtccaagggaattagaaatgttccattcagagtacgagttcgattggctcgtaagaggaatgaggatgaggactcgcctcacaagctttacaccttggtgtcctatgtacctgtcaccacattcaaaggtacccagacagtaaatgtagagagcagtgac +>Pg28571/110 +gaagccatggcacgccgggagaagggcaagaagaagtctgttaccaatgaggtggtaactagagaatacacagtgaatatccacaaaagaattcatggaataggtttcaagaagagggcacctcgtgcgatcaaggctatcaggcagtttgcctttaaacagatgggcactgaggatgtgcgtatagagacgagactcaacaaacacatttggtcaaagggaattagaaatgtcccattccgtgtgcgtgtaagattggctcgtaaaaggaatgaagatgaagactcgccacacaaactttacaccttggtctcctatgtacctgttacaacattcaaaggtacacagacagtaaatgtggagagtagtgac +>Ph73091/110 +------------cgccgtgagaagggaaagaagaagtccgttaccaatgaagtggtaacaagggaatacaccataaacatccataaacgaattcacggaattgggttcaagaagagggcgccacgtgccatcaaagccatcagacagtttgcc--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>Am30661/110 +------------------------------aagaagtccgtgacccaggaggtcgtgacccgagaatacaccatcaacatccacaagcgtatacatggcattggcttcaagaggagggcgcctcgcgcggtgaaagccatccgagagtttgccttcaagcagatgggcacagaggatgtccgaatcgagacccgactcaacaagcacgtctggtccaag---------------------------------------------------------------------------------------------------------------------------------------------------------
--- a/test-data/test_08.out Thu Apr 13 09:47:57 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -*************** CDS detection *************** - -Files processed: 1795 - Files with CDS: 561 - Files with CDS plus M (codon start): 204 - Files without CDS: 1234 - - - -In locus with CDS considering Methionine : - -*************** 1st filter : selection of the locus *************** - -Total number of locus recorded = 204 - Number of locus with 1 species : 16 - Number of locus with 2 species : 171 - Number of locus with 3 species : 16 - Number of locus with 4 species : 1 - Number of locus with 2 species : 0 -Number of locus excluded (exclude if not at least 2 species in the alignment)= 16 - -*************** 2nd Filter : removal of the indel *************** - -Total number of locus recorded = 186 - Total number of locus with no indels (SAVED) = 186 - Total number of locus, when removing indel, wich are empty (EXCLUDED) = 0 -