# HG changeset patch # User greg # Date 1481290942 18000 # Node ID cecea7fdccf88637ad56fdf33dc56d42ac236a27 Uploaded diff -r 000000000000 -r cecea7fdccf8 assembly_post_processor.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assembly_post_processor.xml Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,143 @@ + + pipeline + + plant_tribes_assembly_post_processor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of +complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It post-processes de novo assembly +transcripts into putative coding sequences and their corresponding amino acid translations. + +----- + +**Options** + + * **Prediction method for coding regions** - The prediction method for coding regions; one of ESTScan or TransDecoder. + * **Scores matrices** - Scores matrices that reflect the codons preferences in the studied organisms. + * **Target gene family assembly?** - Select yes to target gene families to assemble. + * **List of orthogroup identifiers for target gene families to assemble** - History item with a list of orthogroup identifiers for target gene families to assemble. The next version of this tool will use a dynamic select list for this feature. + * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. + * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL. + * **Remove gappy sites in alignments** - Removes gappy sites in alignments (i.e. 0.1 removes sites with 90% gaps): [0.0 to 1.0] + * **De novo transcriptome assembly was performed with strand-specific library?** - Select yes if de novo transcriptome assembly was performed with strand-specific library. + * **Remove repeated sequences in predicted coding regions?** - Select yes to remove repeated sequences in predicted coding regions. + * **Minimum sequence length of predicted coding regions** - Set the minimum sequence length of predicted coding regions. + + + + + @unpublished{None, + author = {None}, + title = {None}, + year = {None}, + eprint = {None}, + url = {None} + } + + diff -r 000000000000 -r cecea7fdccf8 plant_tribes_scaffolds.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc.sample Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,4 @@ +## Plant Tribes scaffolds +#Value Name Path Description +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) diff -r 000000000000 -r cecea7fdccf8 test-data/assembly.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/assembly.fasta Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,73 @@ +>contig_1 +CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA +CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT +GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG +CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT +AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA +ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA +>contig_2 +CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC +GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT +ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC +CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG +GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC +GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC +AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG +TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA +ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG +GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA +AGTGTTAAAATGTAAGATAAA +>contig_3 +CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC +AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA +CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT +CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA +TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC +TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA +GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT +TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA +>contig_4 +CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC +TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA +ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC +CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC +ATCATAAATTCATAATCGAAGG +>contig_5 +GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA +TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG +AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC +ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG +GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA +TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC +GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG +>contig_6 +TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA +AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC +AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC +TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC +>contig_7 +GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT +AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG +TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC +AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG +AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG +>contig_8 +TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC +TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG +GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT +TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC +>contig_9 +ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT +TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC +TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG +AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC +AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA +ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC +ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA +>contig_10 +GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT +TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG +GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT +TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG +GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC +GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA diff -r 000000000000 -r cecea7fdccf8 test-data/targetOrthos.ids --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/targetOrthos.ids Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,5 @@ +213 +9300 +752 +4632 +4732 diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cds Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,4 @@ +>contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 contig_2:2-511(+) +CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA +>contig_9|m.2 contig_9|g.2 type:internal len:132 contig_9:2-394(+) +CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.cds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.cds Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,14 @@ +>contig_2 +CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA +CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA +TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG +GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA +GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG +GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA +CTGTTAAATTATGTGCGCTTTGATGAT +>contig_9 +CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT +GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA +AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA +GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA +ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.nr.cds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.nr.cds Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,14 @@ +>contig_2 +CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA +CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA +TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG +GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA +GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG +GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA +CTGTTAAATTATGTGCGCTTTGATGAT +>contig_9 +CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT +GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA +AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA +GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA +ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.nr.pep --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.nr.pep Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,7 @@ +>contig_2 +LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW +VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ +LLNYVRFDD +>contig_9 +LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT +AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.pep --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.pep Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,7 @@ +>contig_2 +LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW +VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ +LLNYVRFDD +>contig_9 +LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT +AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.pep --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.pep Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,4 @@ +>contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 gc:universal contig_2:2-511(+) +LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD* +>contig_9|m.2 contig_9|g.2 type:internal len:132 gc:universal contig_9:2-394(+) +LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV diff -r 000000000000 -r cecea7fdccf8 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Dec 09 08:42:22 2016 -0500 @@ -0,0 +1,6 @@ + + + value, name, path, description + +
+