# HG changeset patch
# User greg
# Date 1481290942 18000
# Node ID cecea7fdccf88637ad56fdf33dc56d42ac236a27
Uploaded
diff -r 000000000000 -r cecea7fdccf8 assembly_post_processor.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/assembly_post_processor.xml	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,143 @@
+
+    pipeline
+    
+        plant_tribes_assembly_post_processor
+    
+    
+        
+        
+        
+        
+        
+    
+    
+        
+    
+    
+        
+        
+            
+                
+                
+            
+            
+            
+                
+            
+        
+        
+            
+                
+                
+            
+            
+            
+                
+                    
+                        
+                        
+                    
+                    
+                    
+                        
+                        
+                            
+                            
+                        
+                        
+                            
+                            
+                            
+                        
+                    
+                
+                
+                
+                    
+                    
+                
+                
+                    
+                    
+                
+                
+            
+        
+    
+    
+        
+            
+        
+    
+    
+        
+            
+            
+            
+            
+            
+            
+                
+                
+                
+                
+                
+                
+            
+        
+    
+    
+This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
+complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It post-processes de novo assembly
+transcripts into putative coding sequences and their corresponding amino acid translations.
+
+-----
+
+**Options**
+
+ * **Prediction method for coding regions** - The prediction method for coding regions; one of ESTScan or TransDecoder.
+ * **Scores matrices** - Scores matrices that reflect the codons preferences in the studied organisms.
+ * **Target gene family assembly?** - Select yes to target gene families to assemble.
+ * **List of orthogroup identifiers for target gene families to assemble** - History item with a list of orthogroup identifiers for target gene families to assemble.  The next version of this tool will use a dynamic select list for this feature.
+ * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
+ * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL.
+ * **Remove gappy sites in alignments** - Removes gappy sites in alignments (i.e. 0.1 removes sites with 90% gaps): [0.0 to 1.0]
+ * **De novo transcriptome assembly was performed with strand-specific library?** - Select yes if de novo transcriptome assembly was performed with strand-specific library.
+ * **Remove repeated sequences in predicted coding regions?** - Select yes to remove repeated sequences in predicted coding regions.
+ * **Minimum sequence length of predicted coding regions** - Set the minimum sequence length of predicted coding regions.
+ 
+    
+    
+            
+                @unpublished{None,
+                author = {None},
+                title = {None},
+                year = {None},
+                eprint = {None},
+                url = {None}
+            }
+    
+
diff -r 000000000000 -r cecea7fdccf8 plant_tribes_scaffolds.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
diff -r 000000000000 -r cecea7fdccf8 test-data/assembly.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assembly.fasta	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,73 @@
+>contig_1
+CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA
+CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT
+GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG
+CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT
+AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA
+ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA
+>contig_2
+CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC
+GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT
+ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC
+CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG
+GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC
+GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC
+AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG
+TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA
+ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG
+GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA
+AGTGTTAAAATGTAAGATAAA
+>contig_3
+CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC
+AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA
+CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT
+CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA
+TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC
+TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA
+GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT
+TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA
+>contig_4
+CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC
+TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA
+ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC
+CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC
+ATCATAAATTCATAATCGAAGG
+>contig_5
+GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA
+TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG
+AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC
+ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG
+GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA
+TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC
+GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG
+>contig_6
+TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA
+AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC
+AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC
+TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT
+AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG
+TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC
+AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG
+AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG
+>contig_8
+TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC
+TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG
+GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT
+TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC
+>contig_9
+ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT
+TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC
+TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG
+AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC
+AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA
+ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC
+ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA
+>contig_10
+GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT
+TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG
+GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT
+TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG
+GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC
+GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA
diff -r 000000000000 -r cecea7fdccf8 test-data/targetOrthos.ids
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/targetOrthos.ids	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,5 @@
+213
+9300
+752
+4632
+4732
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cds
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cds	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,4 @@
+>contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 contig_2:2-511(+)
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA
+>contig_9|m.2 contig_9|g.2 type:internal len:132 contig_9:2-394(+)
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.cds
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.cds	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,14 @@
+>contig_2
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
+CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
+TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
+GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
+GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
+GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
+CTGTTAAATTATGTGCGCTTTGATGAT
+>contig_9
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
+GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
+AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
+GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
+ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.nr.cds
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.cds	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,14 @@
+>contig_2
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
+CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
+TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
+GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
+GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
+GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
+CTGTTAAATTATGTGCGCTTTGATGAT
+>contig_9
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
+GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
+AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
+GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
+ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.nr.pep
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.pep	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,7 @@
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.cleaned.pep
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.pep	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,7 @@
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
diff -r 000000000000 -r cecea7fdccf8 test-data/transcripts.pep
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.pep	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,4 @@
+>contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 gc:universal contig_2:2-511(+)
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD*
+>contig_9|m.2 contig_9|g.2 type:internal len:132 gc:universal contig_9:2-394(+)
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
diff -r 000000000000 -r cecea7fdccf8 tool_data_table_conf.xml.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Dec 09 08:42:22 2016 -0500
@@ -0,0 +1,6 @@
+
+    
+        value, name, path, description
+        
+    
+