changeset 7:4d82da0ca014 draft

Uploaded
author greg
date Mon, 30 Jan 2017 11:03:42 -0500
parents bf355f832557
children e5110df5314b
files .shed.yml assembly_post_processor.xml test-data/targetOrthos.ids test-data/transcripts.cds test-data/transcripts.cleaned.cds test-data/transcripts.cleaned.nr.cds test-data/transcripts.cleaned.nr.pep test-data/transcripts.cleaned.pep test-data/transcripts.cleaned.pep1 test-data/transcripts.cleaned.pep2 test-data/transcripts.pep
diffstat 11 files changed, 171 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Mon Jan 30 11:03:42 2017 -0500
@@ -0,0 +1,16 @@
+name: assembly_post_processor
+owner: iuc
+description: |
+  Contains a tool that postprocesses de novo assembly transcripts into putative coding sequences and their
+  corresponding amino acid translations, locally assembling targeted gene families.
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that is one of the PlantTribes collection of automated modular analysis pipelines that
+  utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
+  comparative evolutionary studies. It postprocesses de novo assembly transcripts into putative coding
+  sequences and their corresponding amino acid translations, locally assembling targeted gene families.
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/plant_tribes/assembly_post_processor
+type: unrestricted
+categories:
+- Assembly
+- Phylogenetics
--- a/assembly_post_processor.xml	Mon Jan 30 09:23:36 2017 -0500
+++ b/assembly_post_processor.xml	Mon Jan 30 11:03:42 2017 -0500
@@ -100,29 +100,28 @@
             <param name="prediction_method" value="transdecoder" />
             <param name="options_type_selector" value="basic" />
             <output_collection name="transcripts" type="list">
-                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" />
-                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" />
-                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" />
-                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" />
+                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep1" ftype="fasta" compare="contains"/>
+                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" compare="contains"/>
             </output_collection>
         </test>
-        <!--
         <test>
             <param name="input" value="assembly.fasta" ftype="fasta" />
             <param name="prediction_method" value="transdecoder" />
             <param name="options_type_selector" value="advanced" />
-            <param name="strand_specific" value="yes" />
+            <param name="gap_trimming" value="0.1" />
             <param name="dereplicate" value="yes" />
+            <param name="min_length" value="200" />
             <output_collection name="transcripts" type="list">
-                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" />
-                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" />
-                <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.nr.cds" ftype="fasta" />
-                <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.nr.pep" ftype="fasta" />
-                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" />
-                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" />
+                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.pep" ftype="fasta" compare="contains"/>
+                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep2" ftype="fasta" compare="contains"/>
+                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" compare="contains"/>
             </output_collection>
         </test>
-        -->
     </tests>
     <help>
 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
--- a/test-data/targetOrthos.ids	Mon Jan 30 09:23:36 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-213
-9300
-752
-4632
-4732
--- a/test-data/transcripts.cds	Mon Jan 30 09:23:36 2017 -0500
+++ b/test-data/transcripts.cds	Mon Jan 30 11:03:42 2017 -0500
@@ -1,4 +1,24 @@
->contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 contig_2:2-511(+)
+>contig_1|m.1 contig_1|g.1 type:internal len:115 contig_1:344-3(-)
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGGTCATGTTGTACACATGGTTAAT
+>contig_2|m.2 contig_2|g.2 type:5prime_partial len:170 contig_2:2-511(+)
 CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA
->contig_9|m.2 contig_9|g.2 type:internal len:132 contig_9:2-394(+)
+>contig_3|m.3 contig_3|g.3 type:5prime_partial len:126 contig_3:463-86(-)
+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAACTAG
+>contig_9|m.4 contig_9|g.4 type:internal len:132 contig_9:2-394(+)
 CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
+>contig_11|m.5 contig_11|g.5 type:5prime_partial len:108 contig_11:2-325(+)
+ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGGCGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCCAAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCATGATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCCAGAAACTCCCGCTGTGGCTATTGA
+>contig_11|m.6 contig_11|g.6 type:5prime_partial len:100 contig_11:443-144(-)
+GCTGCTAAAGTCACGGTTGCTCCCGGATCTCGCGTCGCTGCTGCTGGAAGCGGCGTTGCCTCAGAATCTCCGGCGACTGCTGAAGCCGGCGTTGCCCCAGGATCTCCCGCGGTCGACTTCAATAGCCACAGCGGGAGTTTCTGGAGCAACACCGACTGCAGCAGCAACAACGGGAGATTCGGAGCAACTCCGACGGGATATCATGGAGCAACCCCGACTTCAACAGTGGCGACGGGAGATTTTGGAACAACGCCGACTTCAGGAGCCGCGGTGGGAGAGCTTGGAGTAAATCCGACTTAA
+>contig_18|m.7 contig_18|g.7 type:internal len:143 contig_18:426-1(-)
+ACAGAGGTGAGAAGATGGAACAAGCAGGAAGATTGGGGTAGAAAGACATGGAAAGAAGCTAAGGAATCCACATTGCCGAAAATAGTAGGTGAAGGGATCTATGGAGTTGGTCCCATTTTAGCTGCACTCTCATCCGGGCGAAGAGAACTCTACGCGTTGTACGTTCAGGAAGGTTTGGATTTGAGTAGTAACAGTAAGAAGAAGGACAAGAAACGGTTCGAGAGAGTTTTGAAAATGGTGGAAAAGATTGGATTAAGCAAAAAAGAGGTATCCAAACACGACCTCAACATGGTCGTTGATAATAGGCCTCACCAGGGCTTGCTTCTTGATGCTTCGGCACTTGAAATGGTTAGTATAAAGGAATTAGACCCCGTTTCCATTGATGGAGAGAAGTGCCCGCTTTGGTTGGCATTGGATGAGGTTACC
+>contig_20|m.8 contig_20|g.8 type:internal len:104 contig_20:1-309(+)
+GGCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGATCA
+>contig_20|m.9 contig_20|g.9 type:internal len:103 contig_20:2-307(+)
+GCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGAT
+>contig_20|m.10 contig_20|g.10 type:internal len:103 contig_20:307-2(-)
+ATCCAAGTAGTTGATCCAACGAGACCAGCCCAACATGTAGCGGGTAGGAATGACGAAACCAGTGTAAATGACCAGCGCAAGGATAAGGATAGCTGCGGGAGCCATGGCCTGCGACAGTGTACGCGAAACGGATCCAATAGTGCGGAAGAGCATCGACATGACAAGCGTCAGCGTGAACGATATCAGCAGGAAGAAGAAGAATGCTCCTGGTTCGCGGCGAAGGTTCGTCATGAAGTACAAGATCAAGTTGAAACAGATCGCGTTGCCAATCTTGTAGGGCATATCCGTGAGCATCGATGCGTAAGC
+>contig_21|m.11 contig_21|g.11 type:5prime_partial len:168 contig_21:519-16(-)
+GTTTCCATAGTGAATCAGGAACCTGTTCTATTCTCGGTTTCCATTGGAGAAAATATTGCTTACGGACTCCCAGATGATTATGTTTCCAAGGTCGATGTGATAAAAGCTGCTAAAGCAGCTAATGCTCATGAATTTATCATTGCGTTGCCACAGGGTTATGACACATTAGTTGGTGAGCGTGGTGGGTTGTTAAGTGGAGGACAAAGACAGAGGGTAGCCATTGCAAGGGCTCTGCTCAAGAATGCTCCAATCTTGATTCTTGATGAGGCTACCAGTGCTTTGGACGCAGTCAGTGAACGTCTGGTTCAGGATGCTTTGAACCGTTTGATGAAAGGAAGAACGACTTTAGTGATTGCTCACAGGCTGAGCACCGTTCAAAATGCTGATCAAATTGCTTTATGCTCTGATGGGAAGATTTCAGAACTGGGGACACACTCTGAGTTGTTAGAGCAAAAGGGTCTTTATGCCTCACTGGTTGGCACCCAAAGACTTGCATTCGAGTGA
+>contig_24|m.12 contig_24|g.12 type:internal len:111 contig_24:331-2(-)
+AAAACCCGTGAATTAGAAATCGAACAAATCTTGTTAAAATCCAAGGATTTGGAAAGCGAATTGGAGAGCAAGGGTAGTATGTTTATAAAGGAGACTGAGGCACTTGTTGCAGAAAACTCGAAACTCAGTCAGGAGTTGGGTGCATTTAAATCCGAGCTAAACGATATACAGATGAAATTGAACGTTGTTTCATCTGAGAAAGACGGCACTGTTGAAGAACTAACTAGTGCAAGAAAAGAAATAGAAGAGCTGACTCAGAAGCTTGCTTCTGAAGGACAAAAGCTGCAGTCTCAGATATCTTCTATAATGGAAGAGAACAATTTACTTAAC
--- a/test-data/transcripts.cleaned.cds	Mon Jan 30 09:23:36 2017 -0500
+++ b/test-data/transcripts.cleaned.cds	Mon Jan 30 11:03:42 2017 -0500
@@ -1,14 +1,24 @@
->contig_2
-CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
-CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
-TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
-GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
-GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
-GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
-CTGTTAAATTATGTGCGCTTTGATGAT
->contig_9
-CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
-GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
-AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
-GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
-ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
+>contig_1
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAAT
+GGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATA
+CCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCT
+ACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGG
+TCATGTTGTACACATGGTTAAT
+>contig_100
+TGCAAGCTTCAGGAAATATCCTCAACTCTCATAACAATCCTTGTCATTCTCTCCCCAGACAAACTCATATTCTTCGCATC
+AACAAACCGCGATTCCTTGAACAATGTCACGATCTCCTCCACCTCATCTACCCCACACTCTTCCACAGTCACAACACAAA
+CCCCACCACTCCTCACCGTCCTTTCAATCTGTCCTGCGTACCGCCTGGGGAACAGAGCCCGATCTAAGTACAGCCCGAGC
+CCGAGATCAAACATTCCATCAAAAAATGGCAAGTTATGGGGATCCGCCCGGCTCACCAGTGGAGGCGCTTCAACCAGTTC
+CACCCCCGTGACGTCCTCCACGCCCAGTTCCTGCAGCGCTTGGACGGCATGGCCTGGCCCGGCGGACGAAACTAGGACCC
+GAGAGTCGTTCGTAATGCAGTTTTGGGCC
+>contig_107
+TTTGGCCTTTACCTTGAACGAATTTCTGCTCGCCAAAGTCGTCTCTTTGTCGATCGCAGATCGCGGGTTGCAGCTCCAGC
+GCAGGCCGAAGTTCGGGGACGACGGAAGCTTCGATTTCCTACTGCACCAGGTCGCAGTCGGGCCCAGTCGCAGATCGCCC
+TGCATCGCCGCCACATGACCGATCTGGAAACCACTGCATGTTTGACTTATGATTTTGGTGCTCGATTTGCGGGTGTTTGT
+GCGAGCGTGAGTTATATGTTAAGAAAATCTTGTAGAAGTGGATCGAGAATTAGAGAGAGGGAGGGGCAAGAAGATGCAGA
+G
+>contig_11
+ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGG
+CGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCC
+AAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCAT
+GATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCC
--- a/test-data/transcripts.cleaned.nr.cds	Mon Jan 30 09:23:36 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
->contig_2
-CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
-CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
-TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
-GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
-GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
-GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
-CTGTTAAATTATGTGCGCTTTGATGAT
->contig_9
-CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
-GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
-AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
-GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
-ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
--- a/test-data/transcripts.cleaned.nr.pep	Mon Jan 30 09:23:36 2017 -0500
+++ b/test-data/transcripts.cleaned.nr.pep	Mon Jan 30 11:03:42 2017 -0500
@@ -1,7 +1,25 @@
->contig_2
-LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
-VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
-LLNYVRFDD
->contig_9
-LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
-AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_100
+CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS
+PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA
+>contig_107
+FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC
+ASVSYMLRKSCRSGSRIREREGQEDAE
+>contig_11
+AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD
+FGTTPTSGAAVGELGVNPTX
+>contig_111
+MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL
+GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG
+HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL
+>contig_115
+DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL
+SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ
+>contig_116
+HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK
+TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS
+>contig_117
+EMLAGTSTRPRTFKKEEADRTALAGMPQSPPHRKAYSYSGQLTNTVGHLKRQQNHPRNHSLDEINLLTEGTVNSSGEDDF
+YRYCNTTSGGVTNNLVDYHNMSQRMEGMEIGGGCCSEERR
--- a/test-data/transcripts.cleaned.pep	Mon Jan 30 09:23:36 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
->contig_2
-LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
-VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
-LLNYVRFDD
->contig_9
-LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
-AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.pep1	Mon Jan 30 11:03:42 2017 -0500
@@ -0,0 +1,25 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_100
+CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS
+PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA
+>contig_107
+FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC
+ASVSYMLRKSCRSGSRIREREGQEDAE
+>contig_11
+AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD
+FGTTPTSGAAVGELGVNPTX
+>contig_111
+MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL
+GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG
+HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL
+>contig_115
+DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL
+SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ
+>contig_116
+HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK
+TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS
+>contig_117
+STLLRAAAAPDLHPLHPLAHIMIIHEVIGNAAAGRVTVTIKIIFAGGIDGAFGEEVDLVEAVVARVVLLPLEVSDGVGKL
+SAVGIGFPMRRRLGHPRQGCSVCFFLLKGARTSGGTSQHL
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.pep2	Mon Jan 30 11:03:42 2017 -0500
@@ -0,0 +1,25 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_100
+CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS
+PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA
+>contig_107
+FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC
+ASVSYMLRKSCRSGSRIREREGQEDAE
+>contig_11
+AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD
+FGTTPTSGAAVGELGVNPTX
+>contig_111
+MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL
+GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG
+HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL
+>contig_115
+DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL
+SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ
+>contig_116
+HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK
+TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS
+>contig_117
+EMLAGTSTRPRTFKKEEADRTALAGMPQSPPHRKAYSYSGQLTNTVGHLKRQQNHPRNHSLDEINLLTEGTVNSSGEDDF
+YRYCNTTSGGVTNNLVDYHNMSQRMEGMEIGGGCCSEERR
--- a/test-data/transcripts.pep	Mon Jan 30 09:23:36 2017 -0500
+++ b/test-data/transcripts.pep	Mon Jan 30 11:03:42 2017 -0500
@@ -1,4 +1,24 @@
->contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 gc:universal contig_2:2-511(+)
+>contig_1|m.1 contig_1|g.1 type:internal len:115 gc:universal contig_1:344-3(-)
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_2|m.2 contig_2|g.2 type:5prime_partial len:170 gc:universal contig_2:2-511(+)
 LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD*
->contig_9|m.2 contig_9|g.2 type:internal len:132 gc:universal contig_9:2-394(+)
+>contig_3|m.3 contig_3|g.3 type:5prime_partial len:126 gc:universal contig_3:463-86(-)
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN*
+>contig_9|m.4 contig_9|g.4 type:internal len:132 gc:universal contig_9:2-394(+)
 LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
+>contig_11|m.5 contig_11|g.5 type:5prime_partial len:108 gc:universal contig_11:2-325(+)
+IYGKGIQNMRTLWGGVAPESPVVVVGGGVTPESPVVGGIVLGSPAVLLKSDLLQALPPRLLKSALFQNLPSPLLKSGLLHDIPSELLRISRCCCCSRCCSRNSRCGY*
+>contig_11|m.6 contig_11|g.6 type:5prime_partial len:100 gc:universal contig_11:443-144(-)
+AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGDFGTTPTSGAAVGELGVNPT*
+>contig_18|m.7 contig_18|g.7 type:internal len:143 gc:universal contig_18:426-1(-)
+TEVRRWNKQEDWGRKTWKEAKESTLPKIVGEGIYGVGPILAALSSGRRELYALYVQEGLDLSSNSKKKDKKRFERVLKMVEKIGLSKKEVSKHDLNMVVDNRPHQGLLLDASALEMVSIKELDPVSIDGEKCPLWLALDEVT
+>contig_20|m.8 contig_20|g.8 type:internal len:104 gc:universal contig_20:1-309(+)
+GLRIDAHGYALQDWQRDLFQLDLVLHDEPSPRTRSILLLPADIVHADACHVDALPHYWIRFAYTVAGHGSRSYPYPCAGHLHWFRHSYPLHVGLVSLDQLLGS
+>contig_20|m.9 contig_20|g.9 type:internal len:103 gc:universal contig_20:2-307(+)
+AYASMLTDMPYKIGNAICFNLILYFMTNLRREPGAFFFFLLISFTLTLVMSMLFRTIGSVSRTLSQAMAPAAILILALVIYTGFVIPTRYMLGWSRWINYLD
+>contig_20|m.10 contig_20|g.10 type:internal len:103 gc:universal contig_20:307-2(-)
+IQVVDPTRPAQHVAGRNDETSVNDQRKDKDSCGSHGLRQCTRNGSNSAEEHRHDKRQRERYQQEEEECSWFAAKVRHEVQDQVETDRVANLVGHIREHRCVS
+>contig_21|m.11 contig_21|g.11 type:5prime_partial len:168 gc:universal contig_21:519-16(-)
+VSIVNQEPVLFSVSIGENIAYGLPDDYVSKVDVIKAAKAANAHEFIIALPQGYDTLVGERGGLLSGGQRQRVAIARALLKNAPILILDEATSALDAVSERLVQDALNRLMKGRTTLVIAHRLSTVQNADQIALCSDGKISELGTHSELLEQKGLYASLVGTQRLAFE*
+>contig_24|m.12 contig_24|g.12 type:internal len:111 gc:universal contig_24:331-2(-)
+KTRELEIEQILLKSKDLESELESKGSMFIKETEALVAENSKLSQELGAFKSELNDIQMKLNVVSSEKDGTVEELTSARKEIEELTQKLASEGQKLQSQISSIMEENNLLN