Mercurial > repos > greg > assembly_post_processor
changeset 7:4d82da0ca014 draft
Uploaded
author | greg |
---|---|
date | Mon, 30 Jan 2017 11:03:42 -0500 |
parents | bf355f832557 |
children | e5110df5314b |
files | .shed.yml assembly_post_processor.xml test-data/targetOrthos.ids test-data/transcripts.cds test-data/transcripts.cleaned.cds test-data/transcripts.cleaned.nr.cds test-data/transcripts.cleaned.nr.pep test-data/transcripts.cleaned.pep test-data/transcripts.cleaned.pep1 test-data/transcripts.cleaned.pep2 test-data/transcripts.pep |
diffstat | 11 files changed, 171 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Mon Jan 30 11:03:42 2017 -0500 @@ -0,0 +1,16 @@ +name: assembly_post_processor +owner: iuc +description: | + Contains a tool that postprocesses de novo assembly transcripts into putative coding sequences and their + corresponding amino acid translations, locally assembling targeted gene families. +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that is one of the PlantTribes collection of automated modular analysis pipelines that + utilize objective classifications of complete protein sequences from sequenced plant genomes to perform + comparative evolutionary studies. It postprocesses de novo assembly transcripts into putative coding + sequences and their corresponding amino acid translations, locally assembling targeted gene families. +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/plant_tribes/assembly_post_processor +type: unrestricted +categories: +- Assembly +- Phylogenetics
--- a/assembly_post_processor.xml Mon Jan 30 09:23:36 2017 -0500 +++ b/assembly_post_processor.xml Mon Jan 30 11:03:42 2017 -0500 @@ -100,29 +100,28 @@ <param name="prediction_method" value="transdecoder" /> <param name="options_type_selector" value="basic" /> <output_collection name="transcripts" type="list"> - <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" /> - <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" /> - <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" /> - <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" /> + <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep1" ftype="fasta" compare="contains"/> + <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" compare="contains"/> </output_collection> </test> - <!-- <test> <param name="input" value="assembly.fasta" ftype="fasta" /> <param name="prediction_method" value="transdecoder" /> <param name="options_type_selector" value="advanced" /> - <param name="strand_specific" value="yes" /> + <param name="gap_trimming" value="0.1" /> <param name="dereplicate" value="yes" /> + <param name="min_length" value="200" /> <output_collection name="transcripts" type="list"> - <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" /> - <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" /> - <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.nr.cds" ftype="fasta" /> - <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.nr.pep" ftype="fasta" /> - <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" /> - <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" /> + <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.pep" ftype="fasta" compare="contains"/> + <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep2" ftype="fasta" compare="contains"/> + <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" compare="contains"/> </output_collection> </test> - --> </tests> <help> This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
--- a/test-data/targetOrthos.ids Mon Jan 30 09:23:36 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -213 -9300 -752 -4632 -4732
--- a/test-data/transcripts.cds Mon Jan 30 09:23:36 2017 -0500 +++ b/test-data/transcripts.cds Mon Jan 30 11:03:42 2017 -0500 @@ -1,4 +1,24 @@ ->contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 contig_2:2-511(+) +>contig_1|m.1 contig_1|g.1 type:internal len:115 contig_1:344-3(-) +AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGGTCATGTTGTACACATGGTTAAT +>contig_2|m.2 contig_2|g.2 type:5prime_partial len:170 contig_2:2-511(+) CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA ->contig_9|m.2 contig_9|g.2 type:internal len:132 contig_9:2-394(+) +>contig_3|m.3 contig_3|g.3 type:5prime_partial len:126 contig_3:463-86(-) +GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAACTAG +>contig_9|m.4 contig_9|g.4 type:internal len:132 contig_9:2-394(+) CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT +>contig_11|m.5 contig_11|g.5 type:5prime_partial len:108 contig_11:2-325(+) +ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGGCGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCCAAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCATGATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCCAGAAACTCCCGCTGTGGCTATTGA +>contig_11|m.6 contig_11|g.6 type:5prime_partial len:100 contig_11:443-144(-) +GCTGCTAAAGTCACGGTTGCTCCCGGATCTCGCGTCGCTGCTGCTGGAAGCGGCGTTGCCTCAGAATCTCCGGCGACTGCTGAAGCCGGCGTTGCCCCAGGATCTCCCGCGGTCGACTTCAATAGCCACAGCGGGAGTTTCTGGAGCAACACCGACTGCAGCAGCAACAACGGGAGATTCGGAGCAACTCCGACGGGATATCATGGAGCAACCCCGACTTCAACAGTGGCGACGGGAGATTTTGGAACAACGCCGACTTCAGGAGCCGCGGTGGGAGAGCTTGGAGTAAATCCGACTTAA +>contig_18|m.7 contig_18|g.7 type:internal len:143 contig_18:426-1(-) +ACAGAGGTGAGAAGATGGAACAAGCAGGAAGATTGGGGTAGAAAGACATGGAAAGAAGCTAAGGAATCCACATTGCCGAAAATAGTAGGTGAAGGGATCTATGGAGTTGGTCCCATTTTAGCTGCACTCTCATCCGGGCGAAGAGAACTCTACGCGTTGTACGTTCAGGAAGGTTTGGATTTGAGTAGTAACAGTAAGAAGAAGGACAAGAAACGGTTCGAGAGAGTTTTGAAAATGGTGGAAAAGATTGGATTAAGCAAAAAAGAGGTATCCAAACACGACCTCAACATGGTCGTTGATAATAGGCCTCACCAGGGCTTGCTTCTTGATGCTTCGGCACTTGAAATGGTTAGTATAAAGGAATTAGACCCCGTTTCCATTGATGGAGAGAAGTGCCCGCTTTGGTTGGCATTGGATGAGGTTACC +>contig_20|m.8 contig_20|g.8 type:internal len:104 contig_20:1-309(+) +GGCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGATCA +>contig_20|m.9 contig_20|g.9 type:internal len:103 contig_20:2-307(+) +GCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGAT +>contig_20|m.10 contig_20|g.10 type:internal len:103 contig_20:307-2(-) +ATCCAAGTAGTTGATCCAACGAGACCAGCCCAACATGTAGCGGGTAGGAATGACGAAACCAGTGTAAATGACCAGCGCAAGGATAAGGATAGCTGCGGGAGCCATGGCCTGCGACAGTGTACGCGAAACGGATCCAATAGTGCGGAAGAGCATCGACATGACAAGCGTCAGCGTGAACGATATCAGCAGGAAGAAGAAGAATGCTCCTGGTTCGCGGCGAAGGTTCGTCATGAAGTACAAGATCAAGTTGAAACAGATCGCGTTGCCAATCTTGTAGGGCATATCCGTGAGCATCGATGCGTAAGC +>contig_21|m.11 contig_21|g.11 type:5prime_partial len:168 contig_21:519-16(-) +GTTTCCATAGTGAATCAGGAACCTGTTCTATTCTCGGTTTCCATTGGAGAAAATATTGCTTACGGACTCCCAGATGATTATGTTTCCAAGGTCGATGTGATAAAAGCTGCTAAAGCAGCTAATGCTCATGAATTTATCATTGCGTTGCCACAGGGTTATGACACATTAGTTGGTGAGCGTGGTGGGTTGTTAAGTGGAGGACAAAGACAGAGGGTAGCCATTGCAAGGGCTCTGCTCAAGAATGCTCCAATCTTGATTCTTGATGAGGCTACCAGTGCTTTGGACGCAGTCAGTGAACGTCTGGTTCAGGATGCTTTGAACCGTTTGATGAAAGGAAGAACGACTTTAGTGATTGCTCACAGGCTGAGCACCGTTCAAAATGCTGATCAAATTGCTTTATGCTCTGATGGGAAGATTTCAGAACTGGGGACACACTCTGAGTTGTTAGAGCAAAAGGGTCTTTATGCCTCACTGGTTGGCACCCAAAGACTTGCATTCGAGTGA +>contig_24|m.12 contig_24|g.12 type:internal len:111 contig_24:331-2(-) +AAAACCCGTGAATTAGAAATCGAACAAATCTTGTTAAAATCCAAGGATTTGGAAAGCGAATTGGAGAGCAAGGGTAGTATGTTTATAAAGGAGACTGAGGCACTTGTTGCAGAAAACTCGAAACTCAGTCAGGAGTTGGGTGCATTTAAATCCGAGCTAAACGATATACAGATGAAATTGAACGTTGTTTCATCTGAGAAAGACGGCACTGTTGAAGAACTAACTAGTGCAAGAAAAGAAATAGAAGAGCTGACTCAGAAGCTTGCTTCTGAAGGACAAAAGCTGCAGTCTCAGATATCTTCTATAATGGAAGAGAACAATTTACTTAAC
--- a/test-data/transcripts.cleaned.cds Mon Jan 30 09:23:36 2017 -0500 +++ b/test-data/transcripts.cleaned.cds Mon Jan 30 11:03:42 2017 -0500 @@ -1,14 +1,24 @@ ->contig_2 -CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA -CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA -TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG -GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA -GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG -GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA -CTGTTAAATTATGTGCGCTTTGATGAT ->contig_9 -CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT -GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA -AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA -GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA -ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT +>contig_1 +AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAAT +GGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATA +CCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCT +ACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGG +TCATGTTGTACACATGGTTAAT +>contig_100 +TGCAAGCTTCAGGAAATATCCTCAACTCTCATAACAATCCTTGTCATTCTCTCCCCAGACAAACTCATATTCTTCGCATC +AACAAACCGCGATTCCTTGAACAATGTCACGATCTCCTCCACCTCATCTACCCCACACTCTTCCACAGTCACAACACAAA +CCCCACCACTCCTCACCGTCCTTTCAATCTGTCCTGCGTACCGCCTGGGGAACAGAGCCCGATCTAAGTACAGCCCGAGC +CCGAGATCAAACATTCCATCAAAAAATGGCAAGTTATGGGGATCCGCCCGGCTCACCAGTGGAGGCGCTTCAACCAGTTC +CACCCCCGTGACGTCCTCCACGCCCAGTTCCTGCAGCGCTTGGACGGCATGGCCTGGCCCGGCGGACGAAACTAGGACCC +GAGAGTCGTTCGTAATGCAGTTTTGGGCC +>contig_107 +TTTGGCCTTTACCTTGAACGAATTTCTGCTCGCCAAAGTCGTCTCTTTGTCGATCGCAGATCGCGGGTTGCAGCTCCAGC +GCAGGCCGAAGTTCGGGGACGACGGAAGCTTCGATTTCCTACTGCACCAGGTCGCAGTCGGGCCCAGTCGCAGATCGCCC +TGCATCGCCGCCACATGACCGATCTGGAAACCACTGCATGTTTGACTTATGATTTTGGTGCTCGATTTGCGGGTGTTTGT +GCGAGCGTGAGTTATATGTTAAGAAAATCTTGTAGAAGTGGATCGAGAATTAGAGAGAGGGAGGGGCAAGAAGATGCAGA +G +>contig_11 +ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGG +CGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCC +AAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCAT +GATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCC
--- a/test-data/transcripts.cleaned.nr.cds Mon Jan 30 09:23:36 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ ->contig_2 -CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA -CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA -TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG -GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA -GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG -GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA -CTGTTAAATTATGTGCGCTTTGATGAT ->contig_9 -CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT -GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA -AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA -GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA -ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
--- a/test-data/transcripts.cleaned.nr.pep Mon Jan 30 09:23:36 2017 -0500 +++ b/test-data/transcripts.cleaned.nr.pep Mon Jan 30 11:03:42 2017 -0500 @@ -1,7 +1,25 @@ ->contig_2 -LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW -VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ -LLNYVRFDD ->contig_9 -LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT -AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV +>contig_1 +KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA +TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN +>contig_100 +CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS +PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA +>contig_107 +FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC +ASVSYMLRKSCRSGSRIREREGQEDAE +>contig_11 +AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD +FGTTPTSGAAVGELGVNPTX +>contig_111 +MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL +GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG +HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL +>contig_115 +DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL +SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ +>contig_116 +HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK +TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS +>contig_117 +EMLAGTSTRPRTFKKEEADRTALAGMPQSPPHRKAYSYSGQLTNTVGHLKRQQNHPRNHSLDEINLLTEGTVNSSGEDDF +YRYCNTTSGGVTNNLVDYHNMSQRMEGMEIGGGCCSEERR
--- a/test-data/transcripts.cleaned.pep Mon Jan 30 09:23:36 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ ->contig_2 -LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW -VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ -LLNYVRFDD ->contig_9 -LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT -AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.pep1 Mon Jan 30 11:03:42 2017 -0500 @@ -0,0 +1,25 @@ +>contig_1 +KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA +TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN +>contig_100 +CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS +PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA +>contig_107 +FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC +ASVSYMLRKSCRSGSRIREREGQEDAE +>contig_11 +AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD +FGTTPTSGAAVGELGVNPTX +>contig_111 +MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL +GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG +HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL +>contig_115 +DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL +SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ +>contig_116 +HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK +TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS +>contig_117 +STLLRAAAAPDLHPLHPLAHIMIIHEVIGNAAAGRVTVTIKIIFAGGIDGAFGEEVDLVEAVVARVVLLPLEVSDGVGKL +SAVGIGFPMRRRLGHPRQGCSVCFFLLKGARTSGGTSQHL
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcripts.cleaned.pep2 Mon Jan 30 11:03:42 2017 -0500 @@ -0,0 +1,25 @@ +>contig_1 +KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA +TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN +>contig_100 +CKLQEISSTLITILVILSPDKLIFFASTNRDSLNNVTISSTSSTPHSSTVTTQTPPLLTVLSICPAYRLGNRARSKYSPS +PRSNIPSKNGKLWGSARLTSGGASTSSTPVTSSTPSSCSAWTAWPGPADETRTRESFVMQFWA +>contig_107 +FGLYLERISARQSRLFVDRRSRVAAPAQAEVRGRRKLRFPTAPGRSRAQSQIALHRRHMTDLETTACLTYDFGARFAGVC +ASVSYMLRKSCRSGSRIREREGQEDAE +>contig_11 +AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGD +FGTTPTSGAAVGELGVNPTX +>contig_111 +MGNCCSDVSGGQSAVGGTTSSSAGNDAVDAFIRSRGYNGLFSQIELSLSAANLRDRDVLSKSDPMAVLYMKGSDGLLQEL +GRTEVVLNSLSPKWIIKYTVTYQFEMVQNLVFRVYDVDTQFHNMDVKALKLEDQQFLGEASCTLSQIVTKPNRILTLDLG +HEEGSFPAHTRKFGQLMVYAEESVASKTTAELIL +>contig_115 +DTDVPAGDIGVGGREIGYLFGAYRAERNRWEGVLTGKGSSWGGSLIRPEATGYGLVYYVEHMINYASGGKESFAGKRVAL +SGSGNVAQYAALKLIELGATVISLSDSKGSLIAEDDKGFTPEIINQ +>contig_116 +HKTNEDSGAIEAVLVNTNQFYKWFTDLEAAMKSETEEKYQHYVRTLTERIQTCDTILYQVDETLELFNELQLQHQAVATK +TKTLHDACDRLLLEKQRLFEFADSLRAKLNYFDELENVATSFHS +>contig_117 +EMLAGTSTRPRTFKKEEADRTALAGMPQSPPHRKAYSYSGQLTNTVGHLKRQQNHPRNHSLDEINLLTEGTVNSSGEDDF +YRYCNTTSGGVTNNLVDYHNMSQRMEGMEIGGGCCSEERR
--- a/test-data/transcripts.pep Mon Jan 30 09:23:36 2017 -0500 +++ b/test-data/transcripts.pep Mon Jan 30 11:03:42 2017 -0500 @@ -1,4 +1,24 @@ ->contig_2|m.1 contig_2|g.1 type:5prime_partial len:170 gc:universal contig_2:2-511(+) +>contig_1|m.1 contig_1|g.1 type:internal len:115 gc:universal contig_1:344-3(-) +KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN +>contig_2|m.2 contig_2|g.2 type:5prime_partial len:170 gc:universal contig_2:2-511(+) LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD* ->contig_9|m.2 contig_9|g.2 type:internal len:132 gc:universal contig_9:2-394(+) +>contig_3|m.3 contig_3|g.3 type:5prime_partial len:126 gc:universal contig_3:463-86(-) +VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN* +>contig_9|m.4 contig_9|g.4 type:internal len:132 gc:universal contig_9:2-394(+) LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV +>contig_11|m.5 contig_11|g.5 type:5prime_partial len:108 gc:universal contig_11:2-325(+) +IYGKGIQNMRTLWGGVAPESPVVVVGGGVTPESPVVGGIVLGSPAVLLKSDLLQALPPRLLKSALFQNLPSPLLKSGLLHDIPSELLRISRCCCCSRCCSRNSRCGY* +>contig_11|m.6 contig_11|g.6 type:5prime_partial len:100 gc:universal contig_11:443-144(-) +AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGDFGTTPTSGAAVGELGVNPT* +>contig_18|m.7 contig_18|g.7 type:internal len:143 gc:universal contig_18:426-1(-) +TEVRRWNKQEDWGRKTWKEAKESTLPKIVGEGIYGVGPILAALSSGRRELYALYVQEGLDLSSNSKKKDKKRFERVLKMVEKIGLSKKEVSKHDLNMVVDNRPHQGLLLDASALEMVSIKELDPVSIDGEKCPLWLALDEVT +>contig_20|m.8 contig_20|g.8 type:internal len:104 gc:universal contig_20:1-309(+) +GLRIDAHGYALQDWQRDLFQLDLVLHDEPSPRTRSILLLPADIVHADACHVDALPHYWIRFAYTVAGHGSRSYPYPCAGHLHWFRHSYPLHVGLVSLDQLLGS +>contig_20|m.9 contig_20|g.9 type:internal len:103 gc:universal contig_20:2-307(+) +AYASMLTDMPYKIGNAICFNLILYFMTNLRREPGAFFFFLLISFTLTLVMSMLFRTIGSVSRTLSQAMAPAAILILALVIYTGFVIPTRYMLGWSRWINYLD +>contig_20|m.10 contig_20|g.10 type:internal len:103 gc:universal contig_20:307-2(-) +IQVVDPTRPAQHVAGRNDETSVNDQRKDKDSCGSHGLRQCTRNGSNSAEEHRHDKRQRERYQQEEEECSWFAAKVRHEVQDQVETDRVANLVGHIREHRCVS +>contig_21|m.11 contig_21|g.11 type:5prime_partial len:168 gc:universal contig_21:519-16(-) +VSIVNQEPVLFSVSIGENIAYGLPDDYVSKVDVIKAAKAANAHEFIIALPQGYDTLVGERGGLLSGGQRQRVAIARALLKNAPILILDEATSALDAVSERLVQDALNRLMKGRTTLVIAHRLSTVQNADQIALCSDGKISELGTHSELLEQKGLYASLVGTQRLAFE* +>contig_24|m.12 contig_24|g.12 type:internal len:111 gc:universal contig_24:331-2(-) +KTRELEIEQILLKSKDLESELESKGSMFIKETEALVAENSKLSQELGAFKSELNDIQMKLNVVSSEKDGTVEELTSARKEIEELTQKLASEGQKLQSQISSIMEENNLLN