Mercurial > repos > jjohnson > scythe
changeset 1:b0276d1141fe default tip
Fix test case
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 30 Jan 2014 13:10:12 -0600 (2014-01-30) |
parents | 08439b004404 |
children | |
files | scythe.xml test-data/matches.txt test-data/trimmed_sequences.fastq |
diffstat | 3 files changed, 76 insertions(+), 100 deletions(-) [+] |
line wrap: on
line diff
--- a/scythe.xml Mon Jan 13 14:57:53 2014 -0500 +++ b/scythe.xml Thu Jan 30 13:10:12 2014 -0600 @@ -76,9 +76,9 @@ <tests> <test> - <param name="single_or_paired" value="pe"/> <param name="input_fastq" value="reads.fastq" ftype="fastqillumina"/> <param name="adapter_file" value="illumina_adapters.fa" ftype="fasta"/> + <param name="add_tag" value="False"/> <param name="matches_file" value="True"/> <param name="prior" value="0.3"/> <param name="min_match" value="5"/>
--- a/test-data/matches.txt Mon Jan 13 14:57:53 2014 -0500 +++ b/test-data/matches.txt Thu Jan 30 13:10:12 2014 -0600 @@ -1,64 +1,40 @@ -p(c|s): 0.999973; p(!c|s): 0.000027; adapter: solexa-reverse -SOLEXA2_0414:3:1:12658:1069#0/1 -AGATCGGAAGAGCGGT -|||||||||||| ||| -AGATCGGAAGAGNGGT -_______Q__BBBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] - -p(c|s): 1.000000; p(!c|s): 0.000000; adapter: solexa-reverse -SOLEXA2_0414:3:1:3923:1105#0/1 -AGATCGGAAGAGCGGTTCAG -|||||||||||||||||||| -AGATCGGAAGAGCGGTTCAG -b____YWWYYVVTVVYYYYY -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] +p(c|s): 0.581352; p(!c|s): 0.418648; adapter: Multiplexing_adapter_2 +SOLEXA2_0414:3:1:17191:1069#0/1 +CACTCTTTCC +||| || +CACAAGTTTT +BBBBBBBBBB +[0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61] -p(c|s): 1.000000; p(!c|s): 0.000000; adapter: solexa-reverse -SOLEXA2_0414:3:1:9865:1095#0/1 -AGATCGGAAGAGCGGTTCAGC -||||||||||||||||||||| -AGATCGGAAGAGCGGTTCAGC -______[[[[Y__________ -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] - -p(c|s): 1.000000; p(!c|s): 0.000000; adapter: solexa-forward -SOLEXA2_0414:3:1:11267:1102#0/1 -AGATCGGAAGAGCG -|||||||||||||| -AGATCGGAAGAGCG -BBBBBBBBBBBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] +p(c|s): 0.534091; p(!c|s): 0.465909; adapter: Multiplexing_adapter_1 +SOLEXA2_0414:3:1:19703:1080#0/1 +GATCGGAAGAGCACA +|| || | | | +GAAGGGTATGGTTGA +BBBBBBBBBBBBBBB +[0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61] -p(c|s): 0.999999; p(!c|s): 0.000001; adapter: solexa-forward -SOLEXA2_0414:3:1:13556:1104#0/1 -AGATCGGAAGAG -|||||||||||| -AGATCGGAAGAG -BBBBBBBBBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] - -p(c|s): 0.999995; p(!c|s): 0.000005; adapter: solexa-forward -SOLEXA2_0414:3:1:13625:1097#0/1 -AGATCGGAAGA -||||||||||| -AGATCGGAAGA -Y____BBBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] +p(c|s): 0.581352; p(!c|s): 0.418648; adapter: Multiplexing_adapter_1 +SOLEXA2_0414:3:1:18860:1082#0/1 +GATCGGAAGA +| || | | +GGCCGCATGT +BBBBBBBBBB +[0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61] -p(c|s): 1.000000; p(!c|s): 0.000000; adapter: solexa-reverse -SOLEXA2_0414:3:1:10939:1119#0/1 -AGATCGGAAGAGCGGTTCAGCAGGAATGC -||||||| |||||||||||||||||| || -AGATCGGGAGAGCGGTTCAGCAGGAAGGC -BBBBBBBBBBBBBBBBBBBBBBBBBBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] +p(c|s): 0.729153; p(!c|s): 0.270847; adapter: Multiplexing_adapter_2 +SOLEXA2_0414:3:1:13556:1104#0/1 +CACTCTTTC +|| | || +CAGACCTTA +BBBBBBBBB +[0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61] -p(c|s): 1.000000; p(!c|s): 0.000000; adapter: solexa-reverse -SOLEXA2_0414:3:1:12110:1126#0/1 -AGATCGGAAGAGCGGTTCAGCAGGAATGCCG -||||||||||||||||||||||||||||||| -AGATCGGAAGAGCGGTTCAGCAGGAATGCCG -____Y______YYYYYQWWWW___S_BBBBB -[1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00] +p(c|s): 0.745940; p(!c|s): 0.254060; adapter: Multiplexing_adapter_2 +SOLEXA2_0414:3:1:7605:1117#0/1 +CACTCTTTCCCTACACGACGCTC +|| || || | | || | +CAGTCACCACCCAAAAGAAAATA +BBBBBBBBBBBBBBBBBBBBBBB +[0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61, 0.61]
--- a/test-data/trimmed_sequences.fastq Mon Jan 13 14:57:53 2014 -0500 +++ b/test-data/trimmed_sequences.fastq Thu Jan 30 13:10:12 2014 -0600 @@ -1,15 +1,15 @@ @SOLEXA2_0414:3:1:11146:1065#0/1 -NTGCGATCAAGGGATAAGGGTGGGACACTGCGGGAGATTCAGAGTAACAGTTNTAAGGTCTGGTTTGTCCGGACACTGCGGNAGA +N + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +B @SOLEXA2_0414:3:1:12658:1069#0/1 -NCTACATACAGTTGTAAGGTCTGGTTTGTCCTAGGCAGAGGACACTGCGGGAGATTCAGAGTATGTAGC +NCTACATACAGTTGTAAGGTCTGGTTTGTCCTAGGCAGAGGACACTGCGGGAGATTCAGAGTATGTAGCA + -BOIOKPTTTT_________________________________________________TYYYYY____ +BOIOKPTTTT_________________________________________________TYYYYY_____ @SOLEXA2_0414:3:1:17191:1069#0/1 -NAGCAATCTATAGTTTGTGTATTGCCAAGTCTGTTGTGAGCCCTTCCACAAGTTTTGTTTAGAGGAGAACGAGCGCCCTCTNGAT +NAGCAATCTATAGTTTGTGTATTGCCAAGTCTGTTGTGAGCCCTTC + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:6993:1073#0/1 NTGGATCTCCCGCAGTGTCCTAAATGGACAAACCAGACCTTACAACTGTTACTCTGAATCTCCCGCAGTGTCCTCTGCCTAGGAC + @@ -23,9 +23,9 @@ + BEKEEJOHHHTTVVV_____[[Y[[WWVXXXVXXWRRPTTWXXXWWQQWXWWWTWKKLLLLLLHRVTVVNTTVVT_____YWYYY @SOLEXA2_0414:3:1:12624:1071#0/1 -NTGCGATAGTGTCCTGTGTGCCCCTGCACATGCCTGTGGCTTCCCGGGCTTCTGACCACATCCCCACCATCTTTGTGCCTCCGGA +NTGCGATAGTGTCCTGTGTGCCCCTGCACATGCCTGTGGCTTCCCGGGCTTCTGACCACATCCCCACCATCTTTGTGC + -BQLLMTTTTT__________YYYYY________b___________BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BQLLMTTTTT__________YYYYY________b___________BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:13769:1080#0/1 NAGCAATCTAAAACCCAAAGTATATAAGAAGGGTATGGTTGATTATGTTTTTCTACATATTATTTGACATACTTCTATCTTCCCA + @@ -39,21 +39,21 @@ + BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:16010:1079#0/1 -NAGCAATTACTCTGAATCTCCCGCAGTGTCCTCTGCCTAGGACAAACCAGACCTTACAACTGTTACTCTGAATCTCCCGCAGTGT +NAGCAATTACTCTGAATCTCCCGCAGTGTCCTCTGCCTAGGACAAACCAGACCTTACAACTGTTACTCTGAATCTCCCG + -BKJQQVWVVV_______________WYYYY_______________V____Y[[[[_________________________BBBBB +BKJQQVWVVV_______________WYYYY_______________V____Y[[[[________________________ @SOLEXA2_0414:3:1:17165:1075#0/1 -NTGCGATCTAGGACACTGCGGGAGATTCAGAGTACAGTTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTCAGAGTAACA +N + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +B @SOLEXA2_0414:3:1:19504:1081#0/1 NAGCAATACTTCTATCTTCCCATGTTCTTACTATAGTTTGTGTATTGCCAAGTCTGTTGTGAGCCACATAATAAGGTGTCAAAAT + BJJOJRRQQRYYYYY__b_______[[[[PWWTWWYYYYY[Y[[[YYYYUTTWVVY[YY[_______________VTTTVTVVTV @SOLEXA2_0414:3:1:19703:1080#0/1 -NAGCAATATTGAATTAAATATATGCCAGTGGACTACTAAAACCCAAAGTATATAAGAAGGGTATGGTTGATTATGTTTTTCTACA +NAGCAATATTGAATTAAATATATGCCAGTGGACTACTAAAACCCAAAGTATATAA + -BIKOIVWVVVY[Y[YVVTVTYYYYY_W___BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BIKOIVWVVVY[Y[YVVTVTYYYYY_W___BBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:6962:1084#0/1 NTGCGATCTGAATCTCCCGCAGTGTCCGGGTGGGGTCAGAAAAATGGGCAGCTCAACTGCAGGTACACGTAGTGCGACCAGTGCC + @@ -79,45 +79,45 @@ + BHMOGRTTTTY[[[Y__________TVVVTYVYYY____WYYYYRPRWRR[[Y[[_____WWWTWWTWWWYYYYY[R[[YYYYYY @SOLEXA2_0414:3:1:18860:1082#0/1 -NAGCAATACAGTTGTAAGGTCTGGTTTGTCCTCACTCAAATGTCATCTCGAATTGTAGTTCTCATAATGGCCGCATGTTGTGGGA +NAGCAATACAGTTGTAAGGTCTGGTTTGTCCTCACTCAAATGTCATCTCGAATTGTAGTTCTCATAAT + -BHOKHSWTTT____________________BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BHOKHSWTTT____________________BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:18912:1089#0/1 -NTGCGATCATCTGAGGAGGACACTGCGGGAGATTCAGAGTAACAGTTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTCA +N + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +B @SOLEXA2_0414:3:1:3923:1105#0/1 -NAGCAATAGTCCAGGGATACAGAACCCACTGATACAGAGGCCCAGCTGTAGTTCTATTATTGCTC +NAGCAATAGTCCAGGGATACAGAACCCACTGATACAGAGGCCCAGCTGTAGTTCTATTATTGCTCAGATCGGAAGAGCGGTTCAG + -BNNNQVWVVV__bbb__b_bb____b________b__Z_______[[[[YYYYYY_____b___b +BNNNQVWVVV__bbb__b_bb____b________b__Z_______[[[[YYYYYY_____b___bb____YWWYYVVTVVYYYYY @SOLEXA2_0414:3:1:6494:1099#0/1 -NTGCGATTGGTTTGTCCTAGGCAGAGGACACTGCGGGAGATTCAGAGTACAGTTGTAAGGTCTGGTTTGTCCTAGGCAGAATCGC +N + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +B @SOLEXA2_0414:3:1:8413:1097#0/1 NTGCGATCCATACCCTTCTTATATACTTTGGGTTTTAGTAGTCCACTGGCATATATTTAATTCAATACATACTCATTAAGCAATG + BGIGEMQLNQ_______________VXXXX_____WWXXX[YY[[_____WYRYY___________V___YLYYYYYYPY_____ @SOLEXA2_0414:3:1:9865:1095#0/1 -NCTACATTAATTGTTTTAATTTTTATTTATTTATTTTTTTGAGACGGAGTCTCGCTGATGTAGC +NCTACATTAATTGTTTTAATTTTTATTTATTTATTTTTTTGAGACGGAGTCTCGCTGATGTAGCAGATCGGAAGAGCGGTTCAGC + -BKKJKRTTST_____QQQ_b[[[[[QQQQ_QQQbQQQQQQQ_________bb____________ +BKKJKRTTST_____QQQ_b[[[[[QQQQ_QQQbQQQQQQQ_________bb__________________[[[[Y__________ @SOLEXA2_0414:3:1:10606:1102#0/1 NTGCGATATTTGGAACTTCATGTGTCTTTTGTATTGCTTGATAGCAGCATAGAATATGTGAGGTATTTTTGGCTACTTTGTGAAT + BMMMMTUTTRTTVWV_____b___b__b_QQQQ__[Y[[YYYYYY_______________[[[[[YYYQQQ____BBBBBBBBBB @SOLEXA2_0414:3:1:11267:1102#0/1 -NCTACATTACTCTGAATCTCCCGCAGTGTCCGAGATCGCACCACTGCACCCCAGCCTGGGCGACATGTAGC +NCTACATTACTCTGAATCTCCCGCAGTGTCCGAGATCGCACCACTGCACCCCAGCCTGGGCGACATGTAGCA + -BEGGEJLHIK______________RTWWWWY[Y[[________________QM__YYYYYXWJVX_____B +BEGGEJLHIK______________RTWWWWY[Y[[________________QM__YYYYYXWJVX_____BB @SOLEXA2_0414:3:1:13556:1104#0/1 -NTGCGATTTACAACCGTTACTCTGAATCTCCCGCAGTGTCCAGGACAAACCAGACCTTACAACTGTATCGCAA +NTGCGATTTACAACCGTTACTCTGAATCTCCCGCAGTGTCCAGGACAAAC + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:13625:1097#0/1 -NTGCGATCTTACAACTGTTACTCTGAATCTCCCGCAGTGTCCAGGACAAACCAGACCTTACAACTGTATCGCAA +NTGCGATCTTACAACTGTTACTCTGAATCTCCCGCAGTGTCCAGGACAAACCAGACCTTACAACTGTATCGCAAA + -BJKJJTUUST___b_b_____b______________________________bW_bb__bb____[[Y[YYYYY +BJKJJTUUST___b_b_____b______________________________bW_bb__bb____[[Y[YYYYYY @SOLEXA2_0414:3:1:14055:1098#0/1 NAGCAATCTCTGAATCTCCCGCAGTGTCCAGGACAAACCAGACCTTACAACTGTTACTCTGAATCTCCCGCAGTGTCCAGGACAA + @@ -155,9 +155,9 @@ + BIKOOPTTTT_____QQQQ_YYYYYYYYYY_____YYYYY___________________________________YQYYY___Y_ @SOLEXA2_0414:3:1:7605:1117#0/1 -NAGCAATCCTTTAGTTCTAGTGTCTAGGGATATGCAAAGTCAGAAAAAACCAGTCACCACCCAAAAGAAAATAGTGCAGACAAGG +NAGCAATCCTTTAGTTCTAGTGTCTAGGGATATGCAAAGTCAGAAAAAAC + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:13482:1108#0/1 NCTACATCTGAATCTCCCGCAGTGTCCGAGATCGCACCACTGCACCCCAGCCTGGGCGACAGCGAGACTCCGTCTCAAAAAAATA + @@ -171,9 +171,9 @@ + BJJOHRTRTT[QQQ[___b____b______[V[[[bbbbb_b___b_b_____________________________________ @SOLEXA2_0414:3:1:17840:1114#0/1 -NAGCAATCTCTAGATTACTTATCATACCTAATACAATGTAAATAGTTGTTATACTGTATTGTTTAGGAAATAATGACAAGAAAAA +NAGCAATCTCTAGATTACTTATCATACCTAATACAATGTAAATAGTTGTTATACTGTATTGTTTAGGAAATAAT + -BOIIIVWWVV_____Y[[[[[[[W[TWTWRYYYYYY[YYY____R_____BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +BOIIIVWWVV_____Y[[[[[[[W[TWTWRYYYYYY[YYY____R_____BBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:18083:1109#0/1 NAGCAATTACTCTGAATCTCCCGCAGTGTCCAATACTGTACTTTTTTACATAGTCATTGCTTAATGAATATGTATTGAATTAAAT + @@ -183,18 +183,18 @@ + BIIKHQTTSS[[[Y[_____YYYYYTWVVV_____b___^_________________________YYYYY_____BBBBBBBBBB @SOLEXA2_0414:3:1:4029:1119#0/1 -NAGCAATGGGAGAGTGTTGTTTTTGTCACTTGCCCCAGCAGAGCAGGGGTTTTGGAAGGAGAGGTCTTTAATAGCTTTGAAAATT +N + -BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +B @SOLEXA2_0414:3:1:10939:1119#0/1 -NCTACATACAGTTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTATGTAGC +NCTACATACAGTTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTATGTAGCA + -BQKQLVWVWV_____[Y[[[__________BBBBBBBBBBBBBBBBBBBBBBBBBB +BQKQLVWVWV_____[Y[[[__________BBBBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:11098:1122#0/1 NAGCAATGTTGAGTCCAGGGATACAGAACCCACTGATACAGAGGCCCAGCTGTAGTTCTATTACCAAGTTTTGTACCTTTGTAGC + BEMGGQQORMVVVVTWWWWWOVTVVWRWWW_______________[[T[YTVTTV_____BBBBBBBBBBBBBBBBBBBBBBBBB @SOLEXA2_0414:3:1:12110:1126#0/1 -NTGCGATTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTCAGATCGCAA +NTGCGATTGTAAGGTCTGGTTTGTCCTGGACACTGCGGGAGATTCAGATCGCAAAGATCGGAAGAGCGGTTCAGCAGGAATGCCG + -BIIIHQRRRQ[[[[[b__bb_b___YYYYY_____UWWWT[T[[Y__bb_____ +BIIIHQRRRQ[[[[[b__bb_b___YYYYY_____UWWWT[T[[Y__bb_________Y______YYYYYQWWWW___S_BBBBB