Mercurial > repos > nick > duplex
annotate consensus.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
rev | line source |
---|---|
18
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
1 #include <stdio.h> |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
2 #include <stdlib.h> |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
3 #include <string.h> |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
4 #include <ctype.h> |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
5 #include <limits.h> |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
6 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
7 // N.B. This defines the valid bases, but it's also effectively defined in the switches in |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
8 // get_votes_simple(), get_votes_qual(), and get_base_prime(), and in the constant IUPAC_BASES. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
9 #define N_BASES 6 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
10 const char *BASES = "ACGTN-"; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
11 /* A C G T N - A: 2 Compute IUPAC ambiguous base character by representing each base |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
12 A 4 6 10 14 22 26 C: 3 with a prime and multiplying. Then use a lookup table (an array |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
13 C 9 15 21 33 39 G: 5 where the index is the product of the two primes). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
14 G 25 35 55 65 T: 7 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
15 T 49 77 91 N: 11 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
16 N 121 143 -: 13 1 2 3 4 5 6 7 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
17 - 169 01234567890123456789012345678901234567890123456789012345678901234567890*/ |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
18 const char *IUPAC_BASES = "N...A.M..CR...WS.....YN..GN......N.K...N.........T.....N.........N....." |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
19 // 8 9 10 11 12 13 14 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
20 "......N.............N.............................N..................." |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
21 // 15 16 17 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
22 "..N.........................-"; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
23 #define THRES_DEFAULT 0.5 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
24 #define WIN_LEN 4 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
25 #define GAP_CHAR ' ' |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
26 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
27 int **get_votes_simple(char *align[], int n_seqs, int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
28 int **get_votes_qual(char *align[], char *quals[], int n_seqs, int seq_len, char thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
29 int init_gap_qual_window(int *window, char *quals, int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
30 char get_gap_qual(int *window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
31 int push_qual(int *window, int win_edge, char *quals, int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
32 void print_window(int *window, int win_edge); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
33 int **init_votes(int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
34 void free_votes(int *votes[], int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
35 void print_votes(char *consensus, int *votes[], int seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
36 char *rm_gaps(char *consensus, int cons_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
37 char *build_consensus(int *votes[], int seq_len, double thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
38 char *build_consensus_duplex(int *votes1[], int *votes2[], int seq_len, double thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
39 char *build_consensus_duplex_simple(char *cons1, char *cons2, int gapped); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
40 int get_base_prime(char base); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
41 char *get_consensus(char *align[], char *quals[], int n_seqs, int seq_len, double thres, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
42 char qual_thres, int gapped); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
43 char *get_consensus_duplex(char *align1[], char *align2[], char *quals1[], char *quals2[], |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
44 int n_seqs1, int n_seqs2, int seq_len, double cons_thres, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
45 char qual_thres, int gapped, char *method); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
46 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
47 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
48 // Tally the different bases at each position in an alignment. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
49 // Returns an array of arrays: for each position in the alignment, an array of the number of times |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
50 // each base occurs at that position. The order of bases is as in the "BASES" constant. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
51 int **get_votes_simple(char *align[], int n_seqs, int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
52 int **votes = init_votes(seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
53 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
54 // Tally votes for each base. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
55 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
56 for (i = 0; i < n_seqs; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
57 for (j = 0; j < seq_len; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
58 // N.B.: Could write this without hardcoded literals, but it's about 40% slower. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
59 switch (toupper(align[i][j])) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
60 case 'A': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
61 votes[j][0]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
62 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
63 case 'C': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
64 votes[j][1]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
65 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
66 case 'G': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
67 votes[j][2]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
68 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
69 case 'T': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
70 votes[j][3]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
71 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
72 case 'N': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
73 votes[j][4]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
74 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
75 case '-': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
76 votes[j][5]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
77 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
78 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
79 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
80 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
81 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
82 return votes; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
83 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
84 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
85 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
86 // Tally votes for each base, ignoring bases with a quality score below "thres". |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
87 int **get_votes_qual(char *align[], char *quals[], int n_seqs, int seq_len, char thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
88 int **votes = init_votes(seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
89 int *window = malloc(sizeof(int) * WIN_LEN * 2); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
90 int win_edge; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
91 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
92 // Tally votes for each base. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
93 char qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
94 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
95 for (i = 0; i < n_seqs; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
96 win_edge = init_gap_qual_window(window, quals[i], seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
97 for (j = 0; j < seq_len; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
98 // Figure out the quality score of the base (or gap). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
99 if (align[i][j] == '-') { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
100 qual = get_gap_qual(window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
101 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
102 win_edge = push_qual(window, win_edge, quals[i], seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
103 qual = quals[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
104 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
105 // Don't count bases whose quality is less than the threshold. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
106 if (qual < thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
107 continue; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
108 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
109 // N.B.: Could write this without hardcoded literals, but it's about 40% slower. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
110 switch (toupper(align[i][j])) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
111 case 'A': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
112 votes[j][0]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
113 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
114 case 'C': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
115 votes[j][1]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
116 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
117 case 'G': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
118 votes[j][2]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
119 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
120 case 'T': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
121 votes[j][3]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
122 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
123 case 'N': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
124 votes[j][4]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
125 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
126 case '-': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
127 votes[j][5]++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
128 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
129 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
130 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
131 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
132 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
133 free(window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
134 return votes; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
135 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
136 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
137 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
138 /* Tally votes for each base, weighting by the PHRED score of the base. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
139 * This is based on the theory of PHRED scores representing the literal probability of the base call |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
140 * being erroneous. Thus, if two reads show a C at a position, both with PHRED 20 (1/100 chance of |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
141 * error), then the chances of them both being wrong are 1/100 * 1/100 = 1/10000 (PHRED 40). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
142 * So, theoretically and intuitively, it makes sense to trust two PHRED 20 C's over one PHRED 30 A. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
143 * This seems better than arbitrarily deciding not to consider bases below a PHRED score threshold. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
144 * How to decide when not to call the base? We could just say that if no base's score total is above |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
145 * a certain threshold, we call it an N. Theoretically, this threshold is the confidence we want in |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
146 * our final base calls. This could even replace the arbitrary 3 reads for a consensus threshold. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
147 */ |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
148 int **get_votes_weighted(char *align[], char *quals[], int n_seqs, int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
149 int **votes = init_votes(seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
150 int *window = malloc(sizeof(int) * WIN_LEN * 2); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
151 int win_edge; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
152 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
153 // Tally votes for each base. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
154 char qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
155 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
156 for (i = 0; i < n_seqs; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
157 win_edge = init_gap_qual_window(window, quals[i], seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
158 for (j = 0; j < seq_len; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
159 // Figure out the quality score of the base (or gap). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
160 if (align[i][j] == '-') { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
161 qual = get_gap_qual(window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
162 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
163 win_edge = push_qual(window, win_edge, quals[i], seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
164 qual = quals[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
165 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
166 // N.B.: Could write this without hardcoded literals, but it's about 40% slower. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
167 switch (toupper(align[i][j])) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
168 case 'A': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
169 votes[j][0] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
170 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
171 case 'C': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
172 votes[j][1] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
173 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
174 case 'G': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
175 votes[j][2] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
176 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
177 case 'T': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
178 votes[j][3] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
179 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
180 case 'N': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
181 votes[j][4] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
182 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
183 case '-': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
184 votes[j][5] += qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
185 break; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
186 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
187 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
188 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
189 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
190 free(window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
191 return votes; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
192 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
193 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
194 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
195 /* Calculation of gap quality scores: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
196 * "window" is an array of length 2*WIN_LEN, holding the quality scores of the non-gap bases WIN_LEN |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
197 * from the current one in both directions. When we're at the start or end, fill the slots beyond |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
198 * the edge of the sequence with -1 as a sentinel for "N/A". For example, after the 2nd base in the |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
199 * sequence, if WIN_LEN is 4 and there are no gaps yet, "window" should look something like this: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
200 * base coordinates 0 1 2 3 4 5 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
201 * quality scores = 9 A > E B |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
202 * array values [-1|-1|28|24||32|29|36|33] |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
203 * Usage: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
204 * Allocate "window" and call init_gap_qual_window() to initialize it by filling its right side with |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
205 * the first WIN_LEN non-gap quality scores in the sequence. It will also return "win_edge", which |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
206 * tracks the coordinate of the rightmost quality score in "window". |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
207 * Then, start traversing the array of quality scores. For each score, if it's a gap character, call |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
208 * get_gap_qual() to get the computed quality score at the gap. Otherwise, call push_qual() to add |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
209 * another quality score to the end of the window. This will keep the window filled with all non-gap |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
210 * quality scores, and each time you call get_gap_qual(), you should be at the gap between the two |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
211 * quality scores at the center of the window. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
212 */ |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
213 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
214 // This does the initial fill of the window array, adding the first WIN_LEN bases to the right side |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
215 // and filling the left side with -1's. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
216 int init_gap_qual_window(int *window, char *quals, int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
217 // Fill left side with -1's (no quality information). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
218 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
219 for (i = 0; i < WIN_LEN; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
220 window[i] = -1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
221 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
222 // Fill right side with first WIN_LEN quality scores. Skip gaps, and if you run out of quality |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
223 // scores (if seq_len < WIN_LEN), fill the rest with -1. Leave win_edge at the last base we added. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
224 i = WIN_LEN; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
225 int win_edge = -1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
226 int quals_added = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
227 while (quals_added < WIN_LEN) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
228 win_edge++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
229 if (win_edge >= seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
230 win_edge = seq_len; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
231 window[i] = -1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
232 i++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
233 quals_added++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
234 } else if (quals[win_edge] != GAP_CHAR) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
235 window[i] = quals[win_edge]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
236 i++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
237 quals_added++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
238 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
239 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
240 return win_edge; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
241 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
242 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
243 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
244 // Push the next non-gap quality score onto the right side of the window. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
245 int push_qual(int *window, int win_edge, char *quals, int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
246 // Find the next quality score that's not a gap. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
247 char next_qual = GAP_CHAR; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
248 while (next_qual == GAP_CHAR) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
249 win_edge++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
250 if (win_edge < seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
251 next_qual = quals[win_edge]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
252 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
253 win_edge = seq_len; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
254 next_qual = -1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
255 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
256 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
257 // Shift all the quality scores left add the new one. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
258 int last_qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
259 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
260 for (i = WIN_LEN*2 - 1; i >= 0; i--) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
261 last_qual = window[i]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
262 window[i] = next_qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
263 next_qual = last_qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
264 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
265 return win_edge; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
266 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
267 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
268 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
269 // Compute the quality of the gap based on a weighted average of the quality scores in the window. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
270 // The scores near the center of the window are weighted higher than the ones further away. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
271 char get_gap_qual(int *window) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
272 int score_sum = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
273 int weight_sum = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
274 int weight = 1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
275 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
276 for (i = 0; i < WIN_LEN*2; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
277 if (window[i] != -1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
278 score_sum += window[i] * weight; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
279 weight_sum += weight; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
280 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
281 // Increase the weight until we get to the middle of the window (at WIN_LEN), then decrease it. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
282 if (i < WIN_LEN - 1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
283 weight++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
284 } else if (i > WIN_LEN - 1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
285 weight--; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
286 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
287 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
288 if (weight_sum > 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
289 // Divide by the sum of the weights to get the final quality score. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
290 return (char) (score_sum/weight_sum); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
291 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
292 return '\0'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
293 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
294 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
295 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
296 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
297 void print_window(int *window, int win_edge) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
298 printf("["); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
299 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
300 for (i = 0; i < WIN_LEN*2; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
301 printf("%c", window[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
302 if (i == WIN_LEN - 1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
303 printf("||"); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
304 } else if (i == WIN_LEN*2 - 1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
305 printf("] %-2d\n", win_edge); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
306 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
307 printf("|"); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
308 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
309 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
310 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
311 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
312 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
313 int **init_votes(int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
314 int **votes = malloc(sizeof(int *) * seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
315 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
316 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
317 votes[i] = malloc(sizeof(int) * N_BASES); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
318 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
319 votes[i][j] = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
320 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
321 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
322 return votes; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
323 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
324 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
325 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
326 void free_votes(int *votes[], int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
327 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
328 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
329 free(votes[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
330 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
331 free(votes); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
332 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
333 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
334 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
335 void print_votes(char *consensus, int *votes[], int seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
336 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
337 printf(" "); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
338 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
339 printf(" %c ", BASES[j]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
340 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
341 printf("\n"); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
342 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
343 printf("%c: ", consensus[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
344 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
345 if (votes[i][j]) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
346 printf("%2d ", votes[i][j]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
347 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
348 printf(" "); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
349 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
350 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
351 printf("\n"); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
352 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
353 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
354 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
355 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
356 // Take a consensus sequence which may have gaps ('-' characters) and remove them to produce the |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
357 // actual final sequence. "cons_len" should be the length of the original, gapped, sequence. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
358 char *rm_gaps(char *consensus, int cons_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
359 char *output = malloc(sizeof(char) * cons_len + 1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
360 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
361 int j = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
362 for (i = 0; i < cons_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
363 if (consensus[i] != '-') { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
364 output[j] = consensus[i]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
365 j++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
366 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
367 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
368 output[j] = '\0'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
369 return output; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
370 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
371 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
372 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
373 char *build_consensus(int *votes[], int seq_len, double thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
374 char *consensus = malloc(sizeof(char) * seq_len + 1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
375 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
376 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
377 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
378 int total = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
379 int max_vote = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
380 char max_base = 'N'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
381 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
382 total += votes[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
383 if (votes[i][j] > max_vote) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
384 max_vote = votes[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
385 max_base = BASES[j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
386 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
387 if (total == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
388 consensus[i] = 'N'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
389 } else if ((double)max_vote/total > thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
390 consensus[i] = max_base; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
391 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
392 consensus[i] = 'N'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
393 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
394 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
395 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
396 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
397 consensus[seq_len] = '\0'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
398 return consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
399 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
400 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
401 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
402 // Build a consensus sequence from two alignments by weighting each equally and considering only |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
403 // the frequency of each base in each alignment. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
404 char *build_consensus_duplex(int *votes1[], int *votes2[], int seq_len, double thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
405 char *consensus = malloc(sizeof(char) * seq_len + 1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
406 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
407 int i, j; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
408 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
409 // Sum the total votes at this position. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
410 /*TODO: This does an extra loop through the votes to get the total so it can calculate actual |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
411 * frequencies in the second pass. Technically, this information could be gathered when |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
412 * originally tallying the votes in the get_votes functions. Or, the total could be assumed |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
413 * to be n_seqs if every base always contributes a vote (even when it's not in "ACGTN-"). |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
414 */ |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
415 int total1 = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
416 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
417 total1 += votes1[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
418 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
419 int total2 = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
420 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
421 total2 += votes2[i][j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
422 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
423 double max_freq = 0.0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
424 char max_base = 'N'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
425 for (j = 0; j < N_BASES; j++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
426 // Get the frequency of each base. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
427 double freq1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
428 if (total1 > 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
429 freq1 = (double)votes1[i][j]/total1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
430 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
431 double freq2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
432 if (total2 > 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
433 freq2 = (double)votes2[i][j]/total2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
434 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
435 // frequency of the base = average of frequencies in the two sequences |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
436 double avg_freq; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
437 if (total1 == 0 && total2 == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
438 avg_freq = -1.0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
439 } else if (total1 == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
440 avg_freq = freq2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
441 } else if (total2 == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
442 avg_freq = freq1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
443 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
444 avg_freq = (freq1 + freq2) / 2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
445 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
446 // Track the highest frequency seen. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
447 if (avg_freq > max_freq) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
448 max_freq = avg_freq; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
449 max_base = BASES[j]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
450 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
451 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
452 if (max_freq > thres) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
453 consensus[i] = max_base; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
454 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
455 consensus[i] = 'N'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
456 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
457 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
458 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
459 consensus[seq_len] = '\0'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
460 return consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
461 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
462 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
463 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
464 // "cons1" and "cons2" must be null-terminated strings of equal lengths. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
465 char *build_consensus_duplex_simple(char *cons1, char *cons2, int gapped) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
466 int seq_len = strlen(cons1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
467 char *cons = malloc(sizeof(char) * seq_len + 1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
468 int i = 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
469 int base_prime1, base_prime2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
470 while (cons1[i] != '\0' && cons2[i] != '\0') { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
471 base_prime1 = get_base_prime(cons1[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
472 base_prime2 = get_base_prime(cons2[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
473 cons[i] = IUPAC_BASES[base_prime1*base_prime2]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
474 i++; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
475 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
476 cons[seq_len] = '\0'; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
477 if (gapped) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
478 return cons; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
479 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
480 return rm_gaps(cons, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
481 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
482 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
483 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
484 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
485 int get_base_prime(char base) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
486 switch (base) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
487 case 'A': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
488 return 2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
489 case 'C': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
490 return 3; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
491 case 'G': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
492 return 5; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
493 case 'T': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
494 return 7; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
495 case 'N': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
496 return 11; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
497 case '-': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
498 return 13; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
499 default: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
500 return 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
501 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
502 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
503 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
504 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
505 // Convenience function to create a consensus in one step. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
506 // Give 0 as "quals" to not use quality scores, and -1.0 as "cons_thres" to use the default |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
507 // consensus threshold when evaluating base votes. |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
508 char *get_consensus(char *align[], char *quals[], int n_seqs, int seq_len, double cons_thres, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
509 char qual_thres, int gapped) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
510 if (cons_thres == -1.0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
511 cons_thres = THRES_DEFAULT; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
512 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
513 int **votes; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
514 if (quals == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
515 votes = get_votes_simple(align, n_seqs, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
516 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
517 votes = get_votes_qual(align, quals, n_seqs, seq_len, qual_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
518 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
519 char *consensus_gapped = build_consensus(votes, seq_len, cons_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
520 char *consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
521 if (gapped) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
522 consensus = consensus_gapped; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
523 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
524 consensus = rm_gaps(consensus_gapped, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
525 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
526 free_votes(votes, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
527 return consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
528 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
529 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
530 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
531 char *get_consensus_duplex(char *align1[], char *align2[], char *quals1[], char *quals2[], |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
532 int n_seqs1, int n_seqs2, int seq_len, double cons_thres, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
533 char qual_thres, int gapped, char *method) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
534 if (cons_thres == -1.0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
535 cons_thres = THRES_DEFAULT; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
536 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
537 int **votes1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
538 int **votes2; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
539 if (quals1 == 0 || quals2 == 0) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
540 votes1 = get_votes_simple(align1, n_seqs1, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
541 votes2 = get_votes_simple(align2, n_seqs2, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
542 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
543 votes1 = get_votes_qual(align1, quals1, n_seqs1, seq_len, qual_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
544 votes2 = get_votes_qual(align2, quals2, n_seqs2, seq_len, qual_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
545 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
546 char *consensus_gapped; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
547 if (!strncmp(method, "freq", 4)) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
548 consensus_gapped = build_consensus_duplex(votes1, votes2, seq_len, cons_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
549 } else if (!strncmp(method, "iupac", 5)) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
550 char *cons1 = build_consensus(votes1, seq_len, cons_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
551 char *cons2 = build_consensus(votes2, seq_len, cons_thres); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
552 consensus_gapped = build_consensus_duplex_simple(cons1, cons2, 1); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
553 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
554 return ""; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
555 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
556 char *consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
557 if (gapped) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
558 consensus = consensus_gapped; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
559 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
560 consensus = rm_gaps(consensus_gapped, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
561 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
562 free_votes(votes1, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
563 free_votes(votes2, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
564 return consensus; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
565 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
566 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
567 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
568 void get_gap_quals(char *quals) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
569 int seq_len = strlen(quals); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
570 int *window = malloc(sizeof(int) * WIN_LEN * 2); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
571 int win_edge = init_gap_qual_window(window, quals, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
572 print_window(window, win_edge); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
573 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
574 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
575 char gap_qual; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
576 for (i = 0; i < seq_len; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
577 if (quals[i] == GAP_CHAR) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
578 gap_qual = get_gap_qual(window); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
579 printf("gap %2d: %2d\n", i, gap_qual); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
580 } else { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
581 win_edge = push_qual(window, win_edge, quals, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
582 print_window(window, win_edge); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
583 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
584 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
585 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
586 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
587 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
588 int main(int argc, char *argv[]) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
589 char **align = malloc(sizeof(char *) * (argc-1)); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
590 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
591 int seq_len = INT_MAX; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
592 int i; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
593 for (i = 1; i < argc; i++) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
594 if (strlen(argv[i]) < seq_len) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
595 seq_len = strlen(argv[i]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
596 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
597 align[i-1] = argv[i]; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
598 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
599 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
600 if (argc <= 1) { |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
601 return 1; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
602 } |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
603 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
604 get_gap_quals(align[0]); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
605 return 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
606 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
607 int **votes = get_votes_simple(align, argc-1, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
608 char *consensus = build_consensus(votes, seq_len, THRES_DEFAULT); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
609 print_votes(consensus, votes, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
610 printf("%s\n", consensus); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
611 free_votes(votes, seq_len); |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
612 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
613 return 0; |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
614 } |