annotate align.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
1 #include <stdio.h>
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
2 #include <stdlib.h>
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
3 #include <string.h>
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
4
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
5 #define NAIVE_TEST_WINDOW 6
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
6 #define NAIVE_TEST_THRES 0.80
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
7 #define NAIVE_TEST_MIN 2
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
8 #define NAIVE_WINDOW 10
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
9 #define NAIVE_THRES 0.80
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
10
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
11 typedef struct Gap {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
12 int seq;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
13 int coord;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
14 int length;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
15 struct Gap *next;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
16 } Gap;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
17
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
18 typedef struct Gaps {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
19 int length;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
20 struct Gap *root;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
21 struct Gap *tip;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
22 } Gaps;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
23
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
24 int _test_match(char *seq1, int start1, char *seq2, int start2);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
25 void add_gap(Gaps *gaps, int seq, int coord, int length);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
26 Gaps *make_gaps();
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
27 char *insert_gaps(Gaps *gaps, char *seq, int seq_num);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
28
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
29
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
30 // A naive algorithm for aligning two sequences which are expected to be very similar to each other
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
31 // and already nearly aligned.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
32 void naive2(char *seq1, char *seq2) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
33 Gaps *gaps = make_gaps();
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
34 int i = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
35 int j = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
36 int matches = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
37 while (seq1[i] != 0 && seq2[j] != 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
38 // Match?
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
39 printf("%c %c | i %d j %d\n", seq1[i], seq2[j], i, j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
40 if (seq1[i] == seq2[j]) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
41 matches++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
42 i++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
43 j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
44 continue;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
45 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
46 printf("mismatch!\n");
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
47 // Mismatch. Start adding gaps until the mismatches go away.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
48 int new_i = i;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
49 int new_j = j;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
50 int gap_seq = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
51 int success;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
52 while (1) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
53 if (seq1[new_i] == 0 && seq2[new_j] == 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
54 break;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
55 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
56 success = _test_match(seq1, new_i, seq2, j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
57 if (success) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
58 gap_seq = 2;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
59 break;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
60 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
61 if (seq1[new_i] != 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
62 new_i++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
63 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
64 success = _test_match(seq1, i, seq2, new_j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
65 if (success) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
66 gap_seq = 1;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
67 break;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
68 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
69 if (seq2[new_j] != 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
70 new_j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
71 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
72 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
73 // Which sequence are we putting the gap in?
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
74 if (gap_seq == 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
75 printf("No good gap found. new_i: %d, new_j: %d\n", new_i, new_j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
76 // No good gap found.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
77 } else if (i == new_i && j == new_j) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
78 printf("No gap required.\n");
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
79 } else if (gap_seq == 1) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
80 printf("%dbp gap in seq1 at base %d.\n", new_j-j, j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
81 add_gap(gaps, 1, j, new_j-j);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
82 j = new_j;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
83 } else if (gap_seq == 2) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
84 printf("%dbp gap in seq2 at base %d.\n", new_i-i, i);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
85 add_gap(gaps, 2, i, new_i-i);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
86 i = new_i;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
87 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
88 i++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
89 j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
90 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
91
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
92 char *new_seq1 = insert_gaps(gaps, seq1, 1);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
93 char *new_seq2 = insert_gaps(gaps, seq2, 2);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
94 printf("alignment:\n%s\n%s\n", new_seq1, new_seq2);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
95 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
96
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
97 // Check if the few bases starting at start1 and start2 in seq1 and seq2, respectively, align with
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
98 // few mismatches. The number of bases checked is NAIVE_TEST_WINDOW, and they must have a match
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
99 // percentage greater than NAIVE_TEST_THRES. Also, the amount of sequence left to compare must be
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
100 // more than NAIVE_TEST_MIN.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
101 int _test_match(char *seq1, int start1, char *seq2, int start2) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
102 int matches = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
103 int total = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
104 char base1, base2;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
105 int i;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
106 for (i = 0; i < NAIVE_TEST_WINDOW-1; i++) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
107 base1 = seq1[start1+i];
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
108 base2 = seq2[start2+i];
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
109 if (base1 == 0 || base2 == 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
110 break;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
111 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
112 if (base1 == base2) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
113 matches++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
114 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
115 total++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
116 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
117 return total > NAIVE_TEST_MIN && (double)matches/total > NAIVE_TEST_THRES;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
118 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
119
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
120 Gaps *make_gaps() {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
121 Gaps *gaps = malloc(sizeof(Gaps));
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
122 gaps->root = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
123 gaps->tip = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
124 gaps->length = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
125 return gaps;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
126 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
127
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
128 void add_gap(Gaps *gaps, int seq, int coord, int length) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
129 Gap *gap = malloc(sizeof(Gap));
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
130 gap->next = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
131 gap->seq = seq;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
132 gap->coord = coord;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
133 gap->length = length;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
134 if (gaps->root == 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
135 gaps->root = gap;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
136 } else {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
137 gaps->tip->next = gap;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
138 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
139 gaps->tip = gap;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
140 gaps->length++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
141 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
142
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
143 // Take gap information from the aligner and put them into the sequence string as "-" characters.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
144 char *insert_gaps(Gaps *gaps, char *seq, int seq_num) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
145 if (gaps->root == 0) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
146 return seq;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
147 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
148
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
149 // How long should the new sequence be?
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
150 int extra_len = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
151 Gap *gap = gaps->root;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
152 while (gap) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
153 if (gap->seq == seq_num) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
154 extra_len += gap->length;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
155 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
156 gap = gap->next;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
157 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
158
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
159 //TODO: Handle a situation with no gaps.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
160 int new_len = extra_len + strlen(seq) + 1;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
161 char *new_seq = malloc(sizeof(char) * new_len);
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
162 int i = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
163 int j = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
164 gap = gaps->root;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
165 while (gap) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
166 // Check that it's a gap in our sequence.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
167 if (gap->seq != seq_num) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
168 gap = gap->next;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
169 continue;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
170 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
171 // Copy verbatim all the sequence until the gap.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
172 while (i <= gap->coord) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
173 new_seq[j] = seq[i];
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
174 i++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
175 j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
176 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
177 // Add -'s the whole length of the gap.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
178 while (j < gap->coord + gap->length + 1) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
179 new_seq[j] = '-';
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
180 j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
181 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
182 gap = gap->next;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
183 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
184 // Fill in the end sequence.
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
185 while (seq[i]) {
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
186 new_seq[j] = seq[i];
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
187 i++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
188 j++;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
189 }
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
190 new_seq[new_len-1] = 0;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
191 return new_seq;
e4d75f9efb90 planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff changeset
192 }