comparison mafft/core/MSalign11.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2 #include "dp.h"
3
4 #define DEBUG 0
5 #define XXXXXXX 0
6 #define USE_PENALTY_EX 0
7
8 static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj )
9 {
10 // char gap[] = "-";
11 char *gap;
12 gap = newgapstr;
13 int l;
14
15 fprintf( stderr, "i=%d, prevhiti=%d\n", i, prevhiti );
16 fprintf( stderr, "j=%d, prevhitj=%d\n", j, prevhitj );
17 l = prevhiti - i - 1;
18 fprintf( stderr, "l=%d\n", l );
19 while( l>0 )
20 {
21 *--mseq1[0] = seq1[0][i+l--];
22 *--mseq2[0] = *gap;
23 }
24 l= prevhitj - j - 1;
25 fprintf( stderr, "l=%d\n", l );
26 while( l>0 )
27 {
28 *--mseq1[0] = *gap;
29 *--mseq2[0] = seq2[0][j+l--];
30 }
31 if( i < 0 || j < 0 ) return;
32 *--mseq1[0] = seq1[0][i];
33 *--mseq2[0] = seq2[0][j];
34 fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] );
35 fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] );
36 }
37
38 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
39 {
40 char tmpc = s1[0][i1];
41 char *seq2 = s2[0];
42
43 while( lgth2-- )
44 *match++ = amino_dis[(int)tmpc][(int)*seq2++];
45 }
46
47 static float Atracking( float *lasthorizontalw, float *lastverticalw,
48 char **seq1, char **seq2,
49 char **mseq1, char **mseq2,
50 float **cpmx1, float **cpmx2,
51 int **ijp )
52 {
53 int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
54 // char gap[] = "-";
55 char *gap;
56 gap = newgapstr;
57 lgth1 = strlen( seq1[0] );
58 lgth2 = strlen( seq2[0] );
59
60
61 #if 0
62 for( i=0; i<lgth1; i++ )
63 {
64 fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
65 }
66 #endif
67
68 for( i=0; i<lgth1+1; i++ )
69 {
70 ijp[i][0] = i + 1;
71 }
72 for( j=0; j<lgth2+1; j++ )
73 {
74 ijp[0][j] = -( j + 1 );
75 }
76
77
78 mseq1[0] += lgth1+lgth2;
79 *mseq1[0] = 0;
80 mseq2[0] += lgth1+lgth2;
81 *mseq2[0] = 0;
82 iin = lgth1; jin = lgth2;
83 limk = lgth1+lgth2 + 1;
84 for( k=0; k<limk; k++ )
85 {
86 if( ijp[iin][jin] < 0 )
87 {
88 ifi = iin-1; jfi = jin+ijp[iin][jin];
89 }
90 else if( ijp[iin][jin] > 0 )
91 {
92 ifi = iin-ijp[iin][jin]; jfi = jin-1;
93 }
94 else
95 {
96 ifi = iin-1; jfi = jin-1;
97 }
98 l = iin - ifi;
99 while( --l )
100 {
101 *--mseq1[0] = seq1[0][ifi+l];
102 *--mseq2[0] = *gap;
103 k++;
104 }
105 l= jin - jfi;
106 while( --l )
107 {
108 *--mseq1[0] = *gap;
109 *--mseq2[0] = seq2[0][jfi+l];
110 k++;
111 }
112 if( iin <= 0 || jin <= 0 ) break;
113 *--mseq1[0] = seq1[0][ifi];
114 *--mseq2[0] = seq2[0][jfi];
115 k++;
116 iin = ifi; jin = jfi;
117 }
118 return( 0.0 );
119 }
120
121 void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
122 {
123 register int i, j;
124 int prevhiti, prevhitj;
125 // int lasti, lastj;
126 float g;
127 float fpenalty = (float)penalty;
128 #if USE_PENALTY_EX
129 float fpenalty_ex = (float)penalty_ex;
130 #endif
131 float *currentw, *previousw, *wtmp;
132 float mi;
133 int mpi;
134 int *mpjpt;
135 float *mjpt, *prept, *curpt;
136 float wm = 0.0;
137 float forwwm;
138
139 currentw = w1;
140 previousw = w2;
141
142 match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 );
143 match_calc( currentw, seq1, seq2, lgth1-1, lgth2 );
144
145
146 prevhiti = iin;
147 prevhitj = jin;
148 fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 );
149 fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 );
150 extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 );
151
152 for( i=0; i<lgth1-1; i++ )
153 {
154 initverticalw[i] += fpenalty;
155 WMMTX[i][lgth2-1] += fpenalty;
156 }
157 for( j=0; j<lgth2-1; j++ )
158 {
159 currentw[j] += fpenalty;
160 WMMTX[lgth1-1][j] += fpenalty;
161 }
162
163
164 #if 0
165 fprintf( stderr, "initverticalw = \n" );
166 for( i=0; i<lgth1; i++ )
167 fprintf( stderr, "% 8.2f", initverticalw[i] );
168 fprintf( stderr, "\n" );
169 fprintf( stderr, "currentw = \n" );
170 for( i=0; i<lgth2; i++ )
171 fprintf( stderr, "% 8.2f", currentw[i] );
172 fprintf( stderr, "\n" );
173 #endif
174
175 for( j=lgth2-1; j>0; --j )
176 {
177 m[j-1] = currentw[j];
178 mp[j] = 0; // iranai
179 }
180
181 for( j=0; j<lgth2; j++ ) m[j] = 0.0;
182 //m[lgth2-1] ga, irunoka iranainoka irahai.
183
184 for( i=lgth1-2; i>-1; i-- )
185 {
186 wtmp = previousw;
187 previousw = currentw;
188 currentw = wtmp;
189
190 previousw[lgth2-1] = initverticalw[i+1];
191
192 match_calc( currentw, seq1, seq2, i, lgth2 );
193
194 #if 0
195 fprintf( stderr, "i=%d, currentw = \n", i );
196 for( j=0; j<lgth2; j++ ) fprintf( stderr, "% 8.2f", currentw[j] );
197 fprintf( stderr, "\n" );
198 #endif
199
200 currentw[lgth2-1] = initverticalw[i];
201
202 mi = previousw[lgth2-1];
203 mpi = lgth2-1; //iranai
204
205
206 mjpt = m + lgth2 - 2;
207 prept = previousw + lgth2 - 1;
208 curpt = currentw + lgth2 - 2;
209 mpjpt = mp + lgth2 - 2;
210
211 for( j=lgth2-2; j>-1; j-- )
212 {
213
214 // fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] );
215 wm = *prept;
216 g = mi + fpenalty;
217 #if 0
218 fprintf( stderr, "%5.0f?", g );
219 #endif
220 if( g > wm )
221 {
222 wm = g;
223 }
224 g = *prept;
225 if( g >= mi )
226 {
227 mi = g;
228 mpi = j+1; //iranai
229 }
230 #if USE_PENALTY_EX
231 mi += fpenalty_ex;
232 #endif
233
234 g = *mjpt + fpenalty;
235 #if 0
236 fprintf( stderr, "%5.0f?", g );
237 #endif
238 if( g > wm )
239 {
240 wm = g;
241 }
242 g = *prept;
243 if( g >= *mjpt )
244 {
245 *mjpt = g;
246 *mpjpt = i-1; //iranai
247 }
248 #if USE_PENALTY_EX
249 m[j] += fpenalty_ex;
250 #endif
251
252 #if 0
253 fprintf( stderr, "*curpt = %5.0f \n", *curpt );
254 #endif
255
256 // forwwm = wm + MIN( maxinw[i], maxinh[j] );
257 forwwm = wm + MIN( maxinw[i], maxinh[j] );
258 WMMTX[i][j] = forwwm;
259 if( forwwm == wmmax && i<prevhiti && j<prevhitj )
260 {
261 fprintf( stderr, "hit!\n" );
262 extendmseq( mseq1, mseq2, seq1, seq2, i, j, prevhiti, prevhitj );
263 if( forwwm == wmmax )
264 {
265 *--mseq1[0] = 'u';
266 *--mseq2[0] = 'u';
267 }
268 prevhiti = i;
269 prevhitj = j;
270 }
271 *curpt += wm;
272
273 mjpt--;
274 prept--;
275 mpjpt--;
276 curpt--;
277 }
278 }
279 extendmseq( mseq1, mseq2, seq1, seq2, -1, -1, prevhiti, prevhitj );
280 }
281
282
283 float MSalign11( char **seq1, char **seq2, int alloclen )
284 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
285 {
286 // int k;
287 register int i, j;
288 int lasti, lastj;
289 int iin = 0, jin = 0; // by Mathog, a guess
290 int lgth1, lgth2;
291 int resultlen;
292 float wm = 0.0; /* int ?????? */
293 float g;
294 float *currentw, *previousw;
295 float fpenalty = (float)penalty;
296 #if USE_PENALTY_EX
297 float fpenalty_ex = (float)penalty_ex;
298 #endif
299 float *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
300 float *maxinh = NULL; // by D.Mathog, guess
301 #if 1
302 float wmmax;
303 float *wtmp;
304 int *ijppt;
305 float *mjpt, *prept, *curpt;
306 int *mpjpt;
307 #endif
308 static float mi, *m;
309 static int **ijp;
310 static int mpi, *mp;
311 static float *w1, *w2;
312 static float *match;
313 static float *initverticalw; /* kufuu sureba iranai */
314 static float *lastverticalw; /* kufuu sureba iranai */
315 static char **mseq1;
316 static char **mseq2;
317 static char **mseq;
318 static float **cpmx1;
319 static float **cpmx2;
320 static int **intwork;
321 static float **WMMTX;
322 static float **floatwork;
323 static int orlgth1 = 0, orlgth2 = 0;
324
325 if( orlgth1 == 0 )
326 {
327 mseq1 = AllocateCharMtx( njob, 0 );
328 mseq2 = AllocateCharMtx( njob, 0 );
329 }
330
331
332 lgth1 = strlen( seq1[0] );
333 lgth2 = strlen( seq2[0] );
334
335 if( lgth1 > orlgth1 || lgth2 > orlgth2 )
336 {
337 int ll1, ll2;
338
339 if( orlgth1 > 0 && orlgth2 > 0 )
340 {
341 FreeFloatVec( w1 );
342 FreeFloatVec( w2 );
343 FreeFloatVec( match );
344 FreeFloatVec( initverticalw );
345 FreeFloatVec( lastverticalw );
346 FreeFloatVec( maxinw );
347 FreeFloatVec( maxinh );
348
349 FreeFloatVec( m );
350 FreeIntVec( mp );
351
352 FreeCharMtx( mseq );
353
354
355 FreeFloatMtx( cpmx1 );
356 FreeFloatMtx( cpmx2 );
357
358 FreeFloatMtx( floatwork );
359 FreeIntMtx( intwork );
360 }
361
362 ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
363 ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
364
365 #if DEBUG
366 fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
367 #endif
368
369 w1 = AllocateFloatVec( ll2+2 );
370 w2 = AllocateFloatVec( ll2+2 );
371 match = AllocateFloatVec( ll2+2 );
372
373 initverticalw = AllocateFloatVec( ll1+2 );
374 lastverticalw = AllocateFloatVec( ll1+2 );
375 maxinw = AllocateFloatVec( ll1+2 );
376
377
378 m = AllocateFloatVec( ll2+2 );
379 mp = AllocateIntVec( ll2+2 );
380 maxinh = AllocateFloatVec( ll2+2 );
381
382 mseq = AllocateCharMtx( njob, ll1+ll2 );
383
384 cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
385 cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
386
387 floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
388 intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
389
390 #if DEBUG
391 fprintf( stderr, "succeeded\n" );
392 #endif
393
394 orlgth1 = ll1 - 100;
395 orlgth2 = ll2 - 100;
396 }
397
398
399 mseq1[0] = mseq[0];
400 mseq2[0] = mseq[1];
401
402
403 if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
404 {
405 int ll1, ll2;
406
407 if( commonAlloc1 && commonAlloc2 )
408 {
409 FreeIntMtx( commonIP );
410 FreeFloatMtx( WMMTX );
411 }
412
413 ll1 = MAX( orlgth1, commonAlloc1 );
414 ll2 = MAX( orlgth2, commonAlloc2 );
415
416 #if DEBUG
417 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
418 #endif
419
420 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
421 WMMTX = AllocateFloatMtx( ll1+10, ll2+10 );
422
423 #if DEBUG
424 fprintf( stderr, "succeeded\n\n" );
425 #endif
426
427 commonAlloc1 = ll1;
428 commonAlloc2 = ll2;
429 }
430 ijp = commonIP;
431
432
433 #if 0
434 for( i=0; i<lgth1; i++ )
435 fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
436 #endif
437
438 currentw = w1;
439 previousw = w2;
440
441 match_calc( initverticalw, seq2, seq1, 0, lgth1 );
442
443
444 match_calc( currentw, seq1, seq2, 0, lgth2 );
445
446 WMMTX[0][0] = initverticalw[0];
447
448 maxinh[0] = initverticalw[0];
449 for( i=1; i<lgth1+1; i++ )
450 {
451 initverticalw[i] += fpenalty;
452 WMMTX[i][0] = initverticalw[i];
453 if( maxinh[0] < initverticalw[i] ) maxinh[0] = initverticalw[i];
454 }
455 maxinw[0] = currentw[0];
456 for( j=1; j<lgth2+1; j++ )
457 {
458 currentw[j] += fpenalty;
459 WMMTX[0][j] = currentw[j];
460 if( maxinw[0] < currentw[j] ) maxinw[0] = currentw[j];
461 }
462
463 for( j=1; j<lgth2+1; ++j )
464 {
465 m[j] = currentw[j-1]; mp[j] = 0;
466 }
467
468 lastverticalw[0] = currentw[lgth2-1];
469
470 lasti = lgth1+1;
471
472 for( i=1; i<lasti; i++ )
473 {
474 wtmp = previousw;
475 previousw = currentw;
476 currentw = wtmp;
477
478 previousw[0] = initverticalw[i-1];
479
480 match_calc( currentw, seq1, seq2, i, lgth2 );
481
482 currentw[0] = initverticalw[i];
483
484 mi = previousw[0]; mpi = 0;
485
486 maxinwpt = maxinw + i;
487 *maxinwpt = currentw[0];
488
489 fprintf( stderr, "currentw[0] = %f, *maxinwpt = %f\n", currentw[0], maxinw[i] );
490
491 ijppt = ijp[i] + 1;
492 mjpt = m + 1;
493 prept = previousw;
494 curpt = currentw + 1;
495 mpjpt = mp + 1;
496 lastj = lgth2+1;
497
498 for( j=1; j<lastj; j++ )
499 {
500 wm = *prept;
501 *ijppt = 0;
502
503 #if 0
504 fprintf( stderr, "%5.0f->", wm );
505 #endif
506 g = mi + fpenalty;
507 #if 0
508 fprintf( stderr, "%5.0f?", g );
509 #endif
510 if( g > wm )
511 {
512 wm = g;
513 *ijppt = -( j - mpi );
514 }
515 g = *prept;
516 if( g >= mi )
517 {
518 mi = g;
519 mpi = j-1;
520 }
521 #if USE_PENALTY_EX
522 mi += fpenalty_ex;
523 #endif
524
525 g = *mjpt + fpenalty;
526 #if 0
527 fprintf( stderr, "%5.0f?", g );
528 #endif
529 if( g > wm )
530 {
531 wm = g;
532 *ijppt = +( i - *mpjpt );
533 }
534 g = *prept;
535 if( g >= *mjpt )
536 {
537 *mjpt = g;
538 *mpjpt = i-1;
539 }
540 #if USE_PENALTY_EX
541 m[j] += fpenalty_ex;
542 #endif
543
544 #if 0
545 fprintf( stderr, "%5.0f ", wm );
546 #endif
547 *curpt += wm;
548
549 WMMTX[i][j] = *curpt;
550
551
552 if( j<lgth2 && *maxinwpt < *curpt ) *maxinwpt = *curpt;
553 if( j<lgth2 && maxinh[j] < *curpt ) maxinh[j] = *curpt;
554 // fprintf( stderr, "maxintwpt = %f\n", *maxinwpt );
555
556 ijppt++;
557 mjpt++;
558 prept++;
559 mpjpt++;
560 curpt++;
561 }
562 lastverticalw[i] = currentw[lgth2-1];
563 }
564
565 wmmax = -999.9;
566 for( i=0; i<lgth1; i++ )
567 {
568 g = lastverticalw[i];
569 if( g > wmmax )
570 {
571 wmmax = g;
572 iin = i;
573 jin = lgth2-1;
574 }
575 }
576 for( j=0; j<lgth2; j++ )
577 {
578 g = currentw[j];
579 if( g > wmmax )
580 {
581 wmmax = g;
582 iin = lgth1-1;
583 jin = j;
584 }
585 }
586
587 for( i=0; i<lgth1; i++ )
588 fprintf( stderr, "maxinw[%d] = %f\n", i, maxinw[i] );
589 for( j=0; j<lgth2; j++ )
590 fprintf( stderr, "maxinh[%d] = %f\n", j, maxinh[j] );
591
592 fprintf( stderr, "wmmax = %f (%d,%d)\n", wmmax, iin, jin );
593 if( iin == lgth1 - 1 && jin == lgth2 - 1 )
594 ;
595 else
596 wmmax += fpenalty;
597
598 fprintf( stderr, "wmmax = %f\n", wmmax );
599
600 #if 0
601 for( i=0; i<lgth1; i++ )
602 {
603 for( j=0; j<lgth2; j++ )
604 {
605 fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
606 }
607 fprintf( stderr, "\n" );
608 }
609 #endif
610
611 mseq1[0] += lgth1+lgth2;
612 *mseq1[0] = 0;
613 mseq2[0] += lgth1+lgth2;
614 *mseq2[0] = 0;
615
616 backdp( WMMTX, wmmax, maxinw, maxinh, lgth1, lgth2, alloclen, w1, w2, initverticalw, m, mp, iin, jin, seq1, seq2, mseq1, mseq2 );
617
618 fprintf( stderr, "\n" );
619 #if 1
620 fprintf( stderr, "\n" );
621 fprintf( stderr, ">MSres\n%s\n", mseq1[0] );
622 fprintf( stderr, ">MSres\n%s\n", mseq2[0] );
623 #endif
624
625 #if 0
626 for( i=0; i<lgth1; i++ )
627 {
628 for( j=0; j<lgth2; j++ )
629 {
630 fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
631 }
632 fprintf( stderr, "\n" );
633 }
634 #endif
635
636 mseq1[0] = mseq[0];
637 mseq2[0] = mseq[1];
638 mseq1[0] += lgth1+lgth2;
639 *mseq1[0] = 0;
640 mseq2[0] += lgth1+lgth2;
641 *mseq2[0] = 0;
642
643 Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp );
644
645
646 resultlen = strlen( mseq1[0] );
647 if( alloclen < resultlen || resultlen > N )
648 {
649 fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
650 ErrorExit( "LENGTH OVER!\n" );
651 }
652
653
654 strcpy( seq1[0], mseq1[0] );
655 strcpy( seq2[0], mseq2[0] );
656 #if 1
657 fprintf( stderr, "\n" );
658 fprintf( stderr, ">\n%s\n", mseq1[0] );
659 fprintf( stderr, ">\n%s\n", mseq2[0] );
660 #endif
661
662
663 return( wm );
664 }
665