comparison mafft/core/addfunctions.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2
3
4 void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap
5 {
6 int i, newlen;
7 double *effarr0, *effarr2;
8 float dumfl;
9 double eff;
10 effarr0 = AllocateDoubleVec( n0 );
11 effarr2 = AllocateDoubleVec( n2 );
12
13 // reporterr( "profilealignment!\n" );
14
15 commongappick( n0, aln0 );
16 commongappick( n2, aln2 );
17
18 eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
19 eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
20
21 newgapstr = "-";
22 if( alg == 'M' )
23 MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
24 else
25 A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
26
27 newlen = strlen( aln0[0] );
28
29 #if 0 // tabun hitsuyou
30 for( j=0; j<newlen; j++ )
31 {
32 // fprintf( stderr, "j=%d\n", j );
33 for( i=0; i<n0; i++ )
34 {
35 if( aln0[i][j] != '-' ) break;
36 }
37 if( i == n0 )
38 {
39 for( i=0; i<n1; i++ )
40 {
41 if( aln1[i][j] != '-' ) break;
42 }
43 }
44 else i = -1;
45
46 if( i == n1 )
47 {
48 for( i=0; i<n1; i++ ) aln1[i][j] = '=';
49 }
50 }
51 fprintf( stderr, "in profilealignment,\n" );
52 for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
53 for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
54 for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
55 #endif
56
57 free( effarr0 );
58 free( effarr2 );
59 }
60
61 void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap
62 {
63 int i, j, newlen;
64 double *effarr0, *effarr2;
65 float dumfl;
66 double eff;
67 effarr0 = AllocateDoubleVec( n0 );
68 effarr2 = AllocateDoubleVec( n2 );
69
70
71 // reporterr( "profilealignment!\n" );
72
73 commongappick( n0, aln0 );
74 commongappick( n2, aln2 );
75
76 eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
77 eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
78
79 newgapstr = "-";
80 if( alg == 'M' )
81 MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
82 else
83 A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
84
85 newlen = strlen( aln0[0] );
86
87 for( i=0; i<newlen; i++ ) aln1[0][i] = '-';
88 aln1[0][i] = 0;
89 for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] );
90
91 for( j=0; j<newlen; j++ )
92 {
93 // fprintf( stderr, "j=%d\n", j );
94 for( i=0; i<n0; i++ )
95 {
96 if( aln0[i][j] != '-' ) break;
97 }
98 if( i == n0 )
99 {
100 for( i=0; i<n1; i++ )
101 {
102 if( aln1[i][j] != '-' ) break;
103 }
104 }
105 else i = -1;
106
107 if( i == n1 )
108 {
109 for( i=0; i<n1; i++ ) aln1[i][j] = '=';
110 }
111 }
112 #if 0
113 fprintf( stderr, "in profilealignment,\n" );
114 for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
115 for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
116 for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
117 #endif
118
119 free( effarr0 );
120 free( effarr2 );
121 }
122
123 void eq2dashmatomete( char **s, int n )
124 {
125 int i, j;
126 char sj;
127
128 for( j=0; (sj=s[0][j]); j++ )
129 {
130 if( sj == '=' )
131 {
132 for( i=0; i<n; i++ )
133 {
134 s[i][j] = '-';
135 }
136 }
137 }
138 }
139
140 void eq2dashmatometehayaku( char **s, int n )
141 {
142 int i, j, c;
143 int *tobechanged;
144 int len = strlen( s[0] );
145
146 tobechanged = calloc( len, sizeof( int ) );
147 c = 0;
148 for( j=0; j<len; j++ )
149 {
150 if( s[0][j] == '=' ) tobechanged[c++] = j;
151 }
152 tobechanged[c] = -1;
153
154 for( i=0; i<n; i++ )
155 {
156 for( c=0; (j=tobechanged[c])!=-1; c++ )
157 s[i][j] = '-';
158 }
159 free( tobechanged );
160 }
161
162 void eq2dash( char *s )
163 {
164 while( *s )
165 {
166 if( *s == '=' )
167 {
168 *s = '-';
169 }
170 s++;
171 }
172 }
173
174 void findnewgaps( int n, int rep, char **seq, int *gaplen )
175 {
176 int i, pos, len, len1;
177
178 len = strlen( seq[0] );
179 // for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai
180 len1 = len + 1;
181 for( i=0; i<len1; i++ ) gaplen[i] = 0; // reallo de shokika sareteirukara iru!
182
183 pos = 0;
184 for( i=0; i<len; i++ )
185 {
186 if( seq[rep][i] == '=' )
187 {
188 // fprintf( stderr, "Newgap! pos = %d\n", pos );
189 gaplen[pos]++;
190 }
191 else
192 pos++;
193 }
194 }
195
196 void findcommongaps( int n, char **seq, int *gaplen )
197 {
198 int i, j, pos, len, len1;
199 len = strlen( seq[0] );
200 len1 = len+1;
201
202 // fprintf( stderr, "seq[0] = %s\n", seq[0] );
203 for( i=0; i<len1; i++ ) gaplen[i] = 0;
204
205 pos = 0;
206 for( i=0; i<len; i++ )
207 {
208 for( j=0; j<n; j++ )
209 if( seq[j][i] != '-' ) break;
210
211 if( j == n ) gaplen[pos]++;
212 else
213 pos++;
214 }
215 #if 0
216 for( i=0; i<pos; i++ )
217 {
218 fprintf( stderr, "vec[%d] = %d\n", i, gaplen[i] );
219 }
220 #endif
221 }
222
223 void adjustgapmap( int newlen, int *gapmap, char *seq )
224 {
225 int j;
226 int pos;
227 int newlen1 = newlen+1;
228 int *tmpmap;
229
230
231 tmpmap = AllocateIntVec( newlen+2 );
232 j = 0;
233 pos = 0;
234 while( *seq )
235 {
236 // fprintf( stderr, "j=%d *seq = %c\n", j, *seq );
237 if( *seq++ == '=' )
238 tmpmap[j++] = 0;
239 else
240 {
241 tmpmap[j++] = gapmap[pos++];
242 }
243 }
244 tmpmap[j++] = gapmap[pos];
245
246 for(j=0; j<newlen1; j++)
247 gapmap[j] = tmpmap[j];
248
249 free( tmpmap );
250 }
251
252 static void strncpy0( char *s1, char *s2, int n )
253 {
254 while( n-- ) *s1++ = *s2++;
255 *s1 = 0;
256 }
257
258 static int countnogaplen( int *gaplen, int *term )
259 {
260 int v = 0;
261 while( gaplen < term )
262 {
263 if( *gaplen++ == 0 ) v++;
264 else break;
265 }
266 return( v );
267 }
268
269 void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar )
270 {
271 int *mar;
272 char *gaps;
273 char *cptr;
274 int i, j, k, len, rep, len0, lp, blocklen;
275 char **mseq2, **mseq0, **mseq1;
276 char **aseq, *newchar;
277 int ngroup2, ngroup0, ngroup1;
278 int *list0, *list1, *list2;
279 int posin12, gapshift, newpos;
280 int mlen1, mlen0, mlen2;
281 int tmptmptmpmark = 0;
282
283 mar = calloc( njob, sizeof( int ) );
284 list0 = calloc( njob, sizeof( int ) );
285 list1 = calloc( njob, sizeof( int ) );
286 list2 = calloc( njob, sizeof( int ) );
287
288 for( i=0; i<njob; i++ ) mar[i] = 0;
289 for( i=0; i<njob; i++ )
290 {
291 if( alreadyaligned[i]==0 ) mar[i] = 3;
292 }
293 for( i=0; (k=ex1[i])>-1; i++ )
294 {
295 mar[k] = 1;
296 // fprintf( stderr, "excluding %d\n", ex1[i] );
297 }
298 for( i=0; (k=ex2[i])>-1; i++ )
299 {
300 mar[k] = 2;
301 // fprintf( stderr, "excluding %d\n", ex2[i] );
302 }
303
304 ngroup2 = ngroup1 = ngroup0 = 0;
305 for( i=0; i<njob; i++ )
306 {
307 if( mar[i] == 2 )
308 {
309 list2[ngroup2] = i;
310 ngroup2++;
311 }
312 if( mar[i] == 1 )
313 {
314 list1[ngroup1] = i;
315 ngroup1++;
316 }
317 if( mar[i] == 0 )
318 {
319 list0[ngroup0] = i;
320 // fprintf( stderr, "inserting new gaps to %d\n", i );
321 ngroup0++;
322 }
323 }
324 list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
325 if( ngroup0 == 0 )
326 {
327 // fprintf( stderr, "Nothing to do\n" );
328 free( mar );
329 free( list0 );
330 free( list1 );
331 free( list2 );
332 return;
333 }
334
335 for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break;
336 rep = i;
337 len = strlen( seq[rep] );
338 len0 = len+1;
339
340 //
341 // if( i == njob )
342 // {
343 //// fprintf( stderr, "Nothing to do\n" );
344 // free( mar );
345 // return;
346 // }
347
348 mseq2 = AllocateCharMtx( ngroup2, alloclen );
349 mseq1 = AllocateCharMtx( ngroup1, alloclen );
350 mseq0 = AllocateCharMtx( ngroup0, alloclen );
351 aseq = AllocateCharMtx( njob, alloclen );
352 gaps = calloc( alloclen, sizeof( char ) );
353
354
355 for( i=0; i<njob; i++ ) aseq[i][0] = 0;
356 newpos = 0;
357 posin12 = 0;
358 // fprintf( stderr, "\ngaplen[] = \n" );
359 // for(i=0; i<len0; i++ ) fprintf( stderr, "%d ", gaplen[i] );
360 // fprintf( stderr, "\n" );
361
362 for( j=0; j<len0; j++ )
363 {
364 // fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
365 if( gaplen[j] )
366 {
367 // fprintf( stderr, "j=%d GAP!\n", j );
368 tmptmptmpmark = 1;
369 for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
370 for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
371 for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
372 mlen0 = mlen1 = mlen2 = 0;
373
374 gapshift = gaplen[j];
375 cptr = gaps;
376 while( gapshift-- ) *cptr++ = gapchar;
377 *cptr = 0;
378 gapshift = gaplen[j];
379
380 for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift );
381 for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
382 for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
383 posin12 += gapshift;
384 mlen0 += gapshift;
385 mlen1 += gapshift;
386 mlen2 += gapshift;
387
388 gapshift = gapmap[posin12];
389 // fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] );
390
391
392 for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift );
393 for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
394 for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
395 mlen0 += gapshift;
396 mlen1 += gapshift;
397 mlen2 += gapshift;
398 #if 0
399 for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
400 for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
401 for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
402 #endif
403
404 if( gapshift )
405 profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
406
407 j += gapshift;
408 posin12 += gapshift;
409
410 newpos = strlen( aseq[rep] ); // kufuu?
411 for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
412 for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
413 for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
414
415 // fprintf( stderr, "gapshift = %d\n", gapshift );
416 }
417 blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
418 // fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
419 // blocklen = 1;
420 // if( tmptmptmpmark ) exit( 1 );
421
422 newpos = strlen( aseq[rep] );
423
424 #if 0
425 for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
426 for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
427 for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
428 for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
429 for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
430 for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
431 #else
432
433 for( i=0; i<ngroup0; i++ )
434 {
435 lp = list0[i];
436 newchar = aseq[lp] + newpos;
437 strncpy0( newchar, seq[lp]+j, blocklen );
438 }
439 for( i=0; i<ngroup1; i++ )
440 {
441 lp = list1[i];
442 newchar = aseq[lp] + newpos;
443 strncpy0( newchar, seq[lp]+posin12, blocklen );
444 }
445 for( i=0; i<ngroup2; i++ )
446 {
447 lp = list2[i];
448 newchar = aseq[lp] + newpos;
449 strncpy0( newchar, seq[lp]+posin12, blocklen );
450 }
451 // fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] );
452 // fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] );
453 // fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] );
454 // exit( 1 );
455 #endif
456
457 // fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
458 j += (blocklen-1);
459
460
461 posin12 += (blocklen-1);
462
463
464 posin12++;
465 }
466 #if 0
467 fprintf( stderr, "\n" );
468 fprintf( stderr, " seq[l0] = %s\n", seq[list0[0]] );
469 fprintf( stderr, " seq[l1] = %s\n", seq[list1[0]] );
470 fprintf( stderr, " seq[l2] = %s\n", seq[list2[0]] );
471 fprintf( stderr, "=====>\n" );
472 fprintf( stderr, "aseq[l0] = %s\n", aseq[list0[0]] );
473 fprintf( stderr, "aseq[l1] = %s\n", aseq[list1[0]] );
474 fprintf( stderr, "aseq[l2] = %s\n", aseq[list2[0]] );
475 //if( tmptmptmpmark ) exit( 1 );
476 #endif
477
478 // for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
479 for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
480 for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
481 for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
482
483
484 free( mar );
485 free( gaps );
486 free( list0 );
487 free( list1 );
488 free( list2 );
489 FreeCharMtx( mseq2 );
490 FreeCharMtx( mseq1 ); // ? added 2012/02/12
491 FreeCharMtx( mseq0 );
492 FreeCharMtx( aseq ); // ? added 2012/02/12
493 }
494
495
496 void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen, char gapchar )
497 {
498 int *mar;
499 char *tmpseq;
500 char *cptr;
501 int *iptr;
502 int *tmpgaplen;
503 int i, j, k, len, rep, len1, klim;
504
505 mar = calloc( njob, sizeof( int ) );
506 tmpseq = calloc( alloclen, sizeof( char ) );
507 tmpgaplen = calloc( alloclen, sizeof( int ) );
508 // tmpseq = calloc( alloclen+2, sizeof( char ) );
509 // tmpgaplen = calloc( alloclen+2, sizeof( int ) );
510
511
512 for( i=0; i<njob; i++ ) mar[i] = 0;
513 for( i=0; (k=ex1[i])>-1; i++ )
514 {
515 mar[k] = 1;
516 // fprintf( stderr, "excluding %d\n", ex1[i] );
517 }
518 for( i=0; (k=ex2[i])>-1; i++ )
519 {
520 mar[k] = 1;
521 // fprintf( stderr, "excluding %d\n", ex2[i] );
522 }
523
524 for( i=0; i<njob; i++ )
525 if( mar[i] ) break;
526
527 if( i == njob )
528 {
529 // fprintf( stderr, "Nothing to do\n" );
530 free( mar );
531 free( tmpseq );
532 free( tmpgaplen );
533 return;
534 }
535 rep = i;
536 len = strlen( seq[rep] );
537 len1 = len+1;
538
539
540 for( i=0; i<njob; i++ )
541 {
542 if( mar[i] == 0 ) continue;
543 cptr = tmpseq;
544 for( j=0; j<len1; j++ )
545 {
546 klim = gaplen[j];
547 // for( k=0; k<gaplen[j]; k++ )
548 while( klim-- )
549 *(cptr++) = gapchar; // ???
550 *(cptr++) = seq[i][j];
551 }
552 *cptr = 0;
553 strcpy( seq[i], tmpseq );
554 }
555
556 iptr = tmpgaplen;
557 for( j=0; j<len1; j++ )
558 {
559 *(iptr++) = gaplen[j];
560 for( k=0; k<gaplen[j]; k++ )
561 *(iptr++) = 0;
562 }
563 *iptr = -1;
564
565 iptr = tmpgaplen;
566 while( *iptr != -1 ) *gaplen++ = *iptr++;
567
568 free( mar );
569 free( tmpseq );
570 free( tmpgaplen );
571 }