comparison mafft/core/tbfast.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 static int nadd;
8 static int treein;
9 static int topin;
10 static int treeout;
11 static int distout;
12 static int noalign;
13 static int multidist;
14 static int subalignment;
15 static int subalignmentoffset;
16
17 #ifdef enablemultithread
18 typedef struct _jobtable
19 {
20 int i;
21 int j;
22 } Jobtable;
23
24 typedef struct _distancematrixthread_arg
25 {
26 int njob;
27 int thread_no;
28 float *selfscore;
29 float **iscore;
30 char **seq;
31 int **skiptable;
32 Jobtable *jobpospt;
33 pthread_mutex_t *mutex;
34 } distancematrixthread_arg_t;
35
36 typedef struct _treebasethread_arg
37 {
38 int thread_no;
39 int *nrunpt;
40 int njob;
41 int *nlen;
42 int *jobpospt;
43 int ***topol;
44 Treedep *dep;
45 char **aseq;
46 double *effarr;
47 int *alloclenpt;
48 LocalHom **localhomtable;
49 RNApair ***singlerna;
50 double *effarr_kozo;
51 int *fftlog;
52 char *mergeoralign;
53 pthread_mutex_t *mutex;
54 pthread_cond_t *treecond;
55 } treebasethread_arg_t;
56 #endif
57
58 void arguments( int argc, char *argv[] )
59 {
60 int c;
61
62 nthread = 1;
63 outnumber = 0;
64 scoreout = 0;
65 spscoreout = 0;
66 treein = 0;
67 topin = 0;
68 rnaprediction = 'm';
69 rnakozo = 0;
70 nevermemsave = 0;
71 inputfile = NULL;
72 addfile = NULL;
73 addprofile = 1;
74 fftkeika = 0;
75 constraint = 0;
76 nblosum = 62;
77 fmodel = 0;
78 calledByXced = 0;
79 devide = 0;
80 use_fft = 0; // chuui
81 force_fft = 0;
82 fftscore = 1;
83 fftRepeatStop = 0;
84 fftNoAnchStop = 0;
85 weight = 3;
86 utree = 1;
87 tbutree = 1;
88 refine = 0;
89 check = 1;
90 cut = 0.0;
91 disp = 0;
92 outgap = 1;
93 alg = 'A';
94 mix = 0;
95 tbitr = 0;
96 scmtd = 5;
97 tbweight = 0;
98 tbrweight = 3;
99 checkC = 0;
100 treemethod = 'X';
101 sueff_global = 0.1;
102 contin = 0;
103 scoremtx = 1;
104 kobetsubunkatsu = 0;
105 dorp = NOTSPECIFIED;
106 ppenalty_dist = NOTSPECIFIED;
107 ppenalty = NOTSPECIFIED;
108 penalty_shift_factor = 1000.0;
109 ppenalty_ex = NOTSPECIFIED;
110 poffset = NOTSPECIFIED;
111 kimuraR = NOTSPECIFIED;
112 pamN = NOTSPECIFIED;
113 geta2 = GETA2;
114 fftWinSize = NOTSPECIFIED;
115 fftThreshold = NOTSPECIFIED;
116 RNAppenalty = NOTSPECIFIED;
117 RNAppenalty_ex = NOTSPECIFIED;
118 RNApthr = NOTSPECIFIED;
119 TMorJTT = JTT;
120 consweight_multi = 1.0;
121 consweight_rna = 0.0;
122 multidist = 0;
123 subalignment = 0;
124 subalignmentoffset = 0;
125 legacygapcost = 0;
126 specificityconsideration = 0.0;
127
128 while( --argc > 0 && (*++argv)[0] == '-' )
129 {
130 while ( ( c = *++argv[0] ) )
131 {
132 switch( c )
133 {
134 case 'i':
135 inputfile = *++argv;
136 fprintf( stderr, "inputfile = %s\n", inputfile );
137 --argc;
138 goto nextoption;
139 case 'I':
140 nadd = myatoi( *++argv );
141 fprintf( stderr, "nadd = %d\n", nadd );
142 --argc;
143 goto nextoption;
144 case 'e':
145 RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
146 --argc;
147 goto nextoption;
148 case 'o':
149 RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
150 --argc;
151 goto nextoption;
152 case 'V':
153 ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );
154 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
155 --argc;
156 goto nextoption;
157 case 'f':
158 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
159 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
160 --argc;
161 goto nextoption;
162 case 'Q':
163 penalty_shift_factor = atof( *++argv );
164 --argc;
165 goto nextoption;
166 case 'g':
167 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
168 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
169 --argc;
170 goto nextoption;
171 case 'h':
172 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
173 // fprintf( stderr, "poffset = %d\n", poffset );
174 --argc;
175 goto nextoption;
176 case 'k':
177 kimuraR = myatoi( *++argv );
178 fprintf( stderr, "kappa = %d\n", kimuraR );
179 --argc;
180 goto nextoption;
181 case 'b':
182 nblosum = myatoi( *++argv );
183 scoremtx = 1;
184 fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
185 --argc;
186 goto nextoption;
187 case 'j':
188 pamN = myatoi( *++argv );
189 scoremtx = 0;
190 TMorJTT = JTT;
191 fprintf( stderr, "jtt/kimura %d\n", pamN );
192 --argc;
193 goto nextoption;
194 case 'm':
195 pamN = myatoi( *++argv );
196 scoremtx = 0;
197 TMorJTT = TM;
198 fprintf( stderr, "tm %d\n", pamN );
199 --argc;
200 goto nextoption;
201 case 'l':
202 fastathreshold = atof( *++argv );
203 constraint = 2;
204 --argc;
205 goto nextoption;
206 case 'r':
207 consweight_rna = atof( *++argv );
208 rnakozo = 1;
209 --argc;
210 goto nextoption;
211 case 'c':
212 consweight_multi = atof( *++argv );
213 --argc;
214 goto nextoption;
215 case 'C':
216 nthread = myatoi( *++argv );
217 fprintf( stderr, "nthread = %d\n", nthread );
218 --argc;
219 goto nextoption;
220 case 's':
221 specificityconsideration = (double)myatof( *++argv );
222 // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration );
223 --argc;
224 goto nextoption;
225 case 'R':
226 rnaprediction = 'r';
227 #if 1
228 case 'a':
229 fmodel = 1;
230 break;
231 #endif
232 case 'K':
233 addprofile = 0;
234 break;
235 case 'y':
236 distout = 1;
237 break;
238 case 't':
239 treeout = 1;
240 break;
241 case 'T':
242 noalign = 1;
243 break;
244 case 'D':
245 dorp = 'd';
246 break;
247 case 'P':
248 dorp = 'p';
249 break;
250 case 'L':
251 legacygapcost = 1;
252 break;
253 #if 1
254 case 'O':
255 outgap = 0;
256 break;
257 #else
258 case 'O':
259 fftNoAnchStop = 1;
260 break;
261 #endif
262 #if 0
263 case 'S' :
264 scoreout = 1; // for checking parallel calculation
265 break;
266 #else
267 case 'S' :
268 spscoreout = 1; // 2014/Dec/30, sp score
269 break;
270 #endif
271 case 'H':
272 subalignment = 1;
273 subalignmentoffset = myatoi( *++argv );
274 --argc;
275 goto nextoption;
276 #if 0
277 case 'e':
278 fftscore = 0;
279 break;
280 case 'r':
281 fmodel = -1;
282 break;
283 case 'R':
284 fftRepeatStop = 1;
285 break;
286 case 's':
287 treemethod = 's';
288 break;
289 #endif
290 case 'X':
291 treemethod = 'X';
292 sueff_global = atof( *++argv );
293 fprintf( stderr, "sueff_global = %f\n", sueff_global );
294 --argc;
295 goto nextoption;
296 case 'E':
297 treemethod = 'E';
298 break;
299 case 'q':
300 treemethod = 'q';
301 break;
302 case 'n' :
303 outnumber = 1;
304 break;
305 #if 0
306 case 'a':
307 alg = 'a';
308 break;
309 case 'H':
310 alg = 'H';
311 break;
312 case 'Q':
313 alg = 'Q';
314 break;
315 #endif
316 case 'A':
317 alg = 'A';
318 break;
319 case 'M':
320 alg = 'M';
321 break;
322 case 'N':
323 nevermemsave = 1;
324 break;
325 case 'B': // hitsuyou! memopt -M -B no tame
326 break;
327 case 'F':
328 use_fft = 1;
329 break;
330 case 'G':
331 force_fft = 1;
332 use_fft = 1;
333 break;
334 case 'U':
335 treein = 1;
336 break;
337 #if 0
338 case 'V':
339 topin = 1;
340 break;
341 #endif
342 case 'u':
343 tbrweight = 0;
344 weight = 0;
345 break;
346 case 'v':
347 tbrweight = 3;
348 break;
349 case 'd':
350 multidist = 1;
351 break;
352 #if 0
353 case 'd':
354 disp = 1;
355 break;
356 #endif
357 /* Modified 01/08/27, default: user tree */
358 case 'J':
359 tbutree = 0;
360 break;
361 /* modification end. */
362 case 'z':
363 fftThreshold = myatoi( *++argv );
364 --argc;
365 goto nextoption;
366 case 'w':
367 fftWinSize = myatoi( *++argv );
368 --argc;
369 goto nextoption;
370 case 'Z':
371 checkC = 1;
372 break;
373 default:
374 fprintf( stderr, "illegal option %c\n", c );
375 argc = 0;
376 break;
377 }
378 }
379 nextoption:
380 ;
381 }
382 if( argc == 1 )
383 {
384 cut = atof( (*argv) );
385 argc--;
386 }
387 if( argc != 0 )
388 {
389 fprintf( stderr, "options: Check source file !\n" );
390 exit( 1 );
391 }
392 if( tbitr == 1 && outgap == 0 )
393 {
394 fprintf( stderr, "conflicting options : o, m or u\n" );
395 exit( 1 );
396 }
397 if( alg == 'C' && outgap == 0 )
398 {
399 fprintf( stderr, "conflicting options : C, o\n" );
400 exit( 1 );
401 }
402 }
403
404 #if 0
405 static void *distancematrixthread2( void *arg )
406 {
407 distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
408 int njob = targ->njob;
409 int thread_no = targ->thread_no;
410 float *selfscore = targ->selfscore;
411 float **iscore = targ->iscore;
412 char **seq = targ->seq;
413 Jobtable *jobpospt = targ->jobpospt;
414
415 float ssi, ssj, bunbo;
416 int i, j;
417
418 while( 1 )
419 {
420 pthread_mutex_lock( targ->mutex );
421 i = jobpospt->i;
422 i++;
423 if( i == njob-1 )
424 {
425 pthread_mutex_unlock( targ->mutex );
426 return( NULL );
427 }
428 jobpospt->i = i;
429 pthread_mutex_unlock( targ->mutex );
430
431 ssi = selfscore[i];
432 if( i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
433 for( j=i+1; j<njob; j++ )
434 {
435 ssj = selfscore[j];
436 bunbo = MIN( ssi, ssj );
437 if( bunbo == 0.0 )
438 iscore[i][j-i] = 1.0;
439 else
440 iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo;
441 }
442 }
443 }
444 #endif
445
446 #ifdef enablemultithread
447 static void *distancematrixthread( void *arg ) // v7.2 ijou deha tsukawanaihazu
448 {
449 distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
450 int njob = targ->njob;
451 int thread_no = targ->thread_no;
452 float *selfscore = targ->selfscore;
453 float **iscore = targ->iscore;
454 char **seq = targ->seq;
455 int **skiptable = targ->skiptable;
456 Jobtable *jobpospt = targ->jobpospt;
457
458 float ssi, ssj, bunbo;
459 int i, j;
460
461 while( 1 )
462 {
463 pthread_mutex_lock( targ->mutex );
464 j = jobpospt->j;
465 i = jobpospt->i;
466 j++;
467 if( j == njob )
468 {
469 i++;
470 j = i + 1;
471 if( i == njob-1 )
472 {
473 pthread_mutex_unlock( targ->mutex );
474 return( NULL );
475 }
476 }
477 jobpospt->j = j;
478 jobpospt->i = i;
479 pthread_mutex_unlock( targ->mutex );
480
481
482 if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
483 ssi = selfscore[i];
484 ssj = selfscore[j];
485 bunbo = MIN( ssi, ssj );
486 if( bunbo == 0.0 )
487 iscore[i][j-i] = 2.0; // 2013/Oct/17
488 else
489 // iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17
490 iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
491 if( iscore[i][j-i] > 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17
492 }
493 }
494
495 static void *treebasethread( void *arg )
496 {
497 treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
498 int *nrunpt = targ->nrunpt;
499 int thread_no = targ->thread_no;
500 int njob = targ->njob;
501 int *nlen = targ->nlen;
502 int *jobpospt = targ->jobpospt;
503 int ***topol = targ->topol;
504 Treedep *dep = targ->dep;
505 char **aseq = targ->aseq;
506 double *effarr = targ->effarr;
507 int *alloclen = targ->alloclenpt;
508 LocalHom **localhomtable = targ->localhomtable;
509 RNApair ***singlerna = targ->singlerna;
510 double *effarr_kozo = targ->effarr_kozo;
511 int *fftlog = targ->fftlog;
512 char *mergeoralign = targ->mergeoralign;
513
514 char **mseq1, **mseq2;
515 char **localcopy;
516 int i, j, l;
517 int len1, len2;
518 int clus1, clus2;
519 float pscore;
520 char *indication1, *indication2;
521 double *effarr1 = NULL;
522 double *effarr2 = NULL;
523 double *effarr1_kozo = NULL;
524 double *effarr2_kozo = NULL;
525 LocalHom ***localhomshrink = NULL;
526 int m1, m2;
527 float dumfl = 0.0;
528 int ffttry;
529 RNApair ***grouprna1 = NULL, ***grouprna2 = NULL;
530 double **dynamicmtx;
531
532
533 mseq1 = AllocateCharMtx( njob, 0 );
534 mseq2 = AllocateCharMtx( njob, 0 );
535 localcopy = calloc( njob, sizeof( char * ) );
536 dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
537
538 if( rnakozo && rnaprediction == 'm' )
539 {
540 grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
541 grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
542 }
543 else
544 {
545 grouprna1 = grouprna2 = NULL;
546 }
547
548 effarr1 = AllocateDoubleVec( njob );
549 effarr2 = AllocateDoubleVec( njob );
550 indication1 = AllocateCharVec( 150 );
551 indication2 = AllocateCharVec( 150 );
552 #if 0
553 #else
554 if( constraint )
555 {
556 localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
557 for( i=0; i<njob; i++ )
558 localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
559 }
560 #endif
561 effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
562 effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
563 for( i=0; i<njob; i++ ) effarr1_kozo[i] = 0.0;
564 for( i=0; i<njob; i++ ) effarr2_kozo[i] = 0.0;
565
566
567 #if 0
568 #endif
569
570 #if 0
571 for( i=0; i<njob; i++ )
572 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
573 #endif
574
575 #if 0 //-> main thread
576 if( constraint )
577 calcimportance( njob, effarr, aseq, localhomtable );
578 #endif
579
580
581 // writePre( njob, name, nlen, aseq, 0 );
582
583
584 // for( l=0; l<njob-1; l++ )
585 while( 1 )
586 {
587
588 pthread_mutex_lock( targ->mutex );
589 l = *jobpospt;
590 if( l == njob-1 )
591 {
592 pthread_mutex_unlock( targ->mutex );
593 if( commonIP ) FreeIntMtx( commonIP );
594 commonIP = NULL;
595 Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
596 Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
597 A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
598 free( mseq1 );
599 free( mseq2 );
600 free( localcopy );
601 free( effarr1 );
602 free( effarr2 );
603 free( effarr1_kozo );
604 free( effarr2_kozo );
605 free( indication1 );
606 free( indication2 );
607 FreeDoubleMtx( dynamicmtx );
608 if( rnakozo && rnaprediction == 'm' )
609 {
610 if( grouprna1 ) free( grouprna1 ); // nakami ha?
611 if( grouprna2 ) free( grouprna2 ); // nakami ha?
612 grouprna1 = grouprna2 = NULL;
613 }
614 if( constraint )
615 {
616 if( localhomshrink ) // nen no tame
617 {
618 for( i=0; i<njob; i++ )
619 {
620 free( localhomshrink[i] );
621 localhomshrink[i] = NULL;
622 }
623 free( localhomshrink );
624 localhomshrink = NULL;
625 }
626 }
627 return( NULL );
628 }
629 *jobpospt = l+1;
630
631 if( dep[l].child0 != -1 )
632 {
633 while( dep[dep[l].child0].done == 0 )
634 pthread_cond_wait( targ->treecond, targ->mutex );
635 }
636 if( dep[l].child1 != -1 )
637 {
638 while( dep[dep[l].child1].done == 0 )
639 pthread_cond_wait( targ->treecond, targ->mutex );
640 }
641 // while( *nrunpt >= nthread )
642 // pthread_cond_wait( targ->treecond, targ->mutex );
643 (*nrunpt)++;
644
645 // pthread_mutex_unlock( targ->mutex );
646
647 if( mergeoralign[l] == 'n' )
648 {
649 // fprintf( stderr, "SKIP!\n" );
650 dep[l].done = 1;
651 (*nrunpt)--;
652 pthread_cond_broadcast( targ->treecond );
653 free( topol[l][0] );
654 free( topol[l][1] );
655 free( topol[l] );
656 pthread_mutex_unlock( targ->mutex );
657 continue;
658 }
659
660
661
662 m1 = topol[l][0][0];
663 m2 = topol[l][1][0];
664
665 // fprintf( stderr, "\ndistfromtip = %f\n", dep[l].distfromtip );
666 // makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip - 0.5 );
667 makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
668
669 // pthread_mutex_lock( targ->mutex );
670
671
672
673 len1 = strlen( aseq[m1] );
674 len2 = strlen( aseq[m2] );
675 if( *alloclen <= len1 + len2 )
676 {
677 fprintf( stderr, "\nReallocating (by thread %d) ..", thread_no );
678 *alloclen = ( len1 + len2 ) + 1000;
679 ReallocateCharMtx( aseq, njob, *alloclen + 10 );
680 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
681 }
682
683 for( i=0; (j=topol[l][0][i])!=-1; i++ )
684 {
685 localcopy[j] = calloc( *alloclen, sizeof( char ) );
686 strcpy( localcopy[j], aseq[j] );
687 }
688 for( i=0; (j=topol[l][1][i])!=-1; i++ )
689 {
690 localcopy[j] = calloc( *alloclen, sizeof( char ) );
691 strcpy( localcopy[j], aseq[j] );
692 }
693
694 pthread_mutex_unlock( targ->mutex );
695
696
697
698 if( effarr_kozo )
699 {
700 clus1 = fastconjuction_noname_kozo( topol[l][0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
701 clus2 = fastconjuction_noname_kozo( topol[l][1], localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
702 }
703 else
704 {
705 clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1, 0.0 );
706 clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2, 0.0 );
707 }
708
709
710
711 #if 1
712 fprintf( stderr, "\rSTEP % 5d /%d (thread %4d) ", l+1, njob-1, thread_no );
713 #else
714 fprintf( stderr, "STEP %d /%d (thread %d) \n", l+1, njob-1, thread_no );
715 fprintf( stderr, "group1 = %.66s", indication1 );
716 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
717 fprintf( stderr, ", child1 = %d\n", dep[l].child0 );
718 fprintf( stderr, "group2 = %.66s", indication2 );
719 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
720 fprintf( stderr, ", child2 = %d\n", dep[l].child1 );
721
722 fprintf( stderr, "Group1's lengths = " );
723 for( i=0; i<clus1; i++ ) fprintf( stderr, "%d ", strlen( mseq1[i] ) );
724 fprintf( stderr, "\n" );
725 fprintf( stderr, "Group2's lengths = " );
726 for( i=0; i<clus2; i++ ) fprintf( stderr, "%d ", strlen( mseq2[i] ) );
727 fprintf( stderr, "\n" );
728
729 #endif
730
731
732
733 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
734
735 if( constraint )
736 {
737 fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
738 // msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
739 // fprintf( stderr, "localhomshrink =\n" );
740 // outlocalhompt( localhomshrink, clus1, clus2 );
741 // weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
742 // fprintf( stderr, "after weight =\n" );
743 // outlocalhompt( localhomshrink, clus1, clus2 );
744 }
745 if( rnakozo && rnaprediction == 'm' )
746 {
747 makegrouprna( grouprna1, singlerna, topol[l][0] );
748 makegrouprna( grouprna2, singlerna, topol[l][1] );
749 }
750
751
752 /*
753 fprintf( stderr, "before align all\n" );
754 display( localcopy, njob );
755 fprintf( stderr, "\n" );
756 fprintf( stderr, "before align 1 %s \n", indication1 );
757 display( mseq1, clus1 );
758 fprintf( stderr, "\n" );
759 fprintf( stderr, "before align 2 %s \n", indication2 );
760 display( mseq2, clus2 );
761 fprintf( stderr, "\n" );
762 */
763
764 if( !nevermemsave && ( constraint != 2 && alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
765 {
766 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
767 alg = 'M';
768 if( commonIP ) FreeIntMtx( commonIP );
769 commonIP = NULL;
770 commonAlloc1 = 0;
771 commonAlloc2 = 0;
772 }
773
774
775 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
776 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
777 else ffttry = 0;
778 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
779 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
780 // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
781 if( constraint == 2 )
782 {
783 if( alg == 'M' )
784 {
785 fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
786 exit( 1 );
787 }
788 fprintf( stderr, "c" );
789 if( alg == 'A' )
790 {
791 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
792 if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
793 pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
794 }
795 else if( alg == 'Q' )
796 {
797 fprintf( stderr, "Not supported\n" );
798 exit( 1 );
799 }
800 }
801 else if( force_fft || ( use_fft && ffttry ) )
802 {
803 fprintf( stderr, "f" );
804 if( alg == 'M' )
805 {
806 fprintf( stderr, "m" );
807 pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
808 }
809 else
810 pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
811 }
812 else
813 {
814 fprintf( stderr, "d" );
815 fftlog[m1] = 0;
816 switch( alg )
817 {
818 case( 'a' ):
819 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
820 break;
821 case( 'M' ):
822 fprintf( stderr, "m" );
823 pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
824 break;
825 case( 'A' ):
826 pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
827 break;
828 default:
829 ErrorExit( "ERROR IN SOURCE FILE" );
830 }
831 }
832
833 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
834
835 #if SCOREOUT
836 fprintf( stderr, "score = %10.2f\n", pscore );
837 #endif
838
839 /*
840 fprintf( stderr, "after align 1 %s \n", indication1 );
841 display( mseq1, clus1 );
842 fprintf( stderr, "\n" );
843 fprintf( stderr, "after align 2 %s \n", indication2 );
844 display( mseq2, clus2 );
845 fprintf( stderr, "\n" );
846 */
847
848 // writePre( njob, name, nlen, localcopy, 0 );
849
850 if( disp ) display( localcopy, njob );
851
852 pthread_mutex_lock( targ->mutex );
853 dep[l].done = 1;
854 (*nrunpt)--;
855 pthread_cond_broadcast( targ->treecond );
856
857 // pthread_mutex_unlock( targ->mutex );
858 // pthread_mutex_lock( targ->mutex );
859
860 for( i=0; (j=topol[l][0][i])!=-1; i++ )
861 strcpy( aseq[j], localcopy[j] );
862 for( i=0; (j=topol[l][1][i])!=-1; i++ )
863 strcpy( aseq[j], localcopy[j] );
864 pthread_mutex_unlock( targ->mutex );
865
866 for( i=0; (j=topol[l][0][i])!=-1; i++ )
867 free( localcopy[j] );
868 for( i=0; (j=topol[l][1][i])!=-1; i++ )
869 free( localcopy[j] );
870 free( topol[l][0] );
871 free( topol[l][1] );
872 free( topol[l] );
873
874 }
875 }
876 #endif
877
878 void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo )
879 {
880 int i, l, m;
881 int len1nocommongap, len2nocommongap;
882 int len1, len2;
883 int clus1, clus2;
884 float pscore, tscore;
885 static char *indication1, *indication2;
886 static double *effarr1 = NULL;
887 static double *effarr2 = NULL;
888 static double *effarr1_kozo = NULL;
889 static double *effarr2_kozo = NULL;
890 static LocalHom ***localhomshrink = NULL;
891 static int *fftlog;
892 int m1, m2;
893 static int *gaplen;
894 static int *gapmap;
895 static int *alreadyaligned;
896 float dumfl = 0.0;
897 int ffttry;
898 RNApair ***grouprna1 = NULL, ***grouprna2 = NULL;
899 static double **dynamicmtx;
900
901 if( rnakozo && rnaprediction == 'm' )
902 {
903 grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
904 grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
905 }
906 else
907 {
908 grouprna1 = grouprna2 = NULL;
909 }
910
911 if( effarr1 == NULL )
912 {
913 fftlog = AllocateIntVec( njob );
914 effarr1 = AllocateDoubleVec( njob );
915 effarr2 = AllocateDoubleVec( njob );
916 indication1 = AllocateCharVec( 150 );
917 indication2 = AllocateCharVec( 150 );
918 gaplen = AllocateIntVec( *alloclen+10 );
919 gapmap = AllocateIntVec( *alloclen+10 );
920 alreadyaligned = AllocateIntVec( njob );
921 dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
922 #if 0
923 #else
924 if( constraint )
925 {
926 localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
927 for( i=0; i<njob; i++ )
928 localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
929 }
930 #endif
931 effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
932 effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
933 for( i=0; i<njob; i++ ) effarr1_kozo[i] = 0.0;
934 for( i=0; i<njob; i++ ) effarr2_kozo[i] = 0.0;
935 }
936
937 for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
938 for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
939
940 for( l=0; l<njob; l++ ) fftlog[l] = 1;
941
942 #if 0
943 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
944 #endif
945
946 #if 0
947 for( i=0; i<njob; i++ )
948 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
949 #endif
950
951
952 if( constraint )
953 calcimportance( njob, effarr, aseq, localhomtable );
954 // dontcalcimportance( njob, effarr, aseq, localhomtable ); // CHUUIII!!!!!
955
956
957 // writePre( njob, name, nlen, aseq, 0 );
958
959 tscore = 0.0;
960 for( l=0; l<njob-1; l++ )
961 {
962 // fprintf( stderr, "\ndistfromtip = %f\n", dep[l].distfromtip );
963 makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
964 // makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 );
965 if( mergeoralign[l] == 'n' )
966 {
967 // fprintf( stderr, "SKIP!\n" );
968 free( topol[l][0] );
969 free( topol[l][1] );
970 free( topol[l] );
971 continue;
972 }
973
974 m1 = topol[l][0][0];
975 m2 = topol[l][1][0];
976 len1 = strlen( aseq[m1] );
977 len2 = strlen( aseq[m2] );
978 if( *alloclen < len1 + len2 )
979 {
980 fprintf( stderr, "\nReallocating.." );
981 *alloclen = ( len1 + len2 ) + 1000;
982 ReallocateCharMtx( aseq, njob, *alloclen + 10 );
983 gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
984 if( gaplen == NULL )
985 {
986 fprintf( stderr, "Cannot realloc gaplen\n" );
987 exit( 1 );
988 }
989 gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
990 if( gapmap == NULL )
991 {
992 fprintf( stderr, "Cannot realloc gapmap\n" );
993 exit( 1 );
994 }
995 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
996 }
997
998 if( effarr_kozo )
999 {
1000 clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
1001 clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
1002 }
1003 else
1004 {
1005 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
1006 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
1007 }
1008
1009 if( mergeoralign[l] == '1' || mergeoralign[l] == '2' ) // only in serial version
1010 {
1011 newgapstr = "=";
1012 }
1013 else
1014 newgapstr = "-";
1015
1016
1017 len1nocommongap = len1;
1018 len2nocommongap = len2;
1019 if( mergeoralign[l] == '1' ) // nai
1020 {
1021 findcommongaps( clus2, mseq2, gapmap );
1022 commongappick( clus2, mseq2 );
1023 len2nocommongap = strlen( mseq2[0] );
1024 }
1025 else if( mergeoralign[l] == '2' )
1026 {
1027 findcommongaps( clus1, mseq1, gapmap );
1028 commongappick( clus1, mseq1 );
1029 len1nocommongap = strlen( mseq1[0] );
1030 }
1031
1032
1033 fprintf( trap_g, "\nSTEP-%d\n", l );
1034 fprintf( trap_g, "group1 = %s\n", indication1 );
1035 fprintf( trap_g, "group2 = %s\n", indication2 );
1036
1037 #if 1
1038 fprintf( stderr, "\rSTEP % 5d /%d ", l+1, njob-1 );
1039 fflush( stderr );
1040 #else
1041 fprintf( stdout, "STEP %d /%d\n", l+1, njob-1 );
1042 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
1043 fprintf( stderr, "group1 = %.66s", indication1 );
1044 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1045 fprintf( stderr, "\n" );
1046 fprintf( stderr, "group2 = %.66s", indication2 );
1047 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1048 fprintf( stderr, "\n" );
1049 #endif
1050
1051
1052
1053 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
1054
1055 if( constraint )
1056 {
1057 fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
1058 // msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
1059 // fprintf( stderr, "localhomshrink =\n" );
1060 // outlocalhompt( localhomshrink, clus1, clus2 );
1061 // weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
1062 // fprintf( stderr, "after weight =\n" );
1063 // outlocalhompt( localhomshrink, clus1, clus2 );
1064 }
1065 if( rnakozo && rnaprediction == 'm' )
1066 {
1067 makegrouprna( grouprna1, singlerna, topol[l][0] );
1068 makegrouprna( grouprna2, singlerna, topol[l][1] );
1069 }
1070
1071
1072 /*
1073 fprintf( stderr, "before align all\n" );
1074 display( aseq, njob );
1075 fprintf( stderr, "\n" );
1076 fprintf( stderr, "before align 1 %s \n", indication1 );
1077 display( mseq1, clus1 );
1078 fprintf( stderr, "\n" );
1079 fprintf( stderr, "before align 2 %s \n", indication2 );
1080 display( mseq2, clus2 );
1081 fprintf( stderr, "\n" );
1082 */
1083
1084 if( !nevermemsave && ( constraint != 2 && alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
1085 {
1086 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
1087 alg = 'M';
1088 if( commonIP ) FreeIntMtx( commonIP );
1089 commonIP = NULL;
1090 commonAlloc1 = 0;
1091 commonAlloc2 = 0;
1092 }
1093
1094
1095 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1096 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
1097 else ffttry = 0;
1098 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
1099 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
1100 // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
1101 if( constraint == 2 )
1102 {
1103 if( alg == 'M' )
1104 {
1105 fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
1106 exit( 1 );
1107 }
1108 fprintf( stderr, "c" );
1109 if( alg == 'A' )
1110 {
1111 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
1112 if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
1113 pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1114 }
1115 else if( alg == 'Q' )
1116 {
1117 fprintf( stderr, "Not supported\n" );
1118 exit( 1 );
1119 }
1120 }
1121 else if( force_fft || ( use_fft && ffttry ) )
1122 {
1123 fprintf( stderr, "f" );
1124 if( alg == 'M' )
1125 {
1126 fprintf( stderr, "m" );
1127 pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
1128 }
1129 else
1130 pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1131 }
1132 else
1133 {
1134 fprintf( stderr, "d" );
1135 fftlog[m1] = 0;
1136 switch( alg )
1137 {
1138 case( 'a' ):
1139 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1140 break;
1141 case( 'M' ):
1142 fprintf( stderr, "m" );
1143 pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1144 break;
1145 case( 'A' ):
1146 pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1147 break;
1148 default:
1149 ErrorExit( "ERROR IN SOURCE FILE" );
1150 }
1151 }
1152
1153 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1154
1155 #if SCOREOUT
1156 fprintf( stderr, "score = %10.2f\n", pscore );
1157 #endif
1158 tscore += pscore;
1159 /*
1160 fprintf( stderr, "after align 1 %s \n", indication1 );
1161 display( mseq1, clus1 );
1162 fprintf( stderr, "\n" );
1163 fprintf( stderr, "after align 2 %s \n", indication2 );
1164 display( mseq2, clus2 );
1165 fprintf( stderr, "\n" );
1166 */
1167
1168 // writePre( njob, name, nlen, aseq, 0 );
1169
1170 if( disp ) display( aseq, njob );
1171
1172 if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
1173 {
1174 adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
1175 restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
1176 findnewgaps( clus2, 0, mseq2, gaplen );
1177 insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' );
1178 for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1179 for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
1180 }
1181 if( mergeoralign[l] == '2' )
1182 {
1183 // fprintf( stderr, ">mseq1[0] = \n%s\n", mseq1[0] );
1184 // fprintf( stderr, ">mseq2[0] = \n%s\n", mseq2[0] );
1185 adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
1186 restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
1187 findnewgaps( clus1, 0, mseq1, gaplen );
1188 insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' );
1189 #if 0
1190 for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1191 for( i=0; i<clus1; i++ ) eq2dash( mseq1[i] );
1192 for( i=0; i<clus2; i++ ) eq2dash( mseq2[i] );
1193 #else
1194 eq2dashmatometehayaku( mseq1, clus1 );
1195 eq2dashmatometehayaku( mseq2, clus2 );
1196 #endif
1197 for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
1198 }
1199
1200 free( topol[l][0] );
1201 free( topol[l][1] );
1202 free( topol[l] );
1203 }
1204 #if SCOREOUT
1205 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
1206 #endif
1207 if( rnakozo && rnaprediction == 'm' )
1208 {
1209 if( grouprna1 ) free( grouprna1 ); // nakami ha?
1210 if( grouprna2 ) free( grouprna2 ); // nakami ha?
1211 grouprna1 = grouprna2 = NULL;
1212 }
1213 if( constraint )
1214 {
1215 if( localhomshrink ) // nen no tame
1216 {
1217 for( i=0; i<njob; i++ )
1218 {
1219 free( localhomshrink[i] );
1220 localhomshrink[i] = NULL;
1221 }
1222 free( localhomshrink );
1223 localhomshrink = NULL;
1224 }
1225 }
1226 }
1227
1228 static void WriteOptions( FILE *fp )
1229 {
1230
1231 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1232 else
1233 {
1234 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1235 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1236 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
1237 }
1238 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1239 if( use_fft ) fprintf( fp, "FFT on\n" );
1240
1241 fprintf( fp, "tree-base method\n" );
1242 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1243 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1244 if( tbitr || tbweight )
1245 {
1246 fprintf( fp, "iterate at each step\n" );
1247 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
1248 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
1249 if( tbweight ) fprintf( fp, " weighted\n" );
1250 fprintf( fp, "\n" );
1251 }
1252
1253 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1254
1255 if( alg == 'a' )
1256 fprintf( fp, "Algorithm A\n" );
1257 else if( alg == 'A' )
1258 fprintf( fp, "Algorithm A+\n" );
1259 else if( alg == 'C' )
1260 fprintf( fp, "Apgorithm A+/C\n" );
1261 else
1262 fprintf( fp, "Unknown algorithm\n" );
1263
1264 if( treemethod == 'X' )
1265 fprintf( fp, "Tree = UPGMA (mix).\n" );
1266 else if( treemethod == 'E' )
1267 fprintf( fp, "Tree = UPGMA (average).\n" );
1268 else if( treemethod == 'q' )
1269 fprintf( fp, "Tree = Minimum linkage.\n" );
1270 else
1271 fprintf( fp, "Unknown tree.\n" );
1272
1273 if( use_fft )
1274 {
1275 fprintf( fp, "FFT on\n" );
1276 if( dorp == 'd' )
1277 fprintf( fp, "Basis : 4 nucleotides\n" );
1278 else
1279 {
1280 if( fftscore )
1281 fprintf( fp, "Basis : Polarity and Volume\n" );
1282 else
1283 fprintf( fp, "Basis : 20 amino acids\n" );
1284 }
1285 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
1286 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1287 }
1288 else
1289 fprintf( fp, "FFT off\n" );
1290 fflush( fp );
1291 }
1292
1293
1294 int main( int argc, char *argv[] )
1295 {
1296 static int *nlen;
1297 static float *selfscore;
1298 int nogaplen;
1299 static char **name, **seq;
1300 static char **mseq1, **mseq2;
1301 static char **bseq;
1302 static float **iscore, **iscore_kozo;
1303 int **skiptable;
1304 static double *eff, *eff_kozo, *eff_kozo_mapped = NULL;
1305 int i, j, ien, ik, jk;
1306 static int ***topol, ***topol_kozo;
1307 static int *addmem;
1308 static Treedep *dep;
1309 static float **len, **len_kozo;
1310 FILE *prep;
1311 FILE *infp;
1312 FILE *orderfp;
1313 FILE *hat2p;
1314 double unweightedspscore;
1315 int alignmentlength;
1316 char *mergeoralign;
1317 int foundthebranch;
1318 int nsubalignments, maxmem;
1319 int **subtable;
1320 int *insubtable;
1321 int *preservegaps;
1322 char ***subalnpt;
1323
1324 char c;
1325 int alloclen;
1326 LocalHom **localhomtable = NULL;
1327 RNApair ***singlerna = NULL;
1328 float ssi, ssj, bunbo;
1329 static char *kozoarivec;
1330 int nkozo;
1331
1332 arguments( argc, argv );
1333 #ifndef enablemultithread
1334 nthread = 0;
1335 #endif
1336
1337 if( fastathreshold < 0.0001 ) constraint = 0;
1338
1339 if( inputfile )
1340 {
1341 infp = fopen( inputfile, "r" );
1342 if( !infp )
1343 {
1344 fprintf( stderr, "Cannot open %s\n", inputfile );
1345 exit( 1 );
1346 }
1347 }
1348 else
1349 infp = stdin;
1350
1351 getnumlen( infp );
1352 rewind( infp );
1353
1354
1355 nkozo = 0;
1356
1357 if( njob < 2 )
1358 {
1359 fprintf( stderr, "At least 2 sequences should be input!\n"
1360 "Only %d sequence found.\n", njob );
1361 exit( 1 );
1362 }
1363
1364 if( subalignment )
1365 {
1366 readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
1367 fprintf( stderr, "nsubalignments = %d\n", nsubalignments );
1368 fprintf( stderr, "maxmem = %d\n", maxmem );
1369 subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
1370 insubtable = AllocateIntVec( njob );
1371 for( i=0; i<njob; i++ ) insubtable[i] = 0;
1372 preservegaps = AllocateIntVec( njob );
1373 for( i=0; i<njob; i++ ) preservegaps[i] = 0;
1374 subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
1375 readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
1376 }
1377
1378 seq = AllocateCharMtx( njob, nlenmax+1 );
1379 mseq1 = AllocateCharMtx( njob, 0 );
1380 mseq2 = AllocateCharMtx( njob, 0 );
1381
1382 name = AllocateCharMtx( njob, B+1 );
1383 nlen = AllocateIntVec( njob );
1384 selfscore = AllocateFloatVec( njob );
1385
1386 topol = AllocateIntCub( njob, 2, 0 );
1387 len = AllocateFloatMtx( njob, 2 );
1388 iscore = AllocateFloatHalfMtx( njob );
1389 eff = AllocateDoubleVec( njob );
1390 kozoarivec = AllocateCharVec( njob );
1391
1392 mergeoralign = AllocateCharVec( njob );
1393
1394 dep = (Treedep *)calloc( njob, sizeof( Treedep ) );
1395 if( nadd ) addmem = AllocateIntVec( nadd+1 );
1396
1397 if( constraint )
1398 {
1399 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
1400 for( i=0; i<njob; i++ )
1401 {
1402 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
1403 for( j=0; j<njob; j++ )
1404 {
1405 localhomtable[i][j].start1 = -1;
1406 localhomtable[i][j].end1 = -1;
1407 localhomtable[i][j].start2 = -1;
1408 localhomtable[i][j].end2 = -1;
1409 localhomtable[i][j].overlapaa = -1.0;
1410 localhomtable[i][j].opt = -1.0;
1411 localhomtable[i][j].importance = -1.0;
1412 localhomtable[i][j].next = NULL;
1413 localhomtable[i][j].korh = 'h';
1414 }
1415 }
1416
1417 fprintf( stderr, "Loading 'hat3' ... " );
1418 prep = fopen( "hat3", "r" );
1419 if( prep == NULL ) ErrorExit( "Make hat3." );
1420 readlocalhomtable( prep, njob, localhomtable, kozoarivec );
1421 fclose( prep );
1422 fprintf( stderr, "\ndone.\n" );
1423
1424
1425 nkozo = 0;
1426 for( i=0; i<njob; i++ )
1427 {
1428 // fprintf( stderr, "kozoarivec[%d] = %d\n", i, kozoarivec[i] );
1429 if( kozoarivec[i] ) nkozo++;
1430 }
1431 if( nkozo )
1432 {
1433 topol_kozo = AllocateIntCub( nkozo, 2, 0 );
1434 len_kozo = AllocateFloatMtx( nkozo, 2 );
1435 iscore_kozo = AllocateFloatHalfMtx( nkozo );
1436 eff_kozo = AllocateDoubleVec( nkozo );
1437 eff_kozo_mapped = AllocateDoubleVec( njob );
1438 }
1439
1440
1441 // outlocalhom( localhomtable, njob );
1442
1443 #if 0
1444 fprintf( stderr, "Extending localhom ... " );
1445 extendlocalhom2( njob, localhomtable );
1446 fprintf( stderr, "done.\n" );
1447 #endif
1448 }
1449
1450 #if 0
1451 readData( infp, name, nlen, seq );
1452 #else
1453 readData_pointer( infp, name, nlen, seq );
1454 fclose( infp );
1455 #endif
1456
1457 constants( njob, seq );
1458
1459 #if 0
1460 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
1461 #endif
1462
1463 initSignalSM();
1464
1465 initFiles();
1466
1467 WriteOptions( trap_g );
1468
1469 c = seqcheck( seq );
1470 if( c )
1471 {
1472 fprintf( stderr, "Illegal character %c\n", c );
1473 exit( 1 );
1474 }
1475
1476 // writePre( njob, name, nlen, seq, 0 );
1477
1478 if( treein )
1479 {
1480 #if 0
1481 if( nkozo )
1482 {
1483 fprintf( stderr, "Both structure and user tree have been given. Not yet supported!\n" );
1484 exit( 1 );
1485 }
1486 #endif
1487 fprintf( stderr, "Loading a tree ... " );
1488 loadtree( njob, topol, len, name, nlen, dep );
1489 // loadtop( njob, topol, len, name, NULL, dep ); // 2015/Jan/13, not yet checked
1490 fprintf( stderr, "\ndone.\n\n" );
1491 }
1492 else
1493 {
1494 if( tbutree == 0 )
1495 {
1496 for( i=1; i<njob; i++ )
1497 {
1498 if( nlen[i] != nlen[0] )
1499 {
1500 fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
1501 exit( 1 );
1502 }
1503 }
1504
1505 fprintf( stderr, "Making a distance matrix .. (Should not be used in versions >7.2) \n" );
1506 fflush( stderr );
1507 skiptable = AllocateIntMtx( njob, 0 );
1508 makeskiptable( njob, skiptable, seq ); // allocate suru.
1509 ien = njob-1;
1510 for( i=0; i<njob; i++ )
1511 {
1512 // selfscore[i] = naivepairscore11( seq[i], seq[i], penalty_dist );
1513 selfscore[i] = naivepairscorefast( seq[i], seq[i], skiptable[i], skiptable[i], penalty_dist );
1514 // fprintf( stderr, "penalty = %d\n", penalty );
1515 // fprintf( stderr, "penalty_dist = %d\n", penalty_dist );
1516 }
1517 #ifdef enablemultithread
1518 if( nthread > 0 )
1519 {
1520 distancematrixthread_arg_t *targ;
1521 Jobtable jobpos;
1522 pthread_t *handle;
1523 pthread_mutex_t mutex;
1524
1525 jobpos.i = 0;
1526 jobpos.j = 0;
1527
1528 targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
1529 handle = calloc( nthread, sizeof( pthread_t ) );
1530 pthread_mutex_init( &mutex, NULL );
1531
1532 for( i=0; i<nthread; i++ )
1533 {
1534 targ[i].thread_no = i;
1535 targ[i].njob = njob;
1536 targ[i].selfscore = selfscore;
1537 targ[i].iscore = iscore;
1538 targ[i].seq = seq;
1539 targ[i].skiptable = skiptable;
1540 targ[i].jobpospt = &jobpos;
1541 targ[i].mutex = &mutex;
1542
1543 pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
1544 }
1545
1546 for( i=0; i<nthread; i++ )
1547 {
1548 pthread_join( handle[i], NULL );
1549 }
1550 pthread_mutex_destroy( &mutex );
1551 free( handle );
1552 free( targ );
1553 }
1554 else
1555 #endif
1556 {
1557 for( i=0; i<ien; i++ )
1558 {
1559 if( i % 10 == 0 )
1560 {
1561 fprintf( stderr, "\r% 5d / %d", i, ien );
1562 fflush( stderr );
1563 }
1564 ssi = selfscore[i];
1565 for( j=i+1; j<njob; j++ )
1566 {
1567 ssj = selfscore[j];
1568 bunbo = MIN( ssi, ssj );
1569 if( bunbo == 0.0 )
1570 iscore[i][j-i] = 2.0; // 2013/Oct/17 2bai
1571 else
1572 // iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / MIN( selfscore[i], selfscore[j] );
1573 // iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17 2bai
1574 iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
1575 if( iscore[i][j-i] > 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17
1576 //exit( 1 );
1577
1578 #if 0
1579 fprintf( stderr, "### ssj = %f\n", ssj );
1580 fprintf( stderr, "### selfscore[i] = %f\n", selfscore[i] );
1581 fprintf( stderr, "### selfscore[j] = %f\n", selfscore[j] );
1582 fprintf( stderr, "### rawscore = %f\n", naivepairscore11( seq[i], seq[j], penalty_dist ) );
1583 #endif
1584 }
1585 }
1586 }
1587 fprintf( stderr, "\ndone.\n\n" );
1588 FreeIntMtx( skiptable );
1589 fflush( stderr );
1590 }
1591 else
1592 {
1593 if( multidist )
1594 {
1595 fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " );
1596 prep = fopen( "hat2n", "r" );
1597 if( prep == NULL ) ErrorExit( "Make hat2." );
1598 readhat2_floathalf_pointer( prep, njob, name, iscore );
1599 fclose( prep );
1600 fprintf( stderr, "done.\n" );
1601
1602 fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
1603 prep = fopen( "hat2i", "r" );
1604 if( prep == NULL ) ErrorExit( "Make hat2i." );
1605 readhat2_floathalf_pointer( prep, njob-nadd, name, iscore );
1606 fclose( prep );
1607 fprintf( stderr, "done.\n" );
1608 }
1609 else
1610 {
1611 fprintf( stderr, "Loading 'hat2' ... " );
1612 prep = fopen( "hat2", "r" );
1613 if( prep == NULL ) ErrorExit( "Make hat2." );
1614 readhat2_floathalf_pointer( prep, njob, name, iscore );
1615 fclose( prep );
1616 fprintf( stderr, "done.\n" );
1617 }
1618 }
1619 #if 1
1620 if( distout )
1621 {
1622 hat2p = fopen( "hat2", "w" );
1623 WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
1624 fclose( hat2p );
1625 }
1626 #endif
1627 if( nkozo )
1628 {
1629 ien = njob-1;
1630 ik = 0;
1631 for( i=0; i<ien; i++ )
1632 {
1633 jk = ik+1;
1634 for( j=i+1; j<njob; j++ )
1635 {
1636 if( kozoarivec[i] && kozoarivec[j] )
1637 {
1638 iscore_kozo[ik][jk-ik] = iscore[i][j-i];
1639 }
1640 if( kozoarivec[j] ) jk++;
1641 }
1642 if( kozoarivec[i] ) ik++;
1643 }
1644 }
1645
1646 fprintf( stderr, "Constructing a UPGMA tree ... " );
1647 fflush( stderr );
1648 if( topin )
1649 {
1650 fprintf( stderr, "--topin has been disabled\n" );
1651 exit( 1 );
1652 // fprintf( stderr, "Loading a topology ... " );
1653 // loadtop( njob, iscore, topol, len );
1654 // fprintf( stderr, "\ndone.\n\n" );
1655 }
1656 else if( subalignment ) // merge error no tame
1657 {
1658 fixed_supg_float_realloc_nobk_halfmtx_treeout_constrained( njob, iscore, topol, len, name, nlen, dep, nsubalignments, subtable, 1 );
1659 }
1660 else if( treeout ) // merge error no tame
1661 {
1662 fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, iscore, topol, len, name, nlen, dep, 1 );
1663 }
1664 else
1665 {
1666 fixed_musclesupg_float_realloc_nobk_halfmtx( njob, iscore, topol, len, dep, 1, 1 );
1667 }
1668 // else
1669 // ErrorExit( "Incorrect tree\n" );
1670
1671 if( nkozo )
1672 {
1673 // for( i=0; i<nkozo-1; i++ )
1674 // for( j=i+1; j<nkozo; j++ )
1675 // fprintf( stderr, "iscore_kozo[%d][%d] =~ %f\n", i, j, iscore_kozo[i][j-i] );
1676 fixed_musclesupg_float_realloc_nobk_halfmtx( nkozo, iscore_kozo, topol_kozo, len_kozo, NULL, 1, 1 );
1677 }
1678 fprintf( stderr, "\ndone.\n\n" );
1679 fflush( stderr );
1680 }
1681
1682
1683 orderfp = fopen( "order", "w" );
1684 if( !orderfp )
1685 {
1686 fprintf( stderr, "Cannot open 'order'\n" );
1687 exit( 1 );
1688 }
1689 for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
1690 {
1691 fprintf( orderfp, "%d\n", j );
1692 }
1693 for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
1694 {
1695 fprintf( orderfp, "%d\n", j );
1696 }
1697 fclose( orderfp );
1698
1699 if( treeout && noalign )
1700 {
1701 writeData_pointer( prep_g, njob, name, nlen, seq );
1702 fprintf( stderr, "\n" );
1703 SHOWVERSION;
1704 return( 0 );
1705 }
1706
1707 // countnode( njob, topol, node0 );
1708 if( tbrweight )
1709 {
1710 weight = 3;
1711 #if 0
1712 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
1713 #else
1714 counteff_simple_float_nostatic( njob, topol, len, eff );
1715 for( i=njob-nadd; i<njob; i++ ) eff[i] /= (double)100;
1716 #if 0
1717 fprintf( stderr, "###### WEIGHT = \n" );
1718 for( i=0; i<njob; i++ )
1719 {
1720 fprintf( stderr, "w[%d] = %f\n", i, eff[i] );
1721 }
1722 exit( 1 );
1723 #endif
1724 if( nkozo )
1725 {
1726 // counteff_simple_float( nkozo, topol_kozo, len_kozo, eff_kozo ); // single weight nanode iranai
1727 for( i=0,j=0; i<njob; i++ )
1728 {
1729 if( kozoarivec[i] )
1730 {
1731 // eff_kozo_mapped[i] = eff_kozo[j]; //
1732 eff_kozo_mapped[i] = eff[i]; // single weight
1733 j++;
1734 }
1735 else
1736 eff_kozo_mapped[i] = 0.0;
1737 // fprintf( stderr, "eff_kozo_mapped[%d] = %f\n", i, eff_kozo_mapped[i] );
1738 // fprintf( stderr, " eff[%d] = %f\n", i, eff[i] );
1739 }
1740 }
1741
1742
1743 #endif
1744 }
1745 else
1746 {
1747 for( i=0; i<njob; i++ ) eff[i] = 1.0;
1748 if( nkozo )
1749 {
1750 for( i=0; i<njob; i++ )
1751 {
1752 if( kozoarivec[i] )
1753 eff_kozo_mapped[i] = 1.0;
1754 else
1755 eff_kozo_mapped[i] = 0.0;
1756 }
1757 }
1758 }
1759
1760 FreeFloatHalfMtx( iscore, njob );
1761 FreeFloatMtx( len );
1762
1763 alloclen = nlenmax*2+1; //chuui!
1764 bseq = AllocateCharMtx( njob, alloclen );
1765
1766
1767 if( nadd )
1768 {
1769 alignmentlength = strlen( seq[0] );
1770 for( i=0; i<njob-nadd; i++ )
1771 {
1772 if( alignmentlength != strlen( seq[i] ) )
1773 {
1774 fprintf( stderr, "#################################################################################\n" );
1775 fprintf( stderr, "# ERROR! #\n" );
1776 fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd );
1777 fprintf( stderr, "#################################################################################\n" );
1778 exit( 1 );
1779 }
1780 }
1781 if( addprofile )
1782 {
1783 alignmentlength = strlen( seq[njob-nadd] );
1784 for( i=njob-nadd; i<njob; i++ )
1785 {
1786 if( alignmentlength != strlen( seq[i] ) )
1787 {
1788 fprintf( stderr, "###############################################################################\n" );
1789 fprintf( stderr, "# ERROR! #\n" );
1790 fprintf( stderr, "# The%4d additional sequences must be aligned #\n", nadd );
1791 fprintf( stderr, "# Otherwise, try the '--add' option, instead of '--addprofile' option. #\n" );
1792 fprintf( stderr, "###############################################################################\n" );
1793 exit( 1 );
1794 }
1795 }
1796 for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1797 addmem[nadd] = -1;
1798 foundthebranch = 0;
1799 for( i=0; i<njob-1; i++ )
1800 {
1801 if( samemember( topol[i][0], addmem ) ) // jissainiha nai
1802 {
1803 mergeoralign[i] = '1';
1804 foundthebranch = 1;
1805 }
1806 else if( samemember( topol[i][1], addmem ) ) // samemembern ni henkou kanou
1807 {
1808 mergeoralign[i] = '2';
1809 foundthebranch = 1;
1810 }
1811 else
1812 {
1813 mergeoralign[i] = 'n';
1814 }
1815 }
1816 if( !foundthebranch )
1817 {
1818 fprintf( stderr, "###############################################################################\n" );
1819 fprintf( stderr, "# ERROR! #\n" );
1820 fprintf( stderr, "# There is no appropriate position to add the%4d sequences in the guide tree.#\n", nadd );
1821 fprintf( stderr, "# Check whether the%4d sequences form a monophyletic cluster. #\n", nadd );
1822 fprintf( stderr, "# If not, try the '--add' option, instead of the '--addprofile' option. #\n" );
1823 fprintf( stderr, "############################################################################### \n" );
1824 exit( 1 );
1825 }
1826 commongappick( nadd, seq+njob-nadd );
1827 for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
1828 }
1829 else
1830 {
1831 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
1832 for( j=njob-nadd; j<njob; j++ )
1833 {
1834 addmem[0] = j;
1835 addmem[1] = -1;
1836 for( i=0; i<njob-1; i++ )
1837 {
1838 if( samemembern( topol[i][0], addmem, 1 ) ) // arieru
1839 {
1840 // fprintf( stderr, "HIT!\n" );
1841 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1842 else mergeoralign[i] = '1';
1843 }
1844 else if( samemembern( topol[i][1], addmem, 1 ) )
1845 {
1846 // fprintf( stderr, "HIT!\n" );
1847 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1848 else mergeoralign[i] = '2';
1849 }
1850 }
1851 }
1852
1853 for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1854 addmem[nadd] = -1;
1855 for( i=0; i<njob-1; i++ )
1856 {
1857 if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
1858 {
1859 mergeoralign[i] = 'w';
1860 }
1861 else if( includemember( topol[i][0], addmem ) )
1862 {
1863 mergeoralign[i] = '1';
1864 }
1865 else if( includemember( topol[i][1], addmem ) )
1866 {
1867 mergeoralign[i] = '2';
1868 }
1869 }
1870 #if 0
1871 for( i=0; i<njob-1; i++ )
1872 {
1873 fprintf( stderr, "mem0 = " );
1874 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] );
1875 fprintf( stderr, "\n" );
1876 fprintf( stderr, "mem1 = " );
1877 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] );
1878 fprintf( stderr, "\n" );
1879 fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
1880 }
1881 #endif
1882 for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1883 }
1884
1885 commongappick( njob-nadd, seq );
1886 for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
1887 }
1888 //--------------- kokokara ----
1889 else if( subalignment )
1890 {
1891 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
1892 for( i=0; i<nsubalignments; i++ )
1893 {
1894 fprintf( stderr, "Checking subalignment %d:\n", i+1 );
1895 alignmentlength = strlen( seq[subtable[i][0]] );
1896 // for( j=0; subtable[i][j]!=-1; j++ )
1897 // fprintf( stderr, " %d. %-30.30s\n", subtable[i][j]+1, name[subtable[i][j]]+1 );
1898 for( j=0; subtable[i][j]!=-1; j++ )
1899 {
1900 if( subtable[i][j] >= njob )
1901 {
1902 fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 );
1903 exit( 1 );
1904 }
1905 if( alignmentlength != strlen( seq[subtable[i][j]] ) )
1906 {
1907 fprintf( stderr, "\n" );
1908 fprintf( stderr, "###############################################################################\n" );
1909 fprintf( stderr, "# ERROR!\n" );
1910 fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 );
1911 fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" );
1912 fprintf( stderr, "#\n" );
1913 fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
1914 fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
1915 fprintf( stderr, "#\n" );
1916 fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
1917 if( subalignmentoffset )
1918 {
1919 fprintf( stderr, "#\n" );
1920 fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
1921 fprintf( stderr, "# In this case, the rule of numbering is:\n" );
1922 fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
1923 fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
1924 }
1925 fprintf( stderr, "###############################################################################\n" );
1926 fprintf( stderr, "\n" );
1927 exit( 1 );
1928 }
1929 insubtable[subtable[i][j]] = 1;
1930 }
1931 for( j=0; j<njob-1; j++ )
1932 {
1933 if( includemember( topol[j][0], subtable[i] ) && includemember( topol[j][1], subtable[i] ) )
1934 {
1935 mergeoralign[j] = 'n';
1936 }
1937 }
1938 foundthebranch = 0;
1939 for( j=0; j<njob-1; j++ )
1940 {
1941 if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
1942 {
1943 foundthebranch = 1;
1944 fprintf( stderr, " -> OK\n" );
1945 break;
1946 }
1947 }
1948 if( !foundthebranch )
1949 {
1950 system( "cp infile.tree GuideTree" ); // tekitou
1951 fprintf( stderr, "\n" );
1952 fprintf( stderr, "###############################################################################\n" );
1953 fprintf( stderr, "# ERROR!\n" );
1954 fprintf( stderr, "# Subalignment %d does not form a monophyletic cluster\n", i+1 );
1955 fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
1956 fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
1957 fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
1958 fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
1959 if( subalignmentoffset )
1960 {
1961 fprintf( stderr, "#\n" );
1962 fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
1963 fprintf( stderr, "# In this case, the rule of numbering is:\n" );
1964 fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
1965 fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
1966 }
1967 fprintf( stderr, "############################################################################### \n" );
1968 fprintf( stderr, "\n" );
1969 exit( 1 );
1970 }
1971 // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
1972 }
1973 #if 0
1974 for( i=0; i<njob-1; i++ )
1975 {
1976 fprintf( stderr, "STEP %d\n", i+1 );
1977 fprintf( stderr, "group1 = " );
1978 for( j=0; topol[i][0][j] != -1; j++ )
1979 fprintf( stderr, "%d ", topol[i][0][j]+1 );
1980 fprintf( stderr, "\n" );
1981 fprintf( stderr, "group2 = " );
1982 for( j=0; topol[i][1][j] != -1; j++ )
1983 fprintf( stderr, "%d ", topol[i][1][j]+1 );
1984 fprintf( stderr, "\n" );
1985 fprintf( stderr, "%d -> %c\n\n", i, mergeoralign[i] );
1986 }
1987 #endif
1988
1989 for( i=0; i<njob; i++ )
1990 {
1991 if( insubtable[i] ) strcpy( bseq[i], seq[i] );
1992 else gappick0( bseq[i], seq[i] );
1993 }
1994
1995 for( i=0; i<nsubalignments; i++ )
1996 {
1997 for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
1998 if( !preservegaps[i] ) commongappick( j, subalnpt[i] );
1999 }
2000
2001 FreeIntMtx( subtable );
2002 free( insubtable );
2003 for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
2004 free( subalnpt );
2005 free( preservegaps );
2006 }
2007 //--------------- kokomade ----
2008 else
2009 {
2010 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
2011 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
2012 }
2013
2014 if( rnakozo && rnaprediction == 'm' )
2015 {
2016 singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
2017 prep = fopen( "hat4", "r" );
2018 if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." );
2019 fprintf( stderr, "Loading 'hat4' ... " );
2020 for( i=0; i<njob; i++ )
2021 {
2022 nogaplen = strlen( bseq[i] );
2023 singlerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) );
2024 for( j=0; j<nogaplen; j++ )
2025 {
2026 singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
2027 singlerna[i][j][0].bestpos = -1;
2028 singlerna[i][j][0].bestscore = -1.0;
2029 }
2030 singlerna[i][nogaplen] = NULL;
2031 // fprintf( stderr, "### reading bpp %d ...\n", i );
2032 readmccaskill( prep, singlerna[i], nogaplen );
2033 }
2034 fclose( prep );
2035 fprintf( stderr, "\ndone.\n" );
2036 }
2037 else
2038 singlerna = NULL;
2039
2040
2041 fprintf( stderr, "Progressive alignment ... \n" );
2042
2043 #ifdef enablemultithread
2044 if( nthread > 0 && nadd == 0 )
2045 {
2046 treebasethread_arg_t *targ;
2047 int jobpos;
2048 pthread_t *handle;
2049 pthread_mutex_t mutex;
2050 pthread_cond_t treecond;
2051 int *fftlog;
2052 int nrun;
2053 int nthread_yoyu;
2054
2055 nthread_yoyu = nthread * 1;
2056 nrun = 0;
2057 jobpos = 0;
2058 targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) );
2059 fftlog = AllocateIntVec( njob );
2060 handle = calloc( nthread_yoyu, sizeof( pthread_t ) );
2061 pthread_mutex_init( &mutex, NULL );
2062 pthread_cond_init( &treecond, NULL );
2063
2064 for( i=0; i<njob; i++ ) dep[i].done = 0;
2065 for( i=0; i<njob; i++ ) fftlog[i] = 1;
2066
2067 if( constraint )
2068 calcimportance( njob, eff, bseq, localhomtable );
2069 // dontcalcimportance( njob, eff, bseq, localhomtable ); // CHUUUUIIII!!!
2070
2071 for( i=0; i<nthread_yoyu; i++ )
2072 {
2073 targ[i].thread_no = i;
2074 targ[i].nrunpt = &nrun;
2075 targ[i].njob = njob;
2076 targ[i].nlen = nlen;
2077 targ[i].jobpospt = &jobpos;
2078 targ[i].topol = topol;
2079 targ[i].dep = dep;
2080 targ[i].aseq = bseq;
2081 targ[i].effarr = eff;
2082 targ[i].alloclenpt = &alloclen;
2083 targ[i].localhomtable = localhomtable;
2084 targ[i].singlerna = singlerna;
2085 targ[i].effarr_kozo = eff_kozo_mapped;
2086 targ[i].fftlog = fftlog;
2087 targ[i].mergeoralign = mergeoralign;
2088 targ[i].mutex = &mutex;
2089 targ[i].treecond = &treecond;
2090
2091 pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
2092 }
2093
2094 for( i=0; i<nthread_yoyu; i++ )
2095 {
2096 pthread_join( handle[i], NULL );
2097 }
2098 pthread_mutex_destroy( &mutex );
2099 pthread_cond_destroy( &treecond );
2100 free( handle );
2101 free( targ );
2102 free( fftlog );
2103 }
2104 else
2105 #endif
2106
2107 treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, &alloclen, localhomtable, singlerna, eff_kozo_mapped );
2108 fprintf( stderr, "\ndone.\n" );
2109 if( scoreout )
2110 {
2111 unweightedspscore = plainscore( njob, bseq );
2112 fprintf( stderr, "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
2113 fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
2114 fprintf( stderr, "\n\n" );
2115 }
2116
2117 #if 0
2118 if( constraint )
2119 {
2120 LocalHom *tmppt1, *tmppt2;
2121 for( i=0; i<njob; i++ )
2122 {
2123 for( j=0; j<njob; j++ )
2124 {
2125 tmppt1 = localhomtable[i]+j;
2126 while( tmppt2 = tmppt1->next )
2127 {
2128 free( (void *)tmppt1 );
2129 tmppt1 = tmppt2;
2130 }
2131 free( (void *)tmppt1 );
2132 }
2133 free( (void *)(localhomtable[i]+j) );
2134 }
2135 free( (void *)localhomtable );
2136 }
2137 #endif
2138
2139 fprintf( trap_g, "done.\n" );
2140 fclose( trap_g );
2141 free( mergeoralign );
2142 if( rnakozo && rnaprediction == 'm' )
2143 {
2144 if( singlerna ) // nen no tame
2145 {
2146 for( i=0; i<njob; i++ )
2147 {
2148 for( j=0; singlerna[i][j]!=NULL; j++ )
2149 {
2150 if( singlerna[i][j] ) free( singlerna[i][j] );
2151 }
2152 if( singlerna[i] ) free( singlerna[i] );
2153 }
2154 free( singlerna );
2155 singlerna = NULL;
2156 }
2157 }
2158
2159 writeData_pointer( prep_g, njob, name, nlen, bseq );
2160 #if 0
2161 writeData( stdout, njob, name, nlen, bseq );
2162 writePre( njob, name, nlen, bseq, !contin );
2163 writeData_pointer( prep_g, njob, name, nlen, aseq );
2164 #endif
2165 #if IODEBUG
2166 fprintf( stderr, "OSHIMAI\n" );
2167 #endif
2168
2169 if( constraint ) FreeLocalHomTable( localhomtable, njob );
2170
2171 if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, bseq ) );
2172
2173 SHOWVERSION;
2174 return( 0 );
2175 }