comparison mafft/core/dvtditr.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 /* Tree-dependent-iteration */
2 /* Devide to segments */
3
4 #include "mltaln.h"
5
6 extern char **seq_g;
7 extern char **res_g;
8 static int subalignment;
9 static int subalignmentoffset;
10
11 static int intop;
12 static int intree;
13 static double autosubalignment;
14
15
16 static void calcmaxdistclass( void )
17 {
18 int c;
19 float rep;
20 for( c=0; c<ndistclass; c++ )
21 {
22 rep = (double) 2 * c / ndistclass; // dist:0-2 for dist2offset
23 // fprintf( stderr, "c=%d, rep=%f, offset=%f\n", c, rep, dist2offset( rep ) );
24 if( dist2offset( rep ) == 0.0 )
25 break;
26 }
27 fprintf( stderr, "ndistclass = %d, maxdistclass = %d\n", ndistclass, c+1 );
28 maxdistclass = c + 1;
29 // maxdistclass = ndistclass; // CHUUI!!!!
30 return;
31 }
32
33 void arguments( int argc, char *argv[] )
34 {
35 int c;
36 char *argkey;
37
38 outnumber = 0;
39 nthread = 1;
40 randomseed = 0;
41 scoreout = 0;
42 spscoreout = 0;
43 parallelizationstrategy = BAATARI1;
44 intop = 0;
45 intree = 0;
46 inputfile = NULL;
47 rnakozo = 0;
48 rnaprediction = 'm';
49 nevermemsave = 0;
50 score_check = 1;
51 fftkeika = 1;
52 constraint = 0;
53 fmodel = 0;
54 kobetsubunkatsu = 1;
55 bunkatsu = 1;
56 nblosum = 62;
57 niter = 100;
58 calledByXced = 0;
59 devide = 1;
60 divWinSize = 20; /* 70 */
61 divThreshold = 65;
62 fftscore = 1;
63 fftRepeatStop = 0;
64 fftNoAnchStop = 0;
65 scmtd = 5;
66 cooling = 1;
67 weight = 4;
68 utree = 1;
69 refine = 1;
70 check = 1;
71 cut = 0.0;
72 disp = 0;
73 outgap = 1;
74 use_fft = 0; // CHUUI dochira demo mafft.tmpl deha F
75 force_fft = 0;
76 alg = 'A'; /* chuui */
77 mix = 0;
78 checkC = 0;
79 tbitr = 0;
80 treemethod = 'X';
81 sueff_global = 0.1;
82 scoremtx = 1;
83 dorp = NOTSPECIFIED;
84 ppenalty = NOTSPECIFIED;
85 penalty_shift_factor = 1000.0;
86 ppenalty_ex = NOTSPECIFIED;
87 poffset = NOTSPECIFIED;
88 kimuraR = NOTSPECIFIED;
89 pamN = NOTSPECIFIED;
90 geta2 = GETA2;
91 fftWinSize = NOTSPECIFIED;
92 fftThreshold = NOTSPECIFIED;
93 RNAppenalty = NOTSPECIFIED;
94 RNAppenalty_ex = NOTSPECIFIED;
95 RNApthr = NOTSPECIFIED;
96 TMorJTT = JTT;
97 consweight_multi = 1.0;
98 consweight_rna = 0.0;
99 subalignment = 0;
100 subalignmentoffset = 0;
101 legacygapcost = 0;
102 specificityconsideration = 0.0;
103 autosubalignment = 0.0;
104
105 while( --argc > 0 && (*++argv)[0] == '-' )
106 {
107 while ( (c = *++argv[0]) )
108 {
109 switch( c )
110 {
111 case 'i':
112 inputfile = *++argv;
113 fprintf( stderr, "inputfile = %s\n", inputfile );
114 --argc;
115 goto nextoption;
116 case 'I':
117 niter = myatoi( *++argv );
118 fprintf( stderr, "niter = %d\n", niter );
119 --argc;
120 goto nextoption;
121 case 'e':
122 RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
123 --argc;
124 goto nextoption;
125 case 'o':
126 RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
127 --argc;
128 goto nextoption;
129 case 'f':
130 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
131 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
132 --argc;
133 goto nextoption;
134 case 'Q':
135 penalty_shift_factor = atof( *++argv );
136 if( penalty_shift_factor < 100.0 && penalty_shift_factor != 2.0 )
137 {
138 fprintf( stderr, "%f, penalty_shift is fixed to penalty x 2 in the iterative refinement phase.\n", penalty_shift_factor );
139 penalty_shift_factor = 2.0;
140 }
141 --argc;
142 goto nextoption;
143 case 'g':
144 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
145 // fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
146 --argc;
147 goto nextoption;
148 case 'h':
149 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
150 fprintf( stderr, "poffset = %d\n", poffset );
151 --argc;
152 goto nextoption;
153 case 'k':
154 kimuraR = myatoi( *++argv );
155 fprintf( stderr, "kappa = %d\n", kimuraR );
156 --argc;
157 goto nextoption;
158 case 'b':
159 nblosum = myatoi( *++argv );
160 scoremtx = 1;
161 fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
162 --argc;
163 goto nextoption;
164 case 'j':
165 pamN = myatoi( *++argv );
166 scoremtx = 0;
167 TMorJTT = JTT;
168 fprintf( stderr, "jtt/kimura %d\n", pamN );
169 --argc;
170 goto nextoption;
171 case 'm':
172 pamN = myatoi( *++argv );
173 scoremtx = 0;
174 TMorJTT = TM;
175 fprintf( stderr, "tm %d\n", pamN );
176 --argc;
177 goto nextoption;
178 case 'l':
179 fastathreshold = atof( *++argv );
180 constraint = 2;
181 --argc;
182 goto nextoption;
183 case 'r':
184 consweight_rna = atof( *++argv );
185 rnakozo = 1;
186 --argc;
187 goto nextoption;
188 case 'c':
189 consweight_multi = atof( *++argv );
190 --argc;
191 goto nextoption;
192 case 'C':
193 nthread = myatoi( *++argv );
194 fprintf( stderr, "nthread = %d\n", nthread );
195 --argc;
196 goto nextoption;
197 case 'H':
198 subalignment = 1;
199 subalignmentoffset = myatoi( *++argv );
200 --argc;
201 goto nextoption;
202 case 't':
203 randomseed = myatoi( *++argv );
204 fprintf( stderr, "randomseed = %d\n", randomseed );
205 --argc;
206 goto nextoption;
207 case 'p':
208 argkey = *++argv;
209 if( !strcmp( argkey, "BESTFIRST" ) ) parallelizationstrategy = BESTFIRST;
210 else if( !strcmp( argkey, "BAATARI0" ) ) parallelizationstrategy = BAATARI0;
211 else if( !strcmp( argkey, "BAATARI1" ) ) parallelizationstrategy = BAATARI1;
212 else if( !strcmp( argkey, "BAATARI2" ) ) parallelizationstrategy = BAATARI2;
213 else
214 {
215 fprintf( stderr, "Unknown parallelization strategy, %s\n", argkey );
216 exit( 1 );
217 }
218 // exit( 1 );
219 --argc;
220 goto nextoption;
221 case 's':
222 specificityconsideration = (double)myatof( *++argv );
223 // fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration );
224 --argc;
225 goto nextoption;
226 #if 0
227 case 'S' :
228 scoreout = 1; // for checking parallel calculation
229 break;
230 #else
231 case 'S' :
232 spscoreout = 1; // 2014/Dec/30, sp score
233 break;
234 #endif
235 #if 0
236 case 's' :
237 RNAscoremtx = 'r';
238 break;
239 #endif
240 #if 1
241 case 'a':
242 fmodel = 1;
243 break;
244 #endif
245 case 'N':
246 nevermemsave = 1;
247 break;
248 case 'D':
249 dorp = 'd';
250 break;
251 case 'P':
252 dorp = 'p';
253 break;
254 #if 0
255 case 'Q':
256 alg = 'Q';
257 break;
258 #endif
259 case 'R':
260 rnaprediction = 'r';
261 break;
262 case 'O':
263 fftNoAnchStop = 1;
264 break;
265 #if 0
266 case 'e':
267 fftscore = 0;
268 break;
269 case 'r':
270 fmodel = -1;
271 break;
272 case 'R':
273 fftRepeatStop = 1;
274 break;
275 #endif
276 case 'T':
277 kobetsubunkatsu = 0;
278 break;
279 case 'B':
280 bunkatsu = 0;
281 break;
282 #if 0
283 case 'c':
284 cooling = 1;
285 break;
286 case 'a':
287 alg = 'a';
288 break;
289 case 's' :
290 treemethod = 's';
291 break;
292 case 'H':
293 alg = 'H';
294 break;
295 #endif
296 case 'A':
297 alg = 'A';
298 break;
299 case 'M':
300 alg = 'M';
301 break;
302 case 'F':
303 use_fft = 1;
304 break;
305 #if 0
306 case 't':
307 weight = 4;
308 break;
309 #endif
310 case 'u':
311 weight = 0;
312 break;
313 case 'U':
314 intree = 1;
315 break;
316 case 'V':
317 intop = 1;
318 break;
319 case 'J':
320 utree = 0;
321 break;
322 case 'd':
323 disp = 1;
324 break;
325 case 'Z':
326 score_check = 0;
327 break;
328 case 'Y':
329 score_check = 2;
330 break;
331 case 'L':
332 legacygapcost = 1;
333 break;
334 #if 0
335 case 'n' :
336 treemethod = 'n';
337 break;
338 #endif
339 case 'n' :
340 outnumber = 1;
341 break;
342 case 'X':
343 treemethod = 'X';
344 sueff_global = atof( *++argv );
345 fprintf( stderr, "sueff_global = %f\n", sueff_global );
346 --argc;
347 goto nextoption;
348 #if 0
349 case 'E' :
350 treemethod = 'E';
351 break;
352 case 'q' :
353 treemethod = 'q';
354 break;
355 #endif
356 case 'E':
357 autosubalignment = atof( *++argv );
358 fprintf( stderr, "autosubalignment = %f\n", autosubalignment );
359 --argc;
360 goto nextoption;
361 case 'z':
362 fftThreshold = myatoi( *++argv );
363 --argc;
364 goto nextoption;
365 case 'w':
366 fftWinSize = myatoi( *++argv );
367 --argc;
368 goto nextoption;
369 default:
370 fprintf( stderr, "illegal option %c\n", c );
371 argc = 0;
372 break;
373 }
374 }
375 nextoption:
376 ;
377 }
378 if( argc == 1 )
379 {
380 cut = atof( (*argv) );
381 argc--;
382 }
383 if( argc != 0 )
384 {
385 fprintf( stderr, "options : Check source file!\n" );
386 exit( 1 );
387 }
388 #if 0
389 if( alg == 'A' && weight == 0 )
390 ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" );
391 #endif
392 }
393
394
395 int main( int argc, char *argv[] )
396 {
397 int identity;
398 static int nlen[M];
399 static char **name, **seq, **aseq, **bseq;
400 static Segment *segment = NULL;
401 static int anchors[MAXSEG];
402 int i, j;
403 int iseg, nseg;
404 int ***topol;
405 double **len;
406 double **eff;
407 FILE *prep;
408 FILE *infp;
409 FILE *orderfp;
410 int alloclen;
411 int returnvalue;
412 char c;
413 int ocut;
414 char **seq_g_bk;
415 LocalHom **localhomtable = NULL; // by D.Mathog
416 RNApair ***singlerna;
417 int nogaplen;
418 static char **nogap1seq;
419 static char *kozoarivec;
420 int nkozo;
421 int alignmentlength;
422 int **skipthisbranch;
423 int foundthebranch;
424 int nsubalignments, maxmem;
425 int **subtable;
426 int *insubtable;
427 int *preservegaps;
428 char ***subalnpt;
429
430 arguments( argc, argv );
431 #ifndef enablemultithread
432 nthread = 0;
433 #endif
434 if( fastathreshold < 0.0001 ) constraint = 0;
435
436 if( inputfile )
437 {
438 infp = fopen( inputfile, "r" );
439 if( !infp )
440 {
441 fprintf( stderr, "Cannot open %s\n", inputfile );
442 exit( 1 );
443 }
444 }
445 else
446 infp = stdin;
447
448 #if 0
449 PreRead( stdin, &njob, &nlenmax );
450 #else
451 getnumlen( infp );
452 #endif
453 rewind( infp );
454
455 nkozo = 0;
456
457 if( njob < 2 )
458 {
459 fprintf( stderr, "At least 2 sequences should be input!\n"
460 "Only %d sequence found.\n", njob );
461 exit( 1 );
462 }
463
464
465 if( subalignment )
466 {
467 readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
468 fprintf( stderr, "nsubalignments = %d\n", nsubalignments );
469 fprintf( stderr, "maxmem = %d\n", maxmem );
470 subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
471 insubtable = AllocateIntVec( njob );
472 preservegaps = AllocateIntVec( njob );
473 for( i=0; i<njob; i++ ) insubtable[i] = 0;
474 for( i=0; i<njob; i++ ) preservegaps[i] = 0;
475 subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
476 readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
477 for( i=0; i<nsubalignments; i++ ) for( j=0; j<insubtable[i]; j++ )
478 {
479 if( subtable[i][j] < 0 )
480 {
481 fprintf( stderr, "Not supported in the iterative refinmenment mode.\n" );
482 fprintf( stderr, "Please use a positive number to represent a sequence.\n" );
483 }
484 }
485 }
486
487 ocut = cut;
488
489 segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
490 // if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' )
491 topol = AllocateIntCub( njob, 2, njob );
492 len = AllocateDoubleMtx( njob, 2 );
493 eff = AllocateDoubleMtx( njob, njob );
494 seq = AllocateCharMtx( njob, nlenmax*9+1 );
495 name = AllocateCharMtx( njob, B+1 );
496 seq_g = AllocateCharMtx( njob, nlenmax*9+1 );
497 res_g = AllocateCharMtx( njob, nlenmax*9+1 );
498 aseq = AllocateCharMtx( njob, nlenmax*9+1 );
499 bseq = AllocateCharMtx( njob, nlenmax*9+1 );
500 nogap1seq = AllocateCharMtx( 1, nlenmax*1+1 );
501 alloclen = nlenmax * 9;
502 seq_g_bk = AllocateCharMtx( njob, 0 );
503 for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i];
504 kozoarivec = AllocateCharVec( njob );
505 skipthisbranch = AllocateIntMtx( njob, 2 );
506 for( i=0; i<njob; i++ ) skipthisbranch[i][0] = skipthisbranch[i][1] = 0;
507
508 if( constraint )
509 {
510 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
511 for( i=0; i<njob; i++)
512 {
513 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
514 for( j=0; j<njob; j++)
515 {
516 localhomtable[i][j].start1 = -1;
517 localhomtable[i][j].end1 = -1;
518 localhomtable[i][j].start2 = -1;
519 localhomtable[i][j].end2 = -1;
520 localhomtable[i][j].overlapaa = -1.0;
521 localhomtable[i][j].opt = -1.0;
522 localhomtable[i][j].importance = -1.0;
523 localhomtable[i][j].next = NULL;
524 localhomtable[i][j].nokori = 0;
525 localhomtable[i][j].extended = -1;
526 localhomtable[i][j].last = localhomtable[i]+j;
527 localhomtable[i][j].korh = 'h';
528 }
529 }
530 fprintf( stderr, "Loading 'hat3' ... " );
531 fflush( stderr );
532 prep = fopen( "hat3", "r" );
533 if( prep == NULL ) ErrorExit( "Make hat3." );
534 readlocalhomtable2( prep, njob, localhomtable, kozoarivec );
535 fclose( prep );
536 // for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
537 // fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 );
538 fprintf( stderr, "done.\n" );
539 fflush( stderr );
540 #if 0
541 fprintf( stderr, "Extending localhom ... " );
542 extendlocalhom( njob, localhomtable );
543 fprintf( stderr, "done.\n" );
544 #endif
545 nkozo = 0;
546 for( i=0; i<njob; i++ ) if( kozoarivec[i] ) nkozo++;
547 }
548
549
550 // if( nlenmax > 30000 )
551 if( nlenmax > 50000 ) // version >= 6.823
552 {
553 #if 0
554 if( constraint )
555 {
556 fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax );
557 exit( 1 );
558 }
559 if( nevermemsave )
560 {
561 fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax );
562 exit( 1 );
563 }
564 #endif
565 if( !constraint && !nevermemsave && alg != 'M' )
566 {
567 fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax );
568 alg = 'M';
569 }
570 }
571
572 #if 0
573 Read( name, nlen, seq_g );
574 #else
575 readData_pointer( infp, name, nlen, seq_g );
576 #endif
577 fclose( infp );
578
579 if( specificityconsideration ) calcmaxdistclass();
580
581 for( i=0; i<njob; i++ )
582 {
583 res_g[i][0] = 0;
584 }
585
586 identity = 1;
587 for( i=0; i<njob; i++ )
588 {
589 identity *= ( nlen[i] == nlen[0] );
590 }
591 if( !identity )
592 {
593 fprintf( stderr, "Input pre-aligned data\n" );
594 exit( 1 );
595 }
596 constants( njob, seq_g );
597
598 #if 0
599 fprintf( stderr, "penalty = %d\n", penalty );
600 fprintf( stderr, "penalty_ex = %d\n", penalty_ex );
601 fprintf( stderr, "offset = %d\n", offset );
602 #endif
603
604 initSignalSM();
605
606 initFiles();
607
608 #if 0
609 if( njob == 2 )
610 {
611 writePre( njob, name, nlen, seq_g, 1 );
612 SHOWVERSION;
613 return( 0 );
614 }
615 #else
616 if( njob == 2 )
617 {
618 weight = 0;
619 niter = 1;
620 }
621 #endif
622
623 c = seqcheck( seq_g );
624 if( c )
625 {
626 fprintf( stderr, "Illegal character %c\n", c );
627 exit( 1 );
628 }
629 commongappick( njob, seq_g );
630
631 if( rnakozo && rnaprediction == 'm' )
632 {
633 singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
634 prep = fopen( "hat4", "r" );
635 if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." );
636 fprintf( stderr, "Loading 'hat4' ... " );
637 fflush( stderr );
638 for( i=0; i<njob; i++ )
639 {
640 gappick0( nogap1seq[0], seq_g[i] );
641 nogaplen = strlen( nogap1seq[0] );
642 singlerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) );
643 for( j=0; j<nogaplen; j++ )
644 {
645 singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
646 singlerna[i][j][0].bestpos = -1;
647 singlerna[i][j][0].bestscore = -1.0;
648 }
649 singlerna[i][nogaplen] = NULL;
650 readmccaskill( prep, singlerna[i], nogaplen );
651 }
652 fclose( prep );
653 fprintf( stderr, "\ndone.\n" );
654 fflush( stderr );
655 }
656 else
657 singlerna = NULL;
658
659
660
661 if( utree )
662 {
663 prep = fopen( "hat2", "r" );
664 if( !prep ) ErrorExit( "Make hat2." );
665 readhat2_pointer( prep, njob, name, eff );
666 fclose( prep );
667 #if 0
668 fprintf( stderr, "eff = \n" );
669 for( i=0; i<njob-1; i++ )
670 {
671 for( j=i+1; j<njob; j++ )
672 {
673 fprintf( stderr, "%d-%d, %f\n", i, j, eff[i][j] );
674 }
675 fprintf( stderr, "\n" );
676 }
677 #endif
678 if( intree )
679 {
680 veryfastsupg_double_loadtree( njob, eff, topol, len, name );
681 #if 0
682 fprintf( stderr, "eff = \n" );
683 for( i=0; i<njob-1; i++ )
684 {
685 for( j=i+1; j<njob; j++ )
686 {
687 fprintf( stderr, "%d-%d, %f\n", i, j, eff[i][j] );
688 }
689 fprintf( stderr, "\n" );
690 }
691 exit( 1 );
692 #endif
693 }
694 else if( intop ) // v6.528 deha if( intop ) dattanode intree ga mukou datta.
695 {
696 fprintf( stderr, "--topin has been disabled\n" );
697 exit( 1 );
698 // veryfastsupg_double_loadtop( njob, eff, topol, len );
699 }
700 else if( subalignment )
701 fixed_supg_double_treeout_constrained( njob, eff, topol, len, name, nsubalignments, subtable );
702 else if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' )
703 // veryfastsupg_double_outtree( njob, eff, topol, len, name );
704 fixed_musclesupg_double_treeout( njob, eff, topol, len, name );
705 else if( treemethod == 'n' )
706 nj( njob, eff, topol, len );
707 else if( treemethod == 's' )
708 spg( njob, eff, topol, len );
709 else if( treemethod == 'p' )
710 upg2( njob, eff, topol, len );
711 else ErrorExit( "Incorrect treemethod.\n" );
712 }
713 #if DEBUG
714 printf( "utree = %d\n", utree );
715 #endif
716
717 if( autosubalignment > 0.0 && subalignment == 0 )
718 {
719 // reporterr( "Computing skipthisbranch..\n" );
720 insubtable = AllocateIntVec( njob );
721 preservegaps = AllocateIntVec( njob );
722 subtable = calloc( 1, sizeof( char * ) );
723 subtable[0] = NULL; // for FreeIntMtx
724 for( i=0; i<njob; i++ ) insubtable[i] = 0;
725 for( i=0; i<njob; i++ ) preservegaps[i] = 0; // tsukawanaikamo
726 if( generatesubalignmentstable( njob, &subtable, &nsubalignments, &maxmem, topol, len, autosubalignment ) ) // subtable ha allocate sareru.
727 {
728 reporterr( "################################################################################################ \n" );
729 reporterr( "#\n" );
730 reporterr( "# WARNING: Iterative refinment was not done because you gave a large --fix value (%6.3f).\n", autosubalignment );
731 reporterr( "#\n" );
732 reporterr( "################################################################################################ \n" );
733 writePre( njob, name, nlen, seq_g, 1 );
734
735 FreeCharMtx( seq_g_bk );
736 FreeIntCub( topol );
737 FreeDoubleMtx( len );
738 FreeDoubleMtx( eff );
739 FreeIntMtx( skipthisbranch );
740 FreeIntMtx( subtable );
741 free( preservegaps );
742 free( insubtable );
743 SHOWVERSION;
744 return( 0 );
745 }
746 // subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
747 fprintf( stderr, "nsubalignments = %d, maxmem = %d\n", nsubalignments, maxmem );
748 subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
749 #if 0
750 for( i=0; i<nsubalignments; i++ )
751 {
752 reporterr( "subalignment %d\n", i );
753 for( j=0; subtable[i][j]!=-1; j++ )
754 {
755 reporterr( "%5d", subtable[i][j] );
756 }
757 reporterr( "\n" );
758 }
759 #endif
760 #if 0 // wakaran
761 for( i=0; i<nsubalignments; i++ ) for( j=0; j<insubtable[i]; j++ )
762 {
763 if( subtable[i][j] < 0 )
764 {
765 fprintf( stderr, "Not supported in the iterative refinmenment mode.\n" );
766 fprintf( stderr, "Please use a positive number to represent a sequence.\n" );
767 }
768 }
769 #endif
770 // reporterr( "done.\n" );
771 }
772
773
774 orderfp = fopen( "order", "w" );
775 if( !orderfp )
776 {
777 fprintf( stderr, "Cannot open 'order'\n" );
778 exit( 1 );
779 }
780 for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
781 {
782 fprintf( orderfp, "%d\n", j );
783 }
784 for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
785 {
786 fprintf( orderfp, "%d\n", j );
787 }
788 fclose( orderfp );
789
790
791
792 fprintf( stderr, "\n" );
793 if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu )
794 {
795 nseg = 0;
796 anchors[0] =0;
797 anchors[1] =strlen( seq_g[0] );
798 nseg += 2;
799 }
800 else
801 {
802 nseg = searchAnchors( njob, seq_g, segment );
803 #if 0
804 fprintf( stderr, "### nseg = %d\n", nseg );
805 fprintf( stderr, "### seq_g[0] = %s\n", seq_g[0] );
806 fprintf( stderr, "### nlenmax = %d\n", nlenmax );
807 fprintf( stderr, "### len = %d\n", strlen( seq_g[0] ) );
808
809 #endif
810
811 anchors[0] = 0;
812 for( i=0; i<nseg; i++ ) anchors[i+1] = segment[i].center;
813 anchors[nseg+1] = strlen( seq_g[0] );
814 nseg +=2;
815
816 #if 0
817 for( i=0; i<nseg; i++ )
818 fprintf( stderr, "anchor[%d] = %d\n", i, anchors[i] );
819 #endif
820 }
821
822
823 //--------------- kokokara ----
824 if( subalignment || autosubalignment )
825 {
826 for( i=0; i<nsubalignments; i++ )
827 {
828 fprintf( stderr, "\nChecking subalignment %d:\n", i+1 );
829 alignmentlength = strlen( seq[subtable[i][0]] );
830 for( j=0; subtable[i][j]!=-1; j++ )
831 fprintf( stderr, " %d ", subtable[i][j]+1 );
832 // fprintf( stderr, " ##### %d-%d. %-30.30s\n", i, subtable[i][j]+1, name[subtable[i][j]]+1 );
833 fprintf( stderr, "\n" );
834 for( j=0; subtable[i][j]!=-1; j++ )
835 {
836 if( subtable[i][j] >= njob )
837 {
838 fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 );
839 exit( 1 );
840 }
841 if( alignmentlength != strlen( seq[subtable[i][j]] ) )
842 {
843 fprintf( stderr, "\n" );
844 fprintf( stderr, "###############################################################################\n" );
845 fprintf( stderr, "# ERROR!\n" );
846 fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 );
847 fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" );
848 fprintf( stderr, "#\n" );
849 fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
850 fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
851 fprintf( stderr, "#\n" );
852 fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
853 if( subalignmentoffset )
854 {
855 fprintf( stderr, "#\n" );
856 fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
857 fprintf( stderr, "# In this case, the rule of numbering is:\n" );
858 fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
859 fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
860 }
861 fprintf( stderr, "###############################################################################\n" );
862 fprintf( stderr, "\n" );
863 exit( 1 );
864 }
865 insubtable[subtable[i][j]] = 1;
866 }
867 for( j=0; j<njob-1; j++ )
868 {
869 #if 0
870 int k;
871 reporterr( "#### STEP%d\n", j );
872 for( k=0; topol[j][0][k]!=-1; k++ ) reporterr( "%3d ", topol[j][0][k] );
873 reporterr( "len=%f\n", len[j][0] );
874 for( k=0; topol[j][1][k]!=-1; k++ ) reporterr( "%3d ", topol[j][1][k] );
875 reporterr( "len=%f\n", len[j][1] );
876 reporterr( "\n" );
877 #endif
878 if( includemember( topol[j][0], subtable[i] ) && !samemember( topol[j][0], subtable[i] ) )
879 {
880 skipthisbranch[j][0] = 1;
881 // reporterr( "SKIP 0 !!!!!!\n" );
882 }
883 if( includemember( topol[j][1], subtable[i] ) && !samemember( topol[j][1], subtable[i] ) )
884 {
885 skipthisbranch[j][1] = 1;
886 // reporterr( "SKIP 1 !!!!!!\n" );
887 }
888 }
889 foundthebranch = 0;
890 for( j=0; j<njob-1; j++ )
891 {
892 if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
893 {
894 foundthebranch = 1;
895 fprintf( stderr, " -> OK\n" );
896 break;
897 }
898 }
899 if( !foundthebranch )
900 {
901 system( "cp infile.tree GuideTree" ); // tekitou
902 fprintf( stderr, "\n" );
903 fprintf( stderr, "###############################################################################\n" );
904 fprintf( stderr, "# ERROR!\n" );
905 fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 );
906 fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
907 fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
908 fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
909 fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
910 if( subalignmentoffset )
911 {
912 fprintf( stderr, "#\n" );
913 fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
914 fprintf( stderr, "# In this case, the rule of numbering is:\n" );
915 fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
916 fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
917 }
918 fprintf( stderr, "############################################################################### \n" );
919 fprintf( stderr, "\n" );
920 exit( 1 );
921 }
922 // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
923 }
924 #if 0
925 for( i=0; i<njob-1; i++ )
926 {
927 fprintf( stderr, "STEP %d\n", i+1 );
928 fprintf( stderr, "group1 = " );
929 for( j=0; topol[i][0][j] != -1; j++ )
930 fprintf( stderr, "%d ", topol[i][0][j]+1 );
931 fprintf( stderr, "\n" );
932 fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][0] );
933 fprintf( stderr, "group2 = " );
934 for( j=0; topol[i][1][j] != -1; j++ )
935 fprintf( stderr, "%d ", topol[i][1][j]+1 );
936 fprintf( stderr, "\n" );
937 fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][1] );
938 }
939 #endif
940
941 for( i=0; i<njob; i++ )
942 {
943 if( insubtable[i] ) strcpy( bseq[i], seq[i] );
944 else gappick0( bseq[i], seq[i] );
945 }
946
947 for( i=0; i<nsubalignments; i++ )
948 {
949 for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
950 commongappick( j, subalnpt[i] );
951 }
952
953 FreeIntMtx( subtable );
954 free( insubtable );
955 for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
956 free( subalnpt );
957 free( preservegaps );
958 }
959 //--------------- kokomade ----
960
961
962
963
964 for( i=0; i<njob; i++ ) res_g[i][0] = 0;
965
966 for( iseg=0; iseg<nseg-1; iseg++ )
967 {
968 int tmplen = anchors[iseg+1]-anchors[iseg];
969 int pos = strlen( res_g[0] );
970 for( j=0; j<njob; j++ )
971 {
972 strncpy( seq[j], seq_g[j], tmplen );
973 seq[j][tmplen]= 0;
974 seq_g[j] += tmplen;
975
976 }
977 fprintf( stderr, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
978 fflush( stderr );
979 fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
980
981 cut = ocut;
982 returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, eff, skipthisbranch, alloclen, localhomtable, singlerna, nkozo, kozoarivec );
983
984 for( i=0; i<njob; i++ )
985 strcat( res_g[i], bseq[i] );
986 }
987 FreeCharMtx( seq_g_bk );
988 FreeIntCub( topol );
989 FreeDoubleMtx( len );
990 FreeDoubleMtx( eff );
991 FreeIntMtx( skipthisbranch );
992 free( kozoarivec );
993 if( constraint ) FreeLocalHomTable( localhomtable, njob );
994 if( rnakozo && rnaprediction == 'm' )
995 {
996 if( singlerna ) // nen no tame
997 {
998 for( i=0; i<njob; i++ )
999 {
1000 for( j=0; singlerna[i][j]!=NULL; j++ )
1001 {
1002 if( singlerna[i][j] ) free( singlerna[i][j] );
1003 }
1004 if( singlerna[i] ) free( singlerna[i] );
1005 }
1006 free( singlerna );
1007 singlerna = NULL;
1008 }
1009 }
1010
1011 #if 0
1012 Write( stdout, njob, name, nlen, bseq );
1013 #endif
1014
1015 fprintf( stderr, "done\n" );
1016 fprintf( trap_g, "done\n" );
1017 fclose( trap_g );
1018
1019
1020 devide = 0;
1021 writePre( njob, name, nlen, res_g, 1 );
1022 #if 0
1023 writeData( stdout, njob, name, nlen, res_g, 1 );
1024 #endif
1025
1026
1027 if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, res_g ) );
1028
1029 SHOWVERSION;
1030 return( 0 );
1031 }
1032
1033 #if 0
1034 signed int main( int argc, char *argv[] )
1035 {
1036 int i, nlen[M];
1037 char b[B];
1038 char a[] = "=";
1039 int value;
1040
1041 gets( b ); njob = atoi( b );
1042
1043 /*
1044 scoremtx = 0;
1045 if( strstr( b, "ayhoff" ) ) scoremtx = 1;
1046 else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1;
1047 else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2;
1048 else scoremtx = 0;
1049 */
1050 if( strstr( b, "constraint" ) ) cnst = 1;
1051
1052 nlenmax = 0;
1053 i = 0;
1054 while( i<njob )
1055 {
1056 gets( b );
1057 if( !strncmp( b, a, 1 ) )
1058 {
1059 gets( b ); nlen[i] = atoi( b );
1060 if( nlen[i] > nlenmax ) nlenmax = nlen[i];
1061 i++;
1062 }
1063 }
1064 if( nlenmax > N || njob > M )
1065 {
1066 fprintf( stderr, "ERROR in main\n" );
1067 exit( 1 );
1068 }
1069 /*
1070 nlenmax = Na;
1071 */
1072 rewind( stdin );
1073 value = main1( nlen, argc, argv );
1074 exit( 0 );
1075 }
1076 #endif