Mercurial > repos > nick > duplex
comparison mafft/core/addsingle.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #include "mltaln.h" | |
2 | |
3 #define SMALLMEMORY 1 | |
4 | |
5 #define DEBUG 0 | |
6 #define IODEBUG 0 | |
7 #define SCOREOUT 0 | |
8 | |
9 static int nadd; | |
10 static int treein; | |
11 static int topin; | |
12 static int treeout; | |
13 static int distout; | |
14 static int noalign; | |
15 static int multidist; | |
16 static int maxdist = 2; // scale -> 2bai | |
17 static int allowlongadds; | |
18 | |
19 static float lenfaca, lenfacb, lenfacc, lenfacd; | |
20 static int tuplesize; | |
21 | |
22 #define PLENFACA 0.01 | |
23 #define PLENFACB 10000 | |
24 #define PLENFACC 10000 | |
25 #define PLENFACD 0.1 | |
26 #define D6LENFACA 0.01 | |
27 #define D6LENFACB 2500 | |
28 #define D6LENFACC 2500 | |
29 #define D6LENFACD 0.1 | |
30 #define D10LENFACA 0.01 | |
31 #define D10LENFACB 1000000 | |
32 #define D10LENFACC 1000000 | |
33 #define D10LENFACD 0.0 | |
34 | |
35 typedef struct _thread_arg | |
36 { | |
37 int njob; | |
38 int nadd; | |
39 int *nlen; | |
40 int *follows; | |
41 char **name; | |
42 char **seq; | |
43 LocalHom **localhomtable; | |
44 float **iscore; | |
45 float **nscore; | |
46 int *istherenewgap; | |
47 int **newgaplist; | |
48 RNApair ***singlerna; | |
49 double *eff_kozo_mapped; | |
50 int alloclen; | |
51 Treedep *dep; | |
52 int ***topol; | |
53 float **len; | |
54 Addtree *addtree; | |
55 #ifdef enablemultithread | |
56 int *iaddshare; | |
57 int thread_no; | |
58 pthread_mutex_t *mutex_counter; | |
59 #endif | |
60 } thread_arg_t; | |
61 | |
62 | |
63 #ifdef enablemultithread | |
64 typedef struct _gaplist2alnxthread_arg | |
65 { | |
66 // int thread_no; | |
67 int ncycle; | |
68 int *jobpospt; | |
69 int tmpseqlen; | |
70 int lenfull; | |
71 char **seq; | |
72 int *newgaplist; | |
73 int *posmap; | |
74 pthread_mutex_t *mutex; | |
75 } gaplist2alnxthread_arg_t; | |
76 | |
77 typedef struct _distancematrixthread_arg | |
78 { | |
79 int thread_no; | |
80 int njob; | |
81 int norg; | |
82 int *jobpospt; | |
83 int **pointt; | |
84 int *nogaplen; | |
85 float **imtx; | |
86 float **nmtx; | |
87 float *selfscore; | |
88 pthread_mutex_t *mutex; | |
89 } distancematrixthread_arg_t; | |
90 | |
91 typedef struct _jobtable2d | |
92 { | |
93 int i; | |
94 int j; | |
95 } Jobtable2d; | |
96 | |
97 typedef struct _dndprethread_arg | |
98 { | |
99 int njob; | |
100 int thread_no; | |
101 float *selfscore; | |
102 float **mtx; | |
103 char **seq; | |
104 Jobtable2d *jobpospt; | |
105 pthread_mutex_t *mutex; | |
106 } dndprethread_arg_t; | |
107 | |
108 #endif | |
109 | |
110 typedef struct _blocktorealign | |
111 { | |
112 int start; | |
113 int end; | |
114 int nnewres; | |
115 } Blocktorealign; | |
116 | |
117 static void cnctintvec( int *res, int *o1, int *o2 ) | |
118 { | |
119 while( *o1 != -1 ) *res++ = *o1++; | |
120 while( *o2 != -1 ) *res++ = *o2++; | |
121 *res = -1; | |
122 } | |
123 | |
124 static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist ) | |
125 { | |
126 int i, regstart, regend, len1; | |
127 regstart = 0; | |
128 len1 = len+1; | |
129 for( i=0; i<len1; i++ ) | |
130 { | |
131 if( realign[i].nnewres || gaplist[i] ) | |
132 { | |
133 regend = posmap[i]-1; | |
134 realign[i].start = regstart; | |
135 realign[i].end = regend; | |
136 } | |
137 if( gaplist[i] ) | |
138 { | |
139 realign[i].nnewres++; | |
140 // fprintf( stderr, "hit? reg = %d-%d\n", regstart, regend ); | |
141 } | |
142 regstart = posmap[i]+1; | |
143 } | |
144 } | |
145 static void fillgap( char *s, int len ) | |
146 { | |
147 int orilen = strlen( s ); | |
148 s += orilen; | |
149 len -= orilen; | |
150 while( len-- ) | |
151 *s++ = '-'; | |
152 *s = 0; | |
153 } | |
154 | |
155 static int lencomp( const void *a, const void *b ) // osoikamo | |
156 { | |
157 char **ast = (char **)a; | |
158 char **bst = (char **)b; | |
159 int lena = strlen( *ast ); | |
160 int lenb = strlen( *bst ); | |
161 // fprintf( stderr, "a=%s, b=%s\n", *ast, *bst ); | |
162 // fprintf( stderr, "lena=%d, lenb=%d\n", lena, lenb ); | |
163 if( lena > lenb ) return -1; | |
164 else if( lena < lenb ) return 1; | |
165 else return( 0 ); | |
166 } | |
167 | |
168 static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows ) | |
169 { | |
170 int i, j, k, posinold, newlen, *nmem; | |
171 int n0, n1, localloclen, nhit, hit1, hit2; | |
172 int *pickhistory; | |
173 int nprof1, nprof2, pos, zure; | |
174 char **prof1, **prof2; | |
175 int *iinf0, *iinf1; | |
176 int *group, *nearest, *g2n, ngroup; | |
177 char ***mem; | |
178 static char **tmpaln0 = NULL; | |
179 static char **tmpaln1 = NULL; | |
180 static char **tmpseq; | |
181 int ***topolpick; | |
182 int *tmpint; | |
183 int *intptr, *intptrx; | |
184 char *tmpseq0, *cptr, **cptrptr; | |
185 | |
186 | |
187 localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi? | |
188 tmpaln0 = AllocateCharMtx( njob, localloclen ); | |
189 tmpaln1 = AllocateCharMtx( njob, localloclen ); | |
190 tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 ); | |
191 iinf0 = AllocateIntVec( njob ); | |
192 iinf1 = AllocateIntVec( njob ); | |
193 nearest = AllocateIntVec( njob ); // oosugi | |
194 | |
195 posinold = block->start; | |
196 | |
197 n0 = 0; | |
198 n1 = 0; | |
199 for( i=0; i<njob; i++ ) | |
200 { | |
201 strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 ); | |
202 tmpseq[0][block->end - block->start + 1] = 0; | |
203 commongappick( 1, tmpseq ); | |
204 if( tmpseq[0][0] != 0 ) | |
205 { | |
206 if( i < norg ) | |
207 { | |
208 fprintf( stderr, "BUG!!!!\n" ); | |
209 exit( 1 ); | |
210 } | |
211 strcpy( tmpaln0[n0], tmpseq[0] ); | |
212 iinf0[n0] = i; | |
213 nearest[n0] = follows[i-norg]; | |
214 n0++; | |
215 } | |
216 else | |
217 { | |
218 strcpy( tmpaln1[n0], "" ); | |
219 iinf1[n1] = i; | |
220 n1++; | |
221 } | |
222 } | |
223 mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi | |
224 nmem = AllocateIntVec( n0 ); // oosugi | |
225 g2n = AllocateIntVec( n0 ); // oosugi | |
226 group = AllocateIntVec( n0 ); // oosugi | |
227 for( i=0; i<n0; i++ ) mem[i][0] = NULL; | |
228 for( i=0; i<n0; i++ ) nmem[i] = 0; | |
229 ngroup = 0; | |
230 for( i=0; i<n0; i++ ) | |
231 { | |
232 for( j=0; j<i; j++ ) if( nearest[j] == nearest[i] ) break; | |
233 if( j == i ) group[i] = ngroup++; | |
234 else group[i] = group[j]; | |
235 | |
236 for( j=0; mem[group[i]][j]; j++ ) | |
237 ; | |
238 mem[group[i]][j] = tmpaln0[i]; | |
239 mem[group[i]][j+1] = NULL; | |
240 nmem[group[i]]++; | |
241 g2n[group[i]] = nearest[i]; | |
242 // fprintf( stderr, "%d -> %d -> group%d\n", i, nearest[i], group[i] ); | |
243 // fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] ); | |
244 } | |
245 | |
246 for( i=0; i<ngroup; i++ ) | |
247 { | |
248 // fprintf( stderr, "before sort:\n" ); | |
249 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] ); | |
250 // fprintf( stderr, "\n" ); | |
251 qsort( mem[i], nmem[i], sizeof( char * ), lencomp ); | |
252 // fprintf( stderr, "after sort:\n" ); | |
253 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] ); | |
254 // fprintf( stderr, "\n" ); | |
255 } | |
256 | |
257 #if 0 | |
258 for( i=1; i<n0; i++ ) | |
259 { | |
260 profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, localloclen, alg ); | |
261 } | |
262 newlen = strlen( tmpaln0[0] ); | |
263 for( i=0; i<n1; i++ ) eq2dash( tmpaln1[i] ); | |
264 #else | |
265 // newlen = 0; | |
266 for( i=0; i<ngroup; i++ ) | |
267 { | |
268 // for( k=0; mem[i][k]; k++ ) fprintf( stderr, "mem[%d][%d] = %s\n", i, j, mem[i][k] ); | |
269 | |
270 for( j=1; j<nmem[i]; j++ ) | |
271 { | |
272 profilealignment2( 1, j, mem[i]+j, mem[i], localloclen, alg ); | |
273 } | |
274 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "j=%d, %s\n", j, mem[i][j] ); | |
275 | |
276 #if 0 // iru | |
277 if( ( j = strlen( mem[i][0] ) ) > newlen ) newlen = j; | |
278 for( j=0; j<=i; j++ ) | |
279 { | |
280 for( k=0; mem[j][k]; k++ ) | |
281 fillgap( mem[j][k], newlen ); | |
282 } | |
283 #endif | |
284 | |
285 } | |
286 #if 0 | |
287 fprintf( stderr, "After ingroupalignment (original order):\n" ); | |
288 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] ); | |
289 #endif | |
290 #endif | |
291 | |
292 topolpick = AllocateIntCub( ngroup, 2, ngroup ); | |
293 pickhistory = AllocateIntVec( ngroup ); | |
294 tmpint = AllocateIntVec( 2 ); | |
295 prof1 = AllocateCharMtx( n0, 0 ); | |
296 prof2 = AllocateCharMtx( n0, 0 ); | |
297 for( i=0; i<ngroup; i++ ) | |
298 { | |
299 topolpick[i][0][0] = -1; | |
300 topolpick[i][1][0] = -1; | |
301 pickhistory[i] = -1; | |
302 } | |
303 | |
304 nhit = 0; | |
305 for( i=0; i<norg-1; i++ ) | |
306 { | |
307 for( intptr=topol[i][0]; *intptr>-1; intptr++ ) | |
308 { | |
309 for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ ) | |
310 { | |
311 if( *intptr == *intptrx ) | |
312 { | |
313 hit1 = k; | |
314 goto exitloop1; | |
315 } | |
316 } | |
317 } | |
318 continue; | |
319 exitloop1: | |
320 // fprintf( stderr, "hit1! group%d -> %d\n", k, topol[i][0][j] ); | |
321 | |
322 for( intptr=topol[i][1]; *intptr>-1; intptr++ ) | |
323 { | |
324 for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ ) | |
325 { | |
326 if( *intptr == *intptrx ) | |
327 { | |
328 hit2 = k; | |
329 goto exitloop2; | |
330 } | |
331 } | |
332 } | |
333 continue; | |
334 exitloop2: | |
335 | |
336 if( pickhistory[hit1] == -1 ) | |
337 { | |
338 topolpick[nhit][0][0] = hit1; | |
339 topolpick[nhit][0][1] = -1; | |
340 } | |
341 else | |
342 { | |
343 intcpy( topolpick[nhit][0], topolpick[pickhistory[hit1]][0] ); | |
344 intcat( topolpick[nhit][0], topolpick[pickhistory[hit1]][1] ); | |
345 } | |
346 if( pickhistory[hit2] == -1 ) | |
347 { | |
348 topolpick[nhit][1][0] = hit2; | |
349 topolpick[nhit][1][1] = -1; | |
350 } | |
351 else | |
352 { | |
353 intcpy( topolpick[nhit][1], topolpick[pickhistory[hit2]][0] ); | |
354 intcat( topolpick[nhit][1], topolpick[pickhistory[hit2]][1] ); | |
355 } | |
356 | |
357 pickhistory[hit1] = nhit; | |
358 pickhistory[hit2] = nhit; | |
359 nhit++; | |
360 // g2n[hit1] = -1; | |
361 // g2n[hit2] = -1; | |
362 | |
363 // fprintf( stderr, "hit2! group%d -> %d\n", k, topol[i][1][j] ); | |
364 | |
365 #if 0 | |
366 fprintf( stderr, "\nHIT!!! \n" ); | |
367 fprintf( stderr, "\nSTEP %d\n", i ); | |
368 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] ); | |
369 fprintf( stderr, "\n" ); | |
370 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] ); | |
371 fprintf( stderr, "\n" ); | |
372 #endif | |
373 } | |
374 | |
375 for( i=0; i<ngroup-1; i++ ) | |
376 { | |
377 #if 0 | |
378 fprintf( stderr, "\npickSTEP %d\n", i ); | |
379 for( j=0; topolpick[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] ); | |
380 fprintf( stderr, "\n" ); | |
381 for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] ); | |
382 fprintf( stderr, "\n" ); | |
383 #endif | |
384 | |
385 pos = 0; | |
386 // for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr; | |
387 for( intptr=topolpick[i][0]; *intptr>-1; intptr++ ) | |
388 for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) | |
389 prof1[pos++] = cptr; | |
390 nprof1 = pos; | |
391 pos = 0; | |
392 // for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr; | |
393 for( intptr=topolpick[i][1]; *intptr>-1; intptr++ ) | |
394 for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) | |
395 prof2[pos++] = cptr; | |
396 nprof2 = pos; | |
397 | |
398 | |
399 profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg ); | |
400 #if 0 | |
401 for( j=0; j<nprof1; j++ ) fprintf( stderr, "prof1[%d] = %s\n", j, prof1[j] ); | |
402 for( j=0; j<nprof2; j++ ) fprintf( stderr, "prof2[%d] = %s\n", j, prof2[j] ); | |
403 fprintf( stderr, "done.\n" ); | |
404 #endif | |
405 } | |
406 newlen = strlen( tmpaln0[0] ); | |
407 for( j=0; j<n1; j++ ) fillgap( tmpaln1[j], newlen ); | |
408 | |
409 #if 0 | |
410 fprintf( stderr, "After rerealignment (original order):\n" ); | |
411 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] ); | |
412 #endif | |
413 | |
414 // newlen = strlen( tmpaln0[0] ); | |
415 zure = ( block->end - block->start + 1 - newlen ); | |
416 // fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen ); | |
417 | |
418 | |
419 if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 ) | |
420 { | |
421 *fullseqlenpt = strlen( fullseq[0] ) * 2; | |
422 fprintf( stderr, "reallocating..." ); | |
423 for( i=0; i<njob; i++ ) | |
424 { | |
425 fullseq[i] = realloc( fullseq[i], *fullseqlenpt * sizeof( char ) ); | |
426 if( !fullseq[i] ) | |
427 { | |
428 fprintf( stderr, "Cannot reallocate seq[][]\n" ); | |
429 exit( 1 ); | |
430 } | |
431 } | |
432 fprintf( stderr, "done.\n" ); | |
433 } | |
434 | |
435 | |
436 tmpseq0 = tmpseq[0]; | |
437 posinold = block->end+1; | |
438 for( i=0; i<n0; i++ ) | |
439 { | |
440 strncpy( tmpseq0, tmpaln0[i], newlen ); | |
441 strcpy( tmpseq0+newlen, fullseq[iinf0[i]] + posinold ); | |
442 strcpy( fullseq[iinf0[i]]+block->start, tmpseq0 ); | |
443 } | |
444 for( i=0; i<n1; i++ ) | |
445 { | |
446 // eq2dash( tmpaln1[i] ); | |
447 strncpy( tmpseq0, tmpaln1[i], newlen ); | |
448 strcpy( tmpseq0+newlen, fullseq[iinf1[i]] + posinold ); | |
449 strcpy( fullseq[iinf1[i]]+block->start, tmpseq0 ); | |
450 } | |
451 FreeCharMtx( tmpaln0 ); | |
452 FreeCharMtx( tmpaln1 ); | |
453 FreeCharMtx( tmpseq ); | |
454 for( i=0; i<n0; i++ ) | |
455 { | |
456 // for( j=0; j<njob; j++ ) free( mem[i][j] ); | |
457 free( mem[i] ); | |
458 } | |
459 free( mem ); | |
460 free( nmem ); | |
461 free( iinf0 ); | |
462 free( iinf1 ); | |
463 free( group ); | |
464 free( g2n ); | |
465 free( prof1 ); | |
466 free( prof2 ); | |
467 free( nearest ); | |
468 FreeIntCub( topolpick ); | |
469 free( pickhistory ); | |
470 free( tmpint ); | |
471 | |
472 return( zure ); | |
473 } | |
474 | |
475 | |
476 #if 0 | |
477 static int dorealignment( Blocktorealign *block, char **fullseq, int alloclen, int fullseqlen, int norg ) | |
478 { | |
479 int i, posinnew, posinold, newlen; | |
480 int n0, n1; | |
481 int zure; | |
482 static int *iinf0, *iinf1; | |
483 static char **tmpaln0 = NULL; | |
484 static char **tmpaln1 = NULL; | |
485 static char **tmpseq; | |
486 char *opt, *npt; | |
487 | |
488 if( tmpaln0 == NULL ) | |
489 { | |
490 tmpaln0 = AllocateCharMtx( njob, alloclen ); | |
491 tmpaln1 = AllocateCharMtx( njob, alloclen ); | |
492 tmpseq = AllocateCharMtx( 1, fullseqlen ); | |
493 iinf0 = AllocateIntVec( njob ); | |
494 iinf1 = AllocateIntVec( njob ); | |
495 } | |
496 posinold = block->start; | |
497 | |
498 | |
499 n0 = 0; | |
500 n1 = 0; | |
501 for( i=0; i<njob; i++ ) | |
502 { | |
503 strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 ); | |
504 tmpseq[0][block->end - block->start + 1] = 0; | |
505 commongappick( 1, tmpseq ); | |
506 // if( strlen( tmpseq[0] ) > 0 ) | |
507 if( tmpseq[0][0] != 0 ) | |
508 { | |
509 if( i < norg ) | |
510 { | |
511 fprintf( stderr, "BUG!!!!\n" ); | |
512 exit( 1 ); | |
513 } | |
514 strcpy( tmpaln0[n0], tmpseq[0] ); | |
515 iinf0[n0] = i; | |
516 n0++; | |
517 } | |
518 else | |
519 { | |
520 strcpy( tmpaln1[n0], "" ); | |
521 iinf1[n1] = i; | |
522 n1++; | |
523 } | |
524 } | |
525 | |
526 | |
527 for( i=1; i<n0; i++ ) | |
528 { | |
529 profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, alloclen, alg ); // n1 ha allgap | |
530 } | |
531 | |
532 #if 1 | |
533 fprintf( stderr, "After realignment:\n" ); | |
534 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] ); | |
535 // for( i=0; i<n1; i++ ) fprintf( stderr, "%s\n", tmpaln1[i] ); | |
536 #endif | |
537 | |
538 newlen = strlen( tmpaln0[0] ); | |
539 for( i=0; i<n0; i++ ) strncpy( fullseq[iinf0[i]]+block->start, tmpaln0[i], newlen ); | |
540 for( i=0; i<n1; i++ ) | |
541 { | |
542 eq2dash( tmpaln1[i] ); | |
543 strncpy( fullseq[iinf1[i]] + block->start, tmpaln1[i], newlen ); | |
544 } | |
545 | |
546 posinold = block->end+1; | |
547 posinnew = block->start + newlen; | |
548 | |
549 | |
550 zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) ); | |
551 | |
552 for( i=0; i<njob; i++ ) | |
553 { | |
554 #if 0 | |
555 strcpy( fullseq[i]+posinnew, fullseq[i]+posinold ); // ?? | |
556 #else | |
557 opt = fullseq[i] + posinold; | |
558 npt = fullseq[i] + posinnew; | |
559 while( ( *npt++ = *opt++ ) ); | |
560 *npt = 0; | |
561 #endif | |
562 } | |
563 | |
564 return( zure ); | |
565 } | |
566 #endif | |
567 | |
568 static void adjustposmap( int len, int *posmap, int *gaplist ) | |
569 { | |
570 int *newposmap; | |
571 int *mpt1, *mpt2; | |
572 int lenbk, zure; | |
573 newposmap = calloc( len+2, sizeof( int ) ); | |
574 lenbk = len; | |
575 zure = 0; | |
576 mpt1 = newposmap; | |
577 mpt2 = posmap; | |
578 | |
579 #if 0 | |
580 int i; | |
581 fprintf( stderr, "posmapa = " ); | |
582 for( i=0; i<len+2; i++ ) | |
583 { | |
584 fprintf( stderr, "%3d ", posmap[i] ); | |
585 } | |
586 fprintf( stderr, "\n" ); | |
587 #endif | |
588 | |
589 while( len-- ) | |
590 { | |
591 zure += *gaplist++; | |
592 *mpt1++ = *mpt2++ + zure; | |
593 } | |
594 zure += *gaplist++; | |
595 *mpt1 = *mpt2 + zure; | |
596 | |
597 mpt1 = newposmap; | |
598 mpt2 = posmap; | |
599 len = lenbk; | |
600 while( len-- ) *mpt2++ = *mpt1++; | |
601 *mpt2 = *mpt1; | |
602 free( newposmap ); | |
603 #if 0 | |
604 fprintf( stderr, "posmapm = " ); | |
605 for( i=0; i<lenbk+2; i++ ) | |
606 { | |
607 fprintf( stderr, "%3d ", posmap[i] ); | |
608 } | |
609 fprintf( stderr, "\n" ); | |
610 #endif | |
611 } | |
612 | |
613 static int insertgapsbyotherfragments_compact( int len, char *a, char *s, int *l, int *p ) | |
614 { | |
615 int gaplen; | |
616 int i, pos, pi; | |
617 int prevp = -1; | |
618 int realignment = 0; | |
619 // fprintf( stderr, "### insertgapsbyotherfragments\n" ); | |
620 for( i=0; i<len; i++ ) | |
621 { | |
622 gaplen = l[i]; | |
623 pi = p[i]; | |
624 pos = prevp + 1; | |
625 // fprintf( stderr, "gaplen = %d\n", gaplen ); | |
626 while( gaplen-- ) | |
627 { | |
628 pos++; | |
629 *a++ = *s++; | |
630 } | |
631 // fprintf( stderr, "pos = %d, pi = %d\n", pos, pi ); | |
632 while( pos++ < pi ) | |
633 { | |
634 *a++ = '='; | |
635 realignment = 1; | |
636 } | |
637 *a++ = *s++; | |
638 prevp = pi; | |
639 } | |
640 gaplen = l[i]; | |
641 pi = p[i]; | |
642 pos = prevp + 1; | |
643 while( gaplen-- ) | |
644 { | |
645 pos++; | |
646 *a++ = *s++; | |
647 } | |
648 while( pos++ < pi ) | |
649 { | |
650 *a++ = '='; | |
651 realignment = 1; | |
652 } | |
653 *a = 0; | |
654 return( realignment ); | |
655 } | |
656 | |
657 void makegaplistcompact( int len, int *p, int *c, int *l ) | |
658 { | |
659 int i; | |
660 int pg; | |
661 int prep = -1; | |
662 for( i=0; i<len+2; i++ ) | |
663 { | |
664 if( ( pg = p[i]-prep-1) > 0 && l[i] > 0 ) | |
665 { | |
666 if( pg < l[i] ) | |
667 { | |
668 c[i] = l[i] - pg; | |
669 } | |
670 else | |
671 { | |
672 c[i] = 0; | |
673 } | |
674 } | |
675 else | |
676 { | |
677 c[i] = l[i]; | |
678 } | |
679 prep = p[i]; | |
680 } | |
681 } | |
682 | |
683 | |
684 void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit ) | |
685 { | |
686 int gaplen; | |
687 int pos, pi, posl; | |
688 int prevp = -1; | |
689 int reslen = 0; | |
690 char *sp; | |
691 // char *abk = a; | |
692 | |
693 #if 0 | |
694 int i; | |
695 char *abk = a; | |
696 fprintf( stderr, "s = %s\n", s ); | |
697 fprintf( stderr, "posmap = " ); | |
698 for( i=0; i<len+2; i++ ) | |
699 { | |
700 fprintf( stderr, "%3d ", p[i] ); | |
701 } | |
702 fprintf( stderr, "\n" ); | |
703 fprintf( stderr, "gaplist = " ); | |
704 for( i=0; i<len+2; i++ ) | |
705 { | |
706 fprintf( stderr, "%3d ", l[i] ); | |
707 } | |
708 fprintf( stderr, "\n" ); | |
709 #endif | |
710 while( len-- ) | |
711 { | |
712 gaplen = *l++; | |
713 pi = *p++; | |
714 | |
715 if( (reslen+=gaplen) > lenlimit ) | |
716 { | |
717 fprintf( stderr, "Length over. Please recompile!\n" ); | |
718 exit( 1 ); | |
719 } | |
720 while( gaplen-- ) *a++ = '-'; | |
721 | |
722 pos = prevp + 1; | |
723 sp = s + pos; | |
724 if( ( posl = pi - pos ) ) | |
725 { | |
726 if( ( reslen += posl ) > lenlimit ) | |
727 { | |
728 fprintf( stderr, "Length over. Please recompile\n" ); | |
729 exit( 1 ); | |
730 } | |
731 while( posl-- ) *a++ = *sp++; | |
732 } | |
733 | |
734 if( reslen++ > lenlimit ) | |
735 { | |
736 fprintf( stderr, "Length over. Please recompile\n" ); | |
737 exit( 1 ); | |
738 } | |
739 *a++ = *sp; | |
740 prevp = pi; | |
741 } | |
742 | |
743 gaplen = *l; | |
744 pi = *p; | |
745 if( (reslen+=gaplen) > lenlimit ) | |
746 { | |
747 fprintf( stderr, "Length over. Please recompile\n" ); | |
748 exit( 1 ); | |
749 } | |
750 while( gaplen-- ) *a++ = '-'; | |
751 | |
752 pos = prevp + 1; | |
753 sp = s + pos; | |
754 if( ( posl = pi - pos ) ) | |
755 { | |
756 if( ( reslen += posl ) > lenlimit ) | |
757 { | |
758 fprintf( stderr, "Length over. Please recompile\n" ); | |
759 exit( 1 ); | |
760 } | |
761 while( posl-- ) *a++ = *sp++; | |
762 } | |
763 *a = 0; | |
764 // fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) ); | |
765 // fprintf( stderr, "a = %s\n", abk ); | |
766 } | |
767 | |
768 static void makenewgaplist( int *l, char *a ) | |
769 { | |
770 while( 1 ) | |
771 { | |
772 while( *a == '=' ) | |
773 { | |
774 a++; | |
775 (*l)++; | |
776 // fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) ); | |
777 } | |
778 *++l = 0; | |
779 if( *a == 0 ) break; | |
780 a++; | |
781 } | |
782 *l = -1; | |
783 } | |
784 | |
785 | |
786 void arguments( int argc, char *argv[] ) | |
787 { | |
788 int c; | |
789 | |
790 nthread = 1; | |
791 outnumber = 0; | |
792 scoreout = 0; | |
793 treein = 0; | |
794 topin = 0; | |
795 rnaprediction = 'm'; | |
796 rnakozo = 0; | |
797 nevermemsave = 0; | |
798 inputfile = NULL; | |
799 addfile = NULL; | |
800 addprofile = 1; | |
801 fftkeika = 0; | |
802 constraint = 0; | |
803 nblosum = 62; | |
804 fmodel = 0; | |
805 calledByXced = 0; | |
806 devide = 0; | |
807 use_fft = 0; // chuui | |
808 force_fft = 0; | |
809 fftscore = 1; | |
810 fftRepeatStop = 0; | |
811 fftNoAnchStop = 0; | |
812 weight = 3; | |
813 utree = 1; | |
814 tbutree = 1; | |
815 refine = 0; | |
816 check = 1; | |
817 cut = 0.0; | |
818 disp = 0; | |
819 outgap = 1; | |
820 alg = 'A'; | |
821 mix = 0; | |
822 tbitr = 0; | |
823 scmtd = 5; | |
824 tbweight = 0; | |
825 tbrweight = 3; | |
826 checkC = 0; | |
827 treemethod = 'X'; | |
828 sueff_global = 0.1; | |
829 contin = 0; | |
830 scoremtx = 1; | |
831 kobetsubunkatsu = 0; | |
832 dorp = NOTSPECIFIED; | |
833 ppenalty = NOTSPECIFIED; | |
834 penalty_shift_factor = 1000.0; | |
835 ppenalty_ex = NOTSPECIFIED; | |
836 poffset = NOTSPECIFIED; | |
837 kimuraR = NOTSPECIFIED; | |
838 pamN = NOTSPECIFIED; | |
839 geta2 = GETA2; | |
840 fftWinSize = NOTSPECIFIED; | |
841 fftThreshold = NOTSPECIFIED; | |
842 RNAppenalty = NOTSPECIFIED; | |
843 RNAppenalty_ex = NOTSPECIFIED; | |
844 RNApthr = NOTSPECIFIED; | |
845 TMorJTT = JTT; | |
846 consweight_multi = 1.0; | |
847 consweight_rna = 0.0; | |
848 nadd = 0; | |
849 multidist = 0; | |
850 tuplesize = -1; | |
851 legacygapcost = 0; | |
852 allowlongadds = 0; | |
853 | |
854 while( --argc > 0 && (*++argv)[0] == '-' ) | |
855 { | |
856 while ( ( c = *++argv[0] ) ) | |
857 { | |
858 switch( c ) | |
859 { | |
860 case 'i': | |
861 inputfile = *++argv; | |
862 fprintf( stderr, "inputfile = %s\n", inputfile ); | |
863 --argc; | |
864 goto nextoption; | |
865 case 'I': | |
866 nadd = myatoi( *++argv ); | |
867 fprintf( stderr, "nadd = %d\n", nadd ); | |
868 --argc; | |
869 goto nextoption; | |
870 case 'e': | |
871 RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
872 --argc; | |
873 goto nextoption; | |
874 case 'o': | |
875 RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
876 --argc; | |
877 goto nextoption; | |
878 case 'f': | |
879 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
880 // fprintf( stderr, "ppenalty = %d\n", ppenalty ); | |
881 --argc; | |
882 goto nextoption; | |
883 case 'Q': | |
884 penalty_shift_factor = atof( *++argv ); | |
885 --argc; | |
886 goto nextoption; | |
887 case 'g': | |
888 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
889 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); | |
890 --argc; | |
891 goto nextoption; | |
892 case 'h': | |
893 poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
894 // fprintf( stderr, "poffset = %d\n", poffset ); | |
895 --argc; | |
896 goto nextoption; | |
897 case 'k': | |
898 kimuraR = myatoi( *++argv ); | |
899 fprintf( stderr, "kappa = %d\n", kimuraR ); | |
900 --argc; | |
901 goto nextoption; | |
902 case 'b': | |
903 nblosum = myatoi( *++argv ); | |
904 scoremtx = 1; | |
905 fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); | |
906 --argc; | |
907 goto nextoption; | |
908 case 'j': | |
909 pamN = myatoi( *++argv ); | |
910 scoremtx = 0; | |
911 TMorJTT = JTT; | |
912 fprintf( stderr, "jtt/kimura %d\n", pamN ); | |
913 --argc; | |
914 goto nextoption; | |
915 case 'm': | |
916 pamN = myatoi( *++argv ); | |
917 scoremtx = 0; | |
918 TMorJTT = TM; | |
919 fprintf( stderr, "tm %d\n", pamN ); | |
920 --argc; | |
921 goto nextoption; | |
922 case 'l': | |
923 fastathreshold = atof( *++argv ); | |
924 constraint = 2; | |
925 --argc; | |
926 goto nextoption; | |
927 case 'r': | |
928 consweight_rna = atof( *++argv ); | |
929 rnakozo = 1; | |
930 --argc; | |
931 goto nextoption; | |
932 case 'c': | |
933 consweight_multi = atof( *++argv ); | |
934 --argc; | |
935 goto nextoption; | |
936 case 'C': | |
937 nthread = myatoi( *++argv ); | |
938 fprintf( stderr, "nthread = %d\n", nthread ); | |
939 --argc; | |
940 goto nextoption; | |
941 case 'R': | |
942 rnaprediction = 'r'; | |
943 break; | |
944 case 's': | |
945 RNAscoremtx = 'r'; | |
946 break; | |
947 #if 1 | |
948 case 'a': | |
949 fmodel = 1; | |
950 break; | |
951 #endif | |
952 case 'K': | |
953 addprofile = 0; | |
954 break; | |
955 case 'y': | |
956 distout = 1; | |
957 break; | |
958 case 't': | |
959 treeout = 1; | |
960 break; | |
961 case 'T': | |
962 noalign = 1; | |
963 break; | |
964 case 'D': | |
965 dorp = 'd'; | |
966 break; | |
967 case 'P': | |
968 dorp = 'p'; | |
969 break; | |
970 #if 1 | |
971 case 'O': | |
972 outgap = 0; | |
973 break; | |
974 #else | |
975 case 'O': | |
976 fftNoAnchStop = 1; | |
977 break; | |
978 #endif | |
979 case 'S': | |
980 scoreout = 1; | |
981 break; | |
982 #if 0 | |
983 case 'e': | |
984 fftscore = 0; | |
985 break; | |
986 case 'r': | |
987 fmodel = -1; | |
988 break; | |
989 case 'R': | |
990 fftRepeatStop = 1; | |
991 break; | |
992 case 's': | |
993 treemethod = 's'; | |
994 break; | |
995 #endif | |
996 case 'X': | |
997 treemethod = 'X'; | |
998 sueff_global = atof( *++argv ); | |
999 fprintf( stderr, "sueff_global = %f\n", sueff_global ); | |
1000 --argc; | |
1001 goto nextoption; | |
1002 case 'E': | |
1003 treemethod = 'E'; | |
1004 break; | |
1005 case 'q': | |
1006 treemethod = 'q'; | |
1007 break; | |
1008 case 'n' : | |
1009 outnumber = 1; | |
1010 break; | |
1011 #if 0 | |
1012 case 'a': | |
1013 alg = 'a'; | |
1014 break; | |
1015 case 'Q': | |
1016 alg = 'Q'; | |
1017 break; | |
1018 #endif | |
1019 case 'H': | |
1020 alg = 'H'; | |
1021 break; | |
1022 case 'A': | |
1023 alg = 'A'; | |
1024 break; | |
1025 case 'M': | |
1026 alg = 'M'; | |
1027 break; | |
1028 case 'N': | |
1029 nevermemsave = 1; | |
1030 break; | |
1031 case 'B': | |
1032 break; | |
1033 case 'F': | |
1034 use_fft = 1; | |
1035 break; | |
1036 case 'G': | |
1037 force_fft = 1; | |
1038 use_fft = 1; | |
1039 break; | |
1040 case 'U': | |
1041 treein = 1; | |
1042 break; | |
1043 case 'V': | |
1044 allowlongadds = 1; | |
1045 break; | |
1046 #if 0 | |
1047 case 'V': | |
1048 topin = 1; | |
1049 break; | |
1050 #endif | |
1051 case 'u': | |
1052 tbrweight = 0; | |
1053 weight = 0; | |
1054 break; | |
1055 case 'v': | |
1056 tbrweight = 3; | |
1057 break; | |
1058 case 'd': | |
1059 multidist = 1; | |
1060 break; | |
1061 case 'W': | |
1062 tuplesize = myatoi( *++argv ); | |
1063 --argc; | |
1064 goto nextoption; | |
1065 #if 0 | |
1066 case 'd': | |
1067 disp = 1; | |
1068 break; | |
1069 #endif | |
1070 /* Modified 01/08/27, default: user tree */ | |
1071 case 'J': | |
1072 tbutree = 0; | |
1073 break; | |
1074 /* modification end. */ | |
1075 case 'z': | |
1076 fftThreshold = myatoi( *++argv ); | |
1077 --argc; | |
1078 goto nextoption; | |
1079 case 'w': | |
1080 fftWinSize = myatoi( *++argv ); | |
1081 --argc; | |
1082 goto nextoption; | |
1083 case 'Z': | |
1084 checkC = 1; | |
1085 break; | |
1086 case 'L': | |
1087 legacygapcost = 1; | |
1088 break; | |
1089 default: | |
1090 fprintf( stderr, "illegal option %c\n", c ); | |
1091 argc = 0; | |
1092 break; | |
1093 } | |
1094 } | |
1095 nextoption: | |
1096 ; | |
1097 } | |
1098 if( argc == 1 ) | |
1099 { | |
1100 cut = atof( (*argv) ); | |
1101 argc--; | |
1102 } | |
1103 if( argc != 0 ) | |
1104 { | |
1105 fprintf( stderr, "options: Check source file !\n" ); | |
1106 exit( 1 ); | |
1107 } | |
1108 if( tbitr == 1 && outgap == 0 ) | |
1109 { | |
1110 fprintf( stderr, "conflicting options : o, m or u\n" ); | |
1111 exit( 1 ); | |
1112 } | |
1113 if( alg == 'C' && outgap == 0 ) | |
1114 { | |
1115 fprintf( stderr, "conflicting options : C, o\n" ); | |
1116 exit( 1 ); | |
1117 } | |
1118 } | |
1119 | |
1120 | |
1121 static float treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo ) | |
1122 { | |
1123 int i, l, m; | |
1124 int len1nocommongap, len2nocommongap; | |
1125 int len1, len2; | |
1126 int clus1, clus2; | |
1127 float pscore, tscore; | |
1128 char *indication1, *indication2; | |
1129 double *effarr1 = NULL; | |
1130 double *effarr2 = NULL; | |
1131 double *effarr1_kozo = NULL; | |
1132 double *effarr2_kozo = NULL; | |
1133 LocalHom ***localhomshrink = NULL; | |
1134 int *fftlog; | |
1135 int m1, m2; | |
1136 int *gaplen; | |
1137 int *gapmap; | |
1138 int *alreadyaligned; | |
1139 float dumfl = 0.0; | |
1140 int ffttry; | |
1141 RNApair ***grouprna1, ***grouprna2; | |
1142 | |
1143 if( rnakozo && rnaprediction == 'm' ) | |
1144 { | |
1145 grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); | |
1146 grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); | |
1147 } | |
1148 else | |
1149 { | |
1150 grouprna1 = grouprna2 = NULL; | |
1151 } | |
1152 | |
1153 fftlog = AllocateIntVec( nseq ); | |
1154 effarr1 = AllocateDoubleVec( nseq ); | |
1155 effarr2 = AllocateDoubleVec( nseq ); | |
1156 indication1 = AllocateCharVec( 150 ); | |
1157 indication2 = AllocateCharVec( 150 ); | |
1158 alreadyaligned = AllocateIntVec( nseq ); | |
1159 if( constraint ) | |
1160 { | |
1161 localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) ); | |
1162 #if SMALLMEMORY | |
1163 if( multidist ) | |
1164 { | |
1165 for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( 1, sizeof( LocalHom *) ); | |
1166 } | |
1167 else | |
1168 #endif | |
1169 { | |
1170 for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( nseq, sizeof( LocalHom *) ); | |
1171 } | |
1172 } | |
1173 effarr1_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru. | |
1174 effarr2_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru. | |
1175 for( i=0; i<nseq; i++ ) effarr1_kozo[i] = 0.0; | |
1176 for( i=0; i<nseq; i++ ) effarr2_kozo[i] = 0.0; | |
1177 | |
1178 gaplen = AllocateIntVec( *alloclen+10 ); // maikai shokika | |
1179 gapmap = AllocateIntVec( *alloclen+10 ); // maikai shokika | |
1180 for( i=0; i<nseq-1; i++ ) alreadyaligned[i] = 1; | |
1181 alreadyaligned[nseq-1] = 0; | |
1182 | |
1183 for( l=0; l<nseq; l++ ) fftlog[l] = 1; | |
1184 | |
1185 | |
1186 if( constraint ) | |
1187 { | |
1188 #if SMALLMEMORY | |
1189 if( multidist ) | |
1190 dontcalcimportance_firstone( nseq, effarr, aseq, localhomtable ); | |
1191 else | |
1192 calcimportance( nseq, effarr, aseq, localhomtable ); | |
1193 #else | |
1194 calcimportance( nseq, effarr, aseq, localhomtable ); | |
1195 #endif | |
1196 } | |
1197 | |
1198 tscore = 0.0; | |
1199 for( l=0; l<nseq-1; l++ ) | |
1200 { | |
1201 if( mergeoralign[l] == 'n' ) | |
1202 { | |
1203 // fprintf( stderr, "SKIP!\n" ); | |
1204 #if 0 | |
1205 free( topol[l][0] ); | |
1206 free( topol[l][1] ); | |
1207 free( topol[l] ); | |
1208 #endif | |
1209 continue; | |
1210 } | |
1211 | |
1212 m1 = topol[l][0][0]; | |
1213 m2 = topol[l][1][0]; | |
1214 len1 = strlen( aseq[m1] ); | |
1215 len2 = strlen( aseq[m2] ); | |
1216 if( *alloclen < len1 + len2 ) | |
1217 { | |
1218 #if 0 | |
1219 fprintf( stderr, "\nReallocating.." ); | |
1220 *alloclen = ( len1 + len2 ) + 1000; | |
1221 ReallocateCharMtx( aseq, nseq, *alloclen + 10 ); | |
1222 gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) ); | |
1223 if( gaplen == NULL ) | |
1224 { | |
1225 fprintf( stderr, "Cannot realloc gaplen\n" ); | |
1226 exit( 1 ); | |
1227 } | |
1228 gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) ); | |
1229 if( gapmap == NULL ) | |
1230 { | |
1231 fprintf( stderr, "Cannot realloc gapmap\n" ); | |
1232 exit( 1 ); | |
1233 } | |
1234 fprintf( stderr, "done. *alloclen = %d\n", *alloclen ); | |
1235 #else | |
1236 fprintf( stderr, "Length over!\n" ); | |
1237 exit( 1 ); | |
1238 #endif | |
1239 } | |
1240 | |
1241 if( effarr_kozo ) | |
1242 { | |
1243 clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 ); | |
1244 clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 ); | |
1245 } | |
1246 else | |
1247 { | |
1248 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 ); | |
1249 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 ); | |
1250 } | |
1251 | |
1252 if( mergeoralign[l] == '1' || mergeoralign[l] == '2' ) | |
1253 { | |
1254 newgapstr = "="; | |
1255 } | |
1256 else | |
1257 newgapstr = "-"; | |
1258 | |
1259 | |
1260 len1nocommongap = len1; | |
1261 len2nocommongap = len2; | |
1262 if( mergeoralign[l] == '1' ) // nai | |
1263 { | |
1264 findcommongaps( clus2, mseq2, gapmap ); | |
1265 commongappick( clus2, mseq2 ); | |
1266 len2nocommongap = strlen( mseq2[0] ); | |
1267 } | |
1268 else if( mergeoralign[l] == '2' ) | |
1269 { | |
1270 findcommongaps( clus1, mseq1, gapmap ); | |
1271 commongappick( clus1, mseq1 ); | |
1272 len1nocommongap = strlen( mseq1[0] ); | |
1273 } | |
1274 | |
1275 | |
1276 // fprintf( trap_g, "\nSTEP-%d\n", l ); | |
1277 // fprintf( trap_g, "group1 = %s\n", indication1 ); | |
1278 // fprintf( trap_g, "group2 = %s\n", indication2 ); | |
1279 // | |
1280 #if 1 | |
1281 // fprintf( stderr, "\rSTEP % 5d /%d ", l+1, nseq-1 ); | |
1282 // fflush( stderr ); | |
1283 #else | |
1284 fprintf( stdout, "STEP %d /%d\n", l+1, nseq-1 ); | |
1285 fprintf( stderr, "STEP %d /%d\n", l+1, nseq-1 ); | |
1286 fprintf( stderr, "group1 = %.66s", indication1 ); | |
1287 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." ); | |
1288 fprintf( stderr, "\n" ); | |
1289 fprintf( stderr, "group2 = %.66s", indication2 ); | |
1290 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); | |
1291 fprintf( stderr, "\n" ); | |
1292 #endif | |
1293 | |
1294 | |
1295 | |
1296 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] ); | |
1297 | |
1298 if( constraint ) | |
1299 { | |
1300 #if SMALLMEMORY | |
1301 if( multidist ) | |
1302 { | |
1303 fastshrinklocalhom_one( topol[l][0], topol[l][1], nseq-1, localhomtable, localhomshrink ); | |
1304 } | |
1305 else | |
1306 #endif | |
1307 { | |
1308 fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink ); | |
1309 } | |
1310 | |
1311 // msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink ); | |
1312 // fprintf( stdout, "localhomshrink =\n" ); | |
1313 // outlocalhompt( localhomshrink, clus1, clus2 ); | |
1314 // weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink ); | |
1315 // fprintf( stderr, "after weight =\n" ); | |
1316 // outlocalhompt( localhomshrink, clus1, clus2 ); | |
1317 } | |
1318 if( rnakozo && rnaprediction == 'm' ) | |
1319 { | |
1320 makegrouprna( grouprna1, singlerna, topol[l][0] ); | |
1321 makegrouprna( grouprna2, singlerna, topol[l][1] ); | |
1322 } | |
1323 | |
1324 | |
1325 /* | |
1326 fprintf( stderr, "before align all\n" ); | |
1327 display( aseq, nseq ); | |
1328 fprintf( stderr, "\n" ); | |
1329 fprintf( stderr, "before align 1 %s \n", indication1 ); | |
1330 display( mseq1, clus1 ); | |
1331 fprintf( stderr, "\n" ); | |
1332 fprintf( stderr, "before align 2 %s \n", indication2 ); | |
1333 display( mseq2, clus2 ); | |
1334 fprintf( stderr, "\n" ); | |
1335 */ | |
1336 | |
1337 | |
1338 if( !nevermemsave && ( constraint != 2 && alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) | |
1339 { | |
1340 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); | |
1341 alg = 'M'; | |
1342 if( commonIP ) FreeIntMtx( commonIP ); | |
1343 commonIP = NULL; // 2013/Jul17 | |
1344 commonAlloc1 = 0; | |
1345 commonAlloc2 = 0; | |
1346 } | |
1347 | |
1348 | |
1349 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); | |
1350 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); | |
1351 else ffttry = 0; | |
1352 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 | |
1353 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 ); | |
1354 // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); | |
1355 if( constraint == 2 ) | |
1356 { | |
1357 if( alg == 'M' ) | |
1358 { | |
1359 fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); | |
1360 exit( 1 ); | |
1361 } | |
1362 fprintf( stderr, "c" ); | |
1363 if( alg == 'A' ) | |
1364 { | |
1365 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 ); | |
1366 if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); | |
1367 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); | |
1368 } | |
1369 else if( alg == 'Q' ) | |
1370 { | |
1371 fprintf( stderr, "Q has been disabled.\n" ); | |
1372 exit( 1 ); | |
1373 } | |
1374 } | |
1375 else if( force_fft || ( use_fft && ffttry ) ) | |
1376 { | |
1377 fprintf( stderr, "f" ); | |
1378 if( alg == 'M' ) | |
1379 { | |
1380 fprintf( stderr, "m" ); | |
1381 pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); | |
1382 } | |
1383 else | |
1384 pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); | |
1385 } | |
1386 else | |
1387 { | |
1388 fprintf( stderr, "d" ); | |
1389 fftlog[m1] = 0; | |
1390 switch( alg ) | |
1391 { | |
1392 case( 'a' ): | |
1393 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); | |
1394 break; | |
1395 case( 'M' ): | |
1396 fprintf( stderr, "m" ); | |
1397 pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); | |
1398 break; | |
1399 case( 'A' ): | |
1400 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); | |
1401 break; | |
1402 default: | |
1403 ErrorExit( "ERROR IN SOURCE FILE" ); | |
1404 } | |
1405 } | |
1406 | |
1407 | |
1408 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); | |
1409 | |
1410 // fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] ); | |
1411 | |
1412 #if SCOREOUT | |
1413 fprintf( stderr, "score = %10.2f\n", pscore ); | |
1414 #endif | |
1415 tscore += pscore; | |
1416 #if 0 // New gaps = '=' | |
1417 fprintf( stderr, "Original msa\n" ); | |
1418 for( i=0; i<clus1; i++ ) | |
1419 fprintf( stderr, "%s\n", mseq1[i] ); | |
1420 fprintf( stderr, "Query\n" ); | |
1421 for( i=0; i<clus2; i++ ) | |
1422 fprintf( stderr, "%s\n", mseq2[i] ); | |
1423 #endif | |
1424 | |
1425 // writePre( nseq, name, nlen, aseq, 0 ); | |
1426 | |
1427 if( disp ) display( aseq, nseq ); | |
1428 | |
1429 if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. | |
1430 { | |
1431 adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] ); | |
1432 restorecommongaps( nseq, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); | |
1433 findnewgaps( clus2, 0, mseq2, gaplen ); | |
1434 insertnewgaps( nseq, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' ); | |
1435 // for( i=0; i<nseq; i++ ) eq2dash( aseq[i] ); | |
1436 for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1; | |
1437 } | |
1438 if( mergeoralign[l] == '2' ) | |
1439 { | |
1440 adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); | |
1441 restorecommongaps( nseq, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); | |
1442 findnewgaps( clus1, 0, mseq1, gaplen ); | |
1443 insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); | |
1444 // for( i=0; i<nseq; i++ ) eq2dash( aseq[i] ); | |
1445 for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1; | |
1446 } | |
1447 | |
1448 #if 0 | |
1449 free( topol[l][0] ); | |
1450 free( topol[l][1] ); | |
1451 free( topol[l] ); | |
1452 #endif | |
1453 } | |
1454 | |
1455 #if SCOREOUT | |
1456 fprintf( stderr, "totalscore = %10.2f\n\n", tscore ); | |
1457 #endif | |
1458 free( gaplen ); | |
1459 free( gapmap ); | |
1460 if( rnakozo && rnaprediction == 'm' ) | |
1461 { | |
1462 free( grouprna1 ); | |
1463 free( grouprna2 ); | |
1464 } | |
1465 free( fftlog ); // iranai | |
1466 free( effarr1 ); | |
1467 free( effarr2 ); | |
1468 free( indication1 ); | |
1469 free( indication2 ); | |
1470 free( alreadyaligned ); | |
1471 if( constraint ) | |
1472 { | |
1473 for( i=0; i<nseq; i++ ) free( localhomshrink[i] ); // ?? | |
1474 free( localhomshrink ); | |
1475 } | |
1476 free( effarr1_kozo ); | |
1477 free( effarr2_kozo ); | |
1478 | |
1479 return( pscore ); | |
1480 } | |
1481 | |
1482 | |
1483 | |
1484 | |
1485 static void mtxcpy( int norg, int njobc, float ***iscorec, float **iscore ) | |
1486 { | |
1487 int i, nlim, n; | |
1488 float *fpt, *fptc; | |
1489 | |
1490 *iscorec = AllocateFloatHalfMtx( njobc ); | |
1491 nlim = norg-1; | |
1492 for( i=0; i<nlim; i++ ) | |
1493 { | |
1494 fptc = (*iscorec)[i]+1; | |
1495 fpt = iscore[i]+1; | |
1496 n = norg-i-1; | |
1497 while( n-- ) | |
1498 *fptc++ = *fpt++; | |
1499 // for( j=i+1; j<norg; j++ ) | |
1500 // (*iscorec)[i][j-i] = iscore[i][j-i]; | |
1501 } | |
1502 } | |
1503 | |
1504 | |
1505 static void *addsinglethread( void *arg ) | |
1506 { | |
1507 thread_arg_t *targ = (thread_arg_t *)arg; | |
1508 int *nlenc = NULL; | |
1509 char **namec = NULL; | |
1510 Treedep *depc = NULL; | |
1511 char **mseq1 = NULL, **mseq2 = NULL; | |
1512 float **iscorec; | |
1513 // float **iscorecbk; // to speedup | |
1514 double *effc = NULL; | |
1515 int ***topolc = NULL; | |
1516 float **lenc = NULL; | |
1517 LocalHom **localhomtablec = NULL; | |
1518 int *memlist0 = NULL; | |
1519 int *memlist1 = NULL; | |
1520 int *addmem = NULL; | |
1521 int njobc, norg; | |
1522 char **bseq = NULL; | |
1523 int i, j, k, m, iadd, rep, neighbor; | |
1524 char *mergeoralign = NULL; | |
1525 int *nogaplenjusttodecideaddhereornot = NULL; | |
1526 char *tmpseq = NULL; | |
1527 | |
1528 #ifdef enablemultithread | |
1529 int thread_no = targ->thread_no; | |
1530 int *iaddshare = targ->iaddshare; | |
1531 #endif | |
1532 int njob = targ->njob; | |
1533 int *follows = targ->follows; | |
1534 int nadd = targ->nadd; | |
1535 int *nlen = targ->nlen; | |
1536 char **name = targ->name; | |
1537 char **seq = targ->seq; | |
1538 LocalHom **localhomtable = targ->localhomtable; | |
1539 float **iscore = targ->iscore; | |
1540 float **nscore = targ->nscore; | |
1541 int *istherenewgap = targ->istherenewgap; | |
1542 int **newgaplist = targ->newgaplist; | |
1543 RNApair ***singlerna = targ->singlerna; | |
1544 double *eff_kozo_mapped = targ->eff_kozo_mapped; | |
1545 int alloclen = targ->alloclen; | |
1546 Treedep *dep = targ->dep; | |
1547 int ***topol = targ->topol; | |
1548 float **len = targ->len; | |
1549 Addtree *addtree = targ->addtree; | |
1550 float pscore; | |
1551 int *alnleninnode = NULL; | |
1552 char *targetseq; | |
1553 | |
1554 | |
1555 // fprintf( stderr, "\nPreparing thread %d\n", thread_no ); | |
1556 norg = njob - nadd; | |
1557 njobc = norg+1; | |
1558 | |
1559 alnleninnode = AllocateIntVec( norg ); | |
1560 addmem = AllocateIntVec( nadd+1 ); | |
1561 depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); | |
1562 mseq1 = AllocateCharMtx( njob, 0 ); | |
1563 mseq2 = AllocateCharMtx( njob, 0 ); | |
1564 bseq = AllocateCharMtx( njobc, alloclen ); | |
1565 namec = AllocateCharMtx( njob, 0 ); | |
1566 nlenc = AllocateIntVec( njob ); | |
1567 mergeoralign = AllocateCharVec( njob ); | |
1568 nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc ); | |
1569 tmpseq = calloc( alloclen, sizeof( char ) ); | |
1570 | |
1571 if( allowlongadds ) // hontou ha iranai. | |
1572 { | |
1573 for( i=0; i<njobc; i++ ) nogaplenjusttodecideaddhereornot[i] = 0; | |
1574 } | |
1575 else | |
1576 { | |
1577 for( i=0; i<norg; i++ ) | |
1578 { | |
1579 gappick0( tmpseq, seq[i] ); | |
1580 nogaplenjusttodecideaddhereornot[i] = strlen( tmpseq ); | |
1581 } | |
1582 } | |
1583 | |
1584 for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] ); | |
1585 if( norg == 1 ) | |
1586 { | |
1587 alnleninnode[0] = strlen( bseq[0] ); | |
1588 } | |
1589 else | |
1590 { | |
1591 for( i=norg-2; i>=0; i-- ) | |
1592 // for( i=norg-2; i; i-- ) // BUG!!!! | |
1593 { | |
1594 // reporterr( "\nstep %d\n", i ); | |
1595 k = 0; | |
1596 for( j=0; (m=topol[i][0][j])!=-1; j++ ) | |
1597 { | |
1598 mseq1[k++] = bseq[m]; | |
1599 // reporterr( "%d ", m ); | |
1600 } | |
1601 for( j=0; (m=topol[i][1][j])!=-1; j++ ) | |
1602 { | |
1603 mseq1[k++] = bseq[m]; | |
1604 // reporterr( "%d ", m ); | |
1605 } | |
1606 // reporterr( "\n" ); | |
1607 commongappick( k, mseq1 ); | |
1608 alnleninnode[i] = strlen( mseq1[0] ); | |
1609 // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] ); | |
1610 } | |
1611 } | |
1612 // for( i=0; i<norg-1; i++ ) | |
1613 // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] ); | |
1614 | |
1615 | |
1616 if( constraint ) | |
1617 { | |
1618 localhomtablec = (LocalHom **)calloc( njobc, sizeof( LocalHom *) ); // motto chiisaku dekiru. | |
1619 #if SMALLMEMORY | |
1620 if( multidist ) | |
1621 { | |
1622 for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // motto chiisaku dekiru. | |
1623 } | |
1624 else | |
1625 #endif | |
1626 { | |
1627 for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( njobc, sizeof( LocalHom ) ); // motto chiisaku dekiru. | |
1628 for( i=0; i<norg; i++ ) for( j=0; j<norg; j++ ) localhomtablec[i][j] = localhomtable[i][j]; // iru! | |
1629 } | |
1630 } | |
1631 | |
1632 | |
1633 topolc = AllocateIntCub( njobc, 2, 0 ); | |
1634 lenc = AllocateFloatMtx( njobc, 2 ); | |
1635 effc = AllocateDoubleVec( njobc ); | |
1636 // for( i=0; i<norg; i++ ) nlenc[i] = strlen( seq[i] ); | |
1637 for( i=0; i<norg; i++ ) nlenc[i] = nlen[i]; | |
1638 for( i=0; i<norg; i++ ) namec[i] = name[i]; | |
1639 memlist0 = AllocateIntVec( norg+1 ); | |
1640 memlist1 = AllocateIntVec( 2 ); | |
1641 for( i=0; i<norg; i++ ) memlist0[i] = i; | |
1642 memlist0[norg] = -1; | |
1643 | |
1644 // fprintf( stderr, "\ndone. %d\n", thread_no ); | |
1645 | |
1646 // mtxcpy( norg, norg, &iscorecbk, iscore ); // to speedup? | |
1647 | |
1648 | |
1649 iadd = -1; | |
1650 while( 1 ) | |
1651 { | |
1652 #ifdef enablemultithread | |
1653 if( nthread ) | |
1654 { | |
1655 pthread_mutex_lock( targ->mutex_counter ); | |
1656 iadd = *iaddshare; | |
1657 if( iadd == nadd ) | |
1658 { | |
1659 pthread_mutex_unlock( targ->mutex_counter ); | |
1660 break; | |
1661 } | |
1662 fprintf( stderr, "\r%d / %d (thread %d) \r", iadd, nadd, thread_no ); | |
1663 ++(*iaddshare); | |
1664 targetseq = seq[norg+iadd]; | |
1665 pthread_mutex_unlock( targ->mutex_counter ); | |
1666 } | |
1667 else | |
1668 #endif | |
1669 { | |
1670 iadd++; | |
1671 targetseq = seq[norg+iadd]; | |
1672 if( iadd == nadd ) break; | |
1673 fprintf( stderr, "\r%d / %d \r", iadd, nadd ); | |
1674 } | |
1675 | |
1676 for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] ); | |
1677 // gappick0( bseq[norg], seq[norg+iadd] ); | |
1678 gappick0( bseq[norg], targetseq ); | |
1679 | |
1680 if( allowlongadds ) // missed in v7.220 | |
1681 nogaplenjusttodecideaddhereornot[norg] = 0; | |
1682 else | |
1683 nogaplenjusttodecideaddhereornot[norg] = strlen( bseq[norg] ); | |
1684 | |
1685 mtxcpy( norg, njobc, &iscorec, iscore ); | |
1686 | |
1687 if( multidist || tuplesize > 0 ) | |
1688 { | |
1689 for( i=0; i<norg; i++ ) iscorec[i][norg-i] = nscore[i][iadd]; | |
1690 } | |
1691 else | |
1692 { | |
1693 for( i=0; i<norg; i++ ) iscorec[i][norg-i] = iscore[i][norg+iadd-i]; | |
1694 } | |
1695 | |
1696 | |
1697 #if 0 | |
1698 for( i=0; i<njobc-1; i++ ) | |
1699 { | |
1700 fprintf( stderr, "i=%d\n", i ); | |
1701 for( j=i+1; j<njobc; j++ ) | |
1702 { | |
1703 fprintf( stderr, "%d-%d, %f\n", i, j, iscorec[i][j-i] ); | |
1704 } | |
1705 } | |
1706 #endif | |
1707 nlenc[norg] = nlen[norg+iadd]; | |
1708 namec[norg] = name[norg+iadd]; | |
1709 if( constraint) | |
1710 { | |
1711 for( i=0; i<norg; i++ ) | |
1712 { | |
1713 #if SMALLMEMORY | |
1714 if( multidist ) | |
1715 { | |
1716 localhomtablec[i][0] = localhomtable[i][iadd]; | |
1717 // localhomtablec[norg][i] = localhomtable[norg+iadd][i]; | |
1718 } | |
1719 else | |
1720 #endif | |
1721 { | |
1722 localhomtablec[i][norg] = localhomtable[i][norg+iadd]; | |
1723 localhomtablec[norg][i] = localhomtable[norg+iadd][i]; | |
1724 } | |
1725 } | |
1726 // localhomtablec[norg][norg] = localhomtable[norg+iadd][norg+iadd]; // iranai!! | |
1727 } | |
1728 | |
1729 // fprintf( stderr, "Constructing a UPGMA tree %d ... ", iadd ); | |
1730 // fflush( stderr ); | |
1731 | |
1732 | |
1733 // if( iadd == 0 ) | |
1734 // { | |
1735 // } | |
1736 // fixed_musclesupg_float_realloc_nobk_halfmtx( njobc, iscorec, topolc, lenc, depc, 0, 1 ); | |
1737 neighbor = addonetip( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign ); | |
1738 | |
1739 | |
1740 FreeFloatHalfMtx( iscorec, njobc ); | |
1741 | |
1742 | |
1743 if( tbrweight ) | |
1744 { | |
1745 weight = 3; | |
1746 counteff_simple_float_nostatic( njobc, topolc, lenc, effc ); | |
1747 } | |
1748 else | |
1749 { | |
1750 for( i=0; i<njobc; i++ ) effc[i] = 1.0; | |
1751 } | |
1752 | |
1753 // FreeFloatMtx( lenc ); | |
1754 | |
1755 if( noalign ) // nen no tame weight wo keisan. | |
1756 { | |
1757 // FreeFloatHalfMtx( iscorec, njobc ); // saki ni continue suru baai ha fukkatsu. | |
1758 continue; | |
1759 } | |
1760 | |
1761 // reporterr( "iadd = %d\n", iadd ); | |
1762 | |
1763 #if 0 | |
1764 for( i=0; i<njobc-1; i++ ) | |
1765 { | |
1766 fprintf( stderr, "\n step %d\n", i ); | |
1767 fprintf( stderr, "topol[%d] = \n", i ); | |
1768 for( j=0; topolc[i][0][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][0][j] ); | |
1769 fprintf( stderr, "\n" ); | |
1770 fprintf( stderr, "len=%f\n", lenc [i][0] ); | |
1771 for( j=0; topolc[i][1][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][1][j] ); | |
1772 fprintf( stderr, "\n" ); | |
1773 fprintf( stderr, "len=%f\n", lenc [i][1] ); | |
1774 } | |
1775 | |
1776 fprintf( stderr, "\nneighbor = %d, iadd = %d\n", neighbor, iadd ); | |
1777 #endif | |
1778 follows[iadd] = neighbor; | |
1779 | |
1780 for( i=0; i<njobc-1; i++ ) mergeoralign[i] = 'n'; | |
1781 for( j=njobc-1; j<njobc; j++ ) | |
1782 { | |
1783 addmem[0] = j; | |
1784 addmem[1] = -1; | |
1785 for( i=0; i<njobc-1; i++ ) | |
1786 { | |
1787 if( samemembern( topolc[i][0], addmem, 1 ) ) // arieru | |
1788 { | |
1789 // fprintf( stderr, "HIT!\n" ); | |
1790 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w'; | |
1791 else mergeoralign[i] = '1'; | |
1792 } | |
1793 else if( samemembern( topolc[i][1], addmem, 1 ) ) | |
1794 { | |
1795 // fprintf( stderr, "HIT!\n" ); | |
1796 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w'; | |
1797 else mergeoralign[i] = '2'; | |
1798 } | |
1799 } | |
1800 } | |
1801 | |
1802 // for( i=0; i<1; i++ ) addmem[i] = njobc-1+i; | |
1803 addmem[0] = njobc-1; | |
1804 addmem[1] = -1; | |
1805 for( i=0; i<njobc-1; i++ ) | |
1806 { | |
1807 if( includemember( topolc[i][0], addmem ) && includemember( topolc[i][1], addmem ) ) | |
1808 { | |
1809 mergeoralign[i] = 'w'; | |
1810 } | |
1811 else if( includemember( topolc[i][0], addmem ) ) | |
1812 { | |
1813 mergeoralign[i] = '1'; | |
1814 // fprintf( stderr, "HIT 1! iadd=%d", iadd ); | |
1815 } | |
1816 else if( includemember( topolc[i][1], addmem ) ) | |
1817 { | |
1818 mergeoralign[i] = '2'; | |
1819 // fprintf( stderr, "HIT 2! iadd=%d", iadd ); | |
1820 } | |
1821 } | |
1822 #if 0 | |
1823 for( i=0; i<njob-1; i++ ) | |
1824 { | |
1825 fprintf( stderr, "mem0 = " ); | |
1826 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] ); | |
1827 fprintf( stderr, "\n" ); | |
1828 fprintf( stderr, "mem1 = " ); | |
1829 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] ); | |
1830 fprintf( stderr, "\n" ); | |
1831 fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] ); | |
1832 } | |
1833 #endif | |
1834 | |
1835 | |
1836 #if 0 | |
1837 for( i=0; i<norg; i++ ) fprintf( stderr, "seq[%d, iadd=%d] = \n%s\n", i, iadd, seq[i] ); | |
1838 fprintf( stderr, "gapmapS (iadd=%d) = \n", iadd ); | |
1839 for( i=0; i<lennocommongap; i++ ) fprintf( stderr, "%d\n", gapmapS[i] ); | |
1840 #endif | |
1841 | |
1842 | |
1843 // fprintf( stderr, "Progressive alignment ... \r" ); | |
1844 | |
1845 #if 0 | |
1846 pthread_mutex_lock( targ->mutex_counter ); | |
1847 fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd ); | |
1848 for( i=0; i<njobc-1; i++ ) fprintf( stdout, "%c", mergeoralign[i] ); | |
1849 fprintf( stdout, "\n" ); | |
1850 pthread_mutex_unlock( targ->mutex_counter ); | |
1851 #endif | |
1852 singlerna = NULL; | |
1853 pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped ); | |
1854 #if 0 | |
1855 pthread_mutex_lock( targ->mutex_counter ); | |
1856 // fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore ); | |
1857 // fprintf( stdout, "effc (iadd=%d) = ", iadd ); | |
1858 // for( i=0; i<njobc; i++ ) fprintf( stdout, "%f ", effc[i] ); | |
1859 // fprintf( stdout, "\n" ); | |
1860 pthread_mutex_unlock( targ->mutex_counter ); | |
1861 #endif | |
1862 | |
1863 | |
1864 #if 0 | |
1865 fprintf( trap_g, "done.\n" ); | |
1866 fclose( trap_g ); | |
1867 #endif | |
1868 // fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] ); | |
1869 // fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] ); | |
1870 | |
1871 // strcpy( seq[norg+iadd], bseq[norg] ); | |
1872 strcpy( targetseq, bseq[norg] ); | |
1873 | |
1874 rep = -1; | |
1875 for( i=0; i<norg; i++ ) | |
1876 { | |
1877 // fprintf( stderr, "Checking %d/%d\n", i, norg ); | |
1878 if( strchr( bseq[i], '=' ) ) break; | |
1879 } | |
1880 if( i == norg ) | |
1881 istherenewgap[iadd] = 0; | |
1882 else | |
1883 { | |
1884 rep = i; | |
1885 istherenewgap[iadd] = 1; | |
1886 makenewgaplist( newgaplist[iadd], bseq[rep] ); | |
1887 // for( i=0; newgaplist[iadd][i]!=-1; i++ ) fprintf( stderr, "%d: %d\n", i, newgaplist[iadd][i] ); | |
1888 } | |
1889 eq2dash( targetseq ); | |
1890 | |
1891 } | |
1892 | |
1893 | |
1894 #if 1 | |
1895 if( constraint && localhomtablec ) | |
1896 { | |
1897 for( i=0; i<njobc; i++ ) free( localhomtablec[i] ); | |
1898 free( localhomtablec ); | |
1899 localhomtablec = NULL; | |
1900 } | |
1901 if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL; | |
1902 if( nogaplenjusttodecideaddhereornot ) free( nogaplenjusttodecideaddhereornot ); nogaplenjusttodecideaddhereornot = NULL; | |
1903 if( alnleninnode ) free( alnleninnode ); alnleninnode = NULL; | |
1904 if( tmpseq ) free( tmpseq ); tmpseq = NULL; | |
1905 if( bseq ) FreeCharMtx( bseq ); bseq = NULL; | |
1906 if( namec ) free( namec ); namec = NULL; | |
1907 if( nlenc ) free( nlenc ); nlenc = NULL; | |
1908 if( depc ) free( depc ); depc = NULL; | |
1909 if( topolc ) FreeIntCub( topolc ); topolc = NULL; | |
1910 if( lenc ) FreeFloatMtx( lenc ); lenc = NULL; | |
1911 if( effc ) FreeDoubleVec( effc ); effc = NULL; | |
1912 if( memlist0 ) free( memlist0 ); memlist0 = NULL; | |
1913 if( memlist1 ) free( memlist1 ); memlist1 = NULL; | |
1914 if( addmem ) free( addmem ); addmem = NULL; | |
1915 if( mseq1 ) free( mseq1 ); mseq1 = NULL; | |
1916 if( mseq2 ) free( mseq2 ); mseq2 = NULL; | |
1917 Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); | |
1918 A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); | |
1919 if( commonIP ) FreeIntMtx( commonIP ); | |
1920 commonIP = NULL; | |
1921 commonAlloc1 = commonAlloc2 = 0; | |
1922 #endif | |
1923 // FreeFloatHalfMtx( iscorecbk, norg ); | |
1924 | |
1925 return( NULL ); | |
1926 } | |
1927 | |
1928 static int maxl; | |
1929 static int tsize; | |
1930 static int nunknown = 0; | |
1931 | |
1932 void seq_grp_nuc( int *grp, char *seq ) | |
1933 { | |
1934 int tmp; | |
1935 int *grpbk = grp; | |
1936 while( *seq ) | |
1937 { | |
1938 tmp = amino_grp[(int)*seq++]; | |
1939 if( tmp < 4 ) | |
1940 *grp++ = tmp; | |
1941 else | |
1942 nunknown++; | |
1943 } | |
1944 *grp = END_OF_VEC; | |
1945 if( grp - grpbk < tuplesize ) | |
1946 { | |
1947 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); | |
1948 // exit( 1 ); | |
1949 *grpbk = -1; | |
1950 } | |
1951 } | |
1952 | |
1953 void seq_grp( int *grp, char *seq ) | |
1954 { | |
1955 int tmp; | |
1956 int *grpbk = grp; | |
1957 while( *seq ) | |
1958 { | |
1959 tmp = amino_grp[(int)*seq++]; | |
1960 if( tmp < 6 ) | |
1961 *grp++ = tmp; | |
1962 else | |
1963 nunknown++; | |
1964 } | |
1965 *grp = END_OF_VEC; | |
1966 if( grp - grpbk < 6 ) | |
1967 { | |
1968 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" ); | |
1969 // exit( 1 ); | |
1970 *grpbk = -1; | |
1971 } | |
1972 } | |
1973 | |
1974 void makecompositiontable_p( int *table, int *pointt ) | |
1975 { | |
1976 int point; | |
1977 | |
1978 while( ( point = *pointt++ ) != END_OF_VEC ) | |
1979 table[point]++; | |
1980 } | |
1981 | |
1982 int commonsextet_p( int *table, int *pointt ) | |
1983 { | |
1984 int value = 0; | |
1985 int tmp; | |
1986 int point; | |
1987 static TLS int *memo = NULL; | |
1988 static TLS int *ct = NULL; | |
1989 int *cp; | |
1990 | |
1991 if( table == NULL ) | |
1992 { | |
1993 if( memo ) free( memo ); | |
1994 if( ct ) free( ct ); | |
1995 return( 0 ); | |
1996 } | |
1997 | |
1998 if( *pointt == -1 ) | |
1999 return( 0 ); | |
2000 | |
2001 if( !memo ) | |
2002 { | |
2003 memo = (int *)calloc( tsize, sizeof( int ) ); | |
2004 if( !memo ) ErrorExit( "Cannot allocate memo\n" ); | |
2005 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!! | |
2006 if( !ct ) ErrorExit( "Cannot allocate ct\n" ); | |
2007 } | |
2008 | |
2009 cp = ct; | |
2010 while( ( point = *pointt++ ) != END_OF_VEC ) | |
2011 { | |
2012 tmp = memo[point]++; | |
2013 if( tmp < table[point] ) | |
2014 value++; | |
2015 if( tmp == 0 ) *cp++ = point; | |
2016 } | |
2017 *cp = END_OF_VEC; | |
2018 | |
2019 cp = ct; | |
2020 while( *cp != END_OF_VEC ) | |
2021 memo[*cp++] = 0; | |
2022 | |
2023 return( value ); | |
2024 } | |
2025 | |
2026 void makepointtable_nuc_dectet( int *pointt, int *n ) | |
2027 { | |
2028 int point; | |
2029 register int *p; | |
2030 | |
2031 if( *n == -1 ) | |
2032 { | |
2033 *pointt = -1; | |
2034 return; | |
2035 } | |
2036 | |
2037 p = n; | |
2038 point = *n++ *262144; | |
2039 point += *n++ * 65536; | |
2040 point += *n++ * 16384; | |
2041 point += *n++ * 4096; | |
2042 point += *n++ * 1024; | |
2043 point += *n++ * 256; | |
2044 point += *n++ * 64; | |
2045 point += *n++ * 16; | |
2046 point += *n++ * 4; | |
2047 point += *n++; | |
2048 *pointt++ = point; | |
2049 | |
2050 while( *n != END_OF_VEC ) | |
2051 { | |
2052 point -= *p++ *262144; | |
2053 point *= 4; | |
2054 point += *n++; | |
2055 *pointt++ = point; | |
2056 | |
2057 } | |
2058 *pointt = END_OF_VEC; | |
2059 } | |
2060 | |
2061 void makepointtable_nuc_octet( int *pointt, int *n ) | |
2062 { | |
2063 int point; | |
2064 register int *p; | |
2065 | |
2066 if( *n == -1 ) | |
2067 { | |
2068 *pointt = -1; | |
2069 return; | |
2070 } | |
2071 | |
2072 p = n; | |
2073 point = *n++ * 16384; | |
2074 point += *n++ * 4096; | |
2075 point += *n++ * 1024; | |
2076 point += *n++ * 256; | |
2077 point += *n++ * 64; | |
2078 point += *n++ * 16; | |
2079 point += *n++ * 4; | |
2080 point += *n++; | |
2081 *pointt++ = point; | |
2082 | |
2083 while( *n != END_OF_VEC ) | |
2084 { | |
2085 point -= *p++ * 16384; | |
2086 point *= 4; | |
2087 point += *n++; | |
2088 *pointt++ = point; | |
2089 } | |
2090 *pointt = END_OF_VEC; | |
2091 } | |
2092 | |
2093 void makepointtable_nuc( int *pointt, int *n ) | |
2094 { | |
2095 int point; | |
2096 register int *p; | |
2097 | |
2098 if( *n == -1 ) | |
2099 { | |
2100 *pointt = -1; | |
2101 return; | |
2102 } | |
2103 | |
2104 p = n; | |
2105 point = *n++ * 1024; | |
2106 point += *n++ * 256; | |
2107 point += *n++ * 64; | |
2108 point += *n++ * 16; | |
2109 point += *n++ * 4; | |
2110 point += *n++; | |
2111 *pointt++ = point; | |
2112 | |
2113 while( *n != END_OF_VEC ) | |
2114 { | |
2115 point -= *p++ * 1024; | |
2116 point *= 4; | |
2117 point += *n++; | |
2118 *pointt++ = point; | |
2119 } | |
2120 *pointt = END_OF_VEC; | |
2121 } | |
2122 | |
2123 void makepointtable( int *pointt, int *n ) | |
2124 { | |
2125 int point; | |
2126 register int *p; | |
2127 | |
2128 if( *n == -1 ) | |
2129 { | |
2130 *pointt = -1; | |
2131 return; | |
2132 } | |
2133 | |
2134 p = n; | |
2135 point = *n++ * 7776; | |
2136 point += *n++ * 1296; | |
2137 point += *n++ * 216; | |
2138 point += *n++ * 36; | |
2139 point += *n++ * 6; | |
2140 point += *n++; | |
2141 *pointt++ = point; | |
2142 | |
2143 while( *n != END_OF_VEC ) | |
2144 { | |
2145 point -= *p++ * 7776; | |
2146 point *= 6; | |
2147 point += *n++; | |
2148 *pointt++ = point; | |
2149 } | |
2150 *pointt = END_OF_VEC; | |
2151 } | |
2152 | |
2153 #ifdef enablemultithread | |
2154 | |
2155 void *dndprethread( void *arg ) | |
2156 { | |
2157 dndprethread_arg_t *targ = (dndprethread_arg_t *)arg; | |
2158 int njob = targ->njob; | |
2159 int thread_no = targ->thread_no; | |
2160 float *selfscore = targ->selfscore; | |
2161 float **mtx = targ->mtx; | |
2162 char **seq = targ->seq; | |
2163 Jobtable2d *jobpospt = targ->jobpospt; | |
2164 | |
2165 int i, j; | |
2166 float ssi, ssj, bunbo; | |
2167 float mtxv; | |
2168 | |
2169 if( njob == 1 ) return( NULL ); | |
2170 | |
2171 while( 1 ) | |
2172 { | |
2173 pthread_mutex_lock( targ->mutex ); | |
2174 j = jobpospt->j; | |
2175 i = jobpospt->i; | |
2176 j++; | |
2177 // fprintf( stderr, "\n i=%d, j=%d before check\n", i, j ); | |
2178 if( j == njob ) | |
2179 { | |
2180 // fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob ); | |
2181 fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no ); | |
2182 i++; | |
2183 j = i + 1; | |
2184 if( i == njob-1 ) | |
2185 { | |
2186 // fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 ); | |
2187 pthread_mutex_unlock( targ->mutex ); | |
2188 return( NULL ); | |
2189 } | |
2190 } | |
2191 // fprintf( stderr, "\n i=%d, j=%d after check\n", i, j ); | |
2192 jobpospt->j = j; | |
2193 jobpospt->i = i; | |
2194 pthread_mutex_unlock( targ->mutex ); | |
2195 | |
2196 ssi = selfscore[i]; | |
2197 ssj = selfscore[j]; | |
2198 | |
2199 bunbo = MIN( ssi, ssj ); | |
2200 if( bunbo == 0.0 ) | |
2201 mtxv = maxdist; | |
2202 else | |
2203 mtxv = maxdist * ( 1.0 - (float)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo ); | |
2204 #if 1 | |
2205 if( mtxv > 9.0 || mtxv < 0.0 ) | |
2206 { | |
2207 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); | |
2208 exit( 1 ); | |
2209 } | |
2210 #else // CHUUI!!! 2012/05/16 | |
2211 if( mtxv > 2.0 ) | |
2212 { | |
2213 mtxv = 2.0; | |
2214 } | |
2215 if( mtxv < 0.0 ) | |
2216 { | |
2217 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); | |
2218 exit( 1 ); | |
2219 } | |
2220 #endif | |
2221 mtx[i][j-i] = mtxv; | |
2222 } | |
2223 } | |
2224 | |
2225 static void *gaplist2alnxthread( void *arg ) | |
2226 { | |
2227 gaplist2alnxthread_arg_t *targ = (gaplist2alnxthread_arg_t *)arg; | |
2228 // int thread_no = targ->thread_no; | |
2229 int ncycle = targ->ncycle; | |
2230 char **seq = targ->seq; | |
2231 int *newgaplist = targ->newgaplist; | |
2232 int *posmap = targ->posmap; | |
2233 int *jobpospt = targ->jobpospt; | |
2234 int tmpseqlen = targ->tmpseqlen; | |
2235 int lenfull = targ->lenfull; | |
2236 char *tmpseq1; | |
2237 int i; | |
2238 | |
2239 tmpseq1 = AllocateCharVec( tmpseqlen ); | |
2240 | |
2241 while( 1 ) | |
2242 { | |
2243 pthread_mutex_lock( targ->mutex ); | |
2244 i = *jobpospt; | |
2245 if( i == ncycle ) | |
2246 { | |
2247 pthread_mutex_unlock( targ->mutex ); | |
2248 free( tmpseq1 ); | |
2249 return( NULL ); | |
2250 } | |
2251 *jobpospt = i+1; | |
2252 pthread_mutex_unlock( targ->mutex ); | |
2253 | |
2254 gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist, posmap, tmpseqlen ); | |
2255 // fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 ); | |
2256 strcpy( seq[i], tmpseq1 ); | |
2257 } | |
2258 } | |
2259 | |
2260 static void *distancematrixthread( void *arg ) | |
2261 { | |
2262 distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; | |
2263 int thread_no = targ->thread_no; | |
2264 int njob = targ->njob; | |
2265 int norg = targ->norg; | |
2266 int *jobpospt = targ->jobpospt; | |
2267 int **pointt = targ->pointt; | |
2268 float **imtx = targ->imtx; | |
2269 float **nmtx = targ->nmtx; | |
2270 float *selfscore = targ->selfscore; | |
2271 int *nogaplen = targ->nogaplen; | |
2272 | |
2273 float lenfac, bunbo, longer, shorter, mtxv; | |
2274 int *table1; | |
2275 int i, j; | |
2276 | |
2277 while( 1 ) | |
2278 { | |
2279 pthread_mutex_lock( targ->mutex ); | |
2280 i = *jobpospt; | |
2281 if( i == norg ) | |
2282 { | |
2283 pthread_mutex_unlock( targ->mutex ); | |
2284 commonsextet_p( NULL, NULL ); | |
2285 return( NULL ); | |
2286 } | |
2287 *jobpospt = i+1; | |
2288 pthread_mutex_unlock( targ->mutex ); | |
2289 | |
2290 table1 = (int *)calloc( tsize, sizeof( int ) ); | |
2291 if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); | |
2292 if( i % 100 == 0 ) | |
2293 { | |
2294 fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, norg, thread_no ); | |
2295 } | |
2296 makecompositiontable_p( table1, pointt[i] ); | |
2297 | |
2298 for( j=i+1; j<njob; j++ ) | |
2299 { | |
2300 mtxv = (float)commonsextet_p( table1, pointt[j] ); | |
2301 if( nogaplen[i] > nogaplen[j] ) | |
2302 { | |
2303 longer=(float)nogaplen[i]; | |
2304 shorter=(float)nogaplen[j]; | |
2305 } | |
2306 else | |
2307 { | |
2308 longer=(float)nogaplen[j]; | |
2309 shorter=(float)nogaplen[i]; | |
2310 } | |
2311 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); | |
2312 bunbo = MIN( selfscore[i], selfscore[j] ); | |
2313 | |
2314 if( j < norg ) | |
2315 { | |
2316 if( bunbo == 0.0 ) | |
2317 imtx[i][j-i] = maxdist; | |
2318 else | |
2319 imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; | |
2320 | |
2321 } | |
2322 else | |
2323 { | |
2324 if( bunbo == 0.0 ) | |
2325 nmtx[i][j-norg] = maxdist; | |
2326 else | |
2327 nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; | |
2328 } | |
2329 } | |
2330 free( table1 ); | |
2331 | |
2332 // for( j=i+1; j<norg; j++ ) | |
2333 // imtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] ); | |
2334 // for( j=norg; j<njob; j++ ) | |
2335 // nmtx[i][j-norg] = (float)commonsextet_p( table1, pointt[j] ); | |
2336 // free( table1 ); | |
2337 } | |
2338 } | |
2339 #endif | |
2340 | |
2341 | |
2342 void ktupledistancematrix( int nseq, int norg, int nlenmax, char **seq, char **name, float **imtx, float **nmtx ) | |
2343 { | |
2344 char *tmpseq; | |
2345 int *grpseq; | |
2346 int **pointt; | |
2347 int i, j; | |
2348 int *nogaplen; | |
2349 int *table1; | |
2350 float lenfac, bunbo, longer, shorter, mtxv; | |
2351 float *selfscore; | |
2352 selfscore = AllocateFloatVec( nseq ); | |
2353 | |
2354 fprintf( stderr, "\n\nMaking a distance matrix ..\n" ); | |
2355 fflush( stderr ); | |
2356 | |
2357 tmpseq = AllocateCharVec( nlenmax+1 ); | |
2358 grpseq = AllocateIntVec( nlenmax+1 ); | |
2359 pointt = AllocateIntMtx( nseq, nlenmax+1 ); | |
2360 nogaplen = AllocateIntVec( nseq ); | |
2361 | |
2362 if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize ); | |
2363 else tsize = (int)pow( 6, 6 ); | |
2364 | |
2365 if( dorp == 'd' && tuplesize == 6 ) | |
2366 { | |
2367 lenfaca = D6LENFACA; | |
2368 lenfacb = D6LENFACB; | |
2369 lenfacc = D6LENFACC; | |
2370 lenfacd = D6LENFACD; | |
2371 } | |
2372 else if( dorp == 'd' && tuplesize == 10 ) | |
2373 { | |
2374 lenfaca = D10LENFACA; | |
2375 lenfacb = D10LENFACB; | |
2376 lenfacc = D10LENFACC; | |
2377 lenfacd = D10LENFACD; | |
2378 } | |
2379 else | |
2380 { | |
2381 lenfaca = PLENFACA; | |
2382 lenfacb = PLENFACB; | |
2383 lenfacc = PLENFACC; | |
2384 lenfacd = PLENFACD; | |
2385 } | |
2386 | |
2387 maxl = 0; | |
2388 for( i=0; i<nseq; i++ ) | |
2389 { | |
2390 gappick0( tmpseq, seq[i] ); | |
2391 nogaplen[i] = strlen( tmpseq ); | |
2392 if( nogaplen[i] < 6 ) | |
2393 { | |
2394 // fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] ); | |
2395 // fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" ); | |
2396 // exit( 1 ); | |
2397 } | |
2398 if( nogaplen[i] > maxl ) maxl = nogaplen[i]; | |
2399 if( dorp == 'd' ) /* nuc */ | |
2400 { | |
2401 seq_grp_nuc( grpseq, tmpseq ); | |
2402 // makepointtable_nuc( pointt[i], grpseq ); | |
2403 // makepointtable_nuc_octet( pointt[i], grpseq ); | |
2404 if( tuplesize == 10 ) | |
2405 makepointtable_nuc_dectet( pointt[i], grpseq ); | |
2406 else if( tuplesize == 6 ) | |
2407 makepointtable_nuc( pointt[i], grpseq ); | |
2408 else | |
2409 { | |
2410 fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize ); | |
2411 exit( 1 ); | |
2412 } | |
2413 } | |
2414 else /* amino */ | |
2415 { | |
2416 seq_grp( grpseq, tmpseq ); | |
2417 makepointtable( pointt[i], grpseq ); | |
2418 } | |
2419 | |
2420 } | |
2421 if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown ); | |
2422 | |
2423 for( i=0; i<nseq; i++ ) // serial de jubun | |
2424 { | |
2425 table1 = (int *)calloc( tsize, sizeof( int ) ); | |
2426 if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); | |
2427 makecompositiontable_p( table1, pointt[i] ); | |
2428 | |
2429 selfscore[i] = (float)commonsextet_p( table1, pointt[i] ); | |
2430 free( table1 ); | |
2431 } | |
2432 | |
2433 #ifdef enablemultithread | |
2434 if( nthread > 0 ) | |
2435 { | |
2436 distancematrixthread_arg_t *targ; | |
2437 int jobpos; | |
2438 pthread_t *handle; | |
2439 pthread_mutex_t mutex; | |
2440 | |
2441 jobpos = 0; | |
2442 targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); | |
2443 handle = calloc( nthread, sizeof( pthread_t ) ); | |
2444 pthread_mutex_init( &mutex, NULL ); | |
2445 | |
2446 for( i=0; i<nthread; i++ ) | |
2447 { | |
2448 targ[i].thread_no = i; | |
2449 targ[i].njob = nseq; | |
2450 targ[i].norg = norg; | |
2451 targ[i].jobpospt = &jobpos; | |
2452 targ[i].pointt = pointt; | |
2453 targ[i].imtx = imtx; | |
2454 targ[i].nmtx = nmtx; | |
2455 targ[i].selfscore = selfscore; | |
2456 targ[i].nogaplen = nogaplen; | |
2457 targ[i].mutex = &mutex; | |
2458 | |
2459 pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) ); | |
2460 } | |
2461 | |
2462 for( i=0; i<nthread; i++ ) | |
2463 { | |
2464 pthread_join( handle[i], NULL ); | |
2465 } | |
2466 pthread_mutex_destroy( &mutex ); | |
2467 free( handle ); | |
2468 free( targ ); | |
2469 | |
2470 } | |
2471 else | |
2472 #endif | |
2473 { | |
2474 for( i=0; i<norg; i++ ) | |
2475 { | |
2476 table1 = (int *)calloc( tsize, sizeof( int ) ); | |
2477 if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); | |
2478 if( i % 100 == 0 ) | |
2479 { | |
2480 fprintf( stderr, "\r% 5d / %d", i+1, norg ); | |
2481 fflush( stderr ); | |
2482 } | |
2483 makecompositiontable_p( table1, pointt[i] ); | |
2484 | |
2485 for( j=i+1; j<nseq; j++ ) | |
2486 { | |
2487 mtxv = (float)commonsextet_p( table1, pointt[j] ); | |
2488 if( nogaplen[i] > nogaplen[j] ) | |
2489 { | |
2490 longer=(float)nogaplen[i]; | |
2491 shorter=(float)nogaplen[j]; | |
2492 } | |
2493 else | |
2494 { | |
2495 longer=(float)nogaplen[j]; | |
2496 shorter=(float)nogaplen[i]; | |
2497 } | |
2498 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); | |
2499 bunbo = MIN( selfscore[i], selfscore[j] ); | |
2500 | |
2501 if( j < norg ) | |
2502 { | |
2503 if( bunbo == 0.0 ) | |
2504 imtx[i][j-i] = maxdist; | |
2505 else | |
2506 imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; | |
2507 } | |
2508 else | |
2509 { | |
2510 if( bunbo == 0.0 ) | |
2511 nmtx[i][j-norg] = maxdist; | |
2512 else | |
2513 nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; | |
2514 } | |
2515 } | |
2516 free( table1 ); | |
2517 } | |
2518 } | |
2519 | |
2520 fprintf( stderr, "\ndone.\n\n" ); | |
2521 fflush( stderr ); | |
2522 | |
2523 for( i=0; i<norg; i++ ) | |
2524 { | |
2525 for( j=i+1; j<norg; j++ ) | |
2526 { | |
2527 | |
2528 } | |
2529 for( j=norg; j<nseq; j++ ) | |
2530 { | |
2531 } | |
2532 } | |
2533 free( grpseq ); | |
2534 free( tmpseq ); | |
2535 FreeIntMtx( pointt ); | |
2536 free( nogaplen ); | |
2537 free( selfscore ); | |
2538 | |
2539 #if 0 // writehat2 wo kakinaosu | |
2540 if( distout ) | |
2541 { | |
2542 hat2p = fopen( "hat2", "w" ); | |
2543 WriteFloatHat2_pointer_halfmtx( hat2p, nseq, name, mtx ); | |
2544 fclose( hat2p ); | |
2545 } | |
2546 #endif | |
2547 } | |
2548 | |
2549 void dndpre( int nseq, char **seq, float **mtx ) // not used yet | |
2550 { | |
2551 int i, j, ilim; | |
2552 float *selfscore; | |
2553 float mtxv; | |
2554 float ssi, ssj, bunbo; | |
2555 | |
2556 selfscore = AllocateFloatVec( nseq ); | |
2557 | |
2558 for( i=0; i<nseq; i++ ) | |
2559 { | |
2560 selfscore[i] = (float)naivepairscore11( seq[i], seq[i], 0 ); | |
2561 } | |
2562 #ifdef enablemultithread | |
2563 if( nthread > 0 ) | |
2564 { | |
2565 dndprethread_arg_t *targ; | |
2566 Jobtable2d jobpos; | |
2567 pthread_t *handle; | |
2568 pthread_mutex_t mutex; | |
2569 | |
2570 jobpos.i = 0; | |
2571 jobpos.j = 0; | |
2572 | |
2573 targ = calloc( nthread, sizeof( dndprethread_arg_t ) ); | |
2574 handle = calloc( nthread, sizeof( pthread_t ) ); | |
2575 pthread_mutex_init( &mutex, NULL ); | |
2576 | |
2577 for( i=0; i<nthread; i++ ) | |
2578 { | |
2579 targ[i].thread_no = i; | |
2580 targ[i].njob = nseq; | |
2581 targ[i].selfscore = selfscore; | |
2582 targ[i].mtx = mtx; | |
2583 targ[i].seq = seq; | |
2584 targ[i].jobpospt = &jobpos; | |
2585 targ[i].mutex = &mutex; | |
2586 | |
2587 pthread_create( handle+i, NULL, dndprethread, (void *)(targ+i) ); | |
2588 } | |
2589 | |
2590 for( i=0; i<nthread; i++ ) | |
2591 { | |
2592 pthread_join( handle[i], NULL ); | |
2593 } | |
2594 pthread_mutex_destroy( &mutex ); | |
2595 | |
2596 } | |
2597 else | |
2598 #endif | |
2599 { | |
2600 ilim = nseq-1; | |
2601 for( i=0; i<ilim; i++ ) | |
2602 { | |
2603 ssi = selfscore[i]; | |
2604 fprintf( stderr, "%4d/%4d\r", i+1, nseq ); | |
2605 | |
2606 for( j=i+1; j<nseq; j++ ) | |
2607 { | |
2608 ssj = selfscore[j]; | |
2609 bunbo = MIN( ssi, ssj ); | |
2610 if( bunbo == 0.0 ) | |
2611 mtxv = maxdist; | |
2612 else | |
2613 mtxv = maxdist * ( 1.0 - (float)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo ); | |
2614 | |
2615 #if 1 | |
2616 if( mtxv > 9.0 || mtxv < 0.0 ) | |
2617 { | |
2618 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); | |
2619 exit( 1 ); | |
2620 } | |
2621 #else // CHUUI!!! 2012/05/16 | |
2622 if( mtxv > 2.0 ) | |
2623 { | |
2624 mtxv = 2.0; | |
2625 } | |
2626 if( mtxv < 0.0 ) | |
2627 { | |
2628 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); | |
2629 exit( 1 ); | |
2630 } | |
2631 #endif | |
2632 mtx[i][j-i] = mtxv; | |
2633 } | |
2634 } | |
2635 } | |
2636 | |
2637 #if TEST | |
2638 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) | |
2639 fprintf( stdout, "i=%d, j=%d, mtx[][] = %f\n", i, j, mtx[i][j] ); | |
2640 #endif | |
2641 free( selfscore ); | |
2642 | |
2643 } | |
2644 | |
2645 int main( int argc, char *argv[] ) | |
2646 { | |
2647 static int *nlen; | |
2648 static char **name, **seq; | |
2649 static char **tmpseq; | |
2650 static char *tmpseq1; | |
2651 // static char *check1, *check2; | |
2652 static float **iscore, **iscore_kozo; | |
2653 static double *eff_kozo, *eff_kozo_mapped = NULL; | |
2654 int i, j, f, ien; | |
2655 int iadd; | |
2656 static int ***topol_kozo; | |
2657 Treedep *dep; | |
2658 static float **len_kozo; | |
2659 FILE *prep; | |
2660 FILE *infp; | |
2661 FILE *hat2p; | |
2662 int alignmentlength; | |
2663 char c; | |
2664 int alloclen, fullseqlen, tmplen; | |
2665 LocalHom **localhomtable = NULL; | |
2666 static char *kozoarivec; | |
2667 int nkozo; | |
2668 int njobc, norg, lenfull; | |
2669 int **newgaplist_o; | |
2670 int *newgaplist_compact; | |
2671 int **follower; | |
2672 int *follows; | |
2673 int *istherenewgap; | |
2674 int zure; | |
2675 int *posmap; | |
2676 int *ordertable; | |
2677 FILE *orderfp; | |
2678 int tmpseqlen; | |
2679 Blocktorealign *realign; | |
2680 RNApair ***singlerna; | |
2681 int ***topol; | |
2682 float **len; | |
2683 float **iscoreo, **nscore; | |
2684 FILE *fp; | |
2685 Addtree *addtree; | |
2686 | |
2687 | |
2688 arguments( argc, argv ); | |
2689 #ifndef enablemultithread | |
2690 nthread = 0; | |
2691 #endif | |
2692 | |
2693 if( fastathreshold < 0.0001 ) constraint = 0; | |
2694 | |
2695 if( inputfile ) | |
2696 { | |
2697 infp = fopen( inputfile, "r" ); | |
2698 if( !infp ) | |
2699 { | |
2700 fprintf( stderr, "Cannot open %s\n", inputfile ); | |
2701 exit( 1 ); | |
2702 } | |
2703 } | |
2704 else | |
2705 infp = stdin; | |
2706 | |
2707 getnumlen( infp ); | |
2708 rewind( infp ); | |
2709 | |
2710 | |
2711 nkozo = 0; | |
2712 | |
2713 if( njob < 2 ) | |
2714 { | |
2715 fprintf( stderr, "At least 2 sequences should be input!\n" | |
2716 "Only %d sequence found.\n", njob ); | |
2717 exit( 1 ); | |
2718 } | |
2719 | |
2720 norg = njob-nadd; | |
2721 njobc = norg+1; | |
2722 fprintf( stderr, "norg = %d\n", norg ); | |
2723 fprintf( stderr, "njobc = %d\n", njobc ); | |
2724 if( norg > 1000 || nadd > 1000 ) use_fft = 0; | |
2725 | |
2726 fullseqlen = alloclen = nlenmax*4+1; //chuui! | |
2727 seq = AllocateCharMtx( njob, alloclen ); | |
2728 | |
2729 name = AllocateCharMtx( njob, B+1 ); | |
2730 nlen = AllocateIntVec( njob ); | |
2731 | |
2732 | |
2733 if( multidist || tuplesize > 0 ) | |
2734 { | |
2735 iscore = AllocateFloatHalfMtx( norg ); | |
2736 nscore = AllocateFloatMtx( norg, nadd ); | |
2737 } | |
2738 else | |
2739 { | |
2740 iscore = AllocateFloatHalfMtx( njob ); | |
2741 nscore = NULL; | |
2742 } | |
2743 | |
2744 kozoarivec = AllocateCharVec( njob ); | |
2745 | |
2746 | |
2747 ordertable = AllocateIntVec( norg+1 ); | |
2748 | |
2749 | |
2750 if( constraint ) | |
2751 { | |
2752 #if SMALLMEMORY | |
2753 if( multidist ) | |
2754 { | |
2755 localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) ); | |
2756 for( i=0; i<norg; i++) | |
2757 { | |
2758 localhomtable[i] = (LocalHom *)calloc( nadd, sizeof( LocalHom ) ); | |
2759 for( j=0; j<nadd; j++) | |
2760 { | |
2761 localhomtable[i][j].start1 = -1; | |
2762 localhomtable[i][j].end1 = -1; | |
2763 localhomtable[i][j].start2 = -1; | |
2764 localhomtable[i][j].end2 = -1; | |
2765 localhomtable[i][j].overlapaa = -1.0; | |
2766 localhomtable[i][j].opt = -1.0; | |
2767 localhomtable[i][j].importance = -1.0; | |
2768 localhomtable[i][j].next = NULL; | |
2769 localhomtable[i][j].korh = 'h'; | |
2770 } | |
2771 } | |
2772 // localhomtable = (LocalHom **)calloc( norg+nadd, sizeof( LocalHom *) ); | |
2773 // for( i=norg; i<norg+nadd; i++) // hontou ha iranai | |
2774 // { | |
2775 // localhomtable[i] = (LocalHom *)calloc( norg, sizeof( LocalHom ) ); | |
2776 // for( j=0; j<norg; j++) | |
2777 // { | |
2778 // localhomtable[i][j].start1 = -1; | |
2779 // localhomtable[i][j].end1 = -1; | |
2780 // localhomtable[i][j].start2 = -1; | |
2781 // localhomtable[i][j].end2 = -1; | |
2782 // localhomtable[i][j].overlapaa = -1.0; | |
2783 // localhomtable[i][j].opt = -1.0; | |
2784 // localhomtable[i][j].importance = -1.0; | |
2785 // localhomtable[i][j].next = NULL; | |
2786 // localhomtable[i][j].korh = 'h'; | |
2787 // } | |
2788 // } | |
2789 } | |
2790 else | |
2791 #endif | |
2792 { | |
2793 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); | |
2794 for( i=0; i<njob; i++) | |
2795 { | |
2796 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); | |
2797 for( j=0; j<njob; j++) | |
2798 { | |
2799 localhomtable[i][j].start1 = -1; | |
2800 localhomtable[i][j].end1 = -1; | |
2801 localhomtable[i][j].start2 = -1; | |
2802 localhomtable[i][j].end2 = -1; | |
2803 localhomtable[i][j].overlapaa = -1.0; | |
2804 localhomtable[i][j].opt = -1.0; | |
2805 localhomtable[i][j].importance = -1.0; | |
2806 localhomtable[i][j].next = NULL; | |
2807 localhomtable[i][j].korh = 'h'; | |
2808 } | |
2809 } | |
2810 } | |
2811 | |
2812 fprintf( stderr, "Loading 'hat3' ... " ); | |
2813 prep = fopen( "hat3", "r" ); | |
2814 if( prep == NULL ) ErrorExit( "Make hat3." ); | |
2815 #if SMALLMEMORY | |
2816 if( multidist ) | |
2817 { | |
2818 // readlocalhomtable_two( prep, norg, nadd, localhomtable, localhomtable+norg, kozoarivec ); | |
2819 readlocalhomtable_one( prep, norg, nadd, localhomtable, kozoarivec ); | |
2820 } | |
2821 else | |
2822 #endif | |
2823 { | |
2824 readlocalhomtable( prep, njob, localhomtable, kozoarivec ); | |
2825 } | |
2826 | |
2827 fclose( prep ); | |
2828 fprintf( stderr, "\ndone.\n" ); | |
2829 | |
2830 | |
2831 nkozo = 0; | |
2832 for( i=0; i<njob; i++ ) | |
2833 { | |
2834 // fprintf( stderr, "kozoarivec[%d] = %d\n", i, kozoarivec[i] ); | |
2835 if( kozoarivec[i] ) nkozo++; | |
2836 } | |
2837 if( nkozo ) | |
2838 { | |
2839 topol_kozo = AllocateIntCub( nkozo, 2, 0 ); | |
2840 len_kozo = AllocateFloatMtx( nkozo, 2 ); | |
2841 iscore_kozo = AllocateFloatHalfMtx( nkozo ); | |
2842 eff_kozo = AllocateDoubleVec( nkozo ); | |
2843 eff_kozo_mapped = AllocateDoubleVec( njob ); | |
2844 } | |
2845 | |
2846 | |
2847 #if SMALLMEMORY | |
2848 // outlocalhom_part( localhomtable, norg, nadd ); | |
2849 #else | |
2850 // outlocalhom( localhomtable, njob ); | |
2851 #endif | |
2852 | |
2853 #if 0 | |
2854 fprintf( stderr, "Extending localhom ... " ); | |
2855 extendlocalhom2( njob, localhomtable ); | |
2856 fprintf( stderr, "done.\n" ); | |
2857 #endif | |
2858 } | |
2859 | |
2860 #if 0 | |
2861 readData( infp, name, nlen, seq ); | |
2862 #else | |
2863 readData_pointer( infp, name, nlen, seq ); | |
2864 fclose( infp ); | |
2865 #endif | |
2866 | |
2867 constants( njob, seq ); | |
2868 | |
2869 #if 0 | |
2870 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); | |
2871 #endif | |
2872 | |
2873 initSignalSM(); | |
2874 | |
2875 initFiles(); | |
2876 | |
2877 // WriteOptions( trap_g ); | |
2878 | |
2879 c = seqcheck( seq ); | |
2880 if( c ) | |
2881 { | |
2882 fprintf( stderr, "Illegal character %c\n", c ); | |
2883 exit( 1 ); | |
2884 } | |
2885 | |
2886 alignmentlength = strlen( seq[0] ); | |
2887 for( i=0; i<norg; i++ ) | |
2888 { | |
2889 if( alignmentlength != strlen( seq[i] ) ) | |
2890 { | |
2891 fprintf( stderr, "#################################################################################\n" ); | |
2892 fprintf( stderr, "# ERROR! #\n" ); | |
2893 fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd ); | |
2894 fprintf( stderr, "#################################################################################\n" ); | |
2895 exit( 1 ); | |
2896 } | |
2897 } | |
2898 if( addprofile ) | |
2899 { | |
2900 fprintf( stderr, "Not supported!\n" ); | |
2901 exit( 1 ); | |
2902 } | |
2903 | |
2904 if( tuplesize > 0 ) // if mtx is internally computed | |
2905 { | |
2906 if( multidist == 1 ) | |
2907 { | |
2908 ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda. | |
2909 | |
2910 // hat2p = fopen( "hat2-1", "w" ); | |
2911 // WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore ); | |
2912 // fclose( hat2p ); | |
2913 | |
2914 dndpre( norg, seq, iscore ); | |
2915 // fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); | |
2916 // prep = fopen( "hat2i", "r" ); | |
2917 // if( prep == NULL ) ErrorExit( "Make hat2i." ); | |
2918 // readhat2_floathalf_pointer( prep, njob-nadd, name, iscore ); | |
2919 // fclose( prep ); | |
2920 // fprintf( stderr, "done.\n" ); | |
2921 | |
2922 // hat2p = fopen( "hat2-2", "w" ); | |
2923 // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); | |
2924 // fclose( hat2p ); | |
2925 } | |
2926 else | |
2927 { | |
2928 ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); | |
2929 } | |
2930 } | |
2931 else | |
2932 { | |
2933 if( multidist == 1 ) | |
2934 { | |
2935 fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " ); | |
2936 prep = fopen( "hat2n", "r" ); | |
2937 if( prep == NULL ) ErrorExit( "Make hat2n." ); | |
2938 readhat2_floathalf_part_pointer( prep, njob, nadd, name, nscore ); | |
2939 fclose( prep ); | |
2940 fprintf( stderr, "done.\n" ); | |
2941 | |
2942 fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); | |
2943 prep = fopen( "hat2i", "r" ); | |
2944 if( prep == NULL ) ErrorExit( "Make hat2i." ); | |
2945 readhat2_floathalf_pointer( prep, njob-nadd, name, iscore ); | |
2946 fclose( prep ); | |
2947 fprintf( stderr, "done.\n" ); | |
2948 } | |
2949 else | |
2950 { | |
2951 fprintf( stderr, "Loading 'hat2' ... " ); | |
2952 prep = fopen( "hat2", "r" ); | |
2953 if( prep == NULL ) ErrorExit( "Make hat2." ); | |
2954 readhat2_floathalf_pointer( prep, njob, name, iscore ); | |
2955 fclose( prep ); | |
2956 fprintf( stderr, "done.\n" ); | |
2957 } | |
2958 } | |
2959 | |
2960 #if 1 | |
2961 if( distout ) | |
2962 { | |
2963 fprintf( stderr, "Writing distances between new sequences and existing msa.\n" ); | |
2964 hat2p = fopen( "hat2", "w" ); | |
2965 if( multidist || tuplesize > 0 ) | |
2966 { | |
2967 for( iadd=0; iadd<nadd; iadd++ ) | |
2968 { | |
2969 fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg ); | |
2970 for( i=0; i<norg; i++ ) | |
2971 { | |
2972 fprintf( hat2p, "%5.3f ", nscore[i][iadd] ); | |
2973 if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" ); | |
2974 } | |
2975 fprintf( hat2p, "\n\n" ); | |
2976 } | |
2977 } | |
2978 else | |
2979 { | |
2980 for( iadd=0; iadd<nadd; iadd++ ) | |
2981 { | |
2982 fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg ); | |
2983 for( i=0; i<norg; i++ ) | |
2984 { | |
2985 fprintf( hat2p, "%5.3f ", iscore[i][norg+iadd-i] ); | |
2986 if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" ); | |
2987 } | |
2988 fprintf( hat2p, "\n\n" ); | |
2989 } | |
2990 } | |
2991 fclose( hat2p ); | |
2992 // exit( 1 ); | |
2993 // hat2p = fopen( "hat2", "w" ); | |
2994 // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); | |
2995 // fclose( hat2p ); | |
2996 // exit( 1 ); | |
2997 } | |
2998 #endif | |
2999 | |
3000 | |
3001 singlerna = NULL; | |
3002 | |
3003 commongappick( norg, seq ); | |
3004 | |
3005 lenfull = strlen( seq[0] ); | |
3006 | |
3007 // newgaplist_o = AllocateIntMtx( nadd, alloclen ); //ookisugi | |
3008 newgaplist_o = AllocateIntMtx( nadd, lenfull*2 ); | |
3009 newgaplist_compact = AllocateIntVec( lenfull*2 ); | |
3010 istherenewgap = AllocateIntVec( nadd ); | |
3011 follower = AllocateIntMtx( norg, 1 ); | |
3012 for( i=0; i<norg; i++ ) follower[i][0] = -1; | |
3013 follows = AllocateIntVec( nadd ); | |
3014 | |
3015 dep = (Treedep *)calloc( norg, sizeof( Treedep ) ); | |
3016 topol = AllocateIntCub( norg, 2, 0 ); | |
3017 len = AllocateFloatMtx( norg, 2 ); | |
3018 // iscoreo = AllocateFloatHalfMtx( norg ); | |
3019 mtxcpy( norg, norg, &iscoreo, iscore ); | |
3020 | |
3021 if( treeout ) | |
3022 { | |
3023 addtree = (Addtree *)calloc( nadd, sizeof( Addtree ) ); | |
3024 if( !addtree ) | |
3025 { | |
3026 fprintf( stderr, "Cannot allocate addtree\n" ); | |
3027 exit( 1 ); | |
3028 } | |
3029 } | |
3030 | |
3031 | |
3032 // nlim = norg-1; | |
3033 // for( i=0; i<nlim; i++ ) | |
3034 // { | |
3035 // fptc = iscoreo[i]+1; | |
3036 // fpt = iscore[i]+1; | |
3037 // j = norg-i-1; | |
3038 // while( j-- ) | |
3039 // *fptc++ = *fpt++; | |
3040 //// for( j=i+1; j<norg; j++ ) | |
3041 //// iscoreo[i][j-i] = iscore[i][j-i]; | |
3042 // } | |
3043 | |
3044 // fprintf( stderr, "building a tree.." ); | |
3045 if( treein ) | |
3046 { | |
3047 reporterr( "Loading a tree ... " ); | |
3048 loadtop( norg, iscoreo, topol, len, name, NULL, dep ); // nogaplen? | |
3049 reporterr( "\ndone.\n\n" ); | |
3050 } | |
3051 else if( treeout ) | |
3052 fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( norg, iscoreo, topol, len, name, nlen, dep, 1 ); | |
3053 else | |
3054 fixed_musclesupg_float_realloc_nobk_halfmtx( norg, iscoreo, topol, len, dep, 0, 1 ); | |
3055 // fprintf( stderr, "done.\n" ); | |
3056 | |
3057 if( norg > 1 ) | |
3058 cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] ); | |
3059 else | |
3060 { | |
3061 ordertable[0] = 0; ordertable[1] = -1; | |
3062 } | |
3063 FreeFloatHalfMtx( iscoreo, norg ); | |
3064 | |
3065 #ifdef enablemultithread | |
3066 if( nthread ) | |
3067 { | |
3068 pthread_t *handle; | |
3069 pthread_mutex_t mutex_counter; | |
3070 thread_arg_t *targ; | |
3071 int *iaddsharept; | |
3072 | |
3073 targ = calloc( nthread, sizeof( thread_arg_t ) ); | |
3074 handle = calloc( nthread, sizeof( pthread_t ) ); | |
3075 pthread_mutex_init( &mutex_counter, NULL ); | |
3076 iaddsharept = calloc( 1, sizeof(int) ); | |
3077 *iaddsharept = 0; | |
3078 | |
3079 for( i=0; i<nthread; i++ ) | |
3080 { | |
3081 targ[i].thread_no = i; | |
3082 targ[i].follows = follows; | |
3083 targ[i].njob = njob; | |
3084 targ[i].nadd = nadd; | |
3085 targ[i].nlen = nlen; | |
3086 targ[i].name = name; | |
3087 targ[i].seq = seq; | |
3088 targ[i].localhomtable = localhomtable; | |
3089 targ[i].iscore = iscore; | |
3090 targ[i].nscore = nscore; | |
3091 targ[i].istherenewgap = istherenewgap; | |
3092 targ[i].newgaplist = newgaplist_o; | |
3093 targ[i].singlerna = singlerna; | |
3094 targ[i].eff_kozo_mapped = eff_kozo_mapped; | |
3095 targ[i].alloclen = alloclen; | |
3096 targ[i].iaddshare = iaddsharept; | |
3097 targ[i].dep = dep; | |
3098 targ[i].topol = topol; | |
3099 targ[i].len = len; | |
3100 targ[i].addtree = addtree; | |
3101 targ[i].mutex_counter = &mutex_counter; | |
3102 pthread_create( handle+i, NULL, addsinglethread, (void *)(targ+i) ); | |
3103 } | |
3104 for( i=0; i<nthread; i++ ) | |
3105 { | |
3106 pthread_join( handle[i], NULL ); | |
3107 } | |
3108 pthread_mutex_destroy( &mutex_counter ); | |
3109 free( handle ); | |
3110 free( targ ); | |
3111 free( iaddsharept ); | |
3112 } | |
3113 else | |
3114 #endif | |
3115 { | |
3116 thread_arg_t *targ; | |
3117 targ = calloc( 1, sizeof( thread_arg_t ) ); | |
3118 targ[0].follows = follows; | |
3119 targ[0].njob = njob; | |
3120 targ[0].nadd = nadd; | |
3121 targ[0].nlen = nlen; | |
3122 targ[0].name = name; | |
3123 targ[0].seq = seq; | |
3124 targ[0].localhomtable = localhomtable; | |
3125 targ[0].iscore = iscore; | |
3126 targ[0].nscore = nscore; | |
3127 targ[0].istherenewgap = istherenewgap; | |
3128 targ[0].newgaplist = newgaplist_o; | |
3129 targ[0].singlerna = singlerna; | |
3130 targ[0].eff_kozo_mapped = eff_kozo_mapped; | |
3131 targ[0].alloclen = alloclen; | |
3132 targ[0].dep = dep; | |
3133 targ[0].topol = topol; | |
3134 targ[0].len = len; | |
3135 targ[0].addtree = addtree; | |
3136 addsinglethread( targ ); | |
3137 free( targ ); | |
3138 } | |
3139 free( dep ); | |
3140 FreeFloatMtx( len ); | |
3141 if( multidist || tuplesize > 0 ) FreeFloatMtx( nscore ); | |
3142 | |
3143 // for( i=0; i<nadd; i++ ) fprintf( stdout, ">%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] ); | |
3144 | |
3145 if( treeout ) | |
3146 { | |
3147 fp = fopen( "infile.tree", "a" ); | |
3148 if( fp == 0 ) | |
3149 { | |
3150 fprintf( stderr, "File error!\n" ); | |
3151 exit( 1 ); | |
3152 } | |
3153 for( i=0; i<nadd; i++ ) | |
3154 { | |
3155 fprintf( fp, "\n" ); | |
3156 fprintf( fp, "%8d: %s\n", norg+i+1, name[norg+i]+1 ); | |
3157 fprintf( fp, " nearest sequence: %d\n", addtree[i].nearest + 1 ); | |
3158 fprintf( fp, " approximate distance: %f\n", addtree[i].dist1 ); | |
3159 fprintf( fp, " sister group: %s\n", addtree[i].neighbors ); | |
3160 fprintf( fp, " approximate distance: %f\n", addtree[i].dist2 ); | |
3161 free( addtree[i].neighbors ); | |
3162 } | |
3163 fclose( fp ); | |
3164 free( addtree ); | |
3165 } | |
3166 | |
3167 for( iadd=0; iadd<nadd; iadd++ ) | |
3168 { | |
3169 f = follows[iadd]; | |
3170 for( i=0; follower[f][i]!=-1; i++ ) | |
3171 ; | |
3172 if( !(follower[f] = realloc( follower[f], (i+2)*sizeof(int) ) ) ) | |
3173 { | |
3174 fprintf( stderr, "Cannot reallocate follower[]" ); | |
3175 exit( 1 ); | |
3176 } | |
3177 follower[f][i] = iadd; | |
3178 follower[f][i+1] = -1; | |
3179 #if 0 | |
3180 fprintf( stderr, "\nfollowers of %d = ", f ); | |
3181 for( i=0; follower[f][i]!=-1; i++ ) | |
3182 fprintf( stderr, "%d ", follower[f][i] ); | |
3183 fprintf( stderr, "\n" ); | |
3184 #endif | |
3185 } | |
3186 | |
3187 orderfp = fopen( "order", "w" ); | |
3188 if( !orderfp ) | |
3189 { | |
3190 fprintf( stderr, "Cannot open 'order'\n" ); | |
3191 exit( 1 ); | |
3192 } | |
3193 for( i=0; ordertable[i]!=-1; i++ ) | |
3194 { | |
3195 fprintf( orderfp, "%d\n", ordertable[i] ); | |
3196 // for( j=0; follower[i][j]!=-1; j++ ) | |
3197 // fprintf( orderfp, "%d\n", follower[i][j]+norg ); | |
3198 for( j=0; follower[ordertable[i]][j]!=-1; j++ ) | |
3199 fprintf( orderfp, "%d\n", follower[ordertable[i]][j]+norg ); | |
3200 // fprintf( orderfp, "%d -> %d\n", follower[i][j]+norg, i ); | |
3201 } | |
3202 fclose( orderfp ); | |
3203 | |
3204 posmap = AllocateIntVec( lenfull+2 ); | |
3205 realign = calloc( lenfull+2, sizeof( Blocktorealign ) ); | |
3206 for( i=0; i<lenfull+1; i++ ) posmap[i] = i; | |
3207 for( i=0; i<lenfull+1; i++ ) | |
3208 { | |
3209 realign[i].nnewres = 0; | |
3210 realign[i].start = 0; | |
3211 realign[i].end = 0; | |
3212 } | |
3213 | |
3214 fprintf( stderr, "\n\nCombining ..\n" ); | |
3215 fflush( stderr ); | |
3216 tmpseqlen = alloclen * 100; | |
3217 tmpseq = AllocateCharMtx( 1, tmpseqlen ); | |
3218 // check1 = AllocateCharVec( tmpseqlen ); | |
3219 // check2 = AllocateCharVec( tmpseqlen ); | |
3220 // gappick0( check2, seq[0] ); | |
3221 for( iadd=0; iadd<nadd; iadd++ ) | |
3222 { | |
3223 // fprintf( stderr, "%d / %d\r", iadd, nadd ); | |
3224 fflush( stderr ); | |
3225 | |
3226 // fprintf( stderr, "\niadd == %d\n", iadd ); | |
3227 makegaplistcompact( lenfull, posmap, newgaplist_compact, newgaplist_o[iadd] ); | |
3228 if( iadd == 0 || istherenewgap[iadd] ) | |
3229 { | |
3230 tmpseq1 = tmpseq[0]; | |
3231 // gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_o[iadd], posmap, tmpseqlen ); | |
3232 gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_compact, posmap, tmpseqlen ); | |
3233 // fprintf( stderr, "len = %d ? %d\n", strlen( tmpseq1 ), alloclen ); | |
3234 if( ( tmplen = strlen( tmpseq1 ) ) >= fullseqlen ) | |
3235 { | |
3236 fullseqlen = tmplen * 2+1; | |
3237 // fprintf( stderr, "Length over!\n" ); | |
3238 // fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) ); | |
3239 fprintf( stderr, "reallocating..." ); | |
3240 // fprintf( stderr, "alloclen=%d\n", alloclen ); | |
3241 // fprintf( stderr, "Please recompile!\n" ); | |
3242 // exit( 1 ); | |
3243 for( i=0; i<njob; i++ ) | |
3244 { | |
3245 seq[i] = realloc( seq[i], fullseqlen * sizeof( char ) ); | |
3246 if( !seq[i] ) | |
3247 { | |
3248 fprintf( stderr, "Cannot reallocate seq[][]\n" ); | |
3249 exit( 1 ); | |
3250 } | |
3251 } | |
3252 fprintf( stderr, "done.\n" ); | |
3253 } | |
3254 strcpy( seq[0], tmpseq1 ); | |
3255 | |
3256 ien = norg+iadd; | |
3257 #ifdef enablemultithread | |
3258 if( nthread > 0 && ien > 500 ) | |
3259 { | |
3260 gaplist2alnxthread_arg_t *targ; | |
3261 int jobpos; | |
3262 pthread_t *handle; | |
3263 pthread_mutex_t mutex; | |
3264 fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread ); | |
3265 | |
3266 targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) ); | |
3267 handle = calloc( nthread, sizeof( pthread_t ) ); | |
3268 pthread_mutex_init( &mutex, NULL ); | |
3269 jobpos = 1; | |
3270 for( i=0; i<nthread; i++ ) | |
3271 { | |
3272 // targ[i].thread_no = i; | |
3273 targ[i].ncycle = ien; | |
3274 targ[i].jobpospt = &jobpos; | |
3275 targ[i].tmpseqlen = tmpseqlen; | |
3276 targ[i].lenfull = lenfull; | |
3277 targ[i].seq = seq; | |
3278 // targ[i].newgaplist = newgaplist_o[iadd]; | |
3279 targ[i].newgaplist = newgaplist_compact; | |
3280 targ[i].posmap = posmap; | |
3281 targ[i].mutex = &mutex; | |
3282 | |
3283 pthread_create( handle+i, NULL, gaplist2alnxthread, (void *)(targ+i) ); | |
3284 } | |
3285 for( i=0; i<nthread; i++ ) | |
3286 { | |
3287 pthread_join( handle[i], NULL ); | |
3288 } | |
3289 pthread_mutex_destroy( &mutex ); | |
3290 free( handle ); | |
3291 free( targ ); | |
3292 } | |
3293 else | |
3294 #endif | |
3295 { | |
3296 fprintf( stderr, "%d / %d\r", iadd, nadd ); | |
3297 for( i=1; i<ien; i++ ) | |
3298 { | |
3299 tmpseq1 = tmpseq[0]; | |
3300 if( i == 1 ) fprintf( stderr, " %d / %d\r", iadd, nadd ); | |
3301 // gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_o[iadd], posmap, tmpseqlen ); | |
3302 gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_compact, posmap, tmpseqlen ); | |
3303 // fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 ); | |
3304 strcpy( seq[i], tmpseq1 ); | |
3305 } | |
3306 } | |
3307 } | |
3308 tmpseq1 = tmpseq[0]; | |
3309 // insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); | |
3310 insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); | |
3311 // fprintf( stderr, "%d = %s\n", iadd, tmpseq1 ); | |
3312 eq2dash( tmpseq1 ); | |
3313 strcpy( seq[norg+iadd], tmpseq1 ); | |
3314 | |
3315 // adjustposmap( lenfull, posmap, newgaplist_o[iadd] ); | |
3316 adjustposmap( lenfull, posmap, newgaplist_compact ); | |
3317 countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda? | |
3318 // countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda? | |
3319 | |
3320 } | |
3321 fprintf( stderr, "\r done. \n\n" ); | |
3322 | |
3323 #if 0 | |
3324 for( i=0; i<njob; i++ ) | |
3325 { | |
3326 fprintf( stdout, ">%s\n", name[i] ); | |
3327 fprintf( stdout, "%s\n", seq[i] ); | |
3328 } | |
3329 #endif | |
3330 | |
3331 #if 0 | |
3332 fprintf( stderr, "realign[].nnewres = " ); | |
3333 for( i=0; i<lenfull+1; i++ ) | |
3334 { | |
3335 fprintf( stderr, "%d ", realign[i].nnewres ); | |
3336 } | |
3337 fprintf( stderr, "\n" ); | |
3338 #endif | |
3339 | |
3340 for( i=0; i<lenfull+1; i++ ) | |
3341 { | |
3342 if( realign[i].nnewres > 1 ) | |
3343 { | |
3344 // fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end ); | |
3345 fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull ); | |
3346 // zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg ); | |
3347 // zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows ); | |
3348 zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows ); | |
3349 #if 0 | |
3350 gappick0( check1, seq[0] ); | |
3351 fprintf( stderr, "check1 = %s\n", check1 ); | |
3352 if( strcmp( check1, check2 ) ) | |
3353 { | |
3354 fprintf( stderr, "CHANGED!!!!!\n" ); | |
3355 exit( 1 ); | |
3356 } | |
3357 #endif | |
3358 for( j=i+1; j<lenfull+1; j++ ) | |
3359 { | |
3360 if( realign[j].nnewres ) | |
3361 { | |
3362 realign[j].start -= zure; | |
3363 realign[j].end -= zure; | |
3364 } | |
3365 } | |
3366 } | |
3367 } | |
3368 FreeIntCub( topol ); | |
3369 fprintf( stderr, "\r done. \n\n" ); | |
3370 | |
3371 fflush( stderr ); | |
3372 | |
3373 | |
3374 FreeIntMtx( newgaplist_o ); | |
3375 FreeIntVec( newgaplist_compact ); | |
3376 FreeIntVec( posmap ); | |
3377 free( realign ); | |
3378 free( istherenewgap ); | |
3379 FreeIntMtx( follower ); | |
3380 free( follows ); | |
3381 free( ordertable ); | |
3382 FreeCharMtx( tmpseq ); | |
3383 | |
3384 | |
3385 writeData_pointer( prep_g, njob, name, nlen, seq ); | |
3386 #if 0 | |
3387 writeData( stdout, njob, name, nlen, bseq ); | |
3388 writePre( njob, name, nlen, bseq, !contin ); | |
3389 writeData_pointer( prep_g, njob, name, nlen, aseq ); | |
3390 #endif | |
3391 #if IODEBUG | |
3392 fprintf( stderr, "OSHIMAI\n" ); | |
3393 #endif | |
3394 | |
3395 #if SMALLMEMORY | |
3396 if( multidist ) | |
3397 { | |
3398 // if( constraint ) FreeLocalHomTable_two( localhomtable, norg, nadd ); | |
3399 if( constraint ) FreeLocalHomTable_one( localhomtable, norg, nadd ); | |
3400 } | |
3401 else | |
3402 #endif | |
3403 { | |
3404 if( constraint ) FreeLocalHomTable( localhomtable, njob ); | |
3405 } | |
3406 | |
3407 SHOWVERSION; | |
3408 return( 0 ); | |
3409 } |