comparison mafft/core/iteration.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 /* iteration ( algorithm C ) */
2 #include "mltaln.h"
3
4 #define DEBUG 0
5
6 static void Writeoptions( FILE *fp )
7 {
8 if( scoremtx == 1 )
9 fprintf( fp, "Dayhoff( ... )\n" );
10 else if( scoremtx == -1 )
11 fprintf( fp, "DNA\n" );
12 else if( scoremtx == 2 )
13 fprintf( fp, "Miyata-Yasunaga\n" );
14 else
15 fprintf( fp, "JTT %dPAM\n", pamN );
16
17 if( scoremtx == 0 )
18 fprintf( fp, "Gap Penalty = %+d, %+d\n", penalty, offset );
19 else
20 fprintf( fp, "Gap Penalty = %+d\n", penalty );
21
22 fprintf( fp, "marginal score to search : best - %f\n", cut );
23 if( scmtd == 3 )
24 fprintf( fp, "score of rnd or sco\n" );
25 else if( scmtd == 4 )
26 fprintf( fp, "score = sigma( score for a pair of homologous amino acids ) / ( number of amino acids pairs )\n" );
27 else if( scmtd == 5 )
28 fprintf( fp, "score : SP\n" );
29 if( mix )
30 fprintf( fp, "?\n" );
31 else
32 {
33 if( weight == 2 )
34 fprintf( fp, "weighted, geta2 = %f\n", geta2 );
35 else if( weight == 3 )
36 {
37 if( scmtd == 4 )
38 fprintf( fp, "reversely weighted in function 'align', unweighted in function 'score_calc'\n" );
39 else
40 fprintf( fp, "weighted like ClustalW," );
41 }
42 else
43 fprintf( fp, "unweighted\n" );
44 }
45 if( weight && utree )
46 {
47 fprintf( fp, "using tree defined by the file hat2 with simplified UPG method\n" );
48 }
49 if( weight && !utree )
50 fprintf( fp, "using temporary tree by simplified UPG method\n" );
51 fprintf( fp, "Algorithm %c\n", alg );
52 }
53
54
55
56
57 char **align0( float *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex )
58 {
59 char **result;
60
61 if( alg == 'B' )
62 {
63 ErrorExit( "Sorry!" );
64 /*
65 if( outgap == 0 )
66 {
67 result = alignm1_o( wm, aseq, seq, scmx, effarr, icyc, ex );
68 }
69 if( outgap == 1 )
70 {
71 result = alignm1( wm, aseq, seq, scmx, effarr, icyc, ex );
72 }
73 */
74 }
75 else if( alg == 'C' )
76 {
77 result = Calignm1( wm, aseq, seq, effarr, icyc, ex );
78 }
79 return( result );
80 }
81
82
83 double score_m_1_0( char **aseq, int locnjob, int s, double **eff, double effarr[M] )
84 {
85 double x;
86
87 if( alg == 'B' )
88 {
89 ErrorExit( "Sorry!" );
90 }
91 if( alg == 'C' )
92 {
93 x = Cscore_m_1( aseq, locnjob, s, eff );
94 }
95 fprintf( stderr, "in score_m_1_0 %f\n", x );
96 return( x );
97 }
98
99 int iteration( int locnjob, char name[M][B], int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff )
100 {
101 double tscore, mscore;
102 int identity;
103 static char *mseq1, **mseq2 = NULL;
104 static char **result;
105 int i, l;
106 static double effarr[M];
107 int s;
108 int sss[2];
109 char ou;
110 int alloclen;
111 int resultlen;
112 int nlenmax0 = nlenmax;
113 FILE *prep;
114 char sai[M];
115 char sai1[M];
116 char sai2[M];
117 #if 0
118 double his[2][M][MAXITERATION/locnjob+1];
119 #else
120 double ***his;
121 #endif
122 int cyc[2];
123 char shindou = 0;
124 float wm;
125 int returnvalue;
126
127 for( i=0; i<locnjob; i++ )
128 {
129 sai[i] = 1;
130 sai1[i] = 1;
131 sai2[i] = 2;
132 }
133 sai[locnjob] = sai1[locnjob] = sai2[locnjob] = 0;
134
135
136 Writeoptions( trap_g );
137
138 his = AllocateDoubleCub( 2, M, MAXITERATION/locnjob+1 );
139
140 if( mseq2 == NULL )
141 {
142 alloclen = nlenmax * 2.0;
143 AllocateTmpSeqs( &mseq2, &mseq1, alloclen );
144 }
145
146 if( !tbitr && !tbweight )
147 {
148 writePre( locnjob, name, nlen, aseq, 0 );
149
150 #if 0
151 prep = fopen( "best", "w" );
152 Write( prep, locnjob, name, nlen, aseq );
153 fclose( prep );
154 #endif
155 }
156
157
158
159
160 treeconstruction( aseq, locnjob, topol, len, eff );
161 tscore = score_calc0( aseq, locnjob, eff, 0 );
162
163 #if DEBUG
164 fprintf( stderr, "eff mtx in iteration\n" );
165 for( i=0; i<locnjob; i++ )
166 {
167 for( j=0; j<locnjob; j++ )
168 {
169 fprintf( stderr, "%5.3f ", eff[i][j] );
170 }
171 fprintf( stderr, "\n" );
172 }
173 #endif
174
175 fprintf( stderr, "\n" );
176 if( disp )
177 {
178 fprintf( stderr, "aseq(initial)\n" );
179 display( aseq, locnjob );
180 }
181 fprintf( stderr, "initial score = %f \n", tscore );
182 fprintf( stderr, "\n" );
183 for( i=0; i<locnjob; i++ ) strcpy( bseq[i], aseq[i] );
184 mscore = tscore;
185 srand( time(NULL) );
186
187 sss[1] = 0;
188 sss[0] = locnjob-1;
189 /*
190 sss[0] = (int)( (float)locnjob/2.0 );
191 */
192 ou = 1;
193 cyc[0] = 0; cyc[1] = 0;
194
195 for( s=-1, l=0; l<MAXITERATION; l++ )
196 {
197 int ss;
198 double tmpscore, tmpscore1;
199
200 if( strlen( aseq[0] ) > nlenmax )
201 nlenmax = strlen( aseq[0] );
202
203 /*
204 s = ( int )( rnd() * locnjob );
205 s++;
206 if( s == locnjob ) s = 0;
207 ou = 0;
208 */
209 if( ou == 0 )
210 {
211 ou = 1;
212 s = sss[0];
213 /*
214 sss[0]++;
215 if( sss[0] == locnjob )
216 {
217 sss[0] = 0;
218 cyc[0]++;
219 }
220 */
221 sss[0]--;
222 if( sss[0] == -1 )
223 {
224 sss[0] = locnjob-1;
225 cyc[0]++;
226 }
227 }
228 else
229 {
230 ou = 0;
231 s = sss[1];
232 sss[1]++;
233 if( sss[1] == locnjob )
234 {
235 sss[1] = 0;
236 cyc[1]++;
237 }
238 }
239 fprintf( trap_g, "%d ", weight );
240
241 /*
242 for( i=0, count=0; i<strlen( aseq[s] ); i++ )
243 {
244 if( aseq[s][i] != '-' )
245 {
246 mseq1[count] = aseq[s][i];
247 count++;
248 }
249 }
250 mseq1[count] = 0;
251 */
252 gappick0( mseq1, aseq[s] );
253
254 if( checkC )
255 tmpscore = score_m_1_0( aseq, locnjob, s, eff, effarr );
256
257 gappick( locnjob, s, aseq, mseq2, eff, effarr );
258
259 result = align0( &wm, mseq2, mseq1, effarr, locnjob-2, s );
260 resultlen = strlen( result[0] );
261 if( resultlen > alloclen )
262 {
263 if( resultlen > nlenmax0*3 || resultlen > N )
264 {
265 fprintf(stderr, "Error in main1\n");
266 exit( 1 );
267 }
268 FreeTmpSeqs( mseq2, mseq1 );
269 alloclen = strlen( result[0] ) * 2.0;
270 fprintf( stderr, "\n\ntrying to allocate TmpSeqs\n\n" );
271 AllocateTmpSeqs( &mseq2, &mseq1, alloclen );
272 }
273 for( i=0; i<locnjob; i++ ) strcpy( mseq2[i], result[i] );
274
275 if( checkC )
276 fprintf( stderr, "wm in iteration == %f\n", wm );
277
278 strcpy( mseq1, mseq2[locnjob-1] );
279 /*
280 Write( stdout, locnjob, name, nlen, mseq2 );
281 */
282 for( i=locnjob-2; i>=s; i-- ) strcpy( mseq2[i+1], mseq2[i] );
283 strcpy( mseq2[s], mseq1 );
284 if( checkC )
285 {
286 tmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr );
287 fprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore );
288 fprintf( stderr, "pick up %d, after ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore1 );
289 if( tmpscore1 < tmpscore )
290 {
291 fprintf( stderr, "\7" );
292 fprintf( trap_g, ">>>>>>>n\n" );
293 }
294 if( fabs( wm - tmpscore1 ) / wm > 0.001 )
295 {
296 fprintf( stderr, "\7sorry\n" );
297 exit( 1 );
298 }
299 }
300
301 identity = !strcmp( mseq2[s], aseq[s] );
302 if( s == locnjob - 1 ) ss = 0; else ss=s+1;
303
304 identity *= !strcmp( mseq2[ss], aseq[ss] );
305
306 if( !identity )
307 {
308 tmpscore = score_calc0( mseq2, locnjob, eff, s );
309 }
310 else tmpscore = tscore;
311
312 if( disp )
313 {
314 fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 );
315 display( mseq2, locnjob );
316 }
317 fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 );
318 fprintf( stderr, "score = %f mscore = %f ", tmpscore, mscore );
319
320 fprintf( trap_g, "%#4d %#4d / the rest ", l+1, s+1 );
321 fprintf( trap_g, "score = %f mscore = %f ", tmpscore, mscore );
322
323 if( identity )
324 {
325 fprintf( stderr, "( identical )\n" );
326 fprintf( trap_g, "( identical )\n" );
327 sai[s] = 2;
328 }
329
330 else if( tmpscore > mscore - cut )
331 {
332 fprintf( stderr, "accepted\n" );
333 fprintf( trap_g, "accepted\n" );
334 for( i=0; i<locnjob; i++ ) strcpy( aseq[i], mseq2[i] );
335 strcpy( sai, sai1 ); /* kokoka ? */
336 if( !tbitr && !tbweight )
337 {
338 writePre( locnjob, name, nlen, aseq, 0 );
339 }
340 strcpy( sai, sai1 );
341 tscore = tmpscore;
342 /*
343 tscore = tmpscore = score_calc0( aseq, locnjob, eff, s ); * ? *
344 */
345 if( tmpscore > mscore )
346 {
347 for( i=0; i<locnjob; i++ ) strcpy( bseq[i], mseq2[i] );
348 treeconstruction( bseq, locnjob, topol, len, eff );
349 tscore = mscore = score_calc0( bseq, locnjob, eff, s );
350 fprintf( trap_g, " -> %f\n", mscore );
351 strcpy( sai, sai1 ); /* kokoka ? */
352 #if 0
353 if( !tbitr && !tbweight )
354 { prep = fopen( "best", "w" );
355 Write( prep, locnjob, name, nlen, bseq );
356 fclose( prep );
357 }
358 #endif
359 }
360 }
361
362 else
363 {
364 if( tmpscore == tscore )
365 {
366 fprintf( stderr, "occational coincidence \n" );
367 fprintf( trap_g, "occational coincidence\n" );
368 }
369 else
370 {
371 fprintf( stderr, "rejected\n" );
372 fprintf( trap_g, "rejected\n" );
373 }
374 for( i=0; i<locnjob; i++ ) strcpy( aseq[i], bseq[i] );
375 tscore = mscore;
376 sai[s] = 2;
377 }
378
379 /*
380 prep = fopen( "cur", "w" );
381 Write( prep, locnjob, name, nlen, mseq2 );
382 fclose( prep );
383 */
384
385 his[ou][s][cyc[ou]] = tmpscore;
386 if( !strcmp( sai, sai2 ) )
387 {
388 returnvalue = 0;
389 fprintf( trap_g, "converged\n" );
390 break;
391 }
392 for( i=cyc[ou]-1; i>0; i-- )
393 {
394 if( tmpscore == his[ou][s][i] )
395 {
396 shindou = 1;
397 break;
398 }
399 }
400 fprintf( stderr, "\n" );
401 if( shindou == 1 )
402 {
403 returnvalue = -1;
404 fprintf( trap_g, "oscillating\n" );
405 break;
406 }
407 }
408 if( l == MAXITERATION ) returnvalue = -2;
409 FreeDoubleCub( his );
410 return( returnvalue );
411 }
412