comparison mafft/core/setcore.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 1
6
7 double corethr;
8 int coreext;
9
10 void arguments( int argc, char *argv[] )
11 {
12 int c;
13
14 fftkeika = 1;
15 constraint = 0;
16 nblosum = 62;
17 fmodel = 0;
18 calledByXced = 0;
19 devide = 0;
20 use_fft = 0;
21 fftscore = 1;
22 fftRepeatStop = 0;
23 fftNoAnchStop = 0;
24 weight = 3;
25 utree = 1;
26 tbutree = 1;
27 refine = 0;
28 check = 1;
29 cut = 0.0;
30 disp = 0;
31 outgap = 1;
32 alg = 'A';
33 mix = 0;
34 tbitr = 0;
35 scmtd = 5;
36 tbweight = 0;
37 tbrweight = 3;
38 checkC = 0;
39 treemethod = 'x';
40 contin = 0;
41 scoremtx = 0;
42 kobetsubunkatsu = 0;
43 dorp = NOTSPECIFIED;
44 ppenalty = NOTSPECIFIED;
45 ppenalty_ex = NOTSPECIFIED;
46 poffset = NOTSPECIFIED;
47 kimuraR = NOTSPECIFIED;
48 pamN = NOTSPECIFIED;
49 geta2 = GETA2;
50 fftWinSize = NOTSPECIFIED;
51 fftThreshold = NOTSPECIFIED;
52 corethr = .5;
53 coreext = 0;
54
55 while( --argc > 0 && (*++argv)[0] == '-' )
56 {
57 while ( ( c = *++argv[0] ) )
58 {
59 switch( c )
60 {
61 case 'f':
62 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
63 fprintf( stderr, "ppenalty = %d\n", ppenalty );
64 --argc;
65 goto nextoption;
66 case 'g':
67 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
68 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
69 --argc;
70 goto nextoption;
71 case 'h':
72 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
73 fprintf( stderr, "poffset = %d\n", poffset );
74 --argc;
75 goto nextoption;
76 case 'k':
77 kimuraR = myatoi( *++argv );
78 fprintf( stderr, "kimuraR = %d\n", kimuraR );
79 --argc;
80 goto nextoption;
81 case 'b':
82 nblosum = myatoi( *++argv );
83 scoremtx = 1;
84 fprintf( stderr, "blosum %d\n", nblosum );
85 --argc;
86 goto nextoption;
87 case 'j':
88 pamN = myatoi( *++argv );
89 scoremtx = 0;
90 fprintf( stderr, "jtt %d\n", pamN );
91 --argc;
92 goto nextoption;
93 case 'l':
94 fastathreshold = atof( *++argv );
95 constraint = 2;
96 fprintf( stderr, "weighti = %f\n", fastathreshold );
97 --argc;
98 goto nextoption;
99 case 'i':
100 corethr = atof( *++argv );
101 fprintf( stderr, "corethr = %f\n", corethr );
102 --argc;
103 goto nextoption;
104 case 'm':
105 fmodel = 1;
106 break;
107 case 'c':
108 coreext = 1;
109 break;
110 case 'r':
111 fmodel = -1;
112 break;
113 case 'D':
114 dorp = 'd';
115 break;
116 case 'P':
117 dorp = 'p';
118 break;
119 case 'e':
120 fftscore = 0;
121 break;
122 case 'O':
123 fftNoAnchStop = 1;
124 break;
125 case 'R':
126 fftRepeatStop = 1;
127 break;
128 case 'Q':
129 calledByXced = 1;
130 break;
131 case 's':
132 treemethod = 's';
133 break;
134 case 'x':
135 treemethod = 'x';
136 break;
137 case 'p':
138 treemethod = 'p';
139 break;
140 case 'a':
141 alg = 'a';
142 break;
143 case 'A':
144 alg = 'A';
145 break;
146 case 'S':
147 alg = 'S';
148 break;
149 case 'C':
150 alg = 'C';
151 break;
152 case 'F':
153 use_fft = 1;
154 break;
155 case 'v':
156 tbrweight = 3;
157 break;
158 case 'd':
159 disp = 1;
160 break;
161 case 'o':
162 outgap = 0;
163 break;
164 /* Modified 01/08/27, default: user tree */
165 case 'J':
166 tbutree = 0;
167 break;
168 /* modification end. */
169 case 'z':
170 fftThreshold = myatoi( *++argv );
171 --argc;
172 goto nextoption;
173 case 'w':
174 fftWinSize = myatoi( *++argv );
175 --argc;
176 goto nextoption;
177 case 'Z':
178 checkC = 1;
179 break;
180 default:
181 fprintf( stderr, "illegal option %c\n", c );
182 argc = 0;
183 break;
184 }
185 }
186 nextoption:
187 ;
188 }
189 if( argc == 1 )
190 {
191 cut = atof( (*argv) );
192 argc--;
193 }
194 if( argc != 0 )
195 {
196 fprintf( stderr, "options: Check source file !\n" );
197 exit( 1 );
198 }
199 if( tbitr == 1 && outgap == 0 )
200 {
201 fprintf( stderr, "conflicting options : o, m or u\n" );
202 exit( 1 );
203 }
204 if( alg == 'C' && outgap == 0 )
205 {
206 fprintf( stderr, "conflicting options : C, o\n" );
207 exit( 1 );
208 }
209 }
210
211
212
213 static void WriteOptions( FILE *fp )
214 {
215
216 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
217 else
218 {
219 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
220 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
221 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
222 }
223 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
224 if( use_fft ) fprintf( fp, "FFT on\n" );
225
226 fprintf( fp, "tree-base method\n" );
227 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
228 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
229 if( tbitr || tbweight )
230 {
231 fprintf( fp, "iterate at each step\n" );
232 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
233 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
234 if( tbweight ) fprintf( fp, " weighted\n" );
235 fprintf( fp, "\n" );
236 }
237
238 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
239
240 if( alg == 'a' )
241 fprintf( fp, "Algorithm A\n" );
242 else if( alg == 'A' )
243 fprintf( fp, "Algorithm A+\n" );
244 else if( alg == 'S' )
245 fprintf( fp, "Apgorithm S\n" );
246 else if( alg == 'C' )
247 fprintf( fp, "Apgorithm A+/C\n" );
248 else
249 fprintf( fp, "Unknown algorithm\n" );
250
251 if( treemethod == 'x' )
252 fprintf( fp, "Tree = UPGMA (3).\n" );
253 else if( treemethod == 's' )
254 fprintf( fp, "Tree = UPGMA (2).\n" );
255 else if( treemethod == 'p' )
256 fprintf( fp, "Tree = UPGMA (1).\n" );
257 else
258 fprintf( fp, "Unknown tree.\n" );
259
260 if( use_fft )
261 {
262 fprintf( fp, "FFT on\n" );
263 if( dorp == 'd' )
264 fprintf( fp, "Basis : 4 nucleotides\n" );
265 else
266 {
267 if( fftscore )
268 fprintf( fp, "Basis : Polarity and Volume\n" );
269 else
270 fprintf( fp, "Basis : 20 amino acids\n" );
271 }
272 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
273 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
274 }
275 else
276 fprintf( fp, "FFT off\n" );
277 fflush( fp );
278 }
279
280
281 int main( int argc, char *argv[] )
282 {
283 static int nlen[M];
284 static char **name, **seq;
285 static char **oseq;
286 static double **pscore;
287 static double *eff;
288 static double **node0, **node1;
289 static double *gapc;
290 static double *avgap;
291 double tmpavgap;
292 int i, j, m, goffset;
293 static int ***topol;
294 static double **len;
295 FILE *prep;
296 char c;
297 int corestart, coreend;
298 int alloclen;
299 int winsize;
300 char *pt, *ot;
301 double gapmin;
302
303 arguments( argc, argv );
304
305 getnumlen( stdin );
306 rewind( stdin );
307
308 if( njob < 2 )
309 {
310 fprintf( stderr, "At least 2 sequences should be input!\n"
311 "Only %d sequence found.\n", njob );
312 exit( 1 );
313 }
314
315 seq = AllocateCharMtx( njob, nlenmax*9+1 );
316 name = AllocateCharMtx( njob, B+1 );
317 oseq = AllocateCharMtx( njob, nlenmax*9+1 );
318 alloclen = nlenmax*9;
319
320 topol = AllocateIntCub( njob, 2, njob );
321 len = AllocateDoubleMtx( njob, 2 );
322 pscore = AllocateDoubleMtx( njob, njob );
323 eff = AllocateDoubleVec( njob );
324 node0 = AllocateDoubleMtx( njob, njob );
325 node1 = AllocateDoubleMtx( njob, njob );
326 gapc = AllocateDoubleVec( alloclen );
327 avgap = AllocateDoubleVec( alloclen );
328
329 #if 0
330 Read( name, nlen, seq );
331 #else
332 readData_pointer( stdin, name, nlen, seq );
333 #endif
334
335 constants( njob, seq );
336
337 #if 0
338 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
339 #endif
340
341 initSignalSM();
342
343 initFiles();
344
345 WriteOptions( trap_g );
346
347 c = seqcheck( seq );
348 if( c )
349 {
350 fprintf( stderr, "Illeagal character %c\n", c );
351 exit( 1 );
352 }
353
354 writePre( njob, name, nlen, seq, 0 );
355
356 if( tbutree == 0 )
357 {
358 for( i=1; i<njob; i++ )
359 {
360 if( nlen[i] != nlen[0] )
361 {
362 fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
363 exit( 1 );
364 }
365 }
366 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
367 {
368 /*
369 pscore[i][j] = (double)score_calc1( seq[i], seq[j] );
370 */
371 pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );
372 }
373 }
374 else
375 {
376 fprintf( stderr, "Loading 'hat2' ... " );
377 prep = fopen( "hat2", "r" );
378 if( prep == NULL ) ErrorExit( "Make hat2." );
379 readhat2_pointer( prep, njob, name, pscore );
380 fclose( prep );
381 fprintf( stderr, "done.\n" );
382
383 #if 0
384 prep = fopen( "hat2_check", "w" );
385 WriteHat2( prep, njob, name, pscore );
386 fclose( prep );
387 #endif
388
389 }
390
391 fprintf( stderr, "Constructing dendrogram ... " );
392 if( treemethod == 'x' )
393 supg( njob, pscore, topol, len );
394 else if( treemethod == 's' )
395 spg( njob, pscore, topol, len );
396 else if( treemethod == 'p' )
397 upg2( njob, pscore, topol, len );
398 else
399 ErrorExit( "Incorrect tree\n" );
400 fprintf( stderr, "done.\n" );
401
402 countnode( njob, topol, node0 );
403 if( tbrweight )
404 {
405 weight = 3;
406 #if 0
407 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
408 #else
409 counteff_simple( njob, topol, len, eff );
410 #endif
411 }
412 else
413 {
414 for( i=0; i<njob; i++ ) eff[i] = 1.0;
415 }
416
417
418 for( i=0; i<nlenmax; i++ )
419 {
420 gapc[i] = 0.0;
421 for( j=0; j<njob; j++ )
422 {
423 if( seq[j][i] == '-' ) gapc[i] += eff[j];
424 }
425 }
426
427 gapmin = 1.0;
428 winsize = fftWinSize;
429 goffset = winsize/2;
430 tmpavgap = 0.0;
431 corestart = coreend = -1;
432 for( i=0; i<winsize; i++ )
433 {
434 tmpavgap += gapc[i];
435 }
436 for( i=winsize; i<nlenmax; i++ )
437 {
438 m = i - goffset;
439 avgap[m] = tmpavgap / winsize;
440 // fprintf( stdout, "%d %f %f\n", m, avgap[m], gapc[i] );
441 if( avgap[m] < corethr )
442 {
443 if( corestart == -1 )
444 corestart = i - winsize;
445 // fprintf( stdout, "ok, gapmin = %f, corestart = %d, coreend = %d\n", gapmin, corestart, coreend );
446 if( avgap[m] < gapmin )
447 {
448 gapmin = avgap[m];
449 }
450 coreend = i;
451 }
452 tmpavgap -= gapc[i-winsize];
453 tmpavgap += gapc[i];
454 }
455 if( corestart == -1 || coreend == -1 )
456 {
457 corestart = 0;
458 coreend = nlenmax-1;
459 }
460
461 for( i=0; i<njob; i++ )
462 {
463 pt = oseq[i];
464 m = winsize;
465 while( m-- ) *pt++ = '-';
466 for( j=corestart; j<=coreend; j++ )
467 *pt++ = seq[i][j];
468 m = winsize;
469 while( m-- ) *pt++ = '-';
470 *pt = 0;
471
472 ot = oseq[i]+winsize-1;
473 pt = seq[i]+corestart-1;
474 if( coreext ) m = winsize;
475 else m = 0;
476 while( m && --pt > seq[i] )
477 if( *pt != '-' )
478 {
479 *ot-- = *pt;
480 m--;
481 }
482
483 ot = oseq[i]+winsize+coreend-corestart+1;
484 pt = seq[i]+coreend;
485 if( coreext ) m = winsize;
486 else m = 0;
487 while( m && *(++pt) )
488 {
489 if( *pt != '-' )
490 {
491 *ot++ = *pt;
492 m--;
493 }
494 }
495 fprintf( stdout, ">%s\n", name[i] );
496 fprintf( stdout, "%s\n", oseq[i] );
497 }
498
499 exit( 1 );
500
501 SHOWVERSION;
502 return( 0 );
503 }