comparison mafft/core/mafft-profile.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 #if DEBUG
6 #include <time.h>
7 #include <sys/time.h>
8 #include <sys/resource.h>
9 double getrusage_sec()
10 {
11 struct rusage t;
12 struct timeval tv;
13 getrusage(RUSAGE_SELF, &t);
14 tv = t.ru_utime;
15 return tv.tv_sec + (double)tv.tv_usec*1e-6;
16 }
17 #endif
18
19
20 int intcmp( int *str1, int *str2 )
21 {
22 while( *str1 != -1 && *str2 != -1 )
23 if( *str1++ != *str2++ ) return( 1 );
24 if( *str1 != *str2 ) return( 1 );
25 return( 0 );
26 }
27
28 char **arguments( int argc, char *argv[] )
29 {
30 int c = 0;
31
32 fmodel = 0;
33 nblosum = 62;
34 calledByXced = 0;
35 devide = 0;
36 fftscore = 1;
37 use_fft = 1;
38 alg = 'A';
39 weight = 0;
40 utree = 1;
41 tbutree = 0;
42 refine = 0;
43 check = 1;
44 cut = 0.0;
45 disp = 0;
46 outgap = 0;
47 mix = 0;
48 tbitr = 0;
49 scmtd = 5;
50 tbweight = 0;
51 tbrweight = 3;
52 checkC = 0;
53 scoremtx = 1;
54 dorp = NOTSPECIFIED;
55 ppenalty = NOTSPECIFIED;
56 ppenalty_ex = NOTSPECIFIED;
57 poffset = 0; // chokusetsu yobareru kara
58 kimuraR = NOTSPECIFIED;
59 pamN = NOTSPECIFIED;
60 fftWinSize = NOTSPECIFIED;
61 fftThreshold = NOTSPECIFIED;
62 TMorJTT = JTT;
63 treemethod = 'x';
64
65
66 while( --argc > 0 && (*++argv)[0] == '-' )
67 {
68 while ( ( c = *++argv[0] ) )
69 {
70 switch( c )
71 {
72 case 'P':
73 dorp = 'p';
74 break;
75 case 'D':
76 dorp = 'd';
77 break;
78 case 'F':
79 use_fft = 1;
80 break;
81 case 'N':
82 use_fft = 0;
83 break;
84 case 'e':
85 fftscore = 0;
86 break;
87 case 'Q':
88 alg = 'Q';
89 break;
90 case 'A':
91 alg = 'A';
92 break;
93 case 'M':
94 alg = 'M';
95 break;
96 case 'd':
97 disp = 1;
98 break;
99 case 'O':
100 outgap = 0;
101 break;
102 case 'a':
103 fmodel = 1;
104 break;
105 case 'u':
106 tbrweight = 0;
107 break;
108 case 'z':
109 fftThreshold = myatoi( *++argv );
110 --argc;
111 goto nextoption;
112 case 'w':
113 fftWinSize = myatoi( *++argv );
114 --argc;
115 goto nextoption;
116 case 'Z':
117 checkC = 1;
118 break;
119 case 'f':
120 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
121 fprintf( stderr, "ppenalty = %d\n", ppenalty );
122 --argc;
123 goto nextoption;
124 case 'g':
125 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
126 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
127 --argc;
128 goto nextoption;
129 case 'h':
130 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
131 fprintf( stderr, "poffset = %d\n", poffset );
132 --argc;
133 goto nextoption;
134 case 'k':
135 kimuraR = myatoi( *++argv );
136 fprintf( stderr, "kappa = %d\n", kimuraR );
137 --argc;
138 goto nextoption;
139 case 'b':
140 nblosum = myatoi( *++argv );
141 scoremtx = 1;
142 fprintf( stderr, "blosum %d\n", nblosum );
143 --argc;
144 goto nextoption;
145 case 'j':
146 pamN = myatoi( *++argv );
147 scoremtx = 0;
148 TMorJTT = JTT;
149 fprintf( stderr, "jtt %d\n", pamN );
150 --argc;
151 goto nextoption;
152 case 'm':
153 pamN = myatoi( *++argv );
154 scoremtx = 0;
155 TMorJTT = TM;
156 fprintf( stderr, "tm %d\n", pamN );
157 --argc;
158 goto nextoption;
159 default:
160 fprintf( stderr, "illegal option %c\n", c );
161 argc = 0;
162 break;
163 }
164 }
165 nextoption:
166 ;
167 }
168 if( argc != 2 )
169 {
170 fprintf( stderr, "options: Check source file ! %c ?\n", c );
171 exit( 1 );
172 }
173 fprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\n", tbitr, tbrweight, tbweight );
174 // readOtherOptions( &ppid, &fftThreshold, &fftWinSize );
175 return( argv );
176
177 }
178
179 void GroupAlign( int nseq1, int nseq2, char **name, int *nlen, char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double **len, double *eff, int alloclen )
180 {
181 int i;
182 int clus1, clus2;
183 int s1, s2;
184 float pscore;
185 static char **name1, **name2;
186 double *effarr = eff;
187 double *effarr1 = NULL;
188 double *effarr2 = NULL;
189 static char *indication1, *indication2;
190 float dumfl = 0.0;
191 int intdum;
192 #if DEBUG
193 double time1, time2;
194 #endif
195
196
197 // fprintf( stderr, "in GroupAlign fftWinSize = %d\n", fftWinSize );
198 // fprintf( stderr, "in GroupAlign fftThreshold = %d\n", fftThreshold );
199
200 if( effarr1 == NULL )
201 {
202 name1 = AllocateCharMtx( nseq1, B );
203 name2 = AllocateCharMtx( nseq2, B );
204 indication1 = AllocateCharVec( 150 );
205 indication2 = AllocateCharVec( 150 );
206 effarr1 = AllocateDoubleVec( njob );
207 effarr2 = AllocateDoubleVec( njob );
208 #if 0
209 #else
210 #endif
211 }
212
213 for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
214
215
216
217 s1 = 0;
218 s2 = nseq1;
219 // fprintf( stdout, "nseq1 = %d\n", nseq1 );
220
221
222
223 clus1 = conjuctionforgaln( 0, nseq1, aseq, mseq1, effarr1, effarr, name, name1, indication1 );
224 clus2 = conjuctionforgaln( nseq1, njob, aseq, mseq2, effarr2, effarr, name, name2, indication2 );
225 /*
226 fprintf( stderr, "before align all\n" );
227 display( aseq, njob );
228 fprintf( stderr, "\n" );
229 fprintf( stderr, "before align 1 %s \n", indication1 );
230 display( mseq1, clus1 );
231 fprintf( stderr, "\n" );
232 fprintf( stderr, "before align 2 %s \n", indication2 );
233 display( mseq2, clus2 );
234 fprintf( stderr, "\n" );
235 */
236
237 commongappick( nseq1, mseq1 );
238 commongappick( nseq2, mseq2 );
239
240
241 #if DEBUG
242 time1 = getrusage_sec();
243 fprintf( stderr, "entering Falign\n" );
244 #endif
245 if( use_fft )
246 {
247 if( alg == 'M' )
248 pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum );
249 else
250 pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
251 }
252 else
253 {
254 if( alg == 'M' )
255 pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
256 else
257 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
258 }
259 #if DEBUG
260 time2 = getrusage_sec();
261 fprintf( stdout, "### %d - %d, %f\n", clus1, clus2, time2-time1 );
262 fflush( stdout );
263 #endif
264
265
266 /*
267 fprintf( stderr, "after align 1 %s \n", indication1 );
268 display( mseq1, clus1 );
269 fprintf( stderr, "\n" );
270 fprintf( stderr, "after align 2 %s \n", indication2 );
271 display( mseq2, clus2 );
272 fprintf( stderr, "\n" );
273 */
274
275 fprintf( stderr, "group-to-group %s /%s %f\n", indication1, indication2, pscore );
276 if( disp ) display( aseq, njob );
277 fprintf( stderr, "\n" );
278
279 /*
280 trap = fopen( "pre", "r+" );
281 if( !trap ) ErrorExit( 1 );
282 WriteGapFill( trap, njob, name, nlen, aseq );
283 fclose( trap );
284 fprintf( stdout, "nseq1 = %d\n", nseq1 );
285 */
286 }
287
288 static void WriteOptions( FILE *fp )
289 {
290 fprintf( fp, "tree-base method\n" );
291 if( tbweight == 0 ) fprintf( fp, "unweighted\n" );
292 else if( tbweight == 3 ) fprintf( fp, "reversely weighted\n" );
293 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
294 else if( scoremtx == 1 ) fprintf( fp, "Dayhoff( machigai ga aru )\n" );
295 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
296 else if( scoremtx == -1 ) fprintf( fp, "DNA\n" );
297
298 if( scoremtx )
299 fprintf( fp, "Gap Penalty = %d, %d\n", penalty, offset );
300 else
301 fprintf( fp, "Gap Penalty = %d\n", penalty );
302 }
303
304
305 int main( int argc, char *argv[] )
306 {
307 char **argv2;
308 static int *nlen;
309 static char **name, **seq;
310 static char **seq1, **seq2;
311 static char **mseq1, **mseq2;
312 static char **aseq;
313 static char **bseq;
314 static double **pscore;
315 static double *eff;
316 int i, j, len1, len2;
317 static int ***topol;
318 static double **len;
319 FILE *gp1, *gp2;
320 char c;
321 int nlenmax1, nlenmax2, nseq1, nseq2;
322 int alloclen;
323
324 argv2 = arguments( argc, argv );
325
326 fprintf( stderr, "####### in galn\n" );
327
328 initFiles();
329
330 fprintf( stderr, "file1 = %s\n", argv2[0] );
331 fprintf( stderr, "file2 = %s\n", argv2[1] );
332
333 gp1 = fopen( argv2[0], "r" ); if( !gp1 ) ErrorExit( "cannot open file1" );
334 gp2 = fopen( argv2[1], "r" ); if( !gp2 ) ErrorExit( "cannot open file2" );
335
336 #if 0
337 PreRead( gp1, &nseq1, &nlenmax1 );
338 PreRead( gp2, &nseq2, &nlenmax2 );
339 #else
340 getnumlen( gp1 );
341 nseq1 = njob; nlenmax1 = nlenmax;
342 getnumlen( gp2 );
343 nseq2 = njob; nlenmax2 = nlenmax;
344 #endif
345
346 njob = nseq1 + nseq2;
347 nlenmax = MAX( nlenmax1, nlenmax2 );
348
349 rewind( gp1 );
350 rewind( gp2 );
351
352
353 name = AllocateCharMtx( njob, B );
354 nlen = AllocateIntVec( njob );
355 seq1 = AllocateCharMtx( nseq1, nlenmax*3 );
356 seq2 = AllocateCharMtx( nseq2, nlenmax*3 );
357 seq = AllocateCharMtx( njob, 1 );
358 aseq = AllocateCharMtx( njob, nlenmax*3 );
359 bseq = AllocateCharMtx( njob, nlenmax*3 );
360 mseq1 = AllocateCharMtx( njob, 1 );
361 mseq2 = AllocateCharMtx( njob, 1 );
362 alloclen = nlenmax * 3;
363
364 topol = AllocateIntCub( njob, 2, njob );
365 len = AllocateDoubleMtx( njob, 2 );
366 pscore = AllocateDoubleMtx( njob, njob );
367 eff = AllocateDoubleVec( njob );
368
369 #if 0
370 njob=nseq2; FRead( gp2, name+nseq1, nlen+nseq1, seq2 );
371 njob=nseq1; FRead( gp1, name, nlen, seq1 );
372 #else
373 njob=nseq2; readDataforgaln( gp2, name+nseq1, nlen+nseq1, seq2 );
374 njob=nseq1; readDataforgaln( gp1, name, nlen, seq1 );
375 #endif
376 njob = nseq1 + nseq2;
377
378
379 #if 0 // CHUUI
380 commongappick( nseq1, seq1 );
381 commongappick( nseq2, seq2 );
382 #endif
383
384 for( i=0; i<nseq1; i++ ) seq[i] = seq1[i];
385 for( i=nseq1; i<njob; i++ ) seq[i] = seq2[i-nseq1];
386 /*
387 Write( stdout, njob, name, nlen, seq );
388 */
389
390 constants( njob, seq );
391
392 WriteOptions( trap_g );
393
394 c = seqcheck( seq );
395 if( c )
396 {
397 fprintf( stderr, "Illeagal character %c\n", c );
398 exit( 1 );
399 }
400 for( i=1; i<nseq1; i++ )
401 {
402 if( nlen[i] != nlen[0] )
403 ErrorExit( "group1 is not aligned." );
404 }
405 for( i=nseq1+1; i<njob; i++ )
406 {
407 if( nlen[i] != nlen[nseq1] )
408 ErrorExit( "group2 is not aligned." );
409 }
410 if( tbutree == 0 )
411 {
412 for( i=0; i<nseq1; i++ )
413 {
414 for( j=i+1; j<nseq1; j++ )
415 {
416 pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );
417 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] );
418 }
419 for( j=nseq1; j<njob; j++ )
420 {
421 pscore[i][j] = 3.0;
422 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] );
423 }
424 }
425 for( i=nseq1; i<njob-1; i++ )
426 {
427 for( j=i+1; j<njob; j++ )
428 {
429 pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );
430 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] );
431 }
432 }
433 // fprintf( stderr, "\n" );
434
435
436 }
437 else
438 {
439 fprintf( stderr, "Not supported\n" );
440 exit( 1 );
441 #if 0
442 prep = fopen( "hat2", "r" );
443 if( prep == NULL ) ErrorExit( "Make hat2." );
444 readhat2( prep, njob, name, pscore );
445 fclose( prep );
446 #endif
447 }
448 fprintf( stderr, "Constructing dendrogram ... " );
449 if( treemethod == 'x' )
450 veryfastsupg( njob, pscore, topol, len );
451 else
452 ErrorExit( "Incorrect tree\n" );
453 fprintf( stderr, "done.\n" );
454
455 if( tbrweight )
456 {
457 weight = 3;
458 counteff_simple( njob, topol, len, eff );
459 // for( i=0; i<njob; i++ ) fprintf( stderr, "eff[%d] = %f\n", i, eff[i] );
460 }
461 else
462 {
463 for( i=0; i<njob; i++ ) eff[i] = 1.0;
464 }
465
466 len1 = strlen( seq[0] );
467 len2 = strlen( seq[nseq1] );
468 if( len1 > 30000 || len2 > 30000 )
469 {
470 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
471 alg = 'M';
472 }
473
474
475
476
477 GroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen );
478
479 #if 0
480 writePre( njob, name, nlen, aseq, 1 );
481 #else
482 writeDataforgaln( stdout, njob, name, nlen, aseq );
483 #endif
484
485 SHOWVERSION;
486 return( 0 );
487 }