Mercurial > repos > nick > duplex
comparison mafft/core/mafft-profile.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #include "mltaln.h" | |
2 | |
3 #define DEBUG 0 | |
4 | |
5 #if DEBUG | |
6 #include <time.h> | |
7 #include <sys/time.h> | |
8 #include <sys/resource.h> | |
9 double getrusage_sec() | |
10 { | |
11 struct rusage t; | |
12 struct timeval tv; | |
13 getrusage(RUSAGE_SELF, &t); | |
14 tv = t.ru_utime; | |
15 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
16 } | |
17 #endif | |
18 | |
19 | |
20 int intcmp( int *str1, int *str2 ) | |
21 { | |
22 while( *str1 != -1 && *str2 != -1 ) | |
23 if( *str1++ != *str2++ ) return( 1 ); | |
24 if( *str1 != *str2 ) return( 1 ); | |
25 return( 0 ); | |
26 } | |
27 | |
28 char **arguments( int argc, char *argv[] ) | |
29 { | |
30 int c = 0; | |
31 | |
32 fmodel = 0; | |
33 nblosum = 62; | |
34 calledByXced = 0; | |
35 devide = 0; | |
36 fftscore = 1; | |
37 use_fft = 1; | |
38 alg = 'A'; | |
39 weight = 0; | |
40 utree = 1; | |
41 tbutree = 0; | |
42 refine = 0; | |
43 check = 1; | |
44 cut = 0.0; | |
45 disp = 0; | |
46 outgap = 0; | |
47 mix = 0; | |
48 tbitr = 0; | |
49 scmtd = 5; | |
50 tbweight = 0; | |
51 tbrweight = 3; | |
52 checkC = 0; | |
53 scoremtx = 1; | |
54 dorp = NOTSPECIFIED; | |
55 ppenalty = NOTSPECIFIED; | |
56 ppenalty_ex = NOTSPECIFIED; | |
57 poffset = 0; // chokusetsu yobareru kara | |
58 kimuraR = NOTSPECIFIED; | |
59 pamN = NOTSPECIFIED; | |
60 fftWinSize = NOTSPECIFIED; | |
61 fftThreshold = NOTSPECIFIED; | |
62 TMorJTT = JTT; | |
63 treemethod = 'x'; | |
64 | |
65 | |
66 while( --argc > 0 && (*++argv)[0] == '-' ) | |
67 { | |
68 while ( ( c = *++argv[0] ) ) | |
69 { | |
70 switch( c ) | |
71 { | |
72 case 'P': | |
73 dorp = 'p'; | |
74 break; | |
75 case 'D': | |
76 dorp = 'd'; | |
77 break; | |
78 case 'F': | |
79 use_fft = 1; | |
80 break; | |
81 case 'N': | |
82 use_fft = 0; | |
83 break; | |
84 case 'e': | |
85 fftscore = 0; | |
86 break; | |
87 case 'Q': | |
88 alg = 'Q'; | |
89 break; | |
90 case 'A': | |
91 alg = 'A'; | |
92 break; | |
93 case 'M': | |
94 alg = 'M'; | |
95 break; | |
96 case 'd': | |
97 disp = 1; | |
98 break; | |
99 case 'O': | |
100 outgap = 0; | |
101 break; | |
102 case 'a': | |
103 fmodel = 1; | |
104 break; | |
105 case 'u': | |
106 tbrweight = 0; | |
107 break; | |
108 case 'z': | |
109 fftThreshold = myatoi( *++argv ); | |
110 --argc; | |
111 goto nextoption; | |
112 case 'w': | |
113 fftWinSize = myatoi( *++argv ); | |
114 --argc; | |
115 goto nextoption; | |
116 case 'Z': | |
117 checkC = 1; | |
118 break; | |
119 case 'f': | |
120 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
121 fprintf( stderr, "ppenalty = %d\n", ppenalty ); | |
122 --argc; | |
123 goto nextoption; | |
124 case 'g': | |
125 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
126 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); | |
127 --argc; | |
128 goto nextoption; | |
129 case 'h': | |
130 poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); | |
131 fprintf( stderr, "poffset = %d\n", poffset ); | |
132 --argc; | |
133 goto nextoption; | |
134 case 'k': | |
135 kimuraR = myatoi( *++argv ); | |
136 fprintf( stderr, "kappa = %d\n", kimuraR ); | |
137 --argc; | |
138 goto nextoption; | |
139 case 'b': | |
140 nblosum = myatoi( *++argv ); | |
141 scoremtx = 1; | |
142 fprintf( stderr, "blosum %d\n", nblosum ); | |
143 --argc; | |
144 goto nextoption; | |
145 case 'j': | |
146 pamN = myatoi( *++argv ); | |
147 scoremtx = 0; | |
148 TMorJTT = JTT; | |
149 fprintf( stderr, "jtt %d\n", pamN ); | |
150 --argc; | |
151 goto nextoption; | |
152 case 'm': | |
153 pamN = myatoi( *++argv ); | |
154 scoremtx = 0; | |
155 TMorJTT = TM; | |
156 fprintf( stderr, "tm %d\n", pamN ); | |
157 --argc; | |
158 goto nextoption; | |
159 default: | |
160 fprintf( stderr, "illegal option %c\n", c ); | |
161 argc = 0; | |
162 break; | |
163 } | |
164 } | |
165 nextoption: | |
166 ; | |
167 } | |
168 if( argc != 2 ) | |
169 { | |
170 fprintf( stderr, "options: Check source file ! %c ?\n", c ); | |
171 exit( 1 ); | |
172 } | |
173 fprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\n", tbitr, tbrweight, tbweight ); | |
174 // readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); | |
175 return( argv ); | |
176 | |
177 } | |
178 | |
179 void GroupAlign( int nseq1, int nseq2, char **name, int *nlen, char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double **len, double *eff, int alloclen ) | |
180 { | |
181 int i; | |
182 int clus1, clus2; | |
183 int s1, s2; | |
184 float pscore; | |
185 static char **name1, **name2; | |
186 double *effarr = eff; | |
187 double *effarr1 = NULL; | |
188 double *effarr2 = NULL; | |
189 static char *indication1, *indication2; | |
190 float dumfl = 0.0; | |
191 int intdum; | |
192 #if DEBUG | |
193 double time1, time2; | |
194 #endif | |
195 | |
196 | |
197 // fprintf( stderr, "in GroupAlign fftWinSize = %d\n", fftWinSize ); | |
198 // fprintf( stderr, "in GroupAlign fftThreshold = %d\n", fftThreshold ); | |
199 | |
200 if( effarr1 == NULL ) | |
201 { | |
202 name1 = AllocateCharMtx( nseq1, B ); | |
203 name2 = AllocateCharMtx( nseq2, B ); | |
204 indication1 = AllocateCharVec( 150 ); | |
205 indication2 = AllocateCharVec( 150 ); | |
206 effarr1 = AllocateDoubleVec( njob ); | |
207 effarr2 = AllocateDoubleVec( njob ); | |
208 #if 0 | |
209 #else | |
210 #endif | |
211 } | |
212 | |
213 for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] ); | |
214 | |
215 | |
216 | |
217 s1 = 0; | |
218 s2 = nseq1; | |
219 // fprintf( stdout, "nseq1 = %d\n", nseq1 ); | |
220 | |
221 | |
222 | |
223 clus1 = conjuctionforgaln( 0, nseq1, aseq, mseq1, effarr1, effarr, name, name1, indication1 ); | |
224 clus2 = conjuctionforgaln( nseq1, njob, aseq, mseq2, effarr2, effarr, name, name2, indication2 ); | |
225 /* | |
226 fprintf( stderr, "before align all\n" ); | |
227 display( aseq, njob ); | |
228 fprintf( stderr, "\n" ); | |
229 fprintf( stderr, "before align 1 %s \n", indication1 ); | |
230 display( mseq1, clus1 ); | |
231 fprintf( stderr, "\n" ); | |
232 fprintf( stderr, "before align 2 %s \n", indication2 ); | |
233 display( mseq2, clus2 ); | |
234 fprintf( stderr, "\n" ); | |
235 */ | |
236 | |
237 commongappick( nseq1, mseq1 ); | |
238 commongappick( nseq2, mseq2 ); | |
239 | |
240 | |
241 #if DEBUG | |
242 time1 = getrusage_sec(); | |
243 fprintf( stderr, "entering Falign\n" ); | |
244 #endif | |
245 if( use_fft ) | |
246 { | |
247 if( alg == 'M' ) | |
248 pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum ); | |
249 else | |
250 pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum, NULL, 0, NULL ); | |
251 } | |
252 else | |
253 { | |
254 if( alg == 'M' ) | |
255 pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); | |
256 else | |
257 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); | |
258 } | |
259 #if DEBUG | |
260 time2 = getrusage_sec(); | |
261 fprintf( stdout, "### %d - %d, %f\n", clus1, clus2, time2-time1 ); | |
262 fflush( stdout ); | |
263 #endif | |
264 | |
265 | |
266 /* | |
267 fprintf( stderr, "after align 1 %s \n", indication1 ); | |
268 display( mseq1, clus1 ); | |
269 fprintf( stderr, "\n" ); | |
270 fprintf( stderr, "after align 2 %s \n", indication2 ); | |
271 display( mseq2, clus2 ); | |
272 fprintf( stderr, "\n" ); | |
273 */ | |
274 | |
275 fprintf( stderr, "group-to-group %s /%s %f\n", indication1, indication2, pscore ); | |
276 if( disp ) display( aseq, njob ); | |
277 fprintf( stderr, "\n" ); | |
278 | |
279 /* | |
280 trap = fopen( "pre", "r+" ); | |
281 if( !trap ) ErrorExit( 1 ); | |
282 WriteGapFill( trap, njob, name, nlen, aseq ); | |
283 fclose( trap ); | |
284 fprintf( stdout, "nseq1 = %d\n", nseq1 ); | |
285 */ | |
286 } | |
287 | |
288 static void WriteOptions( FILE *fp ) | |
289 { | |
290 fprintf( fp, "tree-base method\n" ); | |
291 if( tbweight == 0 ) fprintf( fp, "unweighted\n" ); | |
292 else if( tbweight == 3 ) fprintf( fp, "reversely weighted\n" ); | |
293 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); | |
294 else if( scoremtx == 1 ) fprintf( fp, "Dayhoff( machigai ga aru )\n" ); | |
295 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); | |
296 else if( scoremtx == -1 ) fprintf( fp, "DNA\n" ); | |
297 | |
298 if( scoremtx ) | |
299 fprintf( fp, "Gap Penalty = %d, %d\n", penalty, offset ); | |
300 else | |
301 fprintf( fp, "Gap Penalty = %d\n", penalty ); | |
302 } | |
303 | |
304 | |
305 int main( int argc, char *argv[] ) | |
306 { | |
307 char **argv2; | |
308 static int *nlen; | |
309 static char **name, **seq; | |
310 static char **seq1, **seq2; | |
311 static char **mseq1, **mseq2; | |
312 static char **aseq; | |
313 static char **bseq; | |
314 static double **pscore; | |
315 static double *eff; | |
316 int i, j, len1, len2; | |
317 static int ***topol; | |
318 static double **len; | |
319 FILE *gp1, *gp2; | |
320 char c; | |
321 int nlenmax1, nlenmax2, nseq1, nseq2; | |
322 int alloclen; | |
323 | |
324 argv2 = arguments( argc, argv ); | |
325 | |
326 fprintf( stderr, "####### in galn\n" ); | |
327 | |
328 initFiles(); | |
329 | |
330 fprintf( stderr, "file1 = %s\n", argv2[0] ); | |
331 fprintf( stderr, "file2 = %s\n", argv2[1] ); | |
332 | |
333 gp1 = fopen( argv2[0], "r" ); if( !gp1 ) ErrorExit( "cannot open file1" ); | |
334 gp2 = fopen( argv2[1], "r" ); if( !gp2 ) ErrorExit( "cannot open file2" ); | |
335 | |
336 #if 0 | |
337 PreRead( gp1, &nseq1, &nlenmax1 ); | |
338 PreRead( gp2, &nseq2, &nlenmax2 ); | |
339 #else | |
340 getnumlen( gp1 ); | |
341 nseq1 = njob; nlenmax1 = nlenmax; | |
342 getnumlen( gp2 ); | |
343 nseq2 = njob; nlenmax2 = nlenmax; | |
344 #endif | |
345 | |
346 njob = nseq1 + nseq2; | |
347 nlenmax = MAX( nlenmax1, nlenmax2 ); | |
348 | |
349 rewind( gp1 ); | |
350 rewind( gp2 ); | |
351 | |
352 | |
353 name = AllocateCharMtx( njob, B ); | |
354 nlen = AllocateIntVec( njob ); | |
355 seq1 = AllocateCharMtx( nseq1, nlenmax*3 ); | |
356 seq2 = AllocateCharMtx( nseq2, nlenmax*3 ); | |
357 seq = AllocateCharMtx( njob, 1 ); | |
358 aseq = AllocateCharMtx( njob, nlenmax*3 ); | |
359 bseq = AllocateCharMtx( njob, nlenmax*3 ); | |
360 mseq1 = AllocateCharMtx( njob, 1 ); | |
361 mseq2 = AllocateCharMtx( njob, 1 ); | |
362 alloclen = nlenmax * 3; | |
363 | |
364 topol = AllocateIntCub( njob, 2, njob ); | |
365 len = AllocateDoubleMtx( njob, 2 ); | |
366 pscore = AllocateDoubleMtx( njob, njob ); | |
367 eff = AllocateDoubleVec( njob ); | |
368 | |
369 #if 0 | |
370 njob=nseq2; FRead( gp2, name+nseq1, nlen+nseq1, seq2 ); | |
371 njob=nseq1; FRead( gp1, name, nlen, seq1 ); | |
372 #else | |
373 njob=nseq2; readDataforgaln( gp2, name+nseq1, nlen+nseq1, seq2 ); | |
374 njob=nseq1; readDataforgaln( gp1, name, nlen, seq1 ); | |
375 #endif | |
376 njob = nseq1 + nseq2; | |
377 | |
378 | |
379 #if 0 // CHUUI | |
380 commongappick( nseq1, seq1 ); | |
381 commongappick( nseq2, seq2 ); | |
382 #endif | |
383 | |
384 for( i=0; i<nseq1; i++ ) seq[i] = seq1[i]; | |
385 for( i=nseq1; i<njob; i++ ) seq[i] = seq2[i-nseq1]; | |
386 /* | |
387 Write( stdout, njob, name, nlen, seq ); | |
388 */ | |
389 | |
390 constants( njob, seq ); | |
391 | |
392 WriteOptions( trap_g ); | |
393 | |
394 c = seqcheck( seq ); | |
395 if( c ) | |
396 { | |
397 fprintf( stderr, "Illeagal character %c\n", c ); | |
398 exit( 1 ); | |
399 } | |
400 for( i=1; i<nseq1; i++ ) | |
401 { | |
402 if( nlen[i] != nlen[0] ) | |
403 ErrorExit( "group1 is not aligned." ); | |
404 } | |
405 for( i=nseq1+1; i<njob; i++ ) | |
406 { | |
407 if( nlen[i] != nlen[nseq1] ) | |
408 ErrorExit( "group2 is not aligned." ); | |
409 } | |
410 if( tbutree == 0 ) | |
411 { | |
412 for( i=0; i<nseq1; i++ ) | |
413 { | |
414 for( j=i+1; j<nseq1; j++ ) | |
415 { | |
416 pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] ); | |
417 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); | |
418 } | |
419 for( j=nseq1; j<njob; j++ ) | |
420 { | |
421 pscore[i][j] = 3.0; | |
422 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); | |
423 } | |
424 } | |
425 for( i=nseq1; i<njob-1; i++ ) | |
426 { | |
427 for( j=i+1; j<njob; j++ ) | |
428 { | |
429 pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] ); | |
430 // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); | |
431 } | |
432 } | |
433 // fprintf( stderr, "\n" ); | |
434 | |
435 | |
436 } | |
437 else | |
438 { | |
439 fprintf( stderr, "Not supported\n" ); | |
440 exit( 1 ); | |
441 #if 0 | |
442 prep = fopen( "hat2", "r" ); | |
443 if( prep == NULL ) ErrorExit( "Make hat2." ); | |
444 readhat2( prep, njob, name, pscore ); | |
445 fclose( prep ); | |
446 #endif | |
447 } | |
448 fprintf( stderr, "Constructing dendrogram ... " ); | |
449 if( treemethod == 'x' ) | |
450 veryfastsupg( njob, pscore, topol, len ); | |
451 else | |
452 ErrorExit( "Incorrect tree\n" ); | |
453 fprintf( stderr, "done.\n" ); | |
454 | |
455 if( tbrweight ) | |
456 { | |
457 weight = 3; | |
458 counteff_simple( njob, topol, len, eff ); | |
459 // for( i=0; i<njob; i++ ) fprintf( stderr, "eff[%d] = %f\n", i, eff[i] ); | |
460 } | |
461 else | |
462 { | |
463 for( i=0; i<njob; i++ ) eff[i] = 1.0; | |
464 } | |
465 | |
466 len1 = strlen( seq[0] ); | |
467 len2 = strlen( seq[nseq1] ); | |
468 if( len1 > 30000 || len2 > 30000 ) | |
469 { | |
470 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); | |
471 alg = 'M'; | |
472 } | |
473 | |
474 | |
475 | |
476 | |
477 GroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen ); | |
478 | |
479 #if 0 | |
480 writePre( njob, name, nlen, aseq, 1 ); | |
481 #else | |
482 writeDataforgaln( stdout, njob, name, nlen, aseq ); | |
483 #endif | |
484 | |
485 SHOWVERSION; | |
486 return( 0 ); | |
487 } |