Mercurial > repos > nick > duplex
comparison mafft/core/pair2hat3s.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #include "mltaln.h" | |
2 | |
3 #define DEBUG 0 | |
4 #define IODEBUG 0 | |
5 #define SCOREOUT 1 | |
6 #define TSUYOSAFACTOR 100 | |
7 | |
8 | |
9 static char *pairfile; | |
10 static int nhomologs; | |
11 | |
12 void strip( char *s ) | |
13 { | |
14 char *pt = s; | |
15 while( *++pt ) | |
16 if( *pt == '\n' ) *pt = 0; | |
17 } | |
18 | |
19 int searchused( char *q, char **keys, int n ) | |
20 { | |
21 int i; | |
22 for( i=0; i<n; i++ ) | |
23 { | |
24 // fprintf( stderr, "%s ? %s\n", q, names[i] ); | |
25 if( !strcmp( q, keys[i] ) ) return( i ); | |
26 } | |
27 return( -1 ); | |
28 } | |
29 | |
30 void arguments( int argc, char *argv[] ) | |
31 { | |
32 int c; | |
33 | |
34 nhomologs = 2; | |
35 inputfile = NULL; | |
36 pairfile = NULL; | |
37 fftkeika = 0; | |
38 pslocal = -1000.0; | |
39 constraint = 0; | |
40 nblosum = 62; | |
41 fmodel = 0; | |
42 calledByXced = 0; | |
43 devide = 0; | |
44 use_fft = 0; | |
45 fftscore = 1; | |
46 fftRepeatStop = 0; | |
47 fftNoAnchStop = 0; | |
48 weight = 3; | |
49 utree = 1; | |
50 tbutree = 1; | |
51 refine = 0; | |
52 check = 1; | |
53 cut = 0.0; | |
54 disp = 0; | |
55 outgap = 1; | |
56 alg = 'A'; | |
57 mix = 0; | |
58 tbitr = 0; | |
59 scmtd = 5; | |
60 tbweight = 0; | |
61 tbrweight = 3; | |
62 checkC = 0; | |
63 treemethod = 'x'; | |
64 contin = 0; | |
65 scoremtx = 1; | |
66 kobetsubunkatsu = 0; | |
67 divpairscore = 0; | |
68 dorp = NOTSPECIFIED; | |
69 ppenalty = NOTSPECIFIED; | |
70 ppenalty_OP = NOTSPECIFIED; | |
71 ppenalty_ex = NOTSPECIFIED; | |
72 ppenalty_EX = NOTSPECIFIED; | |
73 poffset = NOTSPECIFIED; | |
74 kimuraR = NOTSPECIFIED; | |
75 pamN = NOTSPECIFIED; | |
76 geta2 = GETA2; | |
77 fftWinSize = NOTSPECIFIED; | |
78 fftThreshold = NOTSPECIFIED; | |
79 | |
80 while( --argc > 0 && (*++argv)[0] == '-' ) | |
81 { | |
82 while ( ( c = *++argv[0] ) ) | |
83 { | |
84 switch( c ) | |
85 { | |
86 case 'i': | |
87 inputfile = *++argv; | |
88 fprintf( stderr, "inputfile = %s\n", inputfile ); | |
89 --argc; | |
90 goto nextoption; | |
91 case 'p': | |
92 pairfile = *++argv; | |
93 fprintf( stderr, "pairfile = %s\n", pairfile ); | |
94 --argc; | |
95 goto nextoption; | |
96 case 't': | |
97 nhomologs = myatoi( *++argv ); | |
98 fprintf( stderr, "nhomologs = %d\n", nhomologs ); | |
99 --argc; | |
100 goto nextoption; | |
101 case 'D': | |
102 dorp = 'd'; | |
103 break; | |
104 case 'P': | |
105 dorp = 'p'; | |
106 break; | |
107 default: | |
108 fprintf( stderr, "illegal option %c\n", c ); | |
109 argc = 0; | |
110 break; | |
111 } | |
112 } | |
113 nextoption: | |
114 ; | |
115 } | |
116 if( argc == 1 ) | |
117 { | |
118 cut = atof( (*argv) ); | |
119 argc--; | |
120 } | |
121 if( argc != 0 ) | |
122 { | |
123 fprintf( stderr, "options: Check source file !\n" ); | |
124 exit( 1 ); | |
125 } | |
126 if( tbitr == 1 && outgap == 0 ) | |
127 { | |
128 fprintf( stderr, "conflicting options : o, m or u\n" ); | |
129 exit( 1 ); | |
130 } | |
131 if( alg == 'C' && outgap == 0 ) | |
132 { | |
133 fprintf( stderr, "conflicting options : C, o\n" ); | |
134 exit( 1 ); | |
135 } | |
136 } | |
137 | |
138 int countamino( char *s, int end ) | |
139 { | |
140 int val = 0; | |
141 while( end-- ) | |
142 if( *s++ != '-' ) val++; | |
143 return( val ); | |
144 } | |
145 | |
146 static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen ) | |
147 { | |
148 FILE *tmpfp; | |
149 static char dumm1[B], dumm0[B]; | |
150 int i, j; | |
151 char *res; | |
152 FILE *hat3p; | |
153 static double *effarr1 = NULL; | |
154 static double *effarr2 = NULL; | |
155 static char **pseq; | |
156 LocalHom **localhomtable, *tmpptr; | |
157 float pscore = 0.0; // by D.Mathog, aguess | |
158 char *aseq = NULL; // by D.Mathog | |
159 char **usedseqs = NULL; // by D.Mathog | |
160 char **usednames = NULL; // by D.Mathog | |
161 int nused; | |
162 double tsuyosa; | |
163 | |
164 tsuyosa = (double)nhomologs * (nhomologs-1) / njob * TSUYOSAFACTOR; | |
165 fprintf( stderr, "tsuyosa = %f\n", tsuyosa ); | |
166 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); | |
167 for( i=0; i<njob; i++) | |
168 { | |
169 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); | |
170 for( j=0; j<njob; j++) | |
171 { | |
172 localhomtable[i][j].start1 = -1; | |
173 localhomtable[i][j].end1 = -1; | |
174 localhomtable[i][j].start2 = -1; | |
175 localhomtable[i][j].end2 = -1; | |
176 localhomtable[i][j].opt = -1.0; | |
177 localhomtable[i][j].next = NULL; | |
178 } | |
179 } | |
180 | |
181 if( effarr1 == NULL ) | |
182 { | |
183 effarr1 = AllocateDoubleVec( njob ); | |
184 effarr2 = AllocateDoubleVec( njob ); | |
185 pseq = AllocateCharMtx( 2, nlenmax*9+1 ); | |
186 aseq = AllocateCharVec( nlenmax*9+1 ); | |
187 usedseqs = AllocateCharMtx( njob, nlenmax*9+1 ); | |
188 usednames = AllocateCharMtx( njob, B ); | |
189 #if 0 | |
190 #else | |
191 #endif | |
192 } | |
193 | |
194 #if 0 | |
195 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold ); | |
196 #endif | |
197 | |
198 #if 0 | |
199 for( i=0; i<njob; i++ ) | |
200 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] ); | |
201 #endif | |
202 | |
203 | |
204 // writePre( njob, name, nlen, aseq, 0 ); | |
205 | |
206 fprintf( stderr, "opening %s\n", pairfile ); | |
207 tmpfp = fopen( pairfile, "r" ); | |
208 if( !tmpfp ) | |
209 { | |
210 fprintf( stderr, "Cannot open %s\n", pairfile ); | |
211 exit( 1 ); | |
212 } | |
213 searchKUorWA( tmpfp ); | |
214 hat3p = fopen( "hat3", "w" ); | |
215 if( !hat3p ) ErrorExit( "Cannot open hat3." ); | |
216 nused = 0; | |
217 while( 1 ) | |
218 { | |
219 res = fgets( dumm0, B-1, tmpfp ); | |
220 strip( dumm0 ); | |
221 if( res == NULL ) | |
222 { | |
223 break; | |
224 } | |
225 load1SeqWithoutName_new( tmpfp, pseq[0] ); | |
226 gappick0( aseq, pseq[0] ); | |
227 i = searchused( aseq, usedseqs, nused ); | |
228 if( i == -1 ) | |
229 { | |
230 strcpy( usednames[nused], dumm0+1 ); | |
231 strcpy( usedseqs[nused], aseq ); | |
232 i = nused; | |
233 nused++; | |
234 } | |
235 fprintf( stderr, "i = %d\n", i ); | |
236 | |
237 res = fgets( dumm1, B-1, tmpfp ); | |
238 strip( dumm1 ); | |
239 if( res == NULL ) | |
240 { | |
241 fprintf( stderr, "ERROR: The number of sequences in %s must be even.\n", pairfile ); | |
242 exit( 1 ); | |
243 } | |
244 load1SeqWithoutName_new( tmpfp, pseq[1] ); | |
245 gappick0( aseq, pseq[1] ); | |
246 j = searchused( aseq, usedseqs, nused ); | |
247 if( j == -1 ) | |
248 { | |
249 strcpy( usednames[nused], dumm1+1 ); | |
250 strcpy( usedseqs[nused], aseq ); | |
251 j = nused; | |
252 nused++; | |
253 } | |
254 fprintf( stderr, "j = %d\n", j ); | |
255 | |
256 if( strlen( pseq[0] ) != strlen( pseq[1] ) ) | |
257 { | |
258 fprintf( stderr, "Not aligned, %s - %s\n", dumm0, dumm1 ); | |
259 exit( 1 ); | |
260 } | |
261 | |
262 | |
263 fprintf( stderr, "adding %d-%d\n", i, j ); | |
264 putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ) ); | |
265 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
266 { | |
267 if( tmpptr->opt == -1.0 ) continue; | |
268 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); | |
269 } | |
270 } | |
271 fclose( tmpfp ); | |
272 fclose( hat3p ); | |
273 | |
274 for( i=0; i<nused; i++ ) | |
275 fprintf( stdout, ">%s\n%s\n", usednames[i], usedseqs[i] ); | |
276 | |
277 | |
278 #if 0 | |
279 fprintf( stderr, "##### writing hat3\n" ); | |
280 hat3p = fopen( "hat3", "w" ); | |
281 if( !hat3p ) ErrorExit( "Cannot open hat3." ); | |
282 ilim = njob-1; | |
283 for( i=0; i<ilim; i++ ) | |
284 { | |
285 for( j=i+1; j<njob; j++ ) | |
286 { | |
287 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
288 { | |
289 if( tmpptr->opt == -1.0 ) continue; | |
290 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); | |
291 } | |
292 } | |
293 } | |
294 fclose( hat3p ); | |
295 #endif | |
296 #if DEBUG | |
297 fprintf( stderr, "calling FreeLocalHomTable\n" ); | |
298 #endif | |
299 FreeLocalHomTable( localhomtable, njob ); | |
300 #if DEBUG | |
301 fprintf( stderr, "done. FreeLocalHomTable\n" ); | |
302 #endif | |
303 } | |
304 | |
305 static void WriteOptions( FILE *fp ) | |
306 { | |
307 | |
308 if( dorp == 'd' ) fprintf( fp, "DNA\n" ); | |
309 else | |
310 { | |
311 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); | |
312 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); | |
313 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); | |
314 } | |
315 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); | |
316 if( use_fft ) fprintf( fp, "FFT on\n" ); | |
317 | |
318 fprintf( fp, "tree-base method\n" ); | |
319 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); | |
320 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); | |
321 if( tbitr || tbweight ) | |
322 { | |
323 fprintf( fp, "iterate at each step\n" ); | |
324 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); | |
325 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); | |
326 if( tbweight ) fprintf( fp, " weighted\n" ); | |
327 fprintf( fp, "\n" ); | |
328 } | |
329 | |
330 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); | |
331 | |
332 if( alg == 'a' ) | |
333 fprintf( fp, "Algorithm A\n" ); | |
334 else if( alg == 'A' ) | |
335 fprintf( fp, "Algorithm A+\n" ); | |
336 else if( alg == 'S' ) | |
337 fprintf( fp, "Apgorithm S\n" ); | |
338 else if( alg == 'C' ) | |
339 fprintf( fp, "Apgorithm A+/C\n" ); | |
340 else | |
341 fprintf( fp, "Unknown algorithm\n" ); | |
342 | |
343 if( treemethod == 'x' ) | |
344 fprintf( fp, "Tree = UPGMA (3).\n" ); | |
345 else if( treemethod == 's' ) | |
346 fprintf( fp, "Tree = UPGMA (2).\n" ); | |
347 else if( treemethod == 'p' ) | |
348 fprintf( fp, "Tree = UPGMA (1).\n" ); | |
349 else | |
350 fprintf( fp, "Unknown tree.\n" ); | |
351 | |
352 if( use_fft ) | |
353 { | |
354 fprintf( fp, "FFT on\n" ); | |
355 if( dorp == 'd' ) | |
356 fprintf( fp, "Basis : 4 nucleotides\n" ); | |
357 else | |
358 { | |
359 if( fftscore ) | |
360 fprintf( fp, "Basis : Polarity and Volume\n" ); | |
361 else | |
362 fprintf( fp, "Basis : 20 amino acids\n" ); | |
363 } | |
364 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); | |
365 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); | |
366 } | |
367 else | |
368 fprintf( fp, "FFT off\n" ); | |
369 fflush( fp ); | |
370 } | |
371 | |
372 | |
373 int main( int argc, char *argv[] ) | |
374 { | |
375 static int nlen[M]; | |
376 static char **name, **seq; | |
377 static char **bseq; | |
378 static double *eff; | |
379 int i; | |
380 char c; | |
381 int alloclen; | |
382 FILE *infp; | |
383 | |
384 arguments( argc, argv ); | |
385 | |
386 if( inputfile ) | |
387 { | |
388 infp = fopen( inputfile, "r" ); | |
389 if( !infp ) | |
390 { | |
391 fprintf( stderr, "Cannot open %s\n", inputfile ); | |
392 exit( 1 ); | |
393 } | |
394 } | |
395 else | |
396 infp = stdin; | |
397 | |
398 if( !pairfile ) | |
399 { | |
400 fprintf( stderr, "Usage: %s -p pairfile -i inputfile \n", argv[0] ); | |
401 exit( 1 ); | |
402 } | |
403 | |
404 getnumlen( infp ); | |
405 rewind( infp ); | |
406 | |
407 if( njob < 2 ) | |
408 { | |
409 fprintf( stderr, "At least 2 sequences should be input!\n" | |
410 "Only %d sequence found.\n", njob ); | |
411 exit( 1 ); | |
412 } | |
413 | |
414 name = AllocateCharMtx( njob, B+1 ); | |
415 seq = AllocateCharMtx( njob, nlenmax*9+1 ); | |
416 bseq = AllocateCharMtx( njob, nlenmax*9+1 ); | |
417 alloclen = nlenmax*9; | |
418 | |
419 eff = AllocateDoubleVec( njob ); | |
420 | |
421 #if 0 | |
422 Read( name, nlen, seq ); | |
423 #else | |
424 readData_pointer( infp, name, nlen, seq ); | |
425 #endif | |
426 fclose( infp ); | |
427 | |
428 constants( njob, seq ); | |
429 | |
430 #if 0 | |
431 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); | |
432 #endif | |
433 | |
434 initSignalSM(); | |
435 | |
436 initFiles(); | |
437 | |
438 WriteOptions( trap_g ); | |
439 | |
440 c = seqcheck( seq ); | |
441 if( c ) | |
442 { | |
443 fprintf( stderr, "Illeagal character %c\n", c ); | |
444 exit( 1 ); | |
445 } | |
446 | |
447 // writePre( njob, name, nlen, seq, 0 ); | |
448 | |
449 for( i=0; i<njob; i++ ) eff[i] = 1.0; | |
450 | |
451 | |
452 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] ); | |
453 | |
454 | |
455 pairalign( name, nlen, bseq, eff, alloclen ); | |
456 | |
457 fprintf( trap_g, "done.\n" ); | |
458 #if DEBUG | |
459 fprintf( stderr, "closing trap_g\n" ); | |
460 #endif | |
461 fclose( trap_g ); | |
462 | |
463 #if IODEBUG | |
464 fprintf( stderr, "OSHIMAI\n" ); | |
465 #endif | |
466 SHOWVERSION; | |
467 return( 0 ); | |
468 } |