comparison mafft/core/multi2hat3s.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 1
6 #define TSUYOSAFACTOR 100
7
8
9 static int nhomologs;
10 static int seedoffset;
11
12 void strip( char *s )
13 {
14 char *pt = s;
15 while( *++pt )
16 if( *pt == '\n' ) *pt = 0;
17 }
18
19
20 void arguments( int argc, char *argv[] )
21 {
22 int c;
23
24 seedoffset = 0;
25 nhomologs = 1;
26 inputfile = NULL;
27 fftkeika = 0;
28 pslocal = -1000.0;
29 constraint = 0;
30 nblosum = 62;
31 fmodel = 0;
32 calledByXced = 0;
33 devide = 0;
34 use_fft = 0;
35 fftscore = 1;
36 fftRepeatStop = 0;
37 fftNoAnchStop = 0;
38 weight = 3;
39 utree = 1;
40 tbutree = 1;
41 refine = 0;
42 check = 1;
43 cut = 0.0;
44 disp = 0;
45 outgap = 1;
46 alg = 'A';
47 mix = 0;
48 tbitr = 0;
49 scmtd = 5;
50 tbweight = 0;
51 tbrweight = 3;
52 checkC = 0;
53 treemethod = 'x';
54 contin = 0;
55 scoremtx = 1;
56 kobetsubunkatsu = 0;
57 divpairscore = 0;
58 dorp = NOTSPECIFIED;
59 ppenalty = NOTSPECIFIED;
60 ppenalty_OP = NOTSPECIFIED;
61 ppenalty_ex = NOTSPECIFIED;
62 ppenalty_EX = NOTSPECIFIED;
63 poffset = NOTSPECIFIED;
64 kimuraR = NOTSPECIFIED;
65 pamN = NOTSPECIFIED;
66 geta2 = GETA2;
67 fftWinSize = NOTSPECIFIED;
68 fftThreshold = NOTSPECIFIED;
69
70 while( --argc > 0 && (*++argv)[0] == '-' )
71 {
72 while ( ( c = *++argv[0] ) )
73 {
74 switch( c )
75 {
76 case 'i':
77 inputfile = *++argv;
78 fprintf( stderr, "seed = %s\n", inputfile );
79 --argc;
80 goto nextoption;
81 case 't':
82 nhomologs = myatoi( *++argv );
83 fprintf( stderr, "nhomologs = %d\n", nhomologs );
84 --argc;
85 goto nextoption;
86 case 'o':
87 seedoffset = myatoi( *++argv );
88 fprintf( stderr, "seedoffset = %d\n", seedoffset );
89 --argc;
90 goto nextoption;
91 case 'D':
92 dorp = 'd';
93 break;
94 case 'P':
95 dorp = 'p';
96 break;
97 default:
98 fprintf( stderr, "illegal option %c\n", c );
99 argc = 0;
100 break;
101 }
102 }
103 nextoption:
104 ;
105 }
106 if( argc == 1 )
107 {
108 cut = atof( (*argv) );
109 argc--;
110 }
111 if( argc != 0 )
112 {
113 fprintf( stderr, "options: Check source file !\n" );
114 exit( 1 );
115 }
116 if( tbitr == 1 && outgap == 0 )
117 {
118 fprintf( stderr, "conflicting options : o, m or u\n" );
119 exit( 1 );
120 }
121 if( alg == 'C' && outgap == 0 )
122 {
123 fprintf( stderr, "conflicting options : C, o\n" );
124 exit( 1 );
125 }
126 }
127
128 int countamino( char *s, int end )
129 {
130 int val = 0;
131 while( end-- )
132 if( *s++ != '-' ) val++;
133 return( val );
134 }
135
136 static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen )
137 {
138 int i, j;
139 FILE *hat3p;
140 float pscore = 0.0; // by D.Mathog
141 static double *effarr1 = NULL;
142 static double *effarr2 = NULL;
143 char *aseq;
144 static char **pseq;
145 LocalHom **localhomtable, *tmpptr;
146 double tsuyosa;
147
148 if( nhomologs < 1 ) nhomologs = 1; // tsuyosa=0.0 wo sakeru
149 tsuyosa = (double)nhomologs * nhomologs * TSUYOSAFACTOR;
150 fprintf( stderr, "tsuyosa = %f\n", tsuyosa );
151 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
152 for( i=0; i<njob; i++)
153 {
154 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
155 for( j=0; j<njob; j++)
156 {
157 localhomtable[i][j].start1 = -1;
158 localhomtable[i][j].end1 = -1;
159 localhomtable[i][j].start2 = -1;
160 localhomtable[i][j].end2 = -1;
161 localhomtable[i][j].opt = -1.0;
162 localhomtable[i][j].next = NULL;
163 }
164 }
165
166 if( effarr1 == NULL )
167 {
168 effarr1 = AllocateDoubleVec( njob );
169 effarr2 = AllocateDoubleVec( njob );
170 pseq = AllocateCharMtx( 2, 0 );
171 aseq = AllocateCharVec( nlenmax*9+1 );
172 #if 0
173 #else
174 #endif
175 }
176
177 #if 0
178 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
179 #endif
180
181 #if 0
182 for( i=0; i<njob; i++ )
183 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
184 #endif
185
186
187 // writePre( njob, name, nlen, aseq, 0 );
188
189 hat3p = fopen( "hat3", "w" );
190 if( !hat3p ) ErrorExit( "Cannot open hat3." );
191 fprintf( stderr, "\n" );
192 for( i=0; i<njob-1; i++ )
193 {
194 for( j=i+1; j<njob; j++ )
195 {
196 pseq[0] = seq[i];
197 pseq[1] = seq[j];
198
199 if( strlen( pseq[0] ) != strlen( pseq[1] ) )
200 {
201 fprintf( stderr, "## ERROR ###\n" );
202 fprintf( stderr, "Not aligned, %s - %s\n", name[i], name[j] );
203 fprintf( stderr, "## ERROR ###\n" );
204 exit( 1 );
205 }
206
207
208 fprintf( stderr, "adding %d-%d\r", i, j );
209 putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ) );
210 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
211 {
212 if( tmpptr->opt == -1.0 ) continue;
213 if( tmpptr->start1 == -1 ) continue;
214 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d k\n", i+seedoffset, j+seedoffset, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 );
215 }
216 }
217 }
218 fprintf( stderr, "\n" );
219 fclose( hat3p );
220
221 #if DEBUG
222 fprintf( stderr, "calling FreeLocalHomTable\n" );
223 #endif
224 FreeLocalHomTable( localhomtable, njob );
225 #if DEBUG
226 fprintf( stderr, "done. FreeLocalHomTable\n" );
227 #endif
228 }
229
230 static void WriteOptions( FILE *fp )
231 {
232
233 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
234 else
235 {
236 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
237 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
238 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
239 }
240 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
241 if( use_fft ) fprintf( fp, "FFT on\n" );
242
243 fprintf( fp, "tree-base method\n" );
244 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
245 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
246 if( tbitr || tbweight )
247 {
248 fprintf( fp, "iterate at each step\n" );
249 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
250 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
251 if( tbweight ) fprintf( fp, " weighted\n" );
252 fprintf( fp, "\n" );
253 }
254
255 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
256
257 if( alg == 'a' )
258 fprintf( fp, "Algorithm A\n" );
259 else if( alg == 'A' )
260 fprintf( fp, "Algorithm A+\n" );
261 else if( alg == 'S' )
262 fprintf( fp, "Apgorithm S\n" );
263 else if( alg == 'C' )
264 fprintf( fp, "Apgorithm A+/C\n" );
265 else
266 fprintf( fp, "Unknown algorithm\n" );
267
268 if( treemethod == 'x' )
269 fprintf( fp, "Tree = UPGMA (3).\n" );
270 else if( treemethod == 's' )
271 fprintf( fp, "Tree = UPGMA (2).\n" );
272 else if( treemethod == 'p' )
273 fprintf( fp, "Tree = UPGMA (1).\n" );
274 else
275 fprintf( fp, "Unknown tree.\n" );
276
277 if( use_fft )
278 {
279 fprintf( fp, "FFT on\n" );
280 if( dorp == 'd' )
281 fprintf( fp, "Basis : 4 nucleotides\n" );
282 else
283 {
284 if( fftscore )
285 fprintf( fp, "Basis : Polarity and Volume\n" );
286 else
287 fprintf( fp, "Basis : 20 amino acids\n" );
288 }
289 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
290 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
291 }
292 else
293 fprintf( fp, "FFT off\n" );
294 fflush( fp );
295 }
296
297
298 int main( int argc, char *argv[] )
299 {
300 static int nlen[M];
301 static char **name, **seq;
302 static char **bseq;
303 static double *eff;
304 int i;
305 char c;
306 int alloclen;
307 FILE *infp;
308
309 arguments( argc, argv );
310
311 if( inputfile )
312 {
313 infp = fopen( inputfile, "r" );
314 if( !infp )
315 {
316 fprintf( stderr, "Cannot open %s\n", inputfile );
317 exit( 1 );
318 }
319 }
320 else
321 infp = stdin;
322
323 getnumlen( infp );
324 rewind( infp );
325
326 if( njob < 2 )
327 {
328 fprintf( stderr, "At least 2 sequences should be input!\n"
329 "Only %d sequence found.\n", njob );
330 exit( 1 );
331 }
332
333 name = AllocateCharMtx( njob, B+1 );
334 seq = AllocateCharMtx( njob, nlenmax*9+1 );
335 bseq = AllocateCharMtx( njob, nlenmax*9+1 );
336 alloclen = nlenmax*9;
337
338 eff = AllocateDoubleVec( njob );
339
340 #if 0
341 Read( name, nlen, seq );
342 #else
343 readData_pointer( infp, name, nlen, seq );
344 #endif
345 fclose( infp );
346
347 constants( njob, seq );
348
349 #if 0
350 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
351 #endif
352
353 initSignalSM();
354
355 initFiles();
356
357 WriteOptions( trap_g );
358
359 c = seqcheck( seq );
360 if( c )
361 {
362 fprintf( stderr, "Illeagal character %c\n", c );
363 exit( 1 );
364 }
365
366 // writePre( njob, name, nlen, seq, 0 );
367
368 for( i=0; i<njob; i++ ) eff[i] = 1.0;
369
370
371 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
372
373
374 // for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, bseq[i] ); // CHUUI!!
375 for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, seq[i] );
376
377 pairalign( name, nlen, seq, eff, alloclen );
378
379 fprintf( trap_g, "done.\n" );
380 #if DEBUG
381 fprintf( stderr, "closing trap_g\n" );
382 #endif
383 fclose( trap_g );
384
385 #if IODEBUG
386 fprintf( stderr, "OSHIMAI\n" );
387 #endif
388 SHOWVERSION;
389 return( 0 );
390 }