comparison mafft/core/dndblast.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #include "mltaln.h"
2 #include <sys/types.h>
3 #include <unistd.h>
4 #define DEBUG 0
5 #define TEST 0
6
7
8 int howmanyx( char *s )
9 {
10 int val = 0;
11 if( scoremtx == -1 )
12 {
13 do
14 {
15 if( !strchr( "atgcuATGCU", *s ) ) val++;
16 } while( *++s );
17 }
18 else
19 {
20 do
21 {
22 if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++;
23 } while( *++s );
24 }
25 return( val );
26 }
27
28 void arguments( int argc, char *argv[] )
29 {
30 int c;
31
32 inputfile = NULL;
33 disopt = 0;
34 divpairscore = 0;
35
36 while( --argc > 0 && (*++argv)[0] == '-' )
37 {
38 while ( (c = *++argv[0]) )
39 {
40 switch( c )
41 {
42 case 'i':
43 inputfile = *++argv;
44 fprintf( stderr, "inputfile = %s\n", inputfile );
45 --argc;
46 goto nextoption;
47 case 'I':
48 disopt = 1;
49 break;
50 default:
51 fprintf( stderr, "illegal option %c\n", c );
52 argc = 0;
53 break;
54 }
55 }
56 nextoption:
57 ;
58
59 }
60 if( argc != 0 )
61 {
62 fprintf( stderr, "options: -i\n" );
63 exit( 1 );
64 }
65 }
66
67 int main( int argc, char *argv[] )
68 {
69 int ktuple;
70 int i, j;
71 FILE *infp;
72 FILE *hat2p;
73 FILE *hat3p;
74 char **seq = NULL; // by D.Mathog
75 char **seq1;
76 static char **name;
77 static char **name1;
78 static int nlen1[M];
79 double **mtx;
80 double **mtx2;
81 static int nlen[M];
82 char b[B];
83 double max;
84 char com[1000];
85 int opt[M];
86 int res;
87 char *home;
88 char queryfile[B];
89 char datafile[B];
90 char fastafile[B];
91 char hat2file[B];
92 int pid = (int)getpid();
93 LocalHom **localhomtable, *tmpptr;
94 #if 1
95 home = getenv( "HOME" );
96 #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */
97 home = NULL;
98 #endif
99
100 #if DEBUG
101 if( home ) fprintf( stderr, "home = %s\n", home );
102 #endif
103 if( !home ) home = "";
104 sprintf( queryfile, "%s/tmp/query-%d", home, pid );
105 sprintf( datafile, "%s/tmp/data-%d", home, pid );
106 sprintf( fastafile, "%s/tmp/fasta-%d", home, pid );
107 sprintf( hat2file, "hat2-%d", pid );
108
109
110 arguments( argc, argv );
111
112 if( inputfile )
113 {
114 infp = fopen( inputfile, "r" );
115 if( !infp )
116 {
117 fprintf( stderr, "Cannot open %s\n", inputfile );
118 exit( 1 );
119 }
120 }
121 else
122 infp = stdin;
123 #if 0
124 PreRead( infp, &njob, &nlenmax );
125 #else
126 dorp = NOTSPECIFIED;
127 getnumlen( infp );
128 #endif
129
130 if( dorp == 'd' )
131 {
132 scoremtx = -1;
133 pamN = NOTSPECIFIED;
134 }
135 else
136 {
137 nblosum = 62;
138 scoremtx = 1;
139 }
140 constants( njob, seq );
141
142 rewind( infp );
143
144 name = AllocateCharMtx( njob, B+1 );
145 name1 = AllocateCharMtx( njob, B+1 );
146 seq = AllocateCharMtx( njob, nlenmax+1 );
147 seq1 = AllocateCharMtx( 2, nlenmax+1 );
148 mtx = AllocateDoubleMtx( njob, njob );
149 mtx2 = AllocateDoubleMtx( njob, njob );
150 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
151 for( i=0; i<njob; i++)
152 {
153 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
154 for( j=0; j<njob; j++)
155 {
156 localhomtable[i][j].start1 = -1;
157 localhomtable[i][j].end1 = -1;
158 localhomtable[i][j].start2 = -1;
159 localhomtable[i][j].end2 = -1;
160 localhomtable[i][j].opt = -1.0;
161 localhomtable[i][j].next = NULL;
162
163 }
164 }
165
166 #if 0
167 FRead( infp, name, nlen, seq );
168 #else
169 readData_pointer( infp, name, nlen, seq );
170 #endif
171 fclose( infp );
172
173 if( scoremtx == -1 ) ktuple = 6;
174 else ktuple = 1;
175
176 for( i=0; i<njob; i++ )
177 {
178 gappick0( seq1[0], seq[i] );
179 strcpy( seq[i], seq1[0] );
180 }
181 for( j=0; j<njob; j++ )
182 {
183 sprintf( name1[j], "+==========+%d ", j );
184 nlen1[j] = nlen[j];
185 }
186
187 for( i=0; i<njob; i++ )
188 {
189 // fprintf( stderr, "### i = %d\n", i );
190
191 if( i % 10 == 0 )
192 {
193 fprintf( stderr, "formatting .. " );
194 hat2p = fopen( datafile, "w" );
195 if( !hat2p ) ErrorExit( "Cannot open datafile." );
196 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );
197 fclose( hat2p );
198
199 if( scoremtx == -1 )
200 sprintf( com, "formatdb -p f -i %s -o F", datafile );
201 else
202 sprintf( com, "formatdb -i %s -o F", datafile );
203 system( com );
204 fprintf( stderr, "done.\n" );
205 }
206
207 hat2p = fopen( queryfile, "w" );
208 if( !hat2p ) ErrorExit( "Cannot open queryfile." );
209 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i );
210 fclose( hat2p );
211
212
213 if( scoremtx == -1 )
214 sprintf( com, "blastall -e 1e10 -p blastn -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile );
215 else
216 sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile );
217 res = system( com );
218 if( res ) ErrorExit( "error in fasta" );
219
220
221 hat2p = fopen( fastafile, "r" );
222 if( hat2p == NULL )
223 ErrorExit( "file 'fasta.$$' does not exist\n" );
224 res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] );
225 fclose( hat2p );
226
227 #if 0
228 for( j=0; j<njob; j++ )
229 {
230 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
231 {
232 if( tmpptr->opt == -1.0 ) continue;
233 // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
234 }
235 }
236 #endif
237
238 if( res < njob-i+i%10 )
239 {
240 fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 );
241 }
242
243
244 #if 0
245 {
246 int ii, jj;
247 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ )
248 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] );
249 }
250 #endif
251 fprintf( stderr, "query : %4d / %d\n", i+1, njob );
252 }
253
254 #if 1
255 fprintf( stderr, "##### writing hat3\n" );
256 hat3p = fopen( "hat3", "w" );
257 if( !hat3p ) ErrorExit( "Cannot open hat3." );
258 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ )
259 {
260 // fprintf( stderr, "mtx[%d][%d] = %f, mtx[%d][%d] = %f\n", i, j, mtx[i][j], j, i, mtx[j][i] );
261 if( i == j ) continue;
262 if( mtx[i][j] == mtx[j][i] && i > j ) continue;
263 if( mtx[j][i] > mtx[i][j] ) continue;
264 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
265 {
266 if( tmpptr->opt == -1.0 ) continue;
267 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );
268 }
269 }
270 fclose( hat3p );
271 #endif
272
273 for( i=0; i<njob; i++ )
274 {
275 // fprintf( stderr, "### i = %d\n", i );
276 hat2p = fopen( datafile, "w" );
277 if( !hat2p ) ErrorExit( "Cannot open datafile." );
278 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );
279 fclose( hat2p );
280
281 // seq1[0] = seq[i];
282 // nlen1[0] = nlen[i];
283
284 hat2p = fopen( queryfile, "w" );
285 if( !hat2p ) ErrorExit( "Cannot open queryfile." );
286 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i );
287 fclose( hat2p );
288
289
290 if( scoremtx == -1 )
291 sprintf( com, "fasta34 -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );
292 else
293 sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );
294 res = system( com );
295 if( res ) ErrorExit( "error in fasta" );
296
297
298 hat2p = fopen( fastafile, "r" );
299 if( hat2p == NULL )
300 ErrorExit( "file 'fasta.$$' does not exist\n" );
301 res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] );
302 fclose( hat2p );
303 if( res < njob - i )
304 {
305 fprintf( stderr, "count (fasta34 -z 3) = %d\n", res );
306 exit( 1 );
307 }
308
309
310 if( i == 0 )
311 for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j];
312
313
314 #if 0
315 {
316 int ii, jj;
317 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ )
318 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] );
319 }
320 #endif
321 fprintf( stderr, "query : %4d\r", i+1 );
322 }
323
324
325
326
327 for( i=0; i<njob; i++ )
328 {
329 max = mtx[i][i];
330 if( max == 0.0 )
331 {
332 for( j=0; j<njob; j++ )
333 mtx2[i][j] = 2.0;
334 }
335 else
336 {
337 for( j=0; j<njob; j++ )
338 {
339 mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0;
340 // fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] );
341 }
342 }
343 }
344 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
345 {
346 // fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] );
347 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] );
348 }
349
350 #if 0
351 {
352 int ii, jj;
353 if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ )
354 fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] );
355 }
356 #endif
357
358 for( i=0; i<njob; i++ ) name[i][0] = '=';
359
360 if( disopt )
361 {
362 strcpy( b, name[0] );
363 sprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b );
364 #if 0
365 strins( b, name[0] );
366 #endif
367 for( i=1; i<njob; i++ )
368 {
369 strcpy( b, name[i] );
370 sprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b );
371 #if 0
372 strins( b, name[i] );
373 #endif
374 }
375 }
376
377 hat2p = fopen( hat2file, "w" );
378 if( !hat2p ) ErrorExit( "Cannot open hat2." );
379 WriteHat2_pointer( hat2p, njob, name, mtx2 );
380 fclose( hat2p );
381
382
383 sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile );
384 system( com );
385
386 #if 0
387 sprintf( com, ALNDIR "/supgsdl < %s", hat2file );
388 res = system( com );
389 if( res ) ErrorExit( "error in spgsdl" );
390 #endif
391
392 sprintf( com, "mv %s hat2", hat2file );
393 res = system( com );
394 if( res ) ErrorExit( "error in mv" );
395
396 SHOWVERSION;
397 exit( 0 );
398 }