Mercurial > repos > nick > duplex
comparison mafft/core/dndblast.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #include "mltaln.h" | |
2 #include <sys/types.h> | |
3 #include <unistd.h> | |
4 #define DEBUG 0 | |
5 #define TEST 0 | |
6 | |
7 | |
8 int howmanyx( char *s ) | |
9 { | |
10 int val = 0; | |
11 if( scoremtx == -1 ) | |
12 { | |
13 do | |
14 { | |
15 if( !strchr( "atgcuATGCU", *s ) ) val++; | |
16 } while( *++s ); | |
17 } | |
18 else | |
19 { | |
20 do | |
21 { | |
22 if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; | |
23 } while( *++s ); | |
24 } | |
25 return( val ); | |
26 } | |
27 | |
28 void arguments( int argc, char *argv[] ) | |
29 { | |
30 int c; | |
31 | |
32 inputfile = NULL; | |
33 disopt = 0; | |
34 divpairscore = 0; | |
35 | |
36 while( --argc > 0 && (*++argv)[0] == '-' ) | |
37 { | |
38 while ( (c = *++argv[0]) ) | |
39 { | |
40 switch( c ) | |
41 { | |
42 case 'i': | |
43 inputfile = *++argv; | |
44 fprintf( stderr, "inputfile = %s\n", inputfile ); | |
45 --argc; | |
46 goto nextoption; | |
47 case 'I': | |
48 disopt = 1; | |
49 break; | |
50 default: | |
51 fprintf( stderr, "illegal option %c\n", c ); | |
52 argc = 0; | |
53 break; | |
54 } | |
55 } | |
56 nextoption: | |
57 ; | |
58 | |
59 } | |
60 if( argc != 0 ) | |
61 { | |
62 fprintf( stderr, "options: -i\n" ); | |
63 exit( 1 ); | |
64 } | |
65 } | |
66 | |
67 int main( int argc, char *argv[] ) | |
68 { | |
69 int ktuple; | |
70 int i, j; | |
71 FILE *infp; | |
72 FILE *hat2p; | |
73 FILE *hat3p; | |
74 char **seq = NULL; // by D.Mathog | |
75 char **seq1; | |
76 static char **name; | |
77 static char **name1; | |
78 static int nlen1[M]; | |
79 double **mtx; | |
80 double **mtx2; | |
81 static int nlen[M]; | |
82 char b[B]; | |
83 double max; | |
84 char com[1000]; | |
85 int opt[M]; | |
86 int res; | |
87 char *home; | |
88 char queryfile[B]; | |
89 char datafile[B]; | |
90 char fastafile[B]; | |
91 char hat2file[B]; | |
92 int pid = (int)getpid(); | |
93 LocalHom **localhomtable, *tmpptr; | |
94 #if 1 | |
95 home = getenv( "HOME" ); | |
96 #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ | |
97 home = NULL; | |
98 #endif | |
99 | |
100 #if DEBUG | |
101 if( home ) fprintf( stderr, "home = %s\n", home ); | |
102 #endif | |
103 if( !home ) home = ""; | |
104 sprintf( queryfile, "%s/tmp/query-%d", home, pid ); | |
105 sprintf( datafile, "%s/tmp/data-%d", home, pid ); | |
106 sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); | |
107 sprintf( hat2file, "hat2-%d", pid ); | |
108 | |
109 | |
110 arguments( argc, argv ); | |
111 | |
112 if( inputfile ) | |
113 { | |
114 infp = fopen( inputfile, "r" ); | |
115 if( !infp ) | |
116 { | |
117 fprintf( stderr, "Cannot open %s\n", inputfile ); | |
118 exit( 1 ); | |
119 } | |
120 } | |
121 else | |
122 infp = stdin; | |
123 #if 0 | |
124 PreRead( infp, &njob, &nlenmax ); | |
125 #else | |
126 dorp = NOTSPECIFIED; | |
127 getnumlen( infp ); | |
128 #endif | |
129 | |
130 if( dorp == 'd' ) | |
131 { | |
132 scoremtx = -1; | |
133 pamN = NOTSPECIFIED; | |
134 } | |
135 else | |
136 { | |
137 nblosum = 62; | |
138 scoremtx = 1; | |
139 } | |
140 constants( njob, seq ); | |
141 | |
142 rewind( infp ); | |
143 | |
144 name = AllocateCharMtx( njob, B+1 ); | |
145 name1 = AllocateCharMtx( njob, B+1 ); | |
146 seq = AllocateCharMtx( njob, nlenmax+1 ); | |
147 seq1 = AllocateCharMtx( 2, nlenmax+1 ); | |
148 mtx = AllocateDoubleMtx( njob, njob ); | |
149 mtx2 = AllocateDoubleMtx( njob, njob ); | |
150 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); | |
151 for( i=0; i<njob; i++) | |
152 { | |
153 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); | |
154 for( j=0; j<njob; j++) | |
155 { | |
156 localhomtable[i][j].start1 = -1; | |
157 localhomtable[i][j].end1 = -1; | |
158 localhomtable[i][j].start2 = -1; | |
159 localhomtable[i][j].end2 = -1; | |
160 localhomtable[i][j].opt = -1.0; | |
161 localhomtable[i][j].next = NULL; | |
162 | |
163 } | |
164 } | |
165 | |
166 #if 0 | |
167 FRead( infp, name, nlen, seq ); | |
168 #else | |
169 readData_pointer( infp, name, nlen, seq ); | |
170 #endif | |
171 fclose( infp ); | |
172 | |
173 if( scoremtx == -1 ) ktuple = 6; | |
174 else ktuple = 1; | |
175 | |
176 for( i=0; i<njob; i++ ) | |
177 { | |
178 gappick0( seq1[0], seq[i] ); | |
179 strcpy( seq[i], seq1[0] ); | |
180 } | |
181 for( j=0; j<njob; j++ ) | |
182 { | |
183 sprintf( name1[j], "+==========+%d ", j ); | |
184 nlen1[j] = nlen[j]; | |
185 } | |
186 | |
187 for( i=0; i<njob; i++ ) | |
188 { | |
189 // fprintf( stderr, "### i = %d\n", i ); | |
190 | |
191 if( i % 10 == 0 ) | |
192 { | |
193 fprintf( stderr, "formatting .. " ); | |
194 hat2p = fopen( datafile, "w" ); | |
195 if( !hat2p ) ErrorExit( "Cannot open datafile." ); | |
196 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); | |
197 fclose( hat2p ); | |
198 | |
199 if( scoremtx == -1 ) | |
200 sprintf( com, "formatdb -p f -i %s -o F", datafile ); | |
201 else | |
202 sprintf( com, "formatdb -i %s -o F", datafile ); | |
203 system( com ); | |
204 fprintf( stderr, "done.\n" ); | |
205 } | |
206 | |
207 hat2p = fopen( queryfile, "w" ); | |
208 if( !hat2p ) ErrorExit( "Cannot open queryfile." ); | |
209 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i ); | |
210 fclose( hat2p ); | |
211 | |
212 | |
213 if( scoremtx == -1 ) | |
214 sprintf( com, "blastall -e 1e10 -p blastn -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); | |
215 else | |
216 sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); | |
217 res = system( com ); | |
218 if( res ) ErrorExit( "error in fasta" ); | |
219 | |
220 | |
221 hat2p = fopen( fastafile, "r" ); | |
222 if( hat2p == NULL ) | |
223 ErrorExit( "file 'fasta.$$' does not exist\n" ); | |
224 res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] ); | |
225 fclose( hat2p ); | |
226 | |
227 #if 0 | |
228 for( j=0; j<njob; j++ ) | |
229 { | |
230 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
231 { | |
232 if( tmpptr->opt == -1.0 ) continue; | |
233 // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); | |
234 } | |
235 } | |
236 #endif | |
237 | |
238 if( res < njob-i+i%10 ) | |
239 { | |
240 fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 ); | |
241 } | |
242 | |
243 | |
244 #if 0 | |
245 { | |
246 int ii, jj; | |
247 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) | |
248 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); | |
249 } | |
250 #endif | |
251 fprintf( stderr, "query : %4d / %d\n", i+1, njob ); | |
252 } | |
253 | |
254 #if 1 | |
255 fprintf( stderr, "##### writing hat3\n" ); | |
256 hat3p = fopen( "hat3", "w" ); | |
257 if( !hat3p ) ErrorExit( "Cannot open hat3." ); | |
258 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) | |
259 { | |
260 // fprintf( stderr, "mtx[%d][%d] = %f, mtx[%d][%d] = %f\n", i, j, mtx[i][j], j, i, mtx[j][i] ); | |
261 if( i == j ) continue; | |
262 if( mtx[i][j] == mtx[j][i] && i > j ) continue; | |
263 if( mtx[j][i] > mtx[i][j] ) continue; | |
264 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
265 { | |
266 if( tmpptr->opt == -1.0 ) continue; | |
267 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); | |
268 } | |
269 } | |
270 fclose( hat3p ); | |
271 #endif | |
272 | |
273 for( i=0; i<njob; i++ ) | |
274 { | |
275 // fprintf( stderr, "### i = %d\n", i ); | |
276 hat2p = fopen( datafile, "w" ); | |
277 if( !hat2p ) ErrorExit( "Cannot open datafile." ); | |
278 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); | |
279 fclose( hat2p ); | |
280 | |
281 // seq1[0] = seq[i]; | |
282 // nlen1[0] = nlen[i]; | |
283 | |
284 hat2p = fopen( queryfile, "w" ); | |
285 if( !hat2p ) ErrorExit( "Cannot open queryfile." ); | |
286 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i ); | |
287 fclose( hat2p ); | |
288 | |
289 | |
290 if( scoremtx == -1 ) | |
291 sprintf( com, "fasta34 -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); | |
292 else | |
293 sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); | |
294 res = system( com ); | |
295 if( res ) ErrorExit( "error in fasta" ); | |
296 | |
297 | |
298 hat2p = fopen( fastafile, "r" ); | |
299 if( hat2p == NULL ) | |
300 ErrorExit( "file 'fasta.$$' does not exist\n" ); | |
301 res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] ); | |
302 fclose( hat2p ); | |
303 if( res < njob - i ) | |
304 { | |
305 fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); | |
306 exit( 1 ); | |
307 } | |
308 | |
309 | |
310 if( i == 0 ) | |
311 for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j]; | |
312 | |
313 | |
314 #if 0 | |
315 { | |
316 int ii, jj; | |
317 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) | |
318 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); | |
319 } | |
320 #endif | |
321 fprintf( stderr, "query : %4d\r", i+1 ); | |
322 } | |
323 | |
324 | |
325 | |
326 | |
327 for( i=0; i<njob; i++ ) | |
328 { | |
329 max = mtx[i][i]; | |
330 if( max == 0.0 ) | |
331 { | |
332 for( j=0; j<njob; j++ ) | |
333 mtx2[i][j] = 2.0; | |
334 } | |
335 else | |
336 { | |
337 for( j=0; j<njob; j++ ) | |
338 { | |
339 mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0; | |
340 // fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); | |
341 } | |
342 } | |
343 } | |
344 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) | |
345 { | |
346 // fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] ); | |
347 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] ); | |
348 } | |
349 | |
350 #if 0 | |
351 { | |
352 int ii, jj; | |
353 if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ ) | |
354 fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] ); | |
355 } | |
356 #endif | |
357 | |
358 for( i=0; i<njob; i++ ) name[i][0] = '='; | |
359 | |
360 if( disopt ) | |
361 { | |
362 strcpy( b, name[0] ); | |
363 sprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b ); | |
364 #if 0 | |
365 strins( b, name[0] ); | |
366 #endif | |
367 for( i=1; i<njob; i++ ) | |
368 { | |
369 strcpy( b, name[i] ); | |
370 sprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b ); | |
371 #if 0 | |
372 strins( b, name[i] ); | |
373 #endif | |
374 } | |
375 } | |
376 | |
377 hat2p = fopen( hat2file, "w" ); | |
378 if( !hat2p ) ErrorExit( "Cannot open hat2." ); | |
379 WriteHat2_pointer( hat2p, njob, name, mtx2 ); | |
380 fclose( hat2p ); | |
381 | |
382 | |
383 sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); | |
384 system( com ); | |
385 | |
386 #if 0 | |
387 sprintf( com, ALNDIR "/supgsdl < %s", hat2file ); | |
388 res = system( com ); | |
389 if( res ) ErrorExit( "error in spgsdl" ); | |
390 #endif | |
391 | |
392 sprintf( com, "mv %s hat2", hat2file ); | |
393 res = system( com ); | |
394 if( res ) ErrorExit( "error in mv" ); | |
395 | |
396 SHOWVERSION; | |
397 exit( 0 ); | |
398 } |