Mercurial > repos > nick > duplex
comparison mafft/core/dndblast.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 #include "mltaln.h" | |
| 2 #include <sys/types.h> | |
| 3 #include <unistd.h> | |
| 4 #define DEBUG 0 | |
| 5 #define TEST 0 | |
| 6 | |
| 7 | |
| 8 int howmanyx( char *s ) | |
| 9 { | |
| 10 int val = 0; | |
| 11 if( scoremtx == -1 ) | |
| 12 { | |
| 13 do | |
| 14 { | |
| 15 if( !strchr( "atgcuATGCU", *s ) ) val++; | |
| 16 } while( *++s ); | |
| 17 } | |
| 18 else | |
| 19 { | |
| 20 do | |
| 21 { | |
| 22 if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; | |
| 23 } while( *++s ); | |
| 24 } | |
| 25 return( val ); | |
| 26 } | |
| 27 | |
| 28 void arguments( int argc, char *argv[] ) | |
| 29 { | |
| 30 int c; | |
| 31 | |
| 32 inputfile = NULL; | |
| 33 disopt = 0; | |
| 34 divpairscore = 0; | |
| 35 | |
| 36 while( --argc > 0 && (*++argv)[0] == '-' ) | |
| 37 { | |
| 38 while ( (c = *++argv[0]) ) | |
| 39 { | |
| 40 switch( c ) | |
| 41 { | |
| 42 case 'i': | |
| 43 inputfile = *++argv; | |
| 44 fprintf( stderr, "inputfile = %s\n", inputfile ); | |
| 45 --argc; | |
| 46 goto nextoption; | |
| 47 case 'I': | |
| 48 disopt = 1; | |
| 49 break; | |
| 50 default: | |
| 51 fprintf( stderr, "illegal option %c\n", c ); | |
| 52 argc = 0; | |
| 53 break; | |
| 54 } | |
| 55 } | |
| 56 nextoption: | |
| 57 ; | |
| 58 | |
| 59 } | |
| 60 if( argc != 0 ) | |
| 61 { | |
| 62 fprintf( stderr, "options: -i\n" ); | |
| 63 exit( 1 ); | |
| 64 } | |
| 65 } | |
| 66 | |
| 67 int main( int argc, char *argv[] ) | |
| 68 { | |
| 69 int ktuple; | |
| 70 int i, j; | |
| 71 FILE *infp; | |
| 72 FILE *hat2p; | |
| 73 FILE *hat3p; | |
| 74 char **seq = NULL; // by D.Mathog | |
| 75 char **seq1; | |
| 76 static char **name; | |
| 77 static char **name1; | |
| 78 static int nlen1[M]; | |
| 79 double **mtx; | |
| 80 double **mtx2; | |
| 81 static int nlen[M]; | |
| 82 char b[B]; | |
| 83 double max; | |
| 84 char com[1000]; | |
| 85 int opt[M]; | |
| 86 int res; | |
| 87 char *home; | |
| 88 char queryfile[B]; | |
| 89 char datafile[B]; | |
| 90 char fastafile[B]; | |
| 91 char hat2file[B]; | |
| 92 int pid = (int)getpid(); | |
| 93 LocalHom **localhomtable, *tmpptr; | |
| 94 #if 1 | |
| 95 home = getenv( "HOME" ); | |
| 96 #else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ | |
| 97 home = NULL; | |
| 98 #endif | |
| 99 | |
| 100 #if DEBUG | |
| 101 if( home ) fprintf( stderr, "home = %s\n", home ); | |
| 102 #endif | |
| 103 if( !home ) home = ""; | |
| 104 sprintf( queryfile, "%s/tmp/query-%d", home, pid ); | |
| 105 sprintf( datafile, "%s/tmp/data-%d", home, pid ); | |
| 106 sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); | |
| 107 sprintf( hat2file, "hat2-%d", pid ); | |
| 108 | |
| 109 | |
| 110 arguments( argc, argv ); | |
| 111 | |
| 112 if( inputfile ) | |
| 113 { | |
| 114 infp = fopen( inputfile, "r" ); | |
| 115 if( !infp ) | |
| 116 { | |
| 117 fprintf( stderr, "Cannot open %s\n", inputfile ); | |
| 118 exit( 1 ); | |
| 119 } | |
| 120 } | |
| 121 else | |
| 122 infp = stdin; | |
| 123 #if 0 | |
| 124 PreRead( infp, &njob, &nlenmax ); | |
| 125 #else | |
| 126 dorp = NOTSPECIFIED; | |
| 127 getnumlen( infp ); | |
| 128 #endif | |
| 129 | |
| 130 if( dorp == 'd' ) | |
| 131 { | |
| 132 scoremtx = -1; | |
| 133 pamN = NOTSPECIFIED; | |
| 134 } | |
| 135 else | |
| 136 { | |
| 137 nblosum = 62; | |
| 138 scoremtx = 1; | |
| 139 } | |
| 140 constants( njob, seq ); | |
| 141 | |
| 142 rewind( infp ); | |
| 143 | |
| 144 name = AllocateCharMtx( njob, B+1 ); | |
| 145 name1 = AllocateCharMtx( njob, B+1 ); | |
| 146 seq = AllocateCharMtx( njob, nlenmax+1 ); | |
| 147 seq1 = AllocateCharMtx( 2, nlenmax+1 ); | |
| 148 mtx = AllocateDoubleMtx( njob, njob ); | |
| 149 mtx2 = AllocateDoubleMtx( njob, njob ); | |
| 150 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); | |
| 151 for( i=0; i<njob; i++) | |
| 152 { | |
| 153 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); | |
| 154 for( j=0; j<njob; j++) | |
| 155 { | |
| 156 localhomtable[i][j].start1 = -1; | |
| 157 localhomtable[i][j].end1 = -1; | |
| 158 localhomtable[i][j].start2 = -1; | |
| 159 localhomtable[i][j].end2 = -1; | |
| 160 localhomtable[i][j].opt = -1.0; | |
| 161 localhomtable[i][j].next = NULL; | |
| 162 | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 #if 0 | |
| 167 FRead( infp, name, nlen, seq ); | |
| 168 #else | |
| 169 readData_pointer( infp, name, nlen, seq ); | |
| 170 #endif | |
| 171 fclose( infp ); | |
| 172 | |
| 173 if( scoremtx == -1 ) ktuple = 6; | |
| 174 else ktuple = 1; | |
| 175 | |
| 176 for( i=0; i<njob; i++ ) | |
| 177 { | |
| 178 gappick0( seq1[0], seq[i] ); | |
| 179 strcpy( seq[i], seq1[0] ); | |
| 180 } | |
| 181 for( j=0; j<njob; j++ ) | |
| 182 { | |
| 183 sprintf( name1[j], "+==========+%d ", j ); | |
| 184 nlen1[j] = nlen[j]; | |
| 185 } | |
| 186 | |
| 187 for( i=0; i<njob; i++ ) | |
| 188 { | |
| 189 // fprintf( stderr, "### i = %d\n", i ); | |
| 190 | |
| 191 if( i % 10 == 0 ) | |
| 192 { | |
| 193 fprintf( stderr, "formatting .. " ); | |
| 194 hat2p = fopen( datafile, "w" ); | |
| 195 if( !hat2p ) ErrorExit( "Cannot open datafile." ); | |
| 196 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); | |
| 197 fclose( hat2p ); | |
| 198 | |
| 199 if( scoremtx == -1 ) | |
| 200 sprintf( com, "formatdb -p f -i %s -o F", datafile ); | |
| 201 else | |
| 202 sprintf( com, "formatdb -i %s -o F", datafile ); | |
| 203 system( com ); | |
| 204 fprintf( stderr, "done.\n" ); | |
| 205 } | |
| 206 | |
| 207 hat2p = fopen( queryfile, "w" ); | |
| 208 if( !hat2p ) ErrorExit( "Cannot open queryfile." ); | |
| 209 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i ); | |
| 210 fclose( hat2p ); | |
| 211 | |
| 212 | |
| 213 if( scoremtx == -1 ) | |
| 214 sprintf( com, "blastall -e 1e10 -p blastn -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); | |
| 215 else | |
| 216 sprintf( com, "blastall -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s", queryfile, datafile, fastafile ); | |
| 217 res = system( com ); | |
| 218 if( res ) ErrorExit( "error in fasta" ); | |
| 219 | |
| 220 | |
| 221 hat2p = fopen( fastafile, "r" ); | |
| 222 if( hat2p == NULL ) | |
| 223 ErrorExit( "file 'fasta.$$' does not exist\n" ); | |
| 224 res = ReadBlastm7( hat2p, mtx[i], i, name1, localhomtable[i] ); | |
| 225 fclose( hat2p ); | |
| 226 | |
| 227 #if 0 | |
| 228 for( j=0; j<njob; j++ ) | |
| 229 { | |
| 230 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
| 231 { | |
| 232 if( tmpptr->opt == -1.0 ) continue; | |
| 233 // fprintf( stderr, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); | |
| 234 } | |
| 235 } | |
| 236 #endif | |
| 237 | |
| 238 if( res < njob-i+i%10 ) | |
| 239 { | |
| 240 fprintf( stderr, "WARNING: count (blast) = %d < %d\n", res, njob-i+i%10 ); | |
| 241 } | |
| 242 | |
| 243 | |
| 244 #if 0 | |
| 245 { | |
| 246 int ii, jj; | |
| 247 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) | |
| 248 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); | |
| 249 } | |
| 250 #endif | |
| 251 fprintf( stderr, "query : %4d / %d\n", i+1, njob ); | |
| 252 } | |
| 253 | |
| 254 #if 1 | |
| 255 fprintf( stderr, "##### writing hat3\n" ); | |
| 256 hat3p = fopen( "hat3", "w" ); | |
| 257 if( !hat3p ) ErrorExit( "Cannot open hat3." ); | |
| 258 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) | |
| 259 { | |
| 260 // fprintf( stderr, "mtx[%d][%d] = %f, mtx[%d][%d] = %f\n", i, j, mtx[i][j], j, i, mtx[j][i] ); | |
| 261 if( i == j ) continue; | |
| 262 if( mtx[i][j] == mtx[j][i] && i > j ) continue; | |
| 263 if( mtx[j][i] > mtx[i][j] ) continue; | |
| 264 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) | |
| 265 { | |
| 266 if( tmpptr->opt == -1.0 ) continue; | |
| 267 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); | |
| 268 } | |
| 269 } | |
| 270 fclose( hat3p ); | |
| 271 #endif | |
| 272 | |
| 273 for( i=0; i<njob; i++ ) | |
| 274 { | |
| 275 // fprintf( stderr, "### i = %d\n", i ); | |
| 276 hat2p = fopen( datafile, "w" ); | |
| 277 if( !hat2p ) ErrorExit( "Cannot open datafile." ); | |
| 278 WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); | |
| 279 fclose( hat2p ); | |
| 280 | |
| 281 // seq1[0] = seq[i]; | |
| 282 // nlen1[0] = nlen[i]; | |
| 283 | |
| 284 hat2p = fopen( queryfile, "w" ); | |
| 285 if( !hat2p ) ErrorExit( "Cannot open queryfile." ); | |
| 286 WriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i ); | |
| 287 fclose( hat2p ); | |
| 288 | |
| 289 | |
| 290 if( scoremtx == -1 ) | |
| 291 sprintf( com, "fasta34 -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); | |
| 292 else | |
| 293 sprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile ); | |
| 294 res = system( com ); | |
| 295 if( res ) ErrorExit( "error in fasta" ); | |
| 296 | |
| 297 | |
| 298 hat2p = fopen( fastafile, "r" ); | |
| 299 if( hat2p == NULL ) | |
| 300 ErrorExit( "file 'fasta.$$' does not exist\n" ); | |
| 301 res = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] ); | |
| 302 fclose( hat2p ); | |
| 303 if( res < njob - i ) | |
| 304 { | |
| 305 fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); | |
| 306 exit( 1 ); | |
| 307 } | |
| 308 | |
| 309 | |
| 310 if( i == 0 ) | |
| 311 for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j]; | |
| 312 | |
| 313 | |
| 314 #if 0 | |
| 315 { | |
| 316 int ii, jj; | |
| 317 if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) | |
| 318 fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); | |
| 319 } | |
| 320 #endif | |
| 321 fprintf( stderr, "query : %4d\r", i+1 ); | |
| 322 } | |
| 323 | |
| 324 | |
| 325 | |
| 326 | |
| 327 for( i=0; i<njob; i++ ) | |
| 328 { | |
| 329 max = mtx[i][i]; | |
| 330 if( max == 0.0 ) | |
| 331 { | |
| 332 for( j=0; j<njob; j++ ) | |
| 333 mtx2[i][j] = 2.0; | |
| 334 } | |
| 335 else | |
| 336 { | |
| 337 for( j=0; j<njob; j++ ) | |
| 338 { | |
| 339 mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0; | |
| 340 // fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) | |
| 345 { | |
| 346 // fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] ); | |
| 347 mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] ); | |
| 348 } | |
| 349 | |
| 350 #if 0 | |
| 351 { | |
| 352 int ii, jj; | |
| 353 if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ ) | |
| 354 fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] ); | |
| 355 } | |
| 356 #endif | |
| 357 | |
| 358 for( i=0; i<njob; i++ ) name[i][0] = '='; | |
| 359 | |
| 360 if( disopt ) | |
| 361 { | |
| 362 strcpy( b, name[0] ); | |
| 363 sprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b ); | |
| 364 #if 0 | |
| 365 strins( b, name[0] ); | |
| 366 #endif | |
| 367 for( i=1; i<njob; i++ ) | |
| 368 { | |
| 369 strcpy( b, name[i] ); | |
| 370 sprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b ); | |
| 371 #if 0 | |
| 372 strins( b, name[i] ); | |
| 373 #endif | |
| 374 } | |
| 375 } | |
| 376 | |
| 377 hat2p = fopen( hat2file, "w" ); | |
| 378 if( !hat2p ) ErrorExit( "Cannot open hat2." ); | |
| 379 WriteHat2_pointer( hat2p, njob, name, mtx2 ); | |
| 380 fclose( hat2p ); | |
| 381 | |
| 382 | |
| 383 sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); | |
| 384 system( com ); | |
| 385 | |
| 386 #if 0 | |
| 387 sprintf( com, ALNDIR "/supgsdl < %s", hat2file ); | |
| 388 res = system( com ); | |
| 389 if( res ) ErrorExit( "error in spgsdl" ); | |
| 390 #endif | |
| 391 | |
| 392 sprintf( com, "mv %s hat2", hat2file ); | |
| 393 res = system( com ); | |
| 394 if( res ) ErrorExit( "error in mv" ); | |
| 395 | |
| 396 SHOWVERSION; | |
| 397 exit( 0 ); | |
| 398 } |
