Mercurial > repos > nick > duplex
view mafft/core/mccaskillwrap.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line source
#include "mltaln.h" #define DEBUG 0 static char *whereismccaskillmea; #ifdef enablemultithread typedef struct _thread_arg { int thread_no; int njob; int *jobpospt; int **gapmap; char **nogap; int nlenmax; RNApair ***pairprob; pthread_mutex_t *mutex; } thread_arg_t; #endif void outmccaskill( FILE *fp, RNApair **pairprob, int length ) { int i; RNApair *pt; for( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ ) { if( pt->bestpos > i ) fprintf( fp, "%d %d %50.40f\n", i, pt->bestpos, pt->bestscore ); } } #if 1 static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length ) { char gett[1000]; int *pairnum; int i; int left, right; float prob; pairnum = (int *)calloc( length, sizeof( int ) ); for( i=0; i<length; i++ ) pairnum[i] = 0; while( 1 ) { fgets( gett, 999, fp ); if( feof( fp ) ) break; if( gett[0] == '>' ) continue; sscanf( gett, "%d %d %f", &left, &right, &prob ); if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou //fprintf( stderr, "gett = %s\n", gett ); if( left != right && prob > 0.0 ) { pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); pairprob[left][pairnum[left]].bestscore = prob; pairprob[left][pairnum[left]].bestpos = right; pairnum[left]++; pairprob[left][pairnum[left]].bestscore = -1.0; pairprob[left][pairnum[left]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); pairprob[right][pairnum[right]].bestscore = prob; pairprob[right][pairnum[right]].bestpos = left; pairnum[right]++; pairprob[right][pairnum[right]].bestscore = -1.0; pairprob[right][pairnum[right]].bestpos = -1; // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); } } free( pairnum ); } #endif #ifdef enablemultithread static void *athread( void *arg ) { thread_arg_t *targ = (thread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **gapmap = targ->gapmap; char **nogap = targ->nogap; int nlenmax = targ->nlenmax; RNApair ***pairprob = targ->pairprob; int i, res; FILE *infp; char *com; char *dirname; dirname = calloc( 100, sizeof( char ) ); com = calloc( 1000, sizeof( char ) ); while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); // return( NULL ); break; } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); commongappick_record( 1, nogap+i, gapmap[i] ); if( strlen( nogap[i] ) == 0 ) continue; sprintf( dirname, "_%d", i ); sprintf( com, "rm -rf %s", dirname ); system( com ); sprintf( com, "mkdir %s", dirname ); system( com ); fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no ); sprintf( com, "%s/_mccaskillinorg", dirname ); infp = fopen( com, "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname ); system( com ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea ); else sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } sprintf( com, "%s/_mccaskillout", dirname ); infp = fopen( com, "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); if( system( com ) ) { fprintf( stderr, "retrying to rmdir\n" ); // nanosleep( 100000 ); sleep( 1 ); system( com ); } } free( dirname ); free( com ); return( NULL ); } #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; inputfile = NULL; dorp = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; whereismccaskillmea = NULL; alg = 's'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'd': whereismccaskillmea = *++argv; fprintf( stderr, "whereismccaskillmea = %s\n", whereismccaskillmea ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 's': alg = 's'; // use scarna; default break; case 'G': alg = 'G'; // use dafs, instead of scarna break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } } int main( int argc, char *argv[] ) { static char com[10000]; static int *nlen; int left, right; int res; static char **name, **seq, **nogap; static int **gapmap; static int *order; int i, j; FILE *infp; RNApair ***pairprob; RNApair **alnpairprob; RNApair *pairprobpt; RNApair *pt; int *alnpairnum; float prob; int adpos; arguments( argc, argv ); #ifndef enablemultithread nthread = 0; #endif if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; if( !whereismccaskillmea ) whereismccaskillmea = ""; getnumlen( infp ); rewind( infp ); if( dorp != 'd' ) { fprintf( stderr, "nuc only\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*2+1 ); nogap = AllocateCharMtx( njob, nlenmax*2+1 ); gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); order = AllocateIntVec( njob ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); alnpairnum = AllocateIntVec( nlenmax ); for( i=0; i<nlenmax; i++ ) alnpairnum[i] = 0; readData_pointer( infp, name, nlen, seq ); fclose( infp ); for( i=0; i<njob; i++ ) { pairprob[i] = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); for( j=0; j<nlenmax; j++ ) { pairprob[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); pairprob[i][j][0].bestpos = -1; pairprob[i][j][0].bestscore = -1.0; } strcpy( nogap[i], seq[i] ); order[i] = i; } for( j=0; j<nlenmax; j++ ) { alnpairprob[j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); alnpairprob[j][0].bestpos = -1; alnpairprob[j][0].bestscore = -1.0; } constants( njob, seq ); if( alg == 'G' ) fprintf( stderr, "Running DAFS (Sato et al. 2012; http://www.ncrna.org/).\n" ); else fprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\n" ); #ifdef enablemultithread if( nthread > 0 ) { int jobpos; pthread_t *handle; pthread_mutex_t mutex; thread_arg_t *targ; jobpos = 0; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i<nthread; i++ ) { targ[i].thread_no = i; targ[i].njob = njob; targ[i].jobpospt = &jobpos; targ[i].gapmap = gapmap; targ[i].nogap = nogap; targ[i].nlenmax = nlenmax; targ[i].pairprob = pairprob; targ[i].mutex = &mutex; // athread( targ ); pthread_create( handle+i, NULL, athread, (void *)(targ+i) ); } for( i=0; i<nthread; i++ ) { pthread_join( handle[i], NULL ); } pthread_mutex_destroy( &mutex ); free( handle ); free( targ ); for( i=0; i<njob; i++ ) { fprintf( stdout, ">%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } else #endif { for( i=0; i<njob; i++ ) { fprintf( stderr, "%d / %d\n", i+1, njob ); commongappick_record( 1, nogap+i, gapmap[i] ); if( strlen( nogap[i] ) == 0 ) { fprintf( stdout, ">%d\n", i ); continue; } infp = fopen( "_mccaskillinorg", "w" ); // fprintf( infp, ">in\n%s\n", nogap[i] ); fprintf( infp, ">in\n" ); write1seq( infp, nogap[i] ); fclose( infp ); system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran if( alg == 'G' ) sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea ); else sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea ); res = system( com ); if( res ) { fprintf( stderr, "ERROR IN mccaskill_mea\n" ); exit( 1 ); } infp = fopen( "_mccaskillout", "r" ); readrawmccaskill( infp, pairprob[i], nlenmax ); fclose( infp ); fprintf( stdout, ">%d\n", i ); outmccaskill( stdout, pairprob[i], nlenmax ); } } for( i=0; i<njob; i++ ) { for( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ ) { left = gapmap[i][j]; right = gapmap[i][pairprobpt->bestpos]; prob = pairprobpt->bestscore; for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) if( pt->bestpos == right ) break; if( pt->bestpos == -1 ) { alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); adpos = alnpairnum[left]; alnpairnum[left]++; alnpairprob[left][adpos].bestscore = 0.0; alnpairprob[left][adpos].bestpos = right; alnpairprob[left][adpos+1].bestscore = -1.0; alnpairprob[left][adpos+1].bestpos = -1; pt = alnpairprob[left]+adpos; } else adpos = pt-alnpairprob[left]; pt->bestscore += prob; if( pt->bestpos != right ) { fprintf( stderr, "okashii!\n" ); exit( 1 ); } // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); } } for( i=0; i<njob; i++ ) { for( j=0; j<nlenmax; j++ ) free( pairprob[i][j] ); free( pairprob[i] ); } free( pairprob ); for( j=0; j<nlenmax; j++ ) free( alnpairprob[j] ); free( alnpairprob ); free( alnpairnum ); fprintf( stderr, "%d thread(s)\n", nthread ); return( 0 ); #if 0 fprintf( stdout, "result=\n" ); for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ ) { pairprobpt->bestscore /= (float)njob; left = i; right = pairprobpt->bestpos; prob = pairprobpt->bestscore; fprintf( stdout, "%d-%d, %f\n", left, right, prob ); } return( 0 ); #endif }