Mercurial > repos > nick > duplex
diff mafft/core/mccaskillwrap.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafft/core/mccaskillwrap.c Thu Feb 02 18:44:31 2017 -0500 @@ -0,0 +1,451 @@ +#include "mltaln.h" + +#define DEBUG 0 + +static char *whereismccaskillmea; + +#ifdef enablemultithread +typedef struct _thread_arg +{ + int thread_no; + int njob; + int *jobpospt; + int **gapmap; + char **nogap; + int nlenmax; + RNApair ***pairprob; + pthread_mutex_t *mutex; +} thread_arg_t; +#endif + +void outmccaskill( FILE *fp, RNApair **pairprob, int length ) +{ + int i; + RNApair *pt; + for( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ ) + { + if( pt->bestpos > i ) + fprintf( fp, "%d %d %50.40f\n", i, pt->bestpos, pt->bestscore ); + } +} + +#if 1 +static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length ) +{ + char gett[1000]; + int *pairnum; + int i; + int left, right; + float prob; + + pairnum = (int *)calloc( length, sizeof( int ) ); + for( i=0; i<length; i++ ) pairnum[i] = 0; + + while( 1 ) + { + fgets( gett, 999, fp ); + if( feof( fp ) ) break; + if( gett[0] == '>' ) continue; + sscanf( gett, "%d %d %f", &left, &right, &prob ); + if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou +//fprintf( stderr, "gett = %s\n", gett ); + + if( left != right && prob > 0.0 ) + { + pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); + pairprob[left][pairnum[left]].bestscore = prob; + pairprob[left][pairnum[left]].bestpos = right; + pairnum[left]++; + pairprob[left][pairnum[left]].bestscore = -1.0; + pairprob[left][pairnum[left]].bestpos = -1; +// fprintf( stderr, "%d-%d, %f\n", left, right, prob ); + + pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); + pairprob[right][pairnum[right]].bestscore = prob; + pairprob[right][pairnum[right]].bestpos = left; + pairnum[right]++; + pairprob[right][pairnum[right]].bestscore = -1.0; + pairprob[right][pairnum[right]].bestpos = -1; +// fprintf( stderr, "%d-%d, %f\n", right, left, prob ); + } + } + free( pairnum ); +} +#endif + +#ifdef enablemultithread +static void *athread( void *arg ) +{ + thread_arg_t *targ = (thread_arg_t *)arg; + int thread_no = targ->thread_no; + int njob = targ->njob; + int *jobpospt = targ->jobpospt; + int **gapmap = targ->gapmap; + char **nogap = targ->nogap; + int nlenmax = targ->nlenmax; + RNApair ***pairprob = targ->pairprob; + + int i, res; + FILE *infp; + char *com; + char *dirname; + + dirname = calloc( 100, sizeof( char ) ); + com = calloc( 1000, sizeof( char ) ); + + + while( 1 ) + { + pthread_mutex_lock( targ->mutex ); + i = *jobpospt; + if( i == njob ) + { + pthread_mutex_unlock( targ->mutex ); +// return( NULL ); + break; + } + *jobpospt = i+1; + pthread_mutex_unlock( targ->mutex ); + + commongappick_record( 1, nogap+i, gapmap[i] ); + if( strlen( nogap[i] ) == 0 ) continue; + + sprintf( dirname, "_%d", i ); + sprintf( com, "rm -rf %s", dirname ); + system( com ); + sprintf( com, "mkdir %s", dirname ); + system( com ); + + fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no ); + sprintf( com, "%s/_mccaskillinorg", dirname ); + infp = fopen( com, "w" ); +// fprintf( infp, ">in\n%s\n", nogap[i] ); + fprintf( infp, ">in\n" ); + write1seq( infp, nogap[i] ); + fclose( infp ); + + sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname ); + system( com ); // for cygwin, wakaran + if( alg == 'G' ) + sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea ); + else + sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); + res = system( com ); + + if( res ) + { + fprintf( stderr, "ERROR IN mccaskill_mea\n" ); + exit( 1 ); + } + + sprintf( com, "%s/_mccaskillout", dirname ); + infp = fopen( com, "r" ); + readrawmccaskill( infp, pairprob[i], nlenmax ); + fclose( infp ); + + sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); + if( system( com ) ) + { + fprintf( stderr, "retrying to rmdir\n" ); +// nanosleep( 100000 ); + sleep( 1 ); + system( com ); + } + } + free( dirname ); + free( com ); + return( NULL ); +} +#endif + +void arguments( int argc, char *argv[] ) +{ + int c; + nthread = 1; + inputfile = NULL; + dorp = NOTSPECIFIED; + kimuraR = NOTSPECIFIED; + pamN = NOTSPECIFIED; + whereismccaskillmea = NULL; + alg = 's'; + + while( --argc > 0 && (*++argv)[0] == '-' ) + { + while ( (c = *++argv[0]) ) + { + switch( c ) + { + case 'i': + inputfile = *++argv; + fprintf( stderr, "inputfile = %s\n", inputfile ); + --argc; + goto nextoption; + case 'd': + whereismccaskillmea = *++argv; + fprintf( stderr, "whereismccaskillmea = %s\n", whereismccaskillmea ); + --argc; + goto nextoption; + case 'C': + nthread = myatoi( *++argv ); + fprintf( stderr, "nthread = %d\n", nthread ); + --argc; + goto nextoption; + case 's': + alg = 's'; // use scarna; default + break; + case 'G': + alg = 'G'; // use dafs, instead of scarna + break; + default: + fprintf( stderr, "illegal option %c\n", c ); + argc = 0; + break; + } + } + nextoption: + ; + } + if( argc != 0 ) + { + fprintf( stderr, "options: Check source file !\n" ); + exit( 1 ); + } +} + + +int main( int argc, char *argv[] ) +{ + static char com[10000]; + static int *nlen; + int left, right; + int res; + static char **name, **seq, **nogap; + static int **gapmap; + static int *order; + int i, j; + FILE *infp; + RNApair ***pairprob; + RNApair **alnpairprob; + RNApair *pairprobpt; + RNApair *pt; + int *alnpairnum; + float prob; + int adpos; + + arguments( argc, argv ); +#ifndef enablemultithread + nthread = 0; +#endif + + if( inputfile ) + { + infp = fopen( inputfile, "r" ); + if( !infp ) + { + fprintf( stderr, "Cannot open %s\n", inputfile ); + exit( 1 ); + } + } + else + infp = stdin; + + if( !whereismccaskillmea ) + whereismccaskillmea = ""; + + getnumlen( infp ); + rewind( infp ); + + if( dorp != 'd' ) + { + fprintf( stderr, "nuc only\n" ); + exit( 1 ); + } + + seq = AllocateCharMtx( njob, nlenmax*2+1 ); + nogap = AllocateCharMtx( njob, nlenmax*2+1 ); + gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); + order = AllocateIntVec( njob ); + name = AllocateCharMtx( njob, B+1 ); + nlen = AllocateIntVec( njob ); + pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); + alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); + alnpairnum = AllocateIntVec( nlenmax ); + + for( i=0; i<nlenmax; i++ ) alnpairnum[i] = 0; + + readData_pointer( infp, name, nlen, seq ); + fclose( infp ); + + for( i=0; i<njob; i++ ) + { + pairprob[i] = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); + for( j=0; j<nlenmax; j++ ) + { + pairprob[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); + pairprob[i][j][0].bestpos = -1; + pairprob[i][j][0].bestscore = -1.0; + } + strcpy( nogap[i], seq[i] ); + order[i] = i; + } + for( j=0; j<nlenmax; j++ ) + { + alnpairprob[j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); + alnpairprob[j][0].bestpos = -1; + alnpairprob[j][0].bestscore = -1.0; + } + + + constants( njob, seq ); + + if( alg == 'G' ) + fprintf( stderr, "Running DAFS (Sato et al. 2012; http://www.ncrna.org/).\n" ); + else + fprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\n" ); +#ifdef enablemultithread + if( nthread > 0 ) + { + int jobpos; + pthread_t *handle; + pthread_mutex_t mutex; + thread_arg_t *targ; + jobpos = 0; + + targ = calloc( nthread, sizeof( thread_arg_t ) ); + handle = calloc( nthread, sizeof( pthread_t ) ); + pthread_mutex_init( &mutex, NULL ); + + for( i=0; i<nthread; i++ ) + { + targ[i].thread_no = i; + targ[i].njob = njob; + targ[i].jobpospt = &jobpos; + targ[i].gapmap = gapmap; + targ[i].nogap = nogap; + targ[i].nlenmax = nlenmax; + targ[i].pairprob = pairprob; + targ[i].mutex = &mutex; + +// athread( targ ); + pthread_create( handle+i, NULL, athread, (void *)(targ+i) ); + + } + + for( i=0; i<nthread; i++ ) + { + pthread_join( handle[i], NULL ); + } + pthread_mutex_destroy( &mutex ); + + free( handle ); + free( targ ); + + + for( i=0; i<njob; i++ ) + { + fprintf( stdout, ">%d\n", i ); + outmccaskill( stdout, pairprob[i], nlenmax ); + } + } + else +#endif + { + for( i=0; i<njob; i++ ) + { + fprintf( stderr, "%d / %d\n", i+1, njob ); + commongappick_record( 1, nogap+i, gapmap[i] ); + if( strlen( nogap[i] ) == 0 ) + { + fprintf( stdout, ">%d\n", i ); + continue; + } + + infp = fopen( "_mccaskillinorg", "w" ); +// fprintf( infp, ">in\n%s\n", nogap[i] ); + fprintf( infp, ">in\n" ); + write1seq( infp, nogap[i] ); + fclose( infp ); + + system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran + if( alg == 'G' ) + sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea ); + else + sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea ); + res = system( com ); + + if( res ) + { + fprintf( stderr, "ERROR IN mccaskill_mea\n" ); + exit( 1 ); + } + + infp = fopen( "_mccaskillout", "r" ); + readrawmccaskill( infp, pairprob[i], nlenmax ); + fclose( infp ); + fprintf( stdout, ">%d\n", i ); + outmccaskill( stdout, pairprob[i], nlenmax ); + } + } + + for( i=0; i<njob; i++ ) + { + for( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ ) + { + left = gapmap[i][j]; + right = gapmap[i][pairprobpt->bestpos]; + prob = pairprobpt->bestscore; + + for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) + if( pt->bestpos == right ) break; + + if( pt->bestpos == -1 ) + { + alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); + adpos = alnpairnum[left]; + alnpairnum[left]++; + alnpairprob[left][adpos].bestscore = 0.0; + alnpairprob[left][adpos].bestpos = right; + alnpairprob[left][adpos+1].bestscore = -1.0; + alnpairprob[left][adpos+1].bestpos = -1; + pt = alnpairprob[left]+adpos; + } + else + adpos = pt-alnpairprob[left]; + + pt->bestscore += prob; + if( pt->bestpos != right ) + { + fprintf( stderr, "okashii!\n" ); + exit( 1 ); + } +// fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); + } + } + + for( i=0; i<njob; i++ ) + { + for( j=0; j<nlenmax; j++ ) free( pairprob[i][j] ); + free( pairprob[i] ); + } + free( pairprob ); + for( j=0; j<nlenmax; j++ ) free( alnpairprob[j] ); + free( alnpairprob ); + free( alnpairnum ); + fprintf( stderr, "%d thread(s)\n", nthread ); + return( 0 ); + +#if 0 + fprintf( stdout, "result=\n" ); + + for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ ) + { + pairprobpt->bestscore /= (float)njob; + left = i; + right = pairprobpt->bestpos; + prob = pairprobpt->bestscore; + fprintf( stdout, "%d-%d, %f\n", left, right, prob ); + } + + return( 0 ); +#endif +}
