view mafft/core/makedirectionlist.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line source

#include "mltaln.h"

#define DEBUG 0
#define IODEBUG 0
#define SCOREOUT 0

#define END_OF_VEC -1

int nadd;
float thresholdtorev;
int dodp;
int addfragment;

typedef struct _thread_arg
{
	int iend; 
	char **seq;
	char *tmpseq;
	int *res;
	int **spointt;
	short *table1;
	int iq;
#ifdef enablemultithread
	int *jshare;
	int thread_no;
	pthread_mutex_t *mutex_counter;
#endif
} thread_arg_t;



void arguments( int argc, char *argv[] )
{
    int c;

	nthread = 1;
	inputfile = NULL;
	nadd = 0;
	dodp = 0;
	alg = 'a';
	alg = 'm';
	dorp = NOTSPECIFIED;
	fmodel = 0;
//	ppenalty = (int)( -2.0 * 1000 - 0.5 );
//	ppenalty_ex = (int)( -0.1 * 1000 - 0.5 );
//	poffset = (int)( 0.1 * 1000 - 0.5 ); 
	ppenalty = NOTSPECIFIED;
	ppenalty_ex = NOTSPECIFIED;
	poffset = NOTSPECIFIED;
	kimuraR = 2;
	pamN = 200;
	thresholdtorev = 0.1;
	addfragment = 0;


    while( --argc > 0 && (*++argv)[0] == '-' )
	{
        while ( (c = *++argv[0]) )
		{
            switch( c )
            {
				case 'i':
					inputfile = *++argv;
					fprintf( stderr, "inputfile = %s\n", inputfile );
					--argc;
					goto nextoption;
				case 'I':
					nadd = myatoi( *++argv );
					fprintf( stderr, "nadd = %d\n", nadd );
					--argc;
					goto nextoption;
				case 'C':
					nthread = myatoi( *++argv );
					fprintf( stderr, "nthread = %d\n", nthread );
					--argc; 
					goto nextoption;
				case 'f':
					ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
//					fprintf( stderr, "ppenalty = %d\n", ppenalty );
					--argc;
					goto nextoption;
				case 'g':
					ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
					fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
					--argc;
					goto nextoption;
				case 'h':
					poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
//					fprintf( stderr, "poffset = %d\n", poffset );
					--argc;
					goto nextoption;
				case 'k':
					kimuraR = myatoi( *++argv );
					fprintf( stderr, "kappa = %d\n", kimuraR );
					--argc;
					goto nextoption;
				case 'j':
					pamN = myatoi( *++argv );
					scoremtx = 0;
					TMorJTT = JTT;
					fprintf( stderr, "jtt/kimura %d\n", pamN );
					--argc;
					goto nextoption;
				case 't':
					thresholdtorev = atof( *++argv );
					fprintf( stderr, "thresholdtorev = %f\n", thresholdtorev );
					--argc; 
					goto nextoption;
				case 'd':
					dodp = 1;
					break;
				case 'F':
					addfragment = 1;
					break;
#if 1
				case 'a':
					fmodel = 1;
					break;
#endif
				case 'S':
					alg = 'S';
					break;
				case 'M':
					alg = 'M';
					break;
				case 'm':
					alg = 'm';
					break;
				case 'G':
					alg = 'G';
					break;
				case 'D':
					dorp = 'd';
					break;
				case 'P':
					dorp = 'p';
					break;
                default:
                    fprintf( stderr, "illegal option %c\n", c );
                    argc = 0;
                    break;
            }
		}
		nextoption:
			;
	}
    if( argc == 1 )
    {
        cut = atof( (*argv) );
        argc--;
    }
    if( argc != 0 ) 
    {
        fprintf( stderr, "options: Check source file !\n" );
        exit( 1 );
    }
	if( tbitr == 1 && outgap == 0 )
	{
		fprintf( stderr, "conflicting options : o, m or u\n" );
		exit( 1 );
	}
}




static int maxl;
static int tsize;

void seq_grp_nuc( int *grp, char *seq )
{
	int tmp;
	int *grpbk = grp;
	while( *seq )
	{
		tmp = amino_grp[(int)*seq++];
		if( tmp < 4 )
			*grp++ = tmp;
		else
//			fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
			;
	}
	*grp = END_OF_VEC;
	if( grp - grpbk < 6 )
	{
//		fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
//		exit( 1 );
		*grpbk = -1;
	}
}

void seq_grp( int *grp, char *seq )
{
	int tmp;
	int *grpbk = grp;
	while( *seq )
	{
		tmp = amino_grp[(int)*seq++];
		if( tmp < 6 )
			*grp++ = tmp;
		else
//			fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
			;
	}
	*grp = END_OF_VEC;
	if( grp - grpbk < 6 )
	{
//		fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
//		exit( 1 );
		*grpbk = -1;
	}
}

void makecompositiontable_p( short *table, int *pointt )
{
	int point;

	while( ( point = *pointt++ ) != END_OF_VEC )
		table[point]++;
}


void makepointtable_nuc( int *pointt, int *n )
{
	int point;
	register int *p;

	if( *n == -1 )
	{
		*pointt = -1;
		return;
	}

	p = n;
	point  = *n++ *  1024;
	point += *n++ *   256;
	point += *n++ *    64;
	point += *n++ *    16;
	point += *n++ *     4;
	point += *n++;
	*pointt++ = point;

	while( *n != END_OF_VEC )
	{
		point -= *p++ * 1024;
		point *= 4;
		point += *n++;
		*pointt++ = point;
	}
	*pointt = END_OF_VEC;
}

void makepointtable( int *pointt, int *n )
{
	int point;
	register int *p;

	if( *n == -1 )
	{
		*pointt = -1;
		return;
	}

	p = n;
	point  = *n++ *  7776;
	point += *n++ *  1296;
	point += *n++ *   216;
	point += *n++ *    36;
	point += *n++ *     6;
	point += *n++;
	*pointt++ = point;

	while( *n != END_OF_VEC )
	{
		point -= *p++ * 7776;
		point *= 6;
		point += *n++;
		*pointt++ = point;
	}
	*pointt = END_OF_VEC;
}

static int commonsextet_p2( short *table, int *pointt )
{
	int value = 0;
	short tmp;
	int point;
	short *memo;
	int *ct;
	int *cp;

	if( *pointt == -1 )
		return( 0 );

	memo = (short *)calloc( tsize, sizeof( short ) );
	if( !memo ) ErrorExit( "Cannot allocate memo\n" );
	ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!!
	if( !ct ) ErrorExit( "Cannot allocate memo\n" );

	cp = ct;
	while( ( point = *pointt++ ) != END_OF_VEC )
	{
		tmp = memo[point]++;
		if( tmp < table[point] )
			value++;
		if( tmp == 0 ) *cp++ = point;
	}
	*cp = END_OF_VEC;
	
	cp =  ct;
	while( *cp != END_OF_VEC )
		memo[*cp++] = 0;

	free( memo );
	free( ct );
	return( value );
}

static void	*directionthread( void *arg )
{
	thread_arg_t *targ = (thread_arg_t *)arg;
	int iend = targ->iend;
	char **seq = targ->seq;
	char *tmpseq = targ->tmpseq;
	int *res = targ->res;
	int **spointt = targ->spointt;
	short *table1 = targ->table1;
	int iq = targ->iq;
#ifdef enablemultithread
	int thread_no = targ->thread_no;
	int *jshare = targ->jshare; 
#endif
	int j;
	char **mseq1, **mseq2;


	if( dodp ) // nakuserukamo
	{
		mseq1 = AllocateCharMtx( 1, 0 );
		mseq2 = AllocateCharMtx( 1, 0 );
	}

	j = -1;
	while( 1 )
	{
#ifdef enablemultithread
		if( nthread )
		{
			pthread_mutex_lock( targ->mutex_counter );
			j = *jshare;
			if( j == iend )
			{
				fprintf( stderr, "\r %d / %d (thread %d)   \r", iq, njob, thread_no );
				pthread_mutex_unlock( targ->mutex_counter );
				break;
			}
			++(*jshare);
			pthread_mutex_unlock( targ->mutex_counter );
		}
		else
#endif
		{
			j++;
			if( j == iend ) 
			{
				fprintf( stderr, "\r %d / %d  \r", iq, njob );
				break;
			}
		}


		if( dodp )
		{
//			strcpy( mseq1[0], tmpseq );
//			strcpy( mseq2[0], seq[j] );
			mseq1[0] = tmpseq;
			mseq2[0] = seq[j];
//			res[j] = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, 0 );
			res[j] = L__align11_noalign( n_dis_consweight_multi, mseq1, mseq2 );
		}
		else
			res[j] = commonsextet_p2( table1, spointt[j] );
	}
	if( dodp ) // nakuserukamo
	{
		free( mseq1 );
		free( mseq2 );
//		G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 );
		L__align11_noalign( NULL, NULL, NULL );
	}
//	else
//		if( nthread )  // inthread == 0 no toki free suru to, error. nazeda
//			commonsextet_p( NULL, NULL );
	return( NULL );
}

int main( int argc, char *argv[] )
{
	static int  *nlen;	
	static int  *nogaplen;	
	static char **name, **seq;
	int i, j, istart, iend;
	FILE *infp;
//	FILE *adfp;
	char c;

	int *grpseq;
	char *tmpseq, *revseq;
	int  **pointt, **pointt_rev, **spointt;
	float res_forward, res_reverse, res_max;
	int ires, mres, mres2;
	int *res;
	static short *table1, *table1_rev;
	static char **mseq1f, **mseq1r, **mseq2;

	arguments( argc, argv );
#ifndef enablemultithread
	nthread = 0;
#endif

	if( inputfile )
	{
		infp = fopen( inputfile, "r" );
		if( !infp )
		{
			fprintf( stderr, "Cannot open %s\n", inputfile );
			exit( 1 );
		}
	}
	else
		infp = stdin;

	getnumlen( infp );
	rewind( infp );

	if( alg == 'a' )
	{
		if( nlenmax < 10000 )
			alg = 'G';
		else
			alg = 'S';
	}

	seq = AllocateCharMtx( njob, nlenmax*1+1 );

#if 0
	Read( name, nlen, seq );
	readData( infp, name, nlen, seq );
#else
    name = AllocateCharMtx( njob, B+1 );
    nlen = AllocateIntVec( njob ); 
    nogaplen = AllocateIntVec( njob ); 
	readData_pointer( infp, name, nlen, seq );
	fclose( infp );

	if( dorp != 'd' )
	{
		fprintf( stderr, "Not necessary!\n" );
		for( i=0; i<njob; i++ ) 
			fprintf( stdout, "_F_%-10.10s\n", name[i]+1 );
		exit( 1 );
	}
#endif

	constants( njob, seq );


#if 0
	fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
#endif

	initSignalSM();

	initFiles();

	c = seqcheck( seq );
	if( c )
	{
		fprintf( stderr, "Illegal character %c\n", c );
		exit( 1 );
	}

	fprintf( stderr, "\n" );
	if( alg == 'G' ) // do to the first sequence
	{
		mseq1f = AllocateCharMtx( 1, nlenmax+nlenmax );
		mseq1r = AllocateCharMtx( 1, nlenmax+nlenmax );
		mseq2 = AllocateCharMtx( 1, nlenmax+nlenmax );
	    tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 );

		gappick0( mseq1f[0], seq[0] );
		sreverse( mseq1r[0], mseq1f[0] );
		strcpy( seq[0], mseq1f[0] );

		if( nadd )
			istart = njob - nadd;
		else
			istart = 1;

		fprintf( stderr, "\n" );

		for( i=0; i<istart; i++ )
		{
			gappick0( tmpseq, seq[i] );
			strcpy( seq[i], tmpseq );
			strcpy( tmpseq, name[i] );
			strcpy( name[i], "_F_" );
			strncpy( name[i]+3, tmpseq+1, 10 );
			name[i][13] = 0;
		}
		for( i=istart; i<njob; i++ ) 
		{
			gappick0( mseq2[0], seq[i] );

//			res_forward = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1f, mseq2, 0 );
//			res_reverse = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1r, mseq2, 0 );

			res_forward = L__align11_noalign( n_dis_consweight_multi, mseq1f, mseq2 );
			res_reverse = L__align11_noalign( n_dis_consweight_multi, mseq1r, mseq2 );
#if 0

			strcpy( mseq2[0], seq[i] );
			strcpy( mseq1f[0], seq[0] );
			res_forward = G__align11( n_dis_consweight_multi, mseq1f, mseq2, nlenmax*2, 0, 0 );
			fprintf( stdout, "%s\n", mseq1f[0] );
			fprintf( stdout, "%s\n", mseq2[0] );

			strcpy( mseq2[0], seq[i] );
			sreverse( mseq1r[0], seq[0] );
			res_reverse = G__align11( n_dis_consweight_multi, mseq1r, mseq2, nlenmax*2, 0, 0 );
			fprintf( stdout, "%s\n", mseq1r[0] );
			fprintf( stdout, "%s\n", mseq2[0] );
#endif

//			fprintf( stdout, "\nscore_for(%d,%d) = %f\n", 0, i, res_forward );
//			fprintf( stdout, "score_rev(%d,%d) = %f\n", 0, i, res_reverse );
			res_max = MAX(res_reverse,res_forward);
			if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou
			{
//				fprintf( stderr, "REVERSE!!!\n" );
				sreverse( seq[i], mseq2[0] );

				strcpy( tmpseq, name[i] );
				strcpy( name[i], "_R_" );
				strncpy( name[i]+3, tmpseq+1, 10 );
				name[i][13] = 0;
			}
			else
			{
				strcpy( seq[i], mseq2[0] );

				strcpy( tmpseq, name[i] );
				strcpy( name[i], "_F_" );
				strncpy( name[i]+3, tmpseq+1, 10 );
				name[i][13] = 0;
			}
		}
		FreeCharMtx( mseq1f );
		FreeCharMtx( mseq1r );
		FreeCharMtx( mseq2 );
		free( tmpseq );
	}
	else if( alg == 'm' )
	{
		if( dodp ) // nakuserukamo
		{
			mseq1f = AllocateCharMtx( 1, nlenmax+1);
			mseq1r = AllocateCharMtx( 1, nlenmax+1 );
			mseq2 = AllocateCharMtx( 1, nlenmax+1 );
		}
		else
		{
			spointt = AllocateIntMtx( njob, 0 ); 
			pointt = AllocateIntMtx( njob, nlenmax+1 );
			pointt_rev = AllocateIntMtx( njob, nlenmax+1 );
		}
	    tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 );
	    revseq = AllocateCharVec( nlenmax+1 );
		grpseq = AllocateIntVec( nlenmax+1 );
		res = AllocateIntVec( njob );
		if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
		else              tsize = (int)pow( 6, 6 ); // iranai

		maxl = 0;
		for( i=0; i<njob; i++ )
		{
			gappick0( tmpseq, seq[i] );
			nogaplen[i] = strlen( tmpseq );
			if( nogaplen[i] > maxl ) maxl = nogaplen[i];
		}

		if( nadd )
			iend = njob - nadd;
		else
			iend = 1;

		for( i=0; i<iend; i++ )
		{
//			fprintf( stdout, "%d, SKIP\n", i );
			gappick0( tmpseq, seq[i] );
			strcpy( seq[i], tmpseq );
//			if( !nadd ) strcpy( seq[i], tmpseq ); // seq ha tsukawanaikara ii.

			if( !dodp )
			{
				seq_grp_nuc( grpseq, tmpseq );
				makepointtable_nuc( pointt[i], grpseq );
				spointt[i] = pointt[i];
			}

			strcpy( tmpseq, name[i] );
			strcpy( name[i], "_F_" );
			strncpy( name[i]+3, tmpseq+1, 10 );
			name[i][13] = 0;
		}

		if( nadd )
			istart = njob - nadd;
		else
			istart = 1;

		fprintf( stderr, "\n" );
		for( i=istart; i<njob; i++ ) 
		{
//			fprintf( stderr, "\r %d / %d ", i, njob );
			gappick0( tmpseq, seq[i] );
			strcpy( seq[i], tmpseq );
			sreverse( revseq, tmpseq );

			if( !dodp )
			{
				table1 = (short *)calloc( tsize, sizeof( short ) );
				if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
				table1_rev = (short *)calloc( tsize, sizeof( short ) );
				if( !table1_rev ) ErrorExit( "Cannot allocate table1_rev\n" );
				seq_grp_nuc( grpseq, tmpseq );
				makepointtable_nuc( pointt[i], grpseq );
				makecompositiontable_p( table1, pointt[i] );
				seq_grp_nuc( grpseq, revseq );
				makepointtable_nuc( pointt_rev[i], grpseq );
				makecompositiontable_p( table1_rev, pointt_rev[i] );
			}

			if( nadd && addfragment )
				iend = njob-nadd;
			else
				iend = i;


#ifdef enablemultithread
			if( nthread )
			{
				pthread_t *handle;
				pthread_mutex_t mutex_counter;
				thread_arg_t *targ;
				int *jsharept;
		
				targ = calloc( nthread, sizeof( thread_arg_t ) );
				handle = calloc( nthread, sizeof( pthread_t ) );
				pthread_mutex_init( &mutex_counter, NULL );
				jsharept = calloc( 1, sizeof(int) );
				*jsharept = 0;
		
				for( j=0; j<nthread; j++ )
				{
					targ[j].iend = iend;
					targ[j].seq = seq;
					targ[j].tmpseq = tmpseq; 
					targ[j].res = res; 
					targ[j].spointt = spointt; 
					targ[j].table1 = table1; 
					targ[j].jshare = jsharept;
					targ[j].iq = i;
					targ[j].mutex_counter = &mutex_counter;
					targ[j].thread_no = j;
					pthread_create( handle+j, NULL, directionthread, (void *)(targ+j) );
				}
				for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
				pthread_mutex_destroy( &mutex_counter );
				free( handle );
				free( targ );
				free( jsharept );
			}
			else
#endif
			{
				thread_arg_t *targ;
				targ = calloc( 1, sizeof( thread_arg_t ) );
				targ[0].iend = iend;
				targ[0].seq = seq;
				targ[0].tmpseq = tmpseq; 
				targ[0].res = res; 
				targ[0].spointt = spointt; 
				targ[0].table1 = table1; 
				targ[0].iq = i; 
				directionthread( targ );
				free( targ );
			}


			mres = mres2 = 0;
			for( j=0; j<iend; j++ )
			{
				ires = res[j];
//				fprintf( stdout, "ires (%d,%d) = %d\n", i, j, ires );
//				fflush( stdout );
				if( ires>mres2 ) 
				{
					if( ires>mres ) 
					{
						mres2 = mres;
						mres = ires;
					}
					else
						mres2 = ires;
				}
			}
			res_forward = (float)( mres + mres2 ) / 2;

#ifdef enablemultithread
			if( nthread )
			{
				pthread_t *handle;
				pthread_mutex_t mutex_counter;
				thread_arg_t *targ;
				int *jsharept;
		
				targ = calloc( nthread, sizeof( thread_arg_t ) );
				handle = calloc( nthread, sizeof( pthread_t ) );
				pthread_mutex_init( &mutex_counter, NULL );
				jsharept = calloc( 1, sizeof(int) );
				*jsharept = 0;
		
				for( j=0; j<nthread; j++ )
				{
					targ[j].iend = iend;
					targ[j].seq = seq;
					targ[j].tmpseq = revseq; 
					targ[j].res = res; 
					targ[j].spointt = spointt; 
					targ[j].table1 = table1_rev; 
					targ[j].jshare = jsharept;
					targ[j].iq = i;
					targ[j].mutex_counter = &mutex_counter;
					targ[j].thread_no = j;
					pthread_create( handle+j, NULL, directionthread, (void *)(targ+j) );
				}
				for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
				pthread_mutex_destroy( &mutex_counter );
				free( handle );
				free( targ );
				free( jsharept );
			}
			else
#endif
			{
				thread_arg_t *targ;
				targ = calloc( 1, sizeof( thread_arg_t ) );
				targ[0].iend = iend;
				targ[0].seq = seq;
				targ[0].tmpseq = revseq; 
				targ[0].res = res; 
				targ[0].spointt = spointt;
				targ[0].table1 = table1_rev; 
				targ[0].iq = i; 
				directionthread( targ );
				free( targ );
			}

			mres = mres2 = 0;
			for( j=0; j<iend; j++ )
			{
				ires = res[j];
				if( ires>mres2 )
				{
					if( ires>mres ) 
					{
						mres2 = mres;
						mres = ires;
					}
					else
						mres2 = ires;
				}
			}
			res_reverse = (float)( mres + mres2 ) / 2;

//			fprintf( stdout, "\n" );
//			fprintf( stdout, "score_for(%d,%d) = %f\n", 0, i, res_forward );
//			fprintf( stdout, "score_rev(%d,%d) = %f\n", 0, i, res_reverse );
//			fflush( stdout );
			res_max = MAX(res_reverse,res_forward);
			if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou

			{
				strcpy( seq[i], revseq );

				strcpy( tmpseq, name[i] );
				strcpy( name[i], "_R_" );
				strncpy( name[i]+3, tmpseq+1, 10 );
				name[i][13] = 0;
				if( !dodp ) spointt[i] = pointt_rev[i];
			}
			else
			{
				strcpy( tmpseq, name[i] );
				strcpy( name[i], "_F_" );
				strncpy( name[i]+3, tmpseq+1, 10 );
				name[i][13] = 0;
				if( !dodp ) spointt[i] = pointt[i];
			}

			if( !dodp )
			{
				free( table1 );
				free( table1_rev );
			}
		}

		free( grpseq );
		free( tmpseq );
		free( revseq );
		free( res );
		if( dodp )
		{
			FreeCharMtx( mseq1f );
			FreeCharMtx( mseq1r );
			FreeCharMtx( mseq2 );
		}
		else
		{
			FreeIntMtx( pointt );
			FreeIntMtx( pointt_rev );
			free( spointt );
		}
	}
	else
	{
		fprintf( stderr, "Unknown alg %c\n", alg );
		exit( 1 );
	}
//	writeData_pointer( stdout, njob, name, nlen, seq );
	for( i=0; i<njob; i++ ) 
	{
//		fprintf( stdout, ">%s\n", name[i] );
//		fprintf( stdout, "%s\n", seq[i] );
		fprintf( stdout, "%s\n", name[i] );
	}

	fprintf( stderr, "\n" );
	SHOWVERSION;
	return( 0 );
}