diff mafft/core/addfunctions.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mafft/core/addfunctions.c	Thu Feb 02 18:44:31 2017 -0500
@@ -0,0 +1,571 @@
+#include "mltaln.h"
+
+
+void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap
+{
+	int i, newlen;
+	double *effarr0, *effarr2;
+	float dumfl;
+	double eff;
+	effarr0 = AllocateDoubleVec( n0 );
+	effarr2 = AllocateDoubleVec( n2 );
+
+//	reporterr( "profilealignment!\n" );
+
+	commongappick( n0, aln0 );
+	commongappick( n2, aln2 );
+
+	eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
+	eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
+
+	newgapstr = "-";
+	if( alg == 'M' )
+		MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
+	else
+		A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
+
+	newlen = strlen( aln0[0] );
+
+#if 0 // tabun hitsuyou
+	for( j=0; j<newlen; j++ )
+	{
+//		fprintf( stderr, "j=%d\n", j );
+		for( i=0; i<n0; i++ )
+		{
+			if( aln0[i][j] != '-' ) break;
+		}
+		if( i == n0 ) 
+		{
+			for( i=0; i<n1; i++ ) 
+			{
+				if( aln1[i][j] != '-' ) break;
+			}
+		}
+		else i = -1;
+
+		if( i == n1 ) 
+		{
+			for( i=0; i<n1; i++ ) aln1[i][j] = '=';
+		}
+	}
+	fprintf( stderr, "in profilealignment,\n" );
+	for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
+	for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
+	for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
+#endif
+
+	free( effarr0 );
+	free( effarr2 );
+}
+
+void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap
+{
+	int i, j, newlen;
+	double *effarr0, *effarr2;
+	float dumfl;
+	double eff;
+	effarr0 = AllocateDoubleVec( n0 );
+	effarr2 = AllocateDoubleVec( n2 );
+
+
+//	reporterr( "profilealignment!\n" );
+
+	commongappick( n0, aln0 );
+	commongappick( n2, aln2 );
+
+	eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
+	eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
+
+	newgapstr = "-";
+	if( alg == 'M' )
+		MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
+	else
+		A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
+
+	newlen = strlen( aln0[0] );
+
+	for( i=0; i<newlen; i++ ) aln1[0][i] = '-';
+	aln1[0][i] = 0;
+	for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] );
+
+	for( j=0; j<newlen; j++ )
+	{
+//		fprintf( stderr, "j=%d\n", j );
+		for( i=0; i<n0; i++ )
+		{
+			if( aln0[i][j] != '-' ) break;
+		}
+		if( i == n0 ) 
+		{
+			for( i=0; i<n1; i++ ) 
+			{
+				if( aln1[i][j] != '-' ) break;
+			}
+		}
+		else i = -1;
+
+		if( i == n1 ) 
+		{
+			for( i=0; i<n1; i++ ) aln1[i][j] = '=';
+		}
+	}
+#if 0
+	fprintf( stderr, "in profilealignment,\n" );
+	for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
+	for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
+	for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
+#endif
+
+	free( effarr0 );
+	free( effarr2 );
+}
+
+void eq2dashmatomete( char **s, int n )
+{
+	int i, j;
+	char sj;
+
+	for( j=0; (sj=s[0][j]); j++ )
+	{
+		if( sj == '=' )
+		{
+			for( i=0; i<n; i++ )
+			{
+				s[i][j] = '-';
+			}
+		}
+	}
+}
+
+void eq2dashmatometehayaku( char **s, int n )
+{
+	int i, j, c;
+	int *tobechanged;
+	int len = strlen( s[0] );
+
+	tobechanged = calloc( len, sizeof( int ) );
+	c = 0;
+	for( j=0; j<len; j++ )
+	{
+		if( s[0][j] == '=' ) tobechanged[c++] = j;
+	}
+	tobechanged[c] = -1;
+
+	for( i=0; i<n; i++ )
+	{
+		for( c=0; (j=tobechanged[c])!=-1; c++ )
+			s[i][j] = '-';
+	}
+	free( tobechanged );
+}
+
+void eq2dash( char *s )
+{
+	while( *s )
+	{
+		if( *s == '=' ) 
+		{
+			*s = '-';
+		}
+		s++;
+	}
+}
+
+void findnewgaps( int n, int rep, char **seq, int *gaplen )
+{
+	int i, pos, len, len1;
+
+	len = strlen( seq[0] );	
+//	for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai
+	len1 = len + 1;
+	for( i=0; i<len1; i++ ) gaplen[i] = 0; // reallo de shokika sareteirukara iru!
+	
+	pos = 0;
+	for( i=0; i<len; i++ )
+	{
+		if( seq[rep][i] == '=' ) 
+		{
+//			fprintf( stderr, "Newgap! pos = %d\n", pos );
+			gaplen[pos]++;
+		}
+		else
+			pos++;
+	}
+}
+
+void findcommongaps( int n, char **seq, int *gaplen )
+{
+	int i, j, pos, len, len1;
+	len = strlen( seq[0] );	
+	len1 = len+1;
+
+//	fprintf( stderr, "seq[0] = %s\n", seq[0] );
+	for( i=0; i<len1; i++ ) gaplen[i] = 0;
+	
+	pos = 0;
+	for( i=0; i<len; i++ )
+	{
+		for( j=0; j<n; j++ )
+			if( seq[j][i] != '-' ) break;
+
+		if( j == n ) gaplen[pos]++;
+		else
+			pos++;
+	}
+#if 0
+	for( i=0; i<pos; i++ )
+	{
+		fprintf( stderr, "vec[%d] = %d\n", i, gaplen[i] );
+	}
+#endif
+}
+
+void adjustgapmap( int newlen, int *gapmap, char *seq )
+{
+	int j;
+	int pos;
+	int newlen1 = newlen+1;
+	int *tmpmap;
+
+
+	tmpmap = AllocateIntVec( newlen+2 );
+	j = 0;
+	pos = 0;
+	while( *seq )
+	{
+//		fprintf( stderr, "j=%d *seq = %c\n", j, *seq );
+		if( *seq++ == '=' )
+			tmpmap[j++] = 0;
+		else
+		{
+			tmpmap[j++] = gapmap[pos++];
+		}
+	}
+	tmpmap[j++] = gapmap[pos];
+
+	for(j=0; j<newlen1; j++)
+		gapmap[j] = tmpmap[j];
+
+	free( tmpmap );
+}
+
+static void strncpy0( char *s1, char *s2, int n )
+{
+	while( n-- ) *s1++ = *s2++;
+	*s1 = 0;
+}
+
+static int countnogaplen( int *gaplen, int *term )
+{
+	int v = 0;
+	while( gaplen < term )
+	{
+		if( *gaplen++ == 0 ) v++;
+		else break;
+	}
+	return( v );
+}
+
+void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar )
+{
+	int *mar;
+	char *gaps;
+	char *cptr;
+	int i, j, k, len, rep, len0, lp, blocklen;
+	char **mseq2, **mseq0, **mseq1;
+	char **aseq, *newchar;
+	int ngroup2, ngroup0, ngroup1;
+	int *list0, *list1, *list2;
+	int posin12, gapshift, newpos;
+	int mlen1, mlen0, mlen2;
+	int tmptmptmpmark = 0;
+
+	mar = calloc( njob, sizeof( int ) );
+	list0 = calloc( njob, sizeof( int ) );
+	list1 = calloc( njob, sizeof( int ) );
+	list2 = calloc( njob, sizeof( int ) );
+
+	for( i=0; i<njob; i++ ) mar[i] = 0;
+	for( i=0; i<njob; i++ ) 
+	{
+		if( alreadyaligned[i]==0 ) mar[i] = 3;
+	}
+	for( i=0; (k=ex1[i])>-1; i++ ) 
+	{
+		mar[k] = 1;
+//		fprintf( stderr, "excluding %d\n", ex1[i] );
+	}
+	for( i=0; (k=ex2[i])>-1; i++ ) 
+	{
+		mar[k] = 2;
+//		fprintf( stderr, "excluding %d\n", ex2[i] );
+	}
+
+	ngroup2 = ngroup1 = ngroup0 = 0;
+	for( i=0; i<njob; i++ )
+	{
+		if( mar[i] == 2 ) 
+		{
+			list2[ngroup2] = i;
+			ngroup2++;
+		}
+		if( mar[i] == 1 ) 
+		{
+			list1[ngroup1] = i;
+			ngroup1++;
+		}
+		if( mar[i] == 0 ) 
+		{
+			list0[ngroup0] = i;
+//			fprintf( stderr, "inserting new gaps to %d\n", i );
+			ngroup0++;
+		}
+	}
+	list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
+	if( ngroup0 == 0 )
+	{
+//		fprintf( stderr, "Nothing to do\n" );
+		free( mar );
+		free( list0 );
+		free( list1 );
+		free( list2 );
+		return;
+	}
+
+	for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break;
+	rep = i;
+	len = strlen( seq[rep] );
+	len0 = len+1;
+
+//
+//	if( i == njob )
+//	{
+////		fprintf( stderr, "Nothing to do\n" );
+//		free( mar );
+//		return;
+//	}
+
+	mseq2 = AllocateCharMtx( ngroup2, alloclen );
+	mseq1 = AllocateCharMtx( ngroup1, alloclen );
+	mseq0 = AllocateCharMtx( ngroup0, alloclen );
+	aseq = AllocateCharMtx( njob, alloclen );
+	gaps = calloc( alloclen, sizeof( char ) );
+
+
+	for( i=0; i<njob; i++ ) aseq[i][0] = 0;
+	newpos = 0;
+	posin12 = 0;
+//	fprintf( stderr, "\ngaplen[] = \n" );
+//	for(i=0; i<len0; i++ ) fprintf( stderr, "%d ", gaplen[i] );
+//	fprintf( stderr, "\n" );
+
+	for( j=0; j<len0; j++ )
+	{
+//		fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
+		if( gaplen[j] )
+		{
+//			fprintf( stderr, "j=%d GAP!\n", j );
+			tmptmptmpmark = 1;
+			for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
+			for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
+			for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
+			mlen0 = mlen1 = mlen2 = 0;
+
+			gapshift = gaplen[j];
+			cptr = gaps;
+			while( gapshift-- ) *cptr++ = gapchar;
+			*cptr = 0;
+			gapshift = gaplen[j];
+
+			for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift );
+			for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
+			for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
+			posin12 += gapshift;
+			mlen0 += gapshift;
+			mlen1 += gapshift;
+			mlen2 += gapshift;
+
+			gapshift = gapmap[posin12];
+//			fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] );
+
+
+			for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift );
+			for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
+			for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
+			mlen0 += gapshift;
+			mlen1 += gapshift;
+			mlen2 += gapshift;
+#if 0
+			for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
+			for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
+			for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
+#endif
+
+			if( gapshift ) 
+				profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
+
+			j += gapshift;
+			posin12 += gapshift;
+
+			newpos = strlen( aseq[rep] ); // kufuu?
+			for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
+			for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
+			for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
+
+//			fprintf( stderr, "gapshift = %d\n", gapshift );
+		}
+		blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
+//		fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
+//		blocklen = 1;
+//		if( tmptmptmpmark ) exit( 1 );
+
+		newpos = strlen( aseq[rep] );
+
+#if 0
+		for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
+		for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
+		for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
+		for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
+		for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
+		for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
+#else
+
+		for( i=0; i<ngroup0; i++ )
+		{
+			lp = list0[i];
+			newchar = aseq[lp] + newpos;
+			strncpy0( newchar, seq[lp]+j, blocklen );
+		}
+		for( i=0; i<ngroup1; i++ )
+		{
+			lp = list1[i];
+			newchar = aseq[lp] + newpos;
+			strncpy0( newchar, seq[lp]+posin12, blocklen );
+		}
+		for( i=0; i<ngroup2; i++ )
+		{
+			lp = list2[i];
+			newchar = aseq[lp] + newpos;
+			strncpy0( newchar, seq[lp]+posin12, blocklen );
+		}
+//		fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] );
+//		fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] );
+//		fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] );
+//		exit( 1 );
+#endif
+
+//		fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
+		j += (blocklen-1);
+
+
+		posin12 += (blocklen-1);
+
+
+		posin12++;
+	}
+#if 0
+	fprintf( stderr, "\n" );
+	fprintf( stderr, " seq[l0] = %s\n", seq[list0[0]] );
+	fprintf( stderr, " seq[l1] = %s\n", seq[list1[0]] );
+	fprintf( stderr, " seq[l2] = %s\n", seq[list2[0]] );
+	fprintf( stderr, "=====>\n" );
+	fprintf( stderr, "aseq[l0] = %s\n", aseq[list0[0]] );
+	fprintf( stderr, "aseq[l1] = %s\n", aseq[list1[0]] );
+	fprintf( stderr, "aseq[l2] = %s\n", aseq[list2[0]] );
+//if( tmptmptmpmark ) exit( 1 );
+#endif
+
+//	for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
+	for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
+	for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
+	for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
+
+
+	free( mar );
+	free( gaps );
+	free( list0 );
+	free( list1 );
+	free( list2 );
+	FreeCharMtx( mseq2 );
+	FreeCharMtx( mseq1 ); // ? added 2012/02/12
+	FreeCharMtx( mseq0 );
+	FreeCharMtx( aseq ); // ? added 2012/02/12
+}
+
+
+void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen, char gapchar )
+{
+	int *mar;
+	char *tmpseq;
+	char *cptr;
+	int *iptr;
+	int *tmpgaplen;
+	int i, j, k, len, rep, len1, klim;
+
+	mar = calloc( njob, sizeof( int ) );
+	tmpseq = calloc( alloclen, sizeof( char ) );
+	tmpgaplen = calloc( alloclen, sizeof( int ) );
+//	tmpseq = calloc( alloclen+2, sizeof( char ) );
+//	tmpgaplen = calloc( alloclen+2, sizeof( int ) );
+
+
+	for( i=0; i<njob; i++ ) mar[i] = 0;
+	for( i=0; (k=ex1[i])>-1; i++ ) 
+	{
+		mar[k] = 1;
+//		fprintf( stderr, "excluding %d\n", ex1[i] );
+	}
+	for( i=0; (k=ex2[i])>-1; i++ ) 
+	{
+		mar[k] = 1;
+//		fprintf( stderr, "excluding %d\n", ex2[i] );
+	}
+
+	for( i=0; i<njob; i++ )
+		if( mar[i] ) break;
+
+	if( i == njob )
+	{
+//		fprintf( stderr, "Nothing to do\n" );
+		free( mar );
+		free( tmpseq );
+		free( tmpgaplen );
+		return;
+	}
+	rep = i;
+	len = strlen( seq[rep] );
+	len1 = len+1;
+
+
+	for( i=0; i<njob; i++ )
+	{
+		if( mar[i] == 0 ) continue;
+		cptr = tmpseq;
+		for( j=0; j<len1; j++ )
+		{
+			klim = gaplen[j];
+//			for( k=0; k<gaplen[j]; k++ )
+			while( klim-- )
+				*(cptr++) = gapchar; // ???
+			*(cptr++) = seq[i][j];
+		}
+		*cptr = 0;
+		strcpy( seq[i], tmpseq );
+	}
+
+	iptr = tmpgaplen;
+	for( j=0; j<len1; j++ )
+	{
+		*(iptr++) = gaplen[j];
+		for( k=0; k<gaplen[j]; k++ )
+			*(iptr++) = 0;
+	}
+	*iptr = -1;
+
+	iptr = tmpgaplen;
+	while( *iptr != -1 ) *gaplen++ = *iptr++;
+
+	free( mar );
+	free( tmpseq );
+	free( tmpgaplen );
+}