diff mafft/core/iteration.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mafft/core/iteration.c	Thu Feb 02 18:44:31 2017 -0500
@@ -0,0 +1,412 @@
+ /* iteration  ( algorithm C ) */
+#include "mltaln.h"
+
+#define DEBUG 0
+
+static void Writeoptions( FILE *fp )
+{
+    if( scoremtx == 1 )
+        fprintf( fp, "Dayhoff( ... )\n" );
+    else if( scoremtx == -1 )
+        fprintf( fp, "DNA\n" );
+    else if( scoremtx == 2 )
+        fprintf( fp, "Miyata-Yasunaga\n" );
+	else
+		fprintf( fp, "JTT %dPAM\n", pamN );
+
+	if( scoremtx == 0 )
+	    fprintf( fp, "Gap Penalty = %+d, %+d\n", penalty, offset );
+	else
+	    fprintf( fp, "Gap Penalty = %+d\n", penalty );
+
+    fprintf( fp, "marginal score to search : best - %f\n", cut );
+    if( scmtd == 3 )
+        fprintf( fp, "score of rnd or sco\n" );
+    else if( scmtd == 4 )
+        fprintf( fp, "score = sigma( score for a pair of homologous amino acids ) / ( number of amino acids pairs )\n" );
+    else if( scmtd == 5 )
+        fprintf( fp, "score : SP\n" );
+    if( mix )
+        fprintf( fp, "?\n" );
+    else
+    { 
+        if( weight == 2 )
+            fprintf( fp, "weighted,  geta2 = %f\n", geta2 );
+        else if( weight == 3 )
+        {
+            if( scmtd == 4 )
+                fprintf( fp, "reversely weighted in function 'align', unweighted in function 'score_calc'\n" );
+            else
+                fprintf( fp, "weighted like ClustalW," );
+        }
+        else
+            fprintf( fp, "unweighted\n" );
+    }
+    if( weight && utree )
+    {
+        fprintf( fp, "using tree defined by the file hat2 with simplified UPG method\n" );
+    }
+    if( weight && !utree )
+        fprintf( fp, "using temporary tree by simplified UPG method\n" );
+    fprintf( fp, "Algorithm %c\n", alg );
+}
+
+
+
+
+char **align0( float *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex )
+{
+    char **result;
+
+    if( alg == 'B' )
+    {
+		ErrorExit( "Sorry!" );
+	/*
+        if( outgap == 0 )
+        {
+            result = alignm1_o( wm, aseq, seq, scmx, effarr, icyc, ex );
+        }
+        if( outgap == 1 )
+        {
+            result = alignm1( wm, aseq, seq, scmx, effarr, icyc, ex );
+        }
+	*/
+    }
+    else if( alg == 'C' )
+    {
+        result = Calignm1( wm, aseq, seq, effarr, icyc, ex );
+    }
+    return( result );
+}
+    
+
+double score_m_1_0( char **aseq, int locnjob, int s, double **eff, double effarr[M] )
+{
+    double x;
+
+    if( alg == 'B' )
+    {
+		ErrorExit( "Sorry!" );
+    }
+    if( alg == 'C' )
+    {
+        x = Cscore_m_1( aseq, locnjob, s, eff );
+    }
+    fprintf( stderr, "in score_m_1_0 %f\n", x );
+    return( x );
+}
+
+int iteration( int locnjob, char name[M][B], int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff ) 
+{
+    double tscore, mscore;
+    int identity;
+    static char *mseq1, **mseq2 = NULL;
+	static char **result;
+	int i, l;
+	static double effarr[M];
+	int s;
+	int sss[2];
+	char ou;
+	int alloclen; 
+	int resultlen;
+	int nlenmax0 = nlenmax;
+	FILE *prep;
+	char sai[M];
+	char sai1[M];
+	char sai2[M];
+#if 0
+	double his[2][M][MAXITERATION/locnjob+1];
+#else
+	double ***his;
+#endif
+	int cyc[2];
+	char shindou = 0;
+	float wm;
+	int returnvalue;
+
+    for( i=0; i<locnjob; i++ ) 
+    {
+		sai[i] = 1;
+        sai1[i] = 1;
+        sai2[i] = 2;
+    }
+    sai[locnjob] = sai1[locnjob] = sai2[locnjob] = 0;
+
+
+	Writeoptions( trap_g );
+
+	his = AllocateDoubleCub( 2, M, MAXITERATION/locnjob+1 );
+
+	if( mseq2 == NULL )
+	{
+    	alloclen = nlenmax * 2.0;
+   		AllocateTmpSeqs( &mseq2, &mseq1, alloclen );
+	}
+
+	if( !tbitr && !tbweight )
+	{
+		writePre( locnjob, name, nlen, aseq, 0 );
+
+#if 0
+		prep = fopen( "best", "w" );
+		Write( prep, locnjob, name, nlen, aseq );
+		fclose( prep );
+#endif
+	}
+	
+
+
+
+	treeconstruction( aseq, locnjob, topol, len, eff );
+	tscore = score_calc0( aseq, locnjob, eff, 0 );
+
+#if DEBUG
+    fprintf( stderr, "eff mtx in iteration\n" );
+    for( i=0; i<locnjob; i++ )
+    {
+        for( j=0; j<locnjob; j++ ) 
+        {
+            fprintf( stderr, "%5.3f ", eff[i][j] );
+        }
+        fprintf( stderr, "\n" );
+    }
+#endif
+
+    fprintf( stderr, "\n" );
+	if( disp )
+	{
+    	fprintf( stderr, "aseq(initial)\n" );
+		display( aseq, locnjob );
+	}
+	fprintf( stderr, "initial score = %f     \n", tscore );
+	fprintf( stderr, "\n" );
+	for( i=0; i<locnjob; i++ ) strcpy( bseq[i], aseq[i] );
+	mscore = tscore;
+    srand( time(NULL) );
+
+	sss[1] = 0;
+	sss[0] = locnjob-1;
+/*
+	sss[0] = (int)( (float)locnjob/2.0 );
+*/
+	ou = 1;
+	cyc[0] = 0; cyc[1] = 0;
+
+    for( s=-1, l=0; l<MAXITERATION; l++ )
+    {
+        int ss;
+        double tmpscore, tmpscore1;
+
+		if( strlen( aseq[0] ) > nlenmax )
+			nlenmax = strlen( aseq[0] );
+
+		/*
+        s = ( int )( rnd() * locnjob );
+		s++; 
+		if( s == locnjob ) s = 0;
+		ou = 0;
+		*/
+		if( ou == 0 )
+		{
+			ou = 1;
+			s = sss[0];
+			/*
+			sss[0]++;
+			if( sss[0] == locnjob )
+			{
+				sss[0] = 0;
+				cyc[0]++;
+			}
+			*/
+			sss[0]--;
+			if( sss[0] == -1 )
+			{
+				sss[0] = locnjob-1;
+				cyc[0]++;
+			}
+		}
+		else
+		{
+			ou = 0;
+			s = sss[1];
+			sss[1]++;
+			if( sss[1] == locnjob ) 
+			{
+				sss[1] = 0;
+				cyc[1]++;
+			}
+		}
+		fprintf( trap_g, "%d  ", weight );
+
+/*
+        for( i=0, count=0; i<strlen( aseq[s] ); i++ ) 
+        {
+            if( aseq[s][i] != '-' )
+            {
+                mseq1[count] = aseq[s][i];
+                count++;
+            }
+        }
+        mseq1[count] = 0;
+*/
+		gappick0( mseq1, aseq[s] );
+
+		if( checkC )
+			tmpscore = score_m_1_0( aseq, locnjob, s, eff, effarr );
+
+		gappick( locnjob, s, aseq, mseq2, eff, effarr );
+
+        result = align0( &wm, mseq2, mseq1, effarr, locnjob-2, s );
+		resultlen = strlen( result[0] );
+		if( resultlen > alloclen )
+		{
+			if( resultlen > nlenmax0*3 || resultlen > N )
+			{
+				fprintf(stderr, "Error in main1\n");
+				exit( 1 );
+			}
+			FreeTmpSeqs( mseq2, mseq1 );
+			alloclen = strlen( result[0] ) * 2.0;
+			fprintf( stderr, "\n\ntrying to allocate TmpSeqs\n\n" );
+			AllocateTmpSeqs( &mseq2, &mseq1, alloclen );
+		}
+		for( i=0; i<locnjob; i++ ) strcpy( mseq2[i], result[i] ); 
+
+		if( checkC  )
+			fprintf( stderr, "wm in iteration == %f\n", wm );
+
+		strcpy( mseq1, mseq2[locnjob-1] );
+/*
+		Write( stdout, locnjob, name, nlen, mseq2 );
+*/
+        for( i=locnjob-2; i>=s; i-- ) strcpy( mseq2[i+1], mseq2[i] );
+        strcpy( mseq2[s], mseq1 );
+		if( checkC )
+		{
+			tmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr );
+			fprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore );
+			fprintf( stderr, "pick up %d,  after ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore1 );
+			if( tmpscore1 < tmpscore ) 
+			{
+				fprintf( stderr, "\7" );
+				fprintf( trap_g, ">>>>>>>n\n" );
+			}
+			if( fabs( wm - tmpscore1 ) / wm  > 0.001 ) 
+			{
+				fprintf( stderr, "\7sorry\n" );
+				exit( 1 );
+			}
+		}
+
+        identity = !strcmp( mseq2[s], aseq[s] );
+        if( s == locnjob - 1 ) ss = 0; else ss=s+1;
+
+        identity *= !strcmp( mseq2[ss], aseq[ss] );
+
+   	    if( !identity ) 
+		{
+			tmpscore = score_calc0( mseq2, locnjob, eff, s );
+		}
+		else tmpscore = tscore;
+
+		if( disp )
+		{
+       		fprintf( stderr, "% 3d    % 3d / the rest   \n", l+1, s+1 );
+       		display( mseq2, locnjob );
+		}
+       	fprintf( stderr, "% 3d    % 3d / the rest   \n", l+1, s+1 );
+       	fprintf( stderr, "score = %f    mscore =  %f  ", tmpscore, mscore );
+
+       	fprintf( trap_g, "%#4d    %#4d / the rest     ", l+1, s+1 );
+       	fprintf( trap_g, "score = %f    mscore =  %f  ", tmpscore, mscore );
+
+		if( identity ) 
+		{
+			fprintf( stderr, "( identical )\n" );
+			fprintf( trap_g, "( identical )\n" );
+			sai[s] = 2;
+		}
+
+        else if( tmpscore > mscore - cut )
+        {
+            fprintf( stderr, "accepted\n" );
+            fprintf( trap_g, "accepted\n" );
+            for( i=0; i<locnjob; i++ ) strcpy( aseq[i], mseq2[i] );
+			strcpy( sai, sai1 );   /* kokoka ? */
+			if( !tbitr && !tbweight )
+			{
+				writePre( locnjob, name, nlen, aseq, 0 );
+			}
+			strcpy( sai, sai1 );
+			tscore = tmpscore;
+			/*
+			tscore = tmpscore = score_calc0( aseq, locnjob, eff, s );   * ? *
+			*/
+    		if( tmpscore > mscore ) 
+			{
+            	for( i=0; i<locnjob; i++ ) strcpy( bseq[i], mseq2[i] );
+				treeconstruction( bseq, locnjob, topol, len, eff );
+				tscore = mscore = score_calc0( bseq, locnjob, eff, s );
+				fprintf( trap_g, "                                    -> %f\n", mscore );
+				strcpy( sai, sai1 );   /* kokoka ? */
+#if 0
+				if( !tbitr && !tbweight )
+				{	prep = fopen( "best", "w" );
+					Write( prep, locnjob, name, nlen, bseq );
+					fclose( prep );
+				}
+#endif
+			}
+        }
+
+		else
+		{
+			if( tmpscore == tscore )
+			{
+				fprintf( stderr, "occational coincidence \n" );
+				fprintf( trap_g, "occational coincidence\n" );
+			}
+			else
+			{
+				fprintf( stderr, "rejected\n" );
+           	    fprintf( trap_g, "rejected\n" );
+			}
+			for( i=0; i<locnjob; i++ ) strcpy( aseq[i], bseq[i] );
+			tscore = mscore;
+			sai[s] = 2;
+		}
+
+/*
+		prep = fopen( "cur", "w" );
+		Write( prep, locnjob, name, nlen, mseq2 );
+		fclose( prep );
+*/
+
+		his[ou][s][cyc[ou]] = tmpscore;
+		if( !strcmp( sai, sai2 ) )
+		{
+			returnvalue = 0;
+			fprintf( trap_g, "converged\n" );
+			break;
+		}
+		for( i=cyc[ou]-1; i>0; i-- ) 
+		{
+			if( tmpscore == his[ou][s][i] ) 
+			{
+				shindou = 1;
+				break;
+			}
+		}
+		fprintf( stderr, "\n" );
+		if( shindou == 1 )
+		{
+			returnvalue = -1;
+			fprintf( trap_g, "oscillating\n" );
+			break;
+		}
+	}
+	if( l == MAXITERATION ) returnvalue = -2;
+	FreeDoubleCub( his );
+	return( returnvalue );
+}
+