Mercurial > repos > nick > duplex
diff mafft/core/suboptalign11.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafft/core/suboptalign11.c Thu Feb 02 18:44:31 2017 -0500 @@ -0,0 +1,678 @@ +#include "mltaln.h" +#include "dp.h" + +#define DEBUG 0 +#define DEBUG2 0 +#define XXXXXXX 0 +#define USE_PENALTY_EX 1 + +typedef struct _shuryoten +{ + int i; + int j; + float wm; + struct _shuryoten *next; + struct _shuryoten *prev; +} Shuryoten; + + +static int localstop; + +static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg ) +{ + Shuryoten *s1 = (Shuryoten *)s1_arg; + Shuryoten *s2 = (Shuryoten *)s2_arg; + if ( s1->wm > s2->wm ) return( -1 ); + else if ( s1->wm < s2->wm ) return( 1 ); + else return( 0 ); +} + +static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) +{ + int j; + + for( j=0; j<lgth2; j++ ) + match[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]]; +} + +static float gentracking( int **used, + char **seq1, char **seq2, + char **mseq1, char **mseq2, + float **cpmx1, float **cpmx2, + int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj ) +{ + int l, iin, jin, lgth1, lgth2, k, limk; + int ifi=0, jfi=0; +// char gap[] = "-"; + char *gap; + gap = newgapstr; + static char *res1 = NULL, *res2 = NULL; + char *mspt1, *mspt2; + if( res1 == NULL ) + { + res1 = (char *)calloc( N, sizeof( char ) ); + res2 = (char *)calloc( N, sizeof( char ) ); + } + + lgth1 = strlen( seq1[0] ); + lgth2 = strlen( seq2[0] ); + + mspt1 = res1 + lgth1+lgth2; + *mspt1 = 0; + mspt2 = res2 + lgth1+lgth2; + *mspt2 = 0; + iin = endi; jin = endj; + + limk = lgth1+lgth2; + if( used[iin][jin] ) return( -1.0 ); + for( k=0; k<=limk; k++ ) + { + ifi = ( ijpi[iin][jin] ); + jfi = ( ijpj[iin][jin] ); + + if( used[ifi][jfi] ) return( -1.0 ); + + l = iin - ifi; + while( --l ) + { + *--mspt1 = seq1[0][ifi+l]; + *--mspt2 = *gap; + k++; + } + l= jin - jfi; + while( --l ) + { + *--mspt1 = *gap; + *--mspt2 = seq2[0][jfi+l]; + k++; + } + + if( iin <= 0 || jin <= 0 ) break; + *--mspt1 = seq1[0][ifi]; + *--mspt2 = seq2[0][jfi]; + if( ijpi[ifi][jfi] == localstop ) break; + if( ijpj[ifi][jfi] == localstop ) break; + k++; + iin = ifi; jin = jfi; + } + if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi; + if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi; + +// fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi ); + + iin = endi; jin = endj; + limk = lgth1+lgth2; + for( k=0; k<=limk; k++ ) + { + ifi = ( ijpi[iin][jin] ); + jfi = ( ijpj[iin][jin] ); + + used[ifi][jfi] = 1; + if( iin <= 0 || jin <= 0 ) break; + if( ijpi[ifi][jfi] == localstop ) break; + if( ijpj[ifi][jfi] == localstop ) break; + + k++; + iin = ifi; jin = jfi; + } + + + strcpy( mseq1[0], mspt1 ); + strcpy( mseq2[0], mspt2 ); + + fprintf( stderr, "mseq1=%s\nmseq2=%s\n", mspt1, mspt2 ); + + return( 0.0 ); +} + + +float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt ) +/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ +{ + int k; + static int **used; + register int i, j; + int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ + int lgth1, lgth2; + int resultlen; + float wm = 0.0; // by D.Mathog, + float g; + float *currentw, *previousw; +#if 1 + float *wtmp; + int *ijpipt; + int *ijpjpt; + float *mjpt, *Mjpt, *prept, *curpt; + int *mpjpt, *Mpjpt; +#endif + static float mi, *m; + static float Mi, *largeM; + static int **ijpi; + static int **ijpj; + static int mpi, *mp; + static int Mpi, *Mp; + static float *w1, *w2; +// static float *match; + static float *initverticalw; /* kufuu sureba iranai */ + static float *lastverticalw; /* kufuu sureba iranai */ + static char **mseq1; + static char **mseq2; + static float **cpmx1; + static float **cpmx2; + static int **intwork; + static float **floatwork; + static int orlgth1 = 0, orlgth2 = 0; + float maxwm; + float tbk; + int tbki, tbkj; + int endali, endalj; +// float localthr = 0.0; +// float localthr2 = 0.0; + float fpenalty = (float)penalty; + float fpenalty_OP = (float)penalty_OP; + float fpenalty_ex = (float)penalty_ex; +// float fpenalty_EX = (float)penalty_EX; + float foffset = (float)offset; + float localthr = -foffset; + float localthr2 = -foffset; + static Shuryoten *shuryo = NULL; + int numshuryo; + float minshuryowm = 0.0; // by D.Mathog + int minshuryopos = 0; // by D.Mathog + float resf; + + +// fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); + + fprintf( stderr, "in suboptalign11\n" ); + if( !shuryo ) + { + shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) ); + } + for( i=0; i<100; i++ ) + { + shuryo[i].i = -1; + shuryo[i].j = -1; + shuryo[i].wm = 0.0; + } + numshuryo = 0; + + if( orlgth1 == 0 ) + { + } + + + lgth1 = strlen( seq1[0] ); + lgth2 = strlen( seq2[0] ); + + fprintf( stderr, "in suboptalign11 step 1\n" ); + + if( lgth1 > orlgth1 || lgth2 > orlgth2 ) + { + int ll1, ll2; + + fprintf( stderr, "in suboptalign11 step 1.3\n" ); + if( orlgth1 > 0 && orlgth2 > 0 ) + { + fprintf( stderr, "in suboptalign11 step 1.4\n" ); + FreeFloatVec( w1 ); + FreeFloatVec( w2 ); +// FreeFloatVec( match ); + FreeFloatVec( initverticalw ); + FreeFloatVec( lastverticalw ); + fprintf( stderr, "in suboptalign11 step 1.5\n" ); + + FreeFloatVec( m ); + FreeIntVec( mp ); + FreeFloatVec( largeM ); + FreeIntVec( Mp ); + fprintf( stderr, "in suboptalign11 step 1.6\n" ); + + + FreeFloatMtx( cpmx1 ); + FreeFloatMtx( cpmx2 ); + + fprintf( stderr, "in suboptalign11 step 1.7\n" ); + FreeFloatMtx( floatwork ); + FreeIntMtx( intwork ); + } + + ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; + ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; + +#if DEBUG + fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); +#endif + + w1 = AllocateFloatVec( ll2+2 ); + w2 = AllocateFloatVec( ll2+2 ); +// match = AllocateFloatVec( ll2+2 ); + + initverticalw = AllocateFloatVec( ll1+2 ); + lastverticalw = AllocateFloatVec( ll1+2 ); + + m = AllocateFloatVec( ll2+2 ); + mp = AllocateIntVec( ll2+2 ); + largeM = AllocateFloatVec( ll2+2 ); + Mp = AllocateIntVec( ll2+2 ); + + cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); + cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); + + floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); + intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); + + mseq1 = AllocateCharMtx( njob, ll1+ll2 ); + mseq2 = AllocateCharMtx( njob, ll1+ll2 ); + +#if DEBUG + fprintf( stderr, "succeeded\n" ); +#endif + + orlgth1 = ll1 - 100; + orlgth2 = ll2 - 100; + } + fprintf( stderr, "in suboptalign11 step 1.6\n" ); + + + + fprintf( stderr, "in suboptalign11 step 2\n" ); + + if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) + { + int ll1, ll2; + + if( commonAlloc1 && commonAlloc2 ) + { + FreeIntMtx( commonIP ); + FreeIntMtx( commonJP ); + FreeIntMtx( used ); + } + + ll1 = MAX( orlgth1, commonAlloc1 ); + ll2 = MAX( orlgth2, commonAlloc2 ); + +#if DEBUG + fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); +#endif + + used = AllocateIntMtx( ll1+10, ll2+10 ); + commonIP = AllocateIntMtx( ll1+10, ll2+10 ); + commonJP = AllocateIntMtx( ll1+10, ll2+10 ); + +#if DEBUG + fprintf( stderr, "succeeded\n\n" ); +#endif + + commonAlloc1 = ll1; + commonAlloc2 = ll2; + } + ijpi = commonIP; + ijpj = commonJP; + + +#if 0 + for( i=0; i<lgth1; i++ ) + fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] ); +#endif + + fprintf( stderr, "in suboptalign11 step 3\n" ); + currentw = w1; + previousw = w2; + + match_calc( initverticalw, seq2, seq1, 0, lgth1 ); + + match_calc( currentw, seq1, seq2, 0, lgth2 ); + + + lasti = lgth2+1; + for( j=1; j<lasti; ++j ) + { + m[j] = currentw[j-1]; mp[j] = 0; + largeM[j] = currentw[j-1]; Mp[j] = 0; + } + + lastverticalw[0] = currentw[lgth2-1]; + + +#if 0 +fprintf( stderr, "currentw = \n" ); +for( i=0; i<lgth1+1; i++ ) +{ + fprintf( stderr, "%5.2f ", currentw[i] ); +} +fprintf( stderr, "\n" ); +fprintf( stderr, "initverticalw = \n" ); +for( i=0; i<lgth2+1; i++ ) +{ + fprintf( stderr, "%5.2f ", initverticalw[i] ); +} +fprintf( stderr, "\n" ); +#endif +#if DEBUG2 + fprintf( stderr, "\n" ); + fprintf( stderr, " " ); + for( j=0; j<lgth2+1; j++ ) + fprintf( stderr, "%c ", seq2[0][j] ); + fprintf( stderr, "\n" ); +#endif + + localstop = lgth1+lgth2+1; + maxwm = -999.9; + endali = endalj = 0; +#if DEBUG2 + fprintf( stderr, "\n" ); + fprintf( stderr, "%c ", seq1[0][0] ); + + for( j=0; j<lgth2+1; j++ ) + fprintf( stderr, "%5.0f ", currentw[j] ); + fprintf( stderr, "\n" ); +#endif + + lasti = lgth1+1; + for( i=1; i<lasti; i++ ) + { + wtmp = previousw; + previousw = currentw; + currentw = wtmp; + + previousw[0] = initverticalw[i-1]; + + match_calc( currentw, seq1, seq2, i, lgth2 ); +#if DEBUG2 + fprintf( stderr, "%c ", seq1[0][i] ); + fprintf( stderr, "%5.0f ", currentw[0] ); +#endif + +#if XXXXXXX +fprintf( stderr, "\n" ); +fprintf( stderr, "i=%d\n", i ); +fprintf( stderr, "currentw = \n" ); +for( j=0; j<lgth2; j++ ) +{ + fprintf( stderr, "%5.2f ", currentw[j] ); +} +fprintf( stderr, "\n" ); +#endif +#if XXXXXXX +fprintf( stderr, "\n" ); +fprintf( stderr, "i=%d\n", i ); +fprintf( stderr, "currentw = \n" ); +for( j=0; j<lgth2; j++ ) +{ + fprintf( stderr, "%5.2f ", currentw[j] ); +} +fprintf( stderr, "\n" ); +#endif + currentw[0] = initverticalw[i]; + + mi = previousw[0]; mpi = 0; + Mi = previousw[0]; Mpi = 0; + +#if 0 + if( mi < localthr ) mi = localthr2; +#endif + + ijpipt = ijpi[i] + 1; + ijpjpt = ijpj[i] + 1; + mjpt = m + 1; + Mjpt = largeM + 1; + prept = previousw; + curpt = currentw + 1; + mpjpt = mp + 1; + Mpjpt = Mp + 1; + tbk = -999999.9; + tbki = 0; + tbkj = 0; + lastj = lgth2+1; + for( j=1; j<lastj; j++ ) + { + wm = *prept; + *ijpipt = i-1; + *ijpjpt = j-1; + + +// fprintf( stderr, "i,j=%d,%d %c-%c\n", i, j, seq1[0][i], seq2[0][j] ); +// fprintf( stderr, "wm=%f\n", wm ); +#if 0 + fprintf( stderr, "%5.0f->", wm ); +#endif + g = mi + fpenalty; +#if 0 + fprintf( stderr, "%5.0f?", g ); +#endif + if( g > wm ) + { + wm = g; +// *ijpipt = i - 1; + *ijpjpt = mpi; + } + g = *prept; + if( g > mi ) + { + mi = g; + mpi = j-1; + } + +#if USE_PENALTY_EX + mi += fpenalty_ex; +#endif + +#if 0 + fprintf( stderr, "%5.0f->", wm ); +#endif + g = *mjpt + fpenalty; +#if 0 + fprintf( stderr, "m%5.0f?", g ); +#endif + if( g > wm ) + { + wm = g; + *ijpipt = *mpjpt; +// *ijpjpt = j - 1; + } + g = *prept; + if( g > *mjpt ) + { + *mjpt = g; + *mpjpt = i-1; + } +#if USE_PENALTY_EX + *mjpt += fpenalty_ex; +#endif + + + g = tbk + fpenalty_OP; +// g = tbk; + if( g > wm ) + { + wm = g; + *ijpipt = tbki; + *ijpjpt = tbkj; +// fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); + } + g = Mi; + if( g > tbk ) + { + tbk = g; + tbki = i-1; + tbkj = Mpi; + } + g = *Mjpt; + if( g > tbk ) + { + tbk = g; + tbki = *Mpjpt; + tbkj = j-1; + } +// tbk += fpenalty_EX;// + foffset; + + g = *prept; + if( g > *Mjpt ) + { + *Mjpt = g; + *Mpjpt = i-1; + } +// *Mjpt += fpenalty_EX;// + foffset; + + g = *prept; + if( g > Mi ) + { + Mi = g; + Mpi = j-1; + } +// Mi += fpenalty_EX;// + foffset; + + +// fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); +// fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); + + + if( maxwm < wm ) + { + maxwm = wm; + endali = i; + endalj = j; + } + +#if 1 + if( numshuryo < 100 ) + { + shuryo[numshuryo].i = i; + shuryo[numshuryo].j = j; + shuryo[numshuryo].wm = wm; + + if( minshuryowm > wm ) + { + minshuryowm = wm; + minshuryopos = numshuryo; + } + numshuryo++; + } + else + { + if( wm > minshuryowm ) + { + shuryo[minshuryopos].i = i; + shuryo[minshuryopos].j = j; + shuryo[minshuryopos].wm = wm; + minshuryowm = wm; + for( k=0; k<100; k++ ) // muda + { + if( shuryo[k].wm < minshuryowm ) + { + minshuryowm = shuryo[k].wm; + minshuryopos = k; + break; + } + } + } + } +#endif +#if 1 + if( wm < localthr ) + { +// fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); + *ijpipt = localstop; +// *ijpjpt = localstop; + wm = localthr2; + } +#endif +#if 0 + fprintf( stderr, "%5.0f ", *curpt ); +#endif +#if DEBUG2 + fprintf( stderr, "%5.0f ", wm ); +// fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); +#endif + + *curpt += wm; + ijpipt++; + ijpjpt++; + mjpt++; + Mjpt++; + prept++; + mpjpt++; + Mpjpt++; + curpt++; + } +#if DEBUG2 + fprintf( stderr, "\n" ); +#endif + + lastverticalw[i] = currentw[lgth2-1]; + } + + for( k=0; k<100; k++ ) + { + fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); + } + + +#if 1 + fprintf( stderr, "maxwm = %f\n", maxwm ); + fprintf( stderr, "endali = %d\n", endali ); + fprintf( stderr, "endalj = %d\n", endalj ); +#endif + + qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo ); + for( k=0; k<100; k++ ) + { + fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); + } + + + lasti = lgth1+1; + for( i=0; i<lasti; i++ ) + { + ijpi[i][0] = localstop; + ijpj[i][0] = localstop; + } + lastj = lgth2+1; + for( j=0; j<lastj; j++ ) + { + ijpi[0][j] = localstop; + ijpj[0][j] = localstop; + } + + for( i=0; i<lasti; i++ ) for( j=0; j<lastj; j++ ) used[i][j] = 0; + + for( k=0; k<numshuryo; k++ ) + { + if( shuryo[k].wm < shuryo[0].wm * 0.3 ) break; + fprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); + resf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j ); + if( resf == -1.0 ) continue; + putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) ); +#if 0 + fprintf( stderr, "\n" ); + fprintf( stderr, ">\n%s\n", mseq1[0] ); + fprintf( stderr, ">\n%s\n", mseq2[0] ); +#endif + } + for( i=0; i<20; i++ ) + { + for( j=0; j<20; j++ ) + { + fprintf( stderr, "%2d ", used[i][j] ); + } + fprintf( stderr, "\n" ); + } + + +// fprintf( stderr, "### impmatch = %f\n", *impmatch ); + + resultlen = strlen( mseq1[0] ); + if( alloclen < resultlen || resultlen > N ) + { + fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); + ErrorExit( "LENGTH OVER!\n" ); + } + + + + + + return( wm ); +} +