Mercurial > repos > nick > duplex
view mafft/core/suboptalign11.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line source
#include "mltaln.h" #include "dp.h" #define DEBUG 0 #define DEBUG2 0 #define XXXXXXX 0 #define USE_PENALTY_EX 1 typedef struct _shuryoten { int i; int j; float wm; struct _shuryoten *next; struct _shuryoten *prev; } Shuryoten; static int localstop; static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg ) { Shuryoten *s1 = (Shuryoten *)s1_arg; Shuryoten *s2 = (Shuryoten *)s2_arg; if ( s1->wm > s2->wm ) return( -1 ); else if ( s1->wm < s2->wm ) return( 1 ); else return( 0 ); } static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) { int j; for( j=0; j<lgth2; j++ ) match[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]]; } static float gentracking( int **used, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj ) { int l, iin, jin, lgth1, lgth2, k, limk; int ifi=0, jfi=0; // char gap[] = "-"; char *gap; gap = newgapstr; static char *res1 = NULL, *res2 = NULL; char *mspt1, *mspt2; if( res1 == NULL ) { res1 = (char *)calloc( N, sizeof( char ) ); res2 = (char *)calloc( N, sizeof( char ) ); } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); mspt1 = res1 + lgth1+lgth2; *mspt1 = 0; mspt2 = res2 + lgth1+lgth2; *mspt2 = 0; iin = endi; jin = endj; limk = lgth1+lgth2; if( used[iin][jin] ) return( -1.0 ); for( k=0; k<=limk; k++ ) { ifi = ( ijpi[iin][jin] ); jfi = ( ijpj[iin][jin] ); if( used[ifi][jfi] ) return( -1.0 ); l = iin - ifi; while( --l ) { *--mspt1 = seq1[0][ifi+l]; *--mspt2 = *gap; k++; } l= jin - jfi; while( --l ) { *--mspt1 = *gap; *--mspt2 = seq2[0][jfi+l]; k++; } if( iin <= 0 || jin <= 0 ) break; *--mspt1 = seq1[0][ifi]; *--mspt2 = seq2[0][jfi]; if( ijpi[ifi][jfi] == localstop ) break; if( ijpj[ifi][jfi] == localstop ) break; k++; iin = ifi; jin = jfi; } if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi; if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi; // fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi ); iin = endi; jin = endj; limk = lgth1+lgth2; for( k=0; k<=limk; k++ ) { ifi = ( ijpi[iin][jin] ); jfi = ( ijpj[iin][jin] ); used[ifi][jfi] = 1; if( iin <= 0 || jin <= 0 ) break; if( ijpi[ifi][jfi] == localstop ) break; if( ijpj[ifi][jfi] == localstop ) break; k++; iin = ifi; jin = jfi; } strcpy( mseq1[0], mspt1 ); strcpy( mseq2[0], mspt2 ); fprintf( stderr, "mseq1=%s\nmseq2=%s\n", mspt1, mspt2 ); return( 0.0 ); } float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt ) /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */ { int k; static int **used; register int i, j; int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; // by D.Mathog, float g; float *currentw, *previousw; #if 1 float *wtmp; int *ijpipt; int *ijpjpt; float *mjpt, *Mjpt, *prept, *curpt; int *mpjpt, *Mpjpt; #endif static float mi, *m; static float Mi, *largeM; static int **ijpi; static int **ijpj; static int mpi, *mp; static int Mpi, *Mp; static float *w1, *w2; // static float *match; static float *initverticalw; /* kufuu sureba iranai */ static float *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static float **cpmx1; static float **cpmx2; static int **intwork; static float **floatwork; static int orlgth1 = 0, orlgth2 = 0; float maxwm; float tbk; int tbki, tbkj; int endali, endalj; // float localthr = 0.0; // float localthr2 = 0.0; float fpenalty = (float)penalty; float fpenalty_OP = (float)penalty_OP; float fpenalty_ex = (float)penalty_ex; // float fpenalty_EX = (float)penalty_EX; float foffset = (float)offset; float localthr = -foffset; float localthr2 = -foffset; static Shuryoten *shuryo = NULL; int numshuryo; float minshuryowm = 0.0; // by D.Mathog int minshuryopos = 0; // by D.Mathog float resf; // fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty ); fprintf( stderr, "in suboptalign11\n" ); if( !shuryo ) { shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) ); } for( i=0; i<100; i++ ) { shuryo[i].i = -1; shuryo[i].j = -1; shuryo[i].wm = 0.0; } numshuryo = 0; if( orlgth1 == 0 ) { } lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); fprintf( stderr, "in suboptalign11 step 1\n" ); if( lgth1 > orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; fprintf( stderr, "in suboptalign11 step 1.3\n" ); if( orlgth1 > 0 && orlgth2 > 0 ) { fprintf( stderr, "in suboptalign11 step 1.4\n" ); FreeFloatVec( w1 ); FreeFloatVec( w2 ); // FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); fprintf( stderr, "in suboptalign11 step 1.5\n" ); FreeFloatVec( m ); FreeIntVec( mp ); FreeFloatVec( largeM ); FreeIntVec( Mp ); fprintf( stderr, "in suboptalign11 step 1.6\n" ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); fprintf( stderr, "in suboptalign11 step 1.7\n" ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); // match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); largeM = AllocateFloatVec( ll2+2 ); Mp = AllocateIntVec( ll2+2 ); cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 ); cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 ); floatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); mseq1 = AllocateCharMtx( njob, ll1+ll2 ); mseq2 = AllocateCharMtx( njob, ll1+ll2 ); #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } fprintf( stderr, "in suboptalign11 step 1.6\n" ); fprintf( stderr, "in suboptalign11 step 2\n" ); if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeIntMtx( commonIP ); FreeIntMtx( commonJP ); FreeIntMtx( used ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif used = AllocateIntMtx( ll1+10, ll2+10 ); commonIP = AllocateIntMtx( ll1+10, ll2+10 ); commonJP = AllocateIntMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijpi = commonIP; ijpj = commonJP; #if 0 for( i=0; i<lgth1; i++ ) fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] ); #endif fprintf( stderr, "in suboptalign11 step 3\n" ); currentw = w1; previousw = w2; match_calc( initverticalw, seq2, seq1, 0, lgth1 ); match_calc( currentw, seq1, seq2, 0, lgth2 ); lasti = lgth2+1; for( j=1; j<lasti; ++j ) { m[j] = currentw[j-1]; mp[j] = 0; largeM[j] = currentw[j-1]; Mp[j] = 0; } lastverticalw[0] = currentw[lgth2-1]; #if 0 fprintf( stderr, "currentw = \n" ); for( i=0; i<lgth1+1; i++ ) { fprintf( stderr, "%5.2f ", currentw[i] ); } fprintf( stderr, "\n" ); fprintf( stderr, "initverticalw = \n" ); for( i=0; i<lgth2+1; i++ ) { fprintf( stderr, "%5.2f ", initverticalw[i] ); } fprintf( stderr, "\n" ); #endif #if DEBUG2 fprintf( stderr, "\n" ); fprintf( stderr, " " ); for( j=0; j<lgth2+1; j++ ) fprintf( stderr, "%c ", seq2[0][j] ); fprintf( stderr, "\n" ); #endif localstop = lgth1+lgth2+1; maxwm = -999.9; endali = endalj = 0; #if DEBUG2 fprintf( stderr, "\n" ); fprintf( stderr, "%c ", seq1[0][0] ); for( j=0; j<lgth2+1; j++ ) fprintf( stderr, "%5.0f ", currentw[j] ); fprintf( stderr, "\n" ); #endif lasti = lgth1+1; for( i=1; i<lasti; i++ ) { wtmp = previousw; previousw = currentw; currentw = wtmp; previousw[0] = initverticalw[i-1]; match_calc( currentw, seq1, seq2, i, lgth2 ); #if DEBUG2 fprintf( stderr, "%c ", seq1[0][i] ); fprintf( stderr, "%5.0f ", currentw[0] ); #endif #if XXXXXXX fprintf( stderr, "\n" ); fprintf( stderr, "i=%d\n", i ); fprintf( stderr, "currentw = \n" ); for( j=0; j<lgth2; j++ ) { fprintf( stderr, "%5.2f ", currentw[j] ); } fprintf( stderr, "\n" ); #endif #if XXXXXXX fprintf( stderr, "\n" ); fprintf( stderr, "i=%d\n", i ); fprintf( stderr, "currentw = \n" ); for( j=0; j<lgth2; j++ ) { fprintf( stderr, "%5.2f ", currentw[j] ); } fprintf( stderr, "\n" ); #endif currentw[0] = initverticalw[i]; mi = previousw[0]; mpi = 0; Mi = previousw[0]; Mpi = 0; #if 0 if( mi < localthr ) mi = localthr2; #endif ijpipt = ijpi[i] + 1; ijpjpt = ijpj[i] + 1; mjpt = m + 1; Mjpt = largeM + 1; prept = previousw; curpt = currentw + 1; mpjpt = mp + 1; Mpjpt = Mp + 1; tbk = -999999.9; tbki = 0; tbkj = 0; lastj = lgth2+1; for( j=1; j<lastj; j++ ) { wm = *prept; *ijpipt = i-1; *ijpjpt = j-1; // fprintf( stderr, "i,j=%d,%d %c-%c\n", i, j, seq1[0][i], seq2[0][j] ); // fprintf( stderr, "wm=%f\n", wm ); #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = mi + fpenalty; #if 0 fprintf( stderr, "%5.0f?", g ); #endif if( g > wm ) { wm = g; // *ijpipt = i - 1; *ijpjpt = mpi; } g = *prept; if( g > mi ) { mi = g; mpi = j-1; } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f->", wm ); #endif g = *mjpt + fpenalty; #if 0 fprintf( stderr, "m%5.0f?", g ); #endif if( g > wm ) { wm = g; *ijpipt = *mpjpt; // *ijpjpt = j - 1; } g = *prept; if( g > *mjpt ) { *mjpt = g; *mpjpt = i-1; } #if USE_PENALTY_EX *mjpt += fpenalty_ex; #endif g = tbk + fpenalty_OP; // g = tbk; if( g > wm ) { wm = g; *ijpipt = tbki; *ijpjpt = tbkj; // fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt ); } g = Mi; if( g > tbk ) { tbk = g; tbki = i-1; tbkj = Mpi; } g = *Mjpt; if( g > tbk ) { tbk = g; tbki = *Mpjpt; tbkj = j-1; } // tbk += fpenalty_EX;// + foffset; g = *prept; if( g > *Mjpt ) { *Mjpt = g; *Mpjpt = i-1; } // *Mjpt += fpenalty_EX;// + foffset; g = *prept; if( g > Mi ) { Mi = g; Mpi = j-1; } // Mi += fpenalty_EX;// + foffset; // fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt ); // fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] ); if( maxwm < wm ) { maxwm = wm; endali = i; endalj = j; } #if 1 if( numshuryo < 100 ) { shuryo[numshuryo].i = i; shuryo[numshuryo].j = j; shuryo[numshuryo].wm = wm; if( minshuryowm > wm ) { minshuryowm = wm; minshuryopos = numshuryo; } numshuryo++; } else { if( wm > minshuryowm ) { shuryo[minshuryopos].i = i; shuryo[minshuryopos].j = j; shuryo[minshuryopos].wm = wm; minshuryowm = wm; for( k=0; k<100; k++ ) // muda { if( shuryo[k].wm < minshuryowm ) { minshuryowm = shuryo[k].wm; minshuryopos = k; break; } } } } #endif #if 1 if( wm < localthr ) { // fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt ); *ijpipt = localstop; // *ijpjpt = localstop; wm = localthr2; } #endif #if 0 fprintf( stderr, "%5.0f ", *curpt ); #endif #if DEBUG2 fprintf( stderr, "%5.0f ", wm ); // fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop ); #endif *curpt += wm; ijpipt++; ijpjpt++; mjpt++; Mjpt++; prept++; mpjpt++; Mpjpt++; curpt++; } #if DEBUG2 fprintf( stderr, "\n" ); #endif lastverticalw[i] = currentw[lgth2-1]; } for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } #if 1 fprintf( stderr, "maxwm = %f\n", maxwm ); fprintf( stderr, "endali = %d\n", endali ); fprintf( stderr, "endalj = %d\n", endalj ); #endif qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo ); for( k=0; k<100; k++ ) { fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); } lasti = lgth1+1; for( i=0; i<lasti; i++ ) { ijpi[i][0] = localstop; ijpj[i][0] = localstop; } lastj = lgth2+1; for( j=0; j<lastj; j++ ) { ijpi[0][j] = localstop; ijpj[0][j] = localstop; } for( i=0; i<lasti; i++ ) for( j=0; j<lastj; j++ ) used[i][j] = 0; for( k=0; k<numshuryo; k++ ) { if( shuryo[k].wm < shuryo[0].wm * 0.3 ) break; fprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm ); resf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j ); if( resf == -1.0 ) continue; putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, ">\n%s\n", mseq1[0] ); fprintf( stderr, ">\n%s\n", mseq2[0] ); #endif } for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stderr, "%2d ", used[i][j] ); } fprintf( stderr, "\n" ); } // fprintf( stderr, "### impmatch = %f\n", *impmatch ); resultlen = strlen( mseq1[0] ); if( alloclen < resultlen || resultlen > N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } return( wm ); }