Mercurial > repos > nick > duplex
diff mafft/core/addfunctions.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafft/core/addfunctions.c Thu Feb 02 18:44:31 2017 -0500 @@ -0,0 +1,571 @@ +#include "mltaln.h" + + +void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap +{ + int i, newlen; + double *effarr0, *effarr2; + float dumfl; + double eff; + effarr0 = AllocateDoubleVec( n0 ); + effarr2 = AllocateDoubleVec( n2 ); + +// reporterr( "profilealignment!\n" ); + + commongappick( n0, aln0 ); + commongappick( n2, aln2 ); + + eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff; + eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff; + + newgapstr = "-"; + if( alg == 'M' ) + MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 + else + A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 + + newlen = strlen( aln0[0] ); + +#if 0 // tabun hitsuyou + for( j=0; j<newlen; j++ ) + { +// fprintf( stderr, "j=%d\n", j ); + for( i=0; i<n0; i++ ) + { + if( aln0[i][j] != '-' ) break; + } + if( i == n0 ) + { + for( i=0; i<n1; i++ ) + { + if( aln1[i][j] != '-' ) break; + } + } + else i = -1; + + if( i == n1 ) + { + for( i=0; i<n1; i++ ) aln1[i][j] = '='; + } + } + fprintf( stderr, "in profilealignment,\n" ); + for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] ); + for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] ); + for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] ); +#endif + + free( effarr0 ); + free( effarr2 ); +} + +void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap +{ + int i, j, newlen; + double *effarr0, *effarr2; + float dumfl; + double eff; + effarr0 = AllocateDoubleVec( n0 ); + effarr2 = AllocateDoubleVec( n2 ); + + +// reporterr( "profilealignment!\n" ); + + commongappick( n0, aln0 ); + commongappick( n2, aln2 ); + + eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff; + eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff; + + newgapstr = "-"; + if( alg == 'M' ) + MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 + else + A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 + + newlen = strlen( aln0[0] ); + + for( i=0; i<newlen; i++ ) aln1[0][i] = '-'; + aln1[0][i] = 0; + for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] ); + + for( j=0; j<newlen; j++ ) + { +// fprintf( stderr, "j=%d\n", j ); + for( i=0; i<n0; i++ ) + { + if( aln0[i][j] != '-' ) break; + } + if( i == n0 ) + { + for( i=0; i<n1; i++ ) + { + if( aln1[i][j] != '-' ) break; + } + } + else i = -1; + + if( i == n1 ) + { + for( i=0; i<n1; i++ ) aln1[i][j] = '='; + } + } +#if 0 + fprintf( stderr, "in profilealignment,\n" ); + for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] ); + for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] ); + for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] ); +#endif + + free( effarr0 ); + free( effarr2 ); +} + +void eq2dashmatomete( char **s, int n ) +{ + int i, j; + char sj; + + for( j=0; (sj=s[0][j]); j++ ) + { + if( sj == '=' ) + { + for( i=0; i<n; i++ ) + { + s[i][j] = '-'; + } + } + } +} + +void eq2dashmatometehayaku( char **s, int n ) +{ + int i, j, c; + int *tobechanged; + int len = strlen( s[0] ); + + tobechanged = calloc( len, sizeof( int ) ); + c = 0; + for( j=0; j<len; j++ ) + { + if( s[0][j] == '=' ) tobechanged[c++] = j; + } + tobechanged[c] = -1; + + for( i=0; i<n; i++ ) + { + for( c=0; (j=tobechanged[c])!=-1; c++ ) + s[i][j] = '-'; + } + free( tobechanged ); +} + +void eq2dash( char *s ) +{ + while( *s ) + { + if( *s == '=' ) + { + *s = '-'; + } + s++; + } +} + +void findnewgaps( int n, int rep, char **seq, int *gaplen ) +{ + int i, pos, len, len1; + + len = strlen( seq[0] ); +// for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai + len1 = len + 1; + for( i=0; i<len1; i++ ) gaplen[i] = 0; // reallo de shokika sareteirukara iru! + + pos = 0; + for( i=0; i<len; i++ ) + { + if( seq[rep][i] == '=' ) + { +// fprintf( stderr, "Newgap! pos = %d\n", pos ); + gaplen[pos]++; + } + else + pos++; + } +} + +void findcommongaps( int n, char **seq, int *gaplen ) +{ + int i, j, pos, len, len1; + len = strlen( seq[0] ); + len1 = len+1; + +// fprintf( stderr, "seq[0] = %s\n", seq[0] ); + for( i=0; i<len1; i++ ) gaplen[i] = 0; + + pos = 0; + for( i=0; i<len; i++ ) + { + for( j=0; j<n; j++ ) + if( seq[j][i] != '-' ) break; + + if( j == n ) gaplen[pos]++; + else + pos++; + } +#if 0 + for( i=0; i<pos; i++ ) + { + fprintf( stderr, "vec[%d] = %d\n", i, gaplen[i] ); + } +#endif +} + +void adjustgapmap( int newlen, int *gapmap, char *seq ) +{ + int j; + int pos; + int newlen1 = newlen+1; + int *tmpmap; + + + tmpmap = AllocateIntVec( newlen+2 ); + j = 0; + pos = 0; + while( *seq ) + { +// fprintf( stderr, "j=%d *seq = %c\n", j, *seq ); + if( *seq++ == '=' ) + tmpmap[j++] = 0; + else + { + tmpmap[j++] = gapmap[pos++]; + } + } + tmpmap[j++] = gapmap[pos]; + + for(j=0; j<newlen1; j++) + gapmap[j] = tmpmap[j]; + + free( tmpmap ); +} + +static void strncpy0( char *s1, char *s2, int n ) +{ + while( n-- ) *s1++ = *s2++; + *s1 = 0; +} + +static int countnogaplen( int *gaplen, int *term ) +{ + int v = 0; + while( gaplen < term ) + { + if( *gaplen++ == 0 ) v++; + else break; + } + return( v ); +} + +void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar ) +{ + int *mar; + char *gaps; + char *cptr; + int i, j, k, len, rep, len0, lp, blocklen; + char **mseq2, **mseq0, **mseq1; + char **aseq, *newchar; + int ngroup2, ngroup0, ngroup1; + int *list0, *list1, *list2; + int posin12, gapshift, newpos; + int mlen1, mlen0, mlen2; + int tmptmptmpmark = 0; + + mar = calloc( njob, sizeof( int ) ); + list0 = calloc( njob, sizeof( int ) ); + list1 = calloc( njob, sizeof( int ) ); + list2 = calloc( njob, sizeof( int ) ); + + for( i=0; i<njob; i++ ) mar[i] = 0; + for( i=0; i<njob; i++ ) + { + if( alreadyaligned[i]==0 ) mar[i] = 3; + } + for( i=0; (k=ex1[i])>-1; i++ ) + { + mar[k] = 1; +// fprintf( stderr, "excluding %d\n", ex1[i] ); + } + for( i=0; (k=ex2[i])>-1; i++ ) + { + mar[k] = 2; +// fprintf( stderr, "excluding %d\n", ex2[i] ); + } + + ngroup2 = ngroup1 = ngroup0 = 0; + for( i=0; i<njob; i++ ) + { + if( mar[i] == 2 ) + { + list2[ngroup2] = i; + ngroup2++; + } + if( mar[i] == 1 ) + { + list1[ngroup1] = i; + ngroup1++; + } + if( mar[i] == 0 ) + { + list0[ngroup0] = i; +// fprintf( stderr, "inserting new gaps to %d\n", i ); + ngroup0++; + } + } + list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1; + if( ngroup0 == 0 ) + { +// fprintf( stderr, "Nothing to do\n" ); + free( mar ); + free( list0 ); + free( list1 ); + free( list2 ); + return; + } + + for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break; + rep = i; + len = strlen( seq[rep] ); + len0 = len+1; + +// +// if( i == njob ) +// { +//// fprintf( stderr, "Nothing to do\n" ); +// free( mar ); +// return; +// } + + mseq2 = AllocateCharMtx( ngroup2, alloclen ); + mseq1 = AllocateCharMtx( ngroup1, alloclen ); + mseq0 = AllocateCharMtx( ngroup0, alloclen ); + aseq = AllocateCharMtx( njob, alloclen ); + gaps = calloc( alloclen, sizeof( char ) ); + + + for( i=0; i<njob; i++ ) aseq[i][0] = 0; + newpos = 0; + posin12 = 0; +// fprintf( stderr, "\ngaplen[] = \n" ); +// for(i=0; i<len0; i++ ) fprintf( stderr, "%d ", gaplen[i] ); +// fprintf( stderr, "\n" ); + + for( j=0; j<len0; j++ ) + { +// fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] ); + if( gaplen[j] ) + { +// fprintf( stderr, "j=%d GAP!\n", j ); + tmptmptmpmark = 1; + for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0; + for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0; + for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0; + mlen0 = mlen1 = mlen2 = 0; + + gapshift = gaplen[j]; + cptr = gaps; + while( gapshift-- ) *cptr++ = gapchar; + *cptr = 0; + gapshift = gaplen[j]; + + for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift ); + for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift ); + for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift ); + posin12 += gapshift; + mlen0 += gapshift; + mlen1 += gapshift; + mlen2 += gapshift; + + gapshift = gapmap[posin12]; +// fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] ); + + + for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift ); + for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift ); + for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift ); + mlen0 += gapshift; + mlen1 += gapshift; + mlen2 += gapshift; +#if 0 + for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] ); + for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] ); + for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] ); +#endif + + if( gapshift ) + profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg ); + + j += gapshift; + posin12 += gapshift; + + newpos = strlen( aseq[rep] ); // kufuu? + for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] ); + for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] ); + for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] ); + +// fprintf( stderr, "gapshift = %d\n", gapshift ); + } + blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 ); +// fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 ); +// blocklen = 1; +// if( tmptmptmpmark ) exit( 1 ); + + newpos = strlen( aseq[rep] ); + +#if 0 + for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j]; + for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12]; + for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12]; + for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0; + for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0; + for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0; +#else + + for( i=0; i<ngroup0; i++ ) + { + lp = list0[i]; + newchar = aseq[lp] + newpos; + strncpy0( newchar, seq[lp]+j, blocklen ); + } + for( i=0; i<ngroup1; i++ ) + { + lp = list1[i]; + newchar = aseq[lp] + newpos; + strncpy0( newchar, seq[lp]+posin12, blocklen ); + } + for( i=0; i<ngroup2; i++ ) + { + lp = list2[i]; + newchar = aseq[lp] + newpos; + strncpy0( newchar, seq[lp]+posin12, blocklen ); + } +// fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] ); +// fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] ); +// fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] ); +// exit( 1 ); +#endif + +// fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 ); + j += (blocklen-1); + + + posin12 += (blocklen-1); + + + posin12++; + } +#if 0 + fprintf( stderr, "\n" ); + fprintf( stderr, " seq[l0] = %s\n", seq[list0[0]] ); + fprintf( stderr, " seq[l1] = %s\n", seq[list1[0]] ); + fprintf( stderr, " seq[l2] = %s\n", seq[list2[0]] ); + fprintf( stderr, "=====>\n" ); + fprintf( stderr, "aseq[l0] = %s\n", aseq[list0[0]] ); + fprintf( stderr, "aseq[l1] = %s\n", aseq[list1[0]] ); + fprintf( stderr, "aseq[l2] = %s\n", aseq[list2[0]] ); +//if( tmptmptmpmark ) exit( 1 ); +#endif + +// for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] ); + for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] ); + for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] ); + for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] ); + + + free( mar ); + free( gaps ); + free( list0 ); + free( list1 ); + free( list2 ); + FreeCharMtx( mseq2 ); + FreeCharMtx( mseq1 ); // ? added 2012/02/12 + FreeCharMtx( mseq0 ); + FreeCharMtx( aseq ); // ? added 2012/02/12 +} + + +void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen, char gapchar ) +{ + int *mar; + char *tmpseq; + char *cptr; + int *iptr; + int *tmpgaplen; + int i, j, k, len, rep, len1, klim; + + mar = calloc( njob, sizeof( int ) ); + tmpseq = calloc( alloclen, sizeof( char ) ); + tmpgaplen = calloc( alloclen, sizeof( int ) ); +// tmpseq = calloc( alloclen+2, sizeof( char ) ); +// tmpgaplen = calloc( alloclen+2, sizeof( int ) ); + + + for( i=0; i<njob; i++ ) mar[i] = 0; + for( i=0; (k=ex1[i])>-1; i++ ) + { + mar[k] = 1; +// fprintf( stderr, "excluding %d\n", ex1[i] ); + } + for( i=0; (k=ex2[i])>-1; i++ ) + { + mar[k] = 1; +// fprintf( stderr, "excluding %d\n", ex2[i] ); + } + + for( i=0; i<njob; i++ ) + if( mar[i] ) break; + + if( i == njob ) + { +// fprintf( stderr, "Nothing to do\n" ); + free( mar ); + free( tmpseq ); + free( tmpgaplen ); + return; + } + rep = i; + len = strlen( seq[rep] ); + len1 = len+1; + + + for( i=0; i<njob; i++ ) + { + if( mar[i] == 0 ) continue; + cptr = tmpseq; + for( j=0; j<len1; j++ ) + { + klim = gaplen[j]; +// for( k=0; k<gaplen[j]; k++ ) + while( klim-- ) + *(cptr++) = gapchar; // ??? + *(cptr++) = seq[i][j]; + } + *cptr = 0; + strcpy( seq[i], tmpseq ); + } + + iptr = tmpgaplen; + for( j=0; j<len1; j++ ) + { + *(iptr++) = gaplen[j]; + for( k=0; k<gaplen[j]; k++ ) + *(iptr++) = 0; + } + *iptr = -1; + + iptr = tmpgaplen; + while( *iptr != -1 ) *gaplen++ = *iptr++; + + free( mar ); + free( tmpseq ); + free( tmpgaplen ); +}