Mercurial > repos > nick > duplex
view mafft/core/addfunctions.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line source
#include "mltaln.h" void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap { int i, newlen; double *effarr0, *effarr2; float dumfl; double eff; effarr0 = AllocateDoubleVec( n0 ); effarr2 = AllocateDoubleVec( n2 ); // reporterr( "profilealignment!\n" ); commongappick( n0, aln0 ); commongappick( n2, aln2 ); eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff; eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff; newgapstr = "-"; if( alg == 'M' ) MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 else A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 newlen = strlen( aln0[0] ); #if 0 // tabun hitsuyou for( j=0; j<newlen; j++ ) { // fprintf( stderr, "j=%d\n", j ); for( i=0; i<n0; i++ ) { if( aln0[i][j] != '-' ) break; } if( i == n0 ) { for( i=0; i<n1; i++ ) { if( aln1[i][j] != '-' ) break; } } else i = -1; if( i == n1 ) { for( i=0; i<n1; i++ ) aln1[i][j] = '='; } } fprintf( stderr, "in profilealignment,\n" ); for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] ); for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] ); for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); } void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap { int i, j, newlen; double *effarr0, *effarr2; float dumfl; double eff; effarr0 = AllocateDoubleVec( n0 ); effarr2 = AllocateDoubleVec( n2 ); // reporterr( "profilealignment!\n" ); commongappick( n0, aln0 ); commongappick( n2, aln2 ); eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff; eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff; newgapstr = "-"; if( alg == 'M' ) MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 else A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1 newlen = strlen( aln0[0] ); for( i=0; i<newlen; i++ ) aln1[0][i] = '-'; aln1[0][i] = 0; for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] ); for( j=0; j<newlen; j++ ) { // fprintf( stderr, "j=%d\n", j ); for( i=0; i<n0; i++ ) { if( aln0[i][j] != '-' ) break; } if( i == n0 ) { for( i=0; i<n1; i++ ) { if( aln1[i][j] != '-' ) break; } } else i = -1; if( i == n1 ) { for( i=0; i<n1; i++ ) aln1[i][j] = '='; } } #if 0 fprintf( stderr, "in profilealignment,\n" ); for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] ); for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] ); for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] ); #endif free( effarr0 ); free( effarr2 ); } void eq2dashmatomete( char **s, int n ) { int i, j; char sj; for( j=0; (sj=s[0][j]); j++ ) { if( sj == '=' ) { for( i=0; i<n; i++ ) { s[i][j] = '-'; } } } } void eq2dashmatometehayaku( char **s, int n ) { int i, j, c; int *tobechanged; int len = strlen( s[0] ); tobechanged = calloc( len, sizeof( int ) ); c = 0; for( j=0; j<len; j++ ) { if( s[0][j] == '=' ) tobechanged[c++] = j; } tobechanged[c] = -1; for( i=0; i<n; i++ ) { for( c=0; (j=tobechanged[c])!=-1; c++ ) s[i][j] = '-'; } free( tobechanged ); } void eq2dash( char *s ) { while( *s ) { if( *s == '=' ) { *s = '-'; } s++; } } void findnewgaps( int n, int rep, char **seq, int *gaplen ) { int i, pos, len, len1; len = strlen( seq[0] ); // for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai len1 = len + 1; for( i=0; i<len1; i++ ) gaplen[i] = 0; // reallo de shokika sareteirukara iru! pos = 0; for( i=0; i<len; i++ ) { if( seq[rep][i] == '=' ) { // fprintf( stderr, "Newgap! pos = %d\n", pos ); gaplen[pos]++; } else pos++; } } void findcommongaps( int n, char **seq, int *gaplen ) { int i, j, pos, len, len1; len = strlen( seq[0] ); len1 = len+1; // fprintf( stderr, "seq[0] = %s\n", seq[0] ); for( i=0; i<len1; i++ ) gaplen[i] = 0; pos = 0; for( i=0; i<len; i++ ) { for( j=0; j<n; j++ ) if( seq[j][i] != '-' ) break; if( j == n ) gaplen[pos]++; else pos++; } #if 0 for( i=0; i<pos; i++ ) { fprintf( stderr, "vec[%d] = %d\n", i, gaplen[i] ); } #endif } void adjustgapmap( int newlen, int *gapmap, char *seq ) { int j; int pos; int newlen1 = newlen+1; int *tmpmap; tmpmap = AllocateIntVec( newlen+2 ); j = 0; pos = 0; while( *seq ) { // fprintf( stderr, "j=%d *seq = %c\n", j, *seq ); if( *seq++ == '=' ) tmpmap[j++] = 0; else { tmpmap[j++] = gapmap[pos++]; } } tmpmap[j++] = gapmap[pos]; for(j=0; j<newlen1; j++) gapmap[j] = tmpmap[j]; free( tmpmap ); } static void strncpy0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; } static int countnogaplen( int *gaplen, int *term ) { int v = 0; while( gaplen < term ) { if( *gaplen++ == 0 ) v++; else break; } return( v ); } void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar ) { int *mar; char *gaps; char *cptr; int i, j, k, len, rep, len0, lp, blocklen; char **mseq2, **mseq0, **mseq1; char **aseq, *newchar; int ngroup2, ngroup0, ngroup1; int *list0, *list1, *list2; int posin12, gapshift, newpos; int mlen1, mlen0, mlen2; int tmptmptmpmark = 0; mar = calloc( njob, sizeof( int ) ); list0 = calloc( njob, sizeof( int ) ); list1 = calloc( njob, sizeof( int ) ); list2 = calloc( njob, sizeof( int ) ); for( i=0; i<njob; i++ ) mar[i] = 0; for( i=0; i<njob; i++ ) { if( alreadyaligned[i]==0 ) mar[i] = 3; } for( i=0; (k=ex1[i])>-1; i++ ) { mar[k] = 1; // fprintf( stderr, "excluding %d\n", ex1[i] ); } for( i=0; (k=ex2[i])>-1; i++ ) { mar[k] = 2; // fprintf( stderr, "excluding %d\n", ex2[i] ); } ngroup2 = ngroup1 = ngroup0 = 0; for( i=0; i<njob; i++ ) { if( mar[i] == 2 ) { list2[ngroup2] = i; ngroup2++; } if( mar[i] == 1 ) { list1[ngroup1] = i; ngroup1++; } if( mar[i] == 0 ) { list0[ngroup0] = i; // fprintf( stderr, "inserting new gaps to %d\n", i ); ngroup0++; } } list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1; if( ngroup0 == 0 ) { // fprintf( stderr, "Nothing to do\n" ); free( mar ); free( list0 ); free( list1 ); free( list2 ); return; } for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break; rep = i; len = strlen( seq[rep] ); len0 = len+1; // // if( i == njob ) // { //// fprintf( stderr, "Nothing to do\n" ); // free( mar ); // return; // } mseq2 = AllocateCharMtx( ngroup2, alloclen ); mseq1 = AllocateCharMtx( ngroup1, alloclen ); mseq0 = AllocateCharMtx( ngroup0, alloclen ); aseq = AllocateCharMtx( njob, alloclen ); gaps = calloc( alloclen, sizeof( char ) ); for( i=0; i<njob; i++ ) aseq[i][0] = 0; newpos = 0; posin12 = 0; // fprintf( stderr, "\ngaplen[] = \n" ); // for(i=0; i<len0; i++ ) fprintf( stderr, "%d ", gaplen[i] ); // fprintf( stderr, "\n" ); for( j=0; j<len0; j++ ) { // fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] ); if( gaplen[j] ) { // fprintf( stderr, "j=%d GAP!\n", j ); tmptmptmpmark = 1; for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0; for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0; for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0; mlen0 = mlen1 = mlen2 = 0; gapshift = gaplen[j]; cptr = gaps; while( gapshift-- ) *cptr++ = gapchar; *cptr = 0; gapshift = gaplen[j]; for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift ); for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift ); for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift ); posin12 += gapshift; mlen0 += gapshift; mlen1 += gapshift; mlen2 += gapshift; gapshift = gapmap[posin12]; // fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] ); for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift ); for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift ); for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift ); mlen0 += gapshift; mlen1 += gapshift; mlen2 += gapshift; #if 0 for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] ); for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] ); for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] ); #endif if( gapshift ) profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg ); j += gapshift; posin12 += gapshift; newpos = strlen( aseq[rep] ); // kufuu? for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] ); for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] ); for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] ); // fprintf( stderr, "gapshift = %d\n", gapshift ); } blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 ); // fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 ); // blocklen = 1; // if( tmptmptmpmark ) exit( 1 ); newpos = strlen( aseq[rep] ); #if 0 for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j]; for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12]; for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12]; for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0; for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0; for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0; #else for( i=0; i<ngroup0; i++ ) { lp = list0[i]; newchar = aseq[lp] + newpos; strncpy0( newchar, seq[lp]+j, blocklen ); } for( i=0; i<ngroup1; i++ ) { lp = list1[i]; newchar = aseq[lp] + newpos; strncpy0( newchar, seq[lp]+posin12, blocklen ); } for( i=0; i<ngroup2; i++ ) { lp = list2[i]; newchar = aseq[lp] + newpos; strncpy0( newchar, seq[lp]+posin12, blocklen ); } // fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] ); // fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] ); // fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] ); // exit( 1 ); #endif // fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 ); j += (blocklen-1); posin12 += (blocklen-1); posin12++; } #if 0 fprintf( stderr, "\n" ); fprintf( stderr, " seq[l0] = %s\n", seq[list0[0]] ); fprintf( stderr, " seq[l1] = %s\n", seq[list1[0]] ); fprintf( stderr, " seq[l2] = %s\n", seq[list2[0]] ); fprintf( stderr, "=====>\n" ); fprintf( stderr, "aseq[l0] = %s\n", aseq[list0[0]] ); fprintf( stderr, "aseq[l1] = %s\n", aseq[list1[0]] ); fprintf( stderr, "aseq[l2] = %s\n", aseq[list2[0]] ); //if( tmptmptmpmark ) exit( 1 ); #endif // for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] ); for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] ); for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] ); for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] ); free( mar ); free( gaps ); free( list0 ); free( list1 ); free( list2 ); FreeCharMtx( mseq2 ); FreeCharMtx( mseq1 ); // ? added 2012/02/12 FreeCharMtx( mseq0 ); FreeCharMtx( aseq ); // ? added 2012/02/12 } void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen, char gapchar ) { int *mar; char *tmpseq; char *cptr; int *iptr; int *tmpgaplen; int i, j, k, len, rep, len1, klim; mar = calloc( njob, sizeof( int ) ); tmpseq = calloc( alloclen, sizeof( char ) ); tmpgaplen = calloc( alloclen, sizeof( int ) ); // tmpseq = calloc( alloclen+2, sizeof( char ) ); // tmpgaplen = calloc( alloclen+2, sizeof( int ) ); for( i=0; i<njob; i++ ) mar[i] = 0; for( i=0; (k=ex1[i])>-1; i++ ) { mar[k] = 1; // fprintf( stderr, "excluding %d\n", ex1[i] ); } for( i=0; (k=ex2[i])>-1; i++ ) { mar[k] = 1; // fprintf( stderr, "excluding %d\n", ex2[i] ); } for( i=0; i<njob; i++ ) if( mar[i] ) break; if( i == njob ) { // fprintf( stderr, "Nothing to do\n" ); free( mar ); free( tmpseq ); free( tmpgaplen ); return; } rep = i; len = strlen( seq[rep] ); len1 = len+1; for( i=0; i<njob; i++ ) { if( mar[i] == 0 ) continue; cptr = tmpseq; for( j=0; j<len1; j++ ) { klim = gaplen[j]; // for( k=0; k<gaplen[j]; k++ ) while( klim-- ) *(cptr++) = gapchar; // ??? *(cptr++) = seq[i][j]; } *cptr = 0; strcpy( seq[i], tmpseq ); } iptr = tmpgaplen; for( j=0; j<len1; j++ ) { *(iptr++) = gaplen[j]; for( k=0; k<gaplen[j]; k++ ) *(iptr++) = 0; } *iptr = -1; iptr = tmpgaplen; while( *iptr != -1 ) *gaplen++ = *iptr++; free( mar ); free( tmpseq ); free( tmpgaplen ); }