Mercurial > repos > nick > duplex
view mafft/core/mafft-profile.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line source
#include "mltaln.h" #define DEBUG 0 #if DEBUG #include <time.h> #include <sys/time.h> #include <sys/resource.h> double getrusage_sec() { struct rusage t; struct timeval tv; getrusage(RUSAGE_SELF, &t); tv = t.ru_utime; return tv.tv_sec + (double)tv.tv_usec*1e-6; } #endif int intcmp( int *str1, int *str2 ) { while( *str1 != -1 && *str2 != -1 ) if( *str1++ != *str2++ ) return( 1 ); if( *str1 != *str2 ) return( 1 ); return( 0 ); } char **arguments( int argc, char *argv[] ) { int c = 0; fmodel = 0; nblosum = 62; calledByXced = 0; devide = 0; fftscore = 1; use_fft = 1; alg = 'A'; weight = 0; utree = 1; tbutree = 0; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 0; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; scoremtx = 1; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; poffset = 0; // chokusetsu yobareru kara kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; treemethod = 'x'; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'P': dorp = 'p'; break; case 'D': dorp = 'd'; break; case 'F': use_fft = 1; break; case 'N': use_fft = 0; break; case 'e': fftscore = 0; break; case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'd': disp = 1; break; case 'O': outgap = 0; break; case 'a': fmodel = 1; break; case 'u': tbrweight = 0; break; case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc != 2 ) { fprintf( stderr, "options: Check source file ! %c ?\n", c ); exit( 1 ); } fprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\n", tbitr, tbrweight, tbweight ); // readOtherOptions( &ppid, &fftThreshold, &fftWinSize ); return( argv ); } void GroupAlign( int nseq1, int nseq2, char **name, int *nlen, char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double **len, double *eff, int alloclen ) { int i; int clus1, clus2; int s1, s2; float pscore; static char **name1, **name2; double *effarr = eff; double *effarr1 = NULL; double *effarr2 = NULL; static char *indication1, *indication2; float dumfl = 0.0; int intdum; #if DEBUG double time1, time2; #endif // fprintf( stderr, "in GroupAlign fftWinSize = %d\n", fftWinSize ); // fprintf( stderr, "in GroupAlign fftThreshold = %d\n", fftThreshold ); if( effarr1 == NULL ) { name1 = AllocateCharMtx( nseq1, B ); name2 = AllocateCharMtx( nseq2, B ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); #if 0 #else #endif } for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] ); s1 = 0; s2 = nseq1; // fprintf( stdout, "nseq1 = %d\n", nseq1 ); clus1 = conjuctionforgaln( 0, nseq1, aseq, mseq1, effarr1, effarr, name, name1, indication1 ); clus2 = conjuctionforgaln( nseq1, njob, aseq, mseq2, effarr2, effarr, name, name2, indication2 ); /* fprintf( stderr, "before align all\n" ); display( aseq, njob ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ commongappick( nseq1, mseq1 ); commongappick( nseq2, mseq2 ); #if DEBUG time1 = getrusage_sec(); fprintf( stderr, "entering Falign\n" ); #endif if( use_fft ) { if( alg == 'M' ) pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum ); else pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum, NULL, 0, NULL ); } else { if( alg == 'M' ) pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); else pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } #if DEBUG time2 = getrusage_sec(); fprintf( stdout, "### %d - %d, %f\n", clus1, clus2, time2-time1 ); fflush( stdout ); #endif /* fprintf( stderr, "after align 1 %s \n", indication1 ); display( mseq1, clus1 ); fprintf( stderr, "\n" ); fprintf( stderr, "after align 2 %s \n", indication2 ); display( mseq2, clus2 ); fprintf( stderr, "\n" ); */ fprintf( stderr, "group-to-group %s /%s %f\n", indication1, indication2, pscore ); if( disp ) display( aseq, njob ); fprintf( stderr, "\n" ); /* trap = fopen( "pre", "r+" ); if( !trap ) ErrorExit( 1 ); WriteGapFill( trap, njob, name, nlen, aseq ); fclose( trap ); fprintf( stdout, "nseq1 = %d\n", nseq1 ); */ } static void WriteOptions( FILE *fp ) { fprintf( fp, "tree-base method\n" ); if( tbweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbweight == 3 ) fprintf( fp, "reversely weighted\n" ); if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "Dayhoff( machigai ga aru )\n" ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); else if( scoremtx == -1 ) fprintf( fp, "DNA\n" ); if( scoremtx ) fprintf( fp, "Gap Penalty = %d, %d\n", penalty, offset ); else fprintf( fp, "Gap Penalty = %d\n", penalty ); } int main( int argc, char *argv[] ) { char **argv2; static int *nlen; static char **name, **seq; static char **seq1, **seq2; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double **pscore; static double *eff; int i, j, len1, len2; static int ***topol; static double **len; FILE *gp1, *gp2; char c; int nlenmax1, nlenmax2, nseq1, nseq2; int alloclen; argv2 = arguments( argc, argv ); fprintf( stderr, "####### in galn\n" ); initFiles(); fprintf( stderr, "file1 = %s\n", argv2[0] ); fprintf( stderr, "file2 = %s\n", argv2[1] ); gp1 = fopen( argv2[0], "r" ); if( !gp1 ) ErrorExit( "cannot open file1" ); gp2 = fopen( argv2[1], "r" ); if( !gp2 ) ErrorExit( "cannot open file2" ); #if 0 PreRead( gp1, &nseq1, &nlenmax1 ); PreRead( gp2, &nseq2, &nlenmax2 ); #else getnumlen( gp1 ); nseq1 = njob; nlenmax1 = nlenmax; getnumlen( gp2 ); nseq2 = njob; nlenmax2 = nlenmax; #endif njob = nseq1 + nseq2; nlenmax = MAX( nlenmax1, nlenmax2 ); rewind( gp1 ); rewind( gp2 ); name = AllocateCharMtx( njob, B ); nlen = AllocateIntVec( njob ); seq1 = AllocateCharMtx( nseq1, nlenmax*3 ); seq2 = AllocateCharMtx( nseq2, nlenmax*3 ); seq = AllocateCharMtx( njob, 1 ); aseq = AllocateCharMtx( njob, nlenmax*3 ); bseq = AllocateCharMtx( njob, nlenmax*3 ); mseq1 = AllocateCharMtx( njob, 1 ); mseq2 = AllocateCharMtx( njob, 1 ); alloclen = nlenmax * 3; topol = AllocateIntCub( njob, 2, njob ); len = AllocateDoubleMtx( njob, 2 ); pscore = AllocateDoubleMtx( njob, njob ); eff = AllocateDoubleVec( njob ); #if 0 njob=nseq2; FRead( gp2, name+nseq1, nlen+nseq1, seq2 ); njob=nseq1; FRead( gp1, name, nlen, seq1 ); #else njob=nseq2; readDataforgaln( gp2, name+nseq1, nlen+nseq1, seq2 ); njob=nseq1; readDataforgaln( gp1, name, nlen, seq1 ); #endif njob = nseq1 + nseq2; #if 0 // CHUUI commongappick( nseq1, seq1 ); commongappick( nseq2, seq2 ); #endif for( i=0; i<nseq1; i++ ) seq[i] = seq1[i]; for( i=nseq1; i<njob; i++ ) seq[i] = seq2[i-nseq1]; /* Write( stdout, njob, name, nlen, seq ); */ constants( njob, seq ); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } for( i=1; i<nseq1; i++ ) { if( nlen[i] != nlen[0] ) ErrorExit( "group1 is not aligned." ); } for( i=nseq1+1; i<njob; i++ ) { if( nlen[i] != nlen[nseq1] ) ErrorExit( "group2 is not aligned." ); } if( tbutree == 0 ) { for( i=0; i<nseq1; i++ ) { for( j=i+1; j<nseq1; j++ ) { pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] ); // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); } for( j=nseq1; j<njob; j++ ) { pscore[i][j] = 3.0; // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); } } for( i=nseq1; i<njob-1; i++ ) { for( j=i+1; j<njob; j++ ) { pscore[i][j] = (double)substitution_hosei( seq[i], seq[j] ); // fprintf( stderr, "%d-%d, %5.1f \n", i, j, pscore[i][j] ); } } // fprintf( stderr, "\n" ); } else { fprintf( stderr, "Not supported\n" ); exit( 1 ); #if 0 prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2( prep, njob, name, pscore ); fclose( prep ); #endif } fprintf( stderr, "Constructing dendrogram ... " ); if( treemethod == 'x' ) veryfastsupg( njob, pscore, topol, len ); else ErrorExit( "Incorrect tree\n" ); fprintf( stderr, "done.\n" ); if( tbrweight ) { weight = 3; counteff_simple( njob, topol, len, eff ); // for( i=0; i<njob; i++ ) fprintf( stderr, "eff[%d] = %f\n", i, eff[i] ); } else { for( i=0; i<njob; i++ ) eff[i] = 1.0; } len1 = strlen( seq[0] ); len2 = strlen( seq[nseq1] ); if( len1 > 30000 || len2 > 30000 ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; } GroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen ); #if 0 writePre( njob, name, nlen, aseq, 1 ); #else writeDataforgaln( stdout, njob, name, nlen, aseq ); #endif SHOWVERSION; return( 0 ); }