Mercurial > repos > nick > duplex
diff mafft/core/f2cl.c @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafft/core/f2cl.c Thu Feb 02 18:44:31 2017 -0500 @@ -0,0 +1,318 @@ +#include "mltaln.h" + +#define DEBUG 0 + + +static char *comment; +static char *orderfile; +static int format; +static int namelen; +static int extendedalphabet; + +static void fillspace( char *seq, int lenmax ) +{ + int len = strlen( seq ); + seq += len; + lenmax -= len; + while( lenmax-- ) *seq++ = ' '; + *seq = 0; +} + +void setmark_clustal( int nlen, int nseq, char **seq, char *mark ) +{ + int i, j, k, nalpha; + char firstletter; + char *strong[9]; + char *weaker[11]; + int nstrong, nweaker; + char s; + + if( dorp == 'd' ) + { + strong[0] = "TU"; + nstrong = 1; + weaker[0] = "AG"; + weaker[1] = "CT"; + nweaker = 2; + nalpha = 10; + } + else + { + strong[0] = "STA"; + strong[1] = "NEQK"; + strong[2] = "NHQK"; + strong[3] = "NDEQ"; + strong[4] = "QHRK"; + strong[5] = "MILV"; + strong[6] = "MILF"; + strong[7] = "HY"; + strong[8] = "FYW"; + nstrong = 9; + weaker[0] = "CSA"; + weaker[1] = "ATV"; + weaker[2] = "SAG"; + weaker[3] = "STNK"; + weaker[4] = "STPA"; + weaker[5] = "SGND"; + weaker[6] = "SNDEQK"; + weaker[7] = "NDEQHK"; + weaker[8] = "NEQHRK"; + weaker[9] = "FVLIM"; + weaker[10] = "HFY"; + nweaker = 11; + nalpha = 20; + } + + for( i=0; i<nlen; i++ ) + { + mark[i] = ' '; + for( j=0; j<nseq; j++ ) + { + s = seq[j][i]; + if( '-' == s || ' ' == s ) break; + } + if( j != nseq ) + { + continue; + } + if( extendedalphabet ) + { + firstletter = seq[0][i]; + if( amino_n[(int)firstletter] < 0 ) continue; + + for( j=0; j<nseq; j++ ) + if( seq[j][i] != firstletter ) break; + if( j == nseq ) + { + mark[i] = '*'; + continue; + } + } + else + { + firstletter = toupper( seq[0][i] ); + if( amino_n[(int)firstletter] >= nalpha || amino_n[(int)firstletter] < 0 ) continue; + + for( j=0; j<nseq; j++ ) + if( toupper( seq[j][i] ) != firstletter ) break; + if( j == nseq ) + { + mark[i] = '*'; + continue; + } + for( k=0; k<nstrong; k++ ) + { + for( j=0; j<nseq; j++ ) + { + if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break; + } + if( j == nseq ) break; + } + if( k < nstrong ) + { + mark[i] = ':'; + continue; + } + for( k=0; k<nweaker; k++ ) + { + for( j=0; j<nseq; j++ ) + { + if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break; + } + if( j == nseq ) break; + } + if( k < nweaker ) + { + mark[i] = '.'; + continue; + } + } + } + mark[nlen] = 0; +} + +void setmark( int nlen, int nseq, char **seq, char *mark ) +{ + int i, j; + + for( i=0; i<nlen; i++ ) + { + mark[i] = ' '; + for( j=0; j<nseq; j++ ) + if( '-' == seq[j][i] ) break; + if( j != nseq ) + { + continue; + } + for( j=0; j<nseq; j++ ) + if( seq[0][i] != seq[j][i] ) break; + if( j == nseq ) + { + mark[i] = '*'; + continue; + } + for( j=0; j<nseq; j++ ) + if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break; + if( j == nseq ) + { + mark[i] = '.'; + continue; + } + } + mark[nlen] = 0; +} + +void arguments( int argc, char *argv[] ) +{ + int c; + namelen = -1; + scoremtx = 1; + nblosum = 62; + dorp = NOTSPECIFIED; + kimuraR = NOTSPECIFIED; + pamN = NOTSPECIFIED; + inputfile = NULL; + comment = NULL; + orderfile = NULL; + format = 'c'; + extendedalphabet = 0; + + while( --argc > 0 && (*++argv)[0] == '-' ) + { + while ( (c = *++argv[0]) ) + { + switch( c ) + { + case 'i': + inputfile = *++argv; + fprintf( stderr, "inputfile = %s\n", inputfile ); + --argc; + goto nextoption; + case 'c': + comment = *++argv; + fprintf( stderr, "comment = %s\n", comment ); + --argc; + goto nextoption; + case 'r': + orderfile = *++argv; + fprintf( stderr, "orderfile = %s\n", orderfile ); + --argc; + goto nextoption; + case 'n': + namelen = myatoi( *++argv ); + fprintf( stderr, "namelen = %d\n", namelen ); + --argc; + goto nextoption; + case 'f': + format = 'f'; + break; + case 'y': + format = 'y'; + break; + case 'E': + extendedalphabet = 1; + nblosum = -2; + break; + case 'N': + extendedalphabet = 0; + break; + default: + fprintf( stderr, "illegal option %c\n", c ); + argc = 0; + break; + } + } + nextoption: + ; + } + if( argc != 0 ) + { + fprintf( stderr, "options: Check source file !\n" ); + exit( 1 ); + } +} + + +int main( int argc, char *argv[] ) +{ + static int *nlen; + static char **name, **seq, *mark; + static int *order; + int i; + FILE *infp; + FILE *orderfp; + char gett[B]; + int nlenmin; + + arguments( argc, argv ); + + + if( inputfile ) + { + infp = fopen( inputfile, "r" ); + if( !infp ) + { + fprintf( stderr, "Cannot open %s\n", inputfile ); + exit( 1 ); + } + } + else + infp = stdin; + + getnumlen_casepreserve( infp, &nlenmin ); + rewind( infp ); + + seq = AllocateCharMtx( njob, nlenmax*2+1 ); + mark = AllocateCharVec( nlenmax*2+1 ); + order = AllocateIntVec( njob ); + name = AllocateCharMtx( njob, B+1 ); + nlen = AllocateIntVec( njob ); + + + if( orderfile ) + { + orderfp = fopen( orderfile, "r" ); + if( !orderfp ) + { + fprintf( stderr, "Cannot open %s\n", orderfile ); + exit( 1 ); + } + for( i=0; i<njob; i++ ) + { + fgets( gett, B-1, orderfp ); + order[i] = atoi( gett ); + } + fclose( orderfp ); + } + else + { + for( i=0; i<njob; i++ ) order[i] = i; + } + + readData_pointer_casepreserve( infp, name, nlen, seq ); + fclose( infp ); + + if( format == 'c' || format == 'y' ) for( i=0; i<njob; i++ ) fillspace( seq[i], nlenmax ); + constants( njob, seq ); + +// initSignalSM(); + +// initFiles(); + + + +// setmark( nlenmax, njob, seq, mark ); + setmark_clustal( nlenmax, njob, seq, mark ); + + if( format == 'f' ) + writeData_reorder_pointer( stdout, njob, name, nlen, seq, order ); + else if( format == 'c' ) + clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen ); + else if( format == 'y' ) + phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen ); + else + fprintf( stderr, "Unknown format\n" ); + +// SHOWVERSION; + return( 0 ); +}