diff mafft/core/f2cl.c @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mafft/core/f2cl.c	Thu Feb 02 18:44:31 2017 -0500
@@ -0,0 +1,318 @@
+#include "mltaln.h"
+
+#define DEBUG 0
+
+
+static char *comment;
+static char *orderfile;
+static int format;
+static int namelen;
+static int extendedalphabet;
+
+static void fillspace( char *seq, int lenmax )
+{
+	int len = strlen( seq );
+	seq += len;
+	lenmax -= len;
+	while( lenmax-- ) *seq++ = ' ';
+	*seq = 0;
+}
+
+void setmark_clustal( int nlen, int nseq, char **seq, char *mark )
+{
+	int i, j, k, nalpha;
+	char firstletter;
+	char *strong[9];
+	char *weaker[11];
+	int nstrong, nweaker;
+	char s;
+
+	if( dorp == 'd' ) 
+	{
+		strong[0] = "TU";
+		nstrong = 1;
+		weaker[0] = "AG";
+		weaker[1] = "CT";
+		nweaker = 2;
+		nalpha = 10;
+	}
+	else
+	{
+		strong[0] = "STA";
+		strong[1] = "NEQK";
+		strong[2] = "NHQK";
+		strong[3] = "NDEQ";
+		strong[4] = "QHRK";
+		strong[5] = "MILV";
+		strong[6] = "MILF";
+		strong[7] = "HY";
+		strong[8] = "FYW";
+		nstrong = 9;
+		weaker[0] = "CSA";
+		weaker[1] = "ATV";
+		weaker[2] = "SAG";
+		weaker[3] = "STNK";
+		weaker[4] = "STPA";
+		weaker[5] = "SGND";
+		weaker[6] = "SNDEQK";
+		weaker[7] = "NDEQHK";
+		weaker[8] = "NEQHRK";
+		weaker[9] = "FVLIM";
+		weaker[10] = "HFY";
+		nweaker = 11;
+		nalpha = 20;
+	}
+
+	for( i=0; i<nlen; i++ )
+	{
+		mark[i] = ' ';
+		for( j=0; j<nseq; j++ )
+		{
+			s = seq[j][i];
+			if( '-' == s || ' ' == s ) break;
+		}
+		if( j != nseq ) 
+		{
+			continue;
+		}
+		if( extendedalphabet )
+		{
+			firstletter = seq[0][i];
+			if( amino_n[(int)firstletter] < 0 ) continue;
+	
+			for( j=0; j<nseq; j++ )
+				if( seq[j][i] != firstletter ) break;
+			if( j == nseq ) 
+			{
+				mark[i] = '*';
+				continue;
+			}
+		}
+		else 
+		{
+			firstletter = toupper( seq[0][i] );
+			if( amino_n[(int)firstletter] >= nalpha || amino_n[(int)firstletter] < 0 ) continue;
+	
+			for( j=0; j<nseq; j++ )
+				if( toupper( seq[j][i] ) != firstletter ) break;
+			if( j == nseq ) 
+			{
+				mark[i] = '*';
+				continue;
+			}
+			for( k=0; k<nstrong; k++ )
+			{
+				for( j=0; j<nseq; j++ )
+				{
+					if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+				}
+				if( j == nseq ) break;
+			}
+			if( k < nstrong )
+			{
+				mark[i] = ':';
+				continue;
+			}
+			for( k=0; k<nweaker; k++ )
+			{
+				for( j=0; j<nseq; j++ )
+				{
+					if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+				}
+				if( j == nseq ) break;
+			}
+			if( k < nweaker )
+			{
+				mark[i] = '.';
+				continue;
+			}
+		}
+	}
+	mark[nlen] = 0;
+}
+
+void setmark( int nlen, int nseq, char **seq, char *mark )
+{
+	int i, j;
+
+	for( i=0; i<nlen; i++ )
+	{
+		mark[i] = ' ';
+		for( j=0; j<nseq; j++ )
+			if( '-' == seq[j][i] ) break;
+		if( j != nseq ) 
+		{
+			continue;
+		}
+		for( j=0; j<nseq; j++ )
+			if( seq[0][i] != seq[j][i] ) break;
+		if( j == nseq ) 
+		{
+			mark[i] = '*';
+			continue;
+		}
+		for( j=0; j<nseq; j++ )
+			if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break;
+		if( j == nseq ) 
+		{
+			mark[i] = '.';
+			continue;
+		}
+	}
+	mark[nlen] = 0;
+}
+
+void arguments( int argc, char *argv[] )
+{
+    int c;
+	namelen = -1;
+	scoremtx = 1;
+	nblosum = 62;
+	dorp = NOTSPECIFIED;
+	kimuraR = NOTSPECIFIED;
+	pamN = NOTSPECIFIED;
+	inputfile = NULL;
+	comment = NULL;
+	orderfile = NULL;
+	format = 'c';
+	extendedalphabet = 0;
+
+    while( --argc > 0 && (*++argv)[0] == '-' )
+	{
+        while ( (c = *++argv[0]) )
+		{
+            switch( c )
+            {
+				case 'i':
+					inputfile = *++argv;
+					fprintf( stderr, "inputfile = %s\n", inputfile );
+					--argc;
+					goto nextoption;
+				case 'c':
+					comment = *++argv;
+					fprintf( stderr, "comment = %s\n", comment );
+					--argc;
+					goto nextoption;
+				case 'r':
+					orderfile = *++argv;
+					fprintf( stderr, "orderfile = %s\n", orderfile );
+					--argc;
+					goto nextoption;
+				case 'n':
+					namelen = myatoi( *++argv );
+					fprintf( stderr, "namelen = %d\n", namelen );
+					--argc;
+					goto nextoption;
+				case 'f':
+					format = 'f';
+					break;
+				case 'y':
+					format = 'y';
+					break;
+				case 'E':
+					extendedalphabet = 1;
+					nblosum = -2;
+					break;
+				case 'N':
+					extendedalphabet = 0;
+					break;
+                default:
+                    fprintf( stderr, "illegal option %c\n", c );
+                    argc = 0;
+                    break;
+            }
+		}
+		nextoption:
+			;
+	}
+    if( argc != 0 ) 
+    {
+        fprintf( stderr, "options: Check source file !\n" );
+        exit( 1 );
+    }
+}
+
+
+int main( int argc, char *argv[] )
+{
+	static int  *nlen;	
+	static char **name, **seq, *mark;
+	static int *order;
+	int i;
+	FILE *infp;
+	FILE *orderfp;
+	char gett[B];
+	int nlenmin;
+
+	arguments( argc, argv );
+
+
+	if( inputfile )
+	{
+		infp = fopen( inputfile, "r" );
+		if( !infp )
+		{
+			fprintf( stderr, "Cannot open %s\n", inputfile );
+			exit( 1 );
+		}
+	}
+	else
+		infp = stdin;
+
+	getnumlen_casepreserve( infp, &nlenmin );
+	rewind( infp );
+
+	seq = AllocateCharMtx( njob, nlenmax*2+1 );
+	mark = AllocateCharVec( nlenmax*2+1 );
+	order = AllocateIntVec( njob );
+	name = AllocateCharMtx( njob, B+1 );
+    nlen = AllocateIntVec( njob );
+
+
+	if( orderfile )
+	{
+		orderfp = fopen( orderfile, "r" );
+		if( !orderfp )
+		{
+			fprintf( stderr, "Cannot open %s\n", orderfile );
+			exit( 1 );
+		}
+		for( i=0; i<njob; i++ )
+		{
+			fgets( gett, B-1, orderfp );
+			order[i] = atoi( gett );
+		}
+		fclose( orderfp );
+	}
+	else
+	{
+		for( i=0; i<njob; i++ ) order[i] = i;
+	}
+
+	readData_pointer_casepreserve( infp, name, nlen, seq );
+	fclose( infp );
+
+	if( format == 'c' || format == 'y' ) for( i=0; i<njob; i++ ) fillspace( seq[i], nlenmax );
+	constants( njob, seq );
+
+//	initSignalSM();
+
+//	initFiles();
+
+
+
+//	setmark( nlenmax, njob, seq, mark );
+	setmark_clustal( nlenmax, njob, seq, mark );
+
+	if( format == 'f' )
+		writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
+	else if( format == 'c' )
+		clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen );
+	else if( format == 'y' )
+		phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen );
+	else
+		fprintf( stderr, "Unknown format\n" );
+
+//	SHOWVERSION;
+	return( 0 );
+}