annotate utils/fasta_to_len.py @ 0:86f00ebe7be7 draft

Imported from capsule None
author devteam
date Mon, 19 May 2014 10:59:55 -0400
parents
children 79cd53e23207
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
2 """
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
3 Input: fasta, int
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
4 Output: tabular
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
5 Return titles with lengths of corresponding seq
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
6 """
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
7
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
8 import sys, os
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
9
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
10 assert sys.version_info[:2] >= ( 2, 4 )
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
11
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ):
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
13
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
14 infile = fasta_file
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
15 out = open( out_file, 'w')
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
16 keep_first_char = int( keep_first_char )
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
17
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
18 fasta_title = ''
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
19 seq_len = 0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
20
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
21 # number of char to keep in the title
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
22 if keep_first_char == 0:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
23 keep_first_char = None
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
24 else:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
25 keep_first_char += 1
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
26
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
27 first_entry = True
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
28
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
29 for line in open( infile ):
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
30 line = line.strip()
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
31 if not line or line.startswith( '#' ):
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
32 continue
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
33 if line[0] == '>':
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
34 if first_entry == False:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
35 if keep_first_word:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
36 fasta_title = fasta_title.split()[0]
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
38 else:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
39 first_entry = False
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
40 fasta_title = line
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
41 seq_len = 0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
42 else:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
43 seq_len += len(line)
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
44
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
45 # last fasta-entry
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
46 if keep_first_word:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
47 fasta_title = fasta_title.split()[0]
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
49 out.close()
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
50
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
51 if __name__ == "__main__" :
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True )