comparison fasta_concatenate_by_species.py @ 0:a63b082a26eb

Imported from capsule None
author devteam
date Mon, 19 May 2014 10:59:37 -0400
parents
children c5311b7718d1
comparison
equal deleted inserted replaced
-1:000000000000 0:a63b082a26eb
1 #!/usr/bin/env python
2 #Dan Blankenberg
3 """
4 Takes a Multiple Alignment FASTA file and concatenates
5 sequences for each species, resulting in one sequence
6 alignment per species.
7 """
8
9 import sys, tempfile
10 from utils.maf_utilities import iter_fasta_alignment
11 from utils.odict import odict
12
13 def __main__():
14 input_filename = sys.argv[1]
15 output_filename = sys.argv[2]
16 species = odict()
17 cur_size = 0
18 for components in iter_fasta_alignment( input_filename ):
19 species_not_written = species.keys()
20 for component in components:
21 if component.species not in species:
22 species[component.species] = tempfile.TemporaryFile()
23 species[component.species].write( "-" * cur_size )
24 species[component.species].write( component.text )
25 try:
26 species_not_written.remove( component.species )
27 except ValueError:
28 #this is a new species
29 pass
30 for spec in species_not_written:
31 species[spec].write( "-" * len( components[0].text ) )
32 cur_size += len( components[0].text )
33 out = open( output_filename, 'wb' )
34 for spec, f in species.iteritems():
35 f.seek( 0 )
36 out.write( ">%s\n%s\n" % ( spec, f.read() ) )
37 out.close()
38
39 if __name__ == "__main__" : __main__()