0
|
1 #!/usr/bin/env python
|
|
2 #Dan Blankenberg
|
|
3 """
|
|
4 Takes a Multiple Alignment FASTA file and concatenates
|
|
5 sequences for each species, resulting in one sequence
|
|
6 alignment per species.
|
|
7 """
|
|
8
|
|
9 import sys, tempfile
|
|
10 from utils.maf_utilities import iter_fasta_alignment
|
|
11 from utils.odict import odict
|
|
12
|
|
13 def __main__():
|
|
14 input_filename = sys.argv[1]
|
|
15 output_filename = sys.argv[2]
|
|
16 species = odict()
|
|
17 cur_size = 0
|
|
18 for components in iter_fasta_alignment( input_filename ):
|
|
19 species_not_written = species.keys()
|
|
20 for component in components:
|
|
21 if component.species not in species:
|
|
22 species[component.species] = tempfile.TemporaryFile()
|
|
23 species[component.species].write( "-" * cur_size )
|
|
24 species[component.species].write( component.text )
|
|
25 try:
|
|
26 species_not_written.remove( component.species )
|
|
27 except ValueError:
|
|
28 #this is a new species
|
|
29 pass
|
|
30 for spec in species_not_written:
|
|
31 species[spec].write( "-" * len( components[0].text ) )
|
|
32 cur_size += len( components[0].text )
|
|
33 out = open( output_filename, 'wb' )
|
|
34 for spec, f in species.iteritems():
|
|
35 f.seek( 0 )
|
|
36 out.write( ">%s\n%s\n" % ( spec, f.read() ) )
|
|
37 out.close()
|
|
38
|
|
39 if __name__ == "__main__" : __main__()
|