comparison fasta_concatenate_by_species.py @ 2:c5311b7718d1 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 12:22:35 +0000
parents a63b082a26eb
children
comparison
equal deleted inserted replaced
1:d9f0a11824e9 2:c5311b7718d1
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Dan Blankenberg 2 # Dan Blankenberg
3 """ 3 """
4 Takes a Multiple Alignment FASTA file and concatenates 4 Takes a Multiple Alignment FASTA file and concatenates
5 sequences for each species, resulting in one sequence 5 sequences for each species, resulting in one sequence
6 alignment per species. 6 alignment per species.
7 """ 7 """
8 8
9 import sys, tempfile 9 import sys
10 import tempfile
11 from collections import OrderedDict
12
10 from utils.maf_utilities import iter_fasta_alignment 13 from utils.maf_utilities import iter_fasta_alignment
11 from utils.odict import odict 14
12 15
13 def __main__(): 16 def __main__():
14 input_filename = sys.argv[1] 17 input_filename = sys.argv[1]
15 output_filename = sys.argv[2] 18 output_filename = sys.argv[2]
16 species = odict() 19 species = OrderedDict()
17 cur_size = 0 20 cur_size = 0
18 for components in iter_fasta_alignment( input_filename ): 21 for components in iter_fasta_alignment(input_filename):
19 species_not_written = species.keys() 22 species_not_written = list(species.keys())
20 for component in components: 23 for component in components:
21 if component.species not in species: 24 if component.species not in species:
22 species[component.species] = tempfile.TemporaryFile() 25 species[component.species] = tempfile.TemporaryFile(mode="r+")
23 species[component.species].write( "-" * cur_size ) 26 species[component.species].write("-" * cur_size)
24 species[component.species].write( component.text ) 27 species[component.species].write(component.text)
25 try: 28 try:
26 species_not_written.remove( component.species ) 29 species_not_written.remove(component.species)
27 except ValueError: 30 except ValueError:
28 #this is a new species 31 # this is a new species
29 pass 32 pass
30 for spec in species_not_written: 33 for spec in species_not_written:
31 species[spec].write( "-" * len( components[0].text ) ) 34 species[spec].write("-" * len(components[0].text))
32 cur_size += len( components[0].text ) 35 cur_size += len(components[0].text)
33 out = open( output_filename, 'wb' ) 36 with open(output_filename, 'w') as out:
34 for spec, f in species.iteritems(): 37 for spec, f in species.items():
35 f.seek( 0 ) 38 f.seek(0)
36 out.write( ">%s\n%s\n" % ( spec, f.read() ) ) 39 out.write(">%s\n%s\n" % (spec, f.read()))
37 out.close()
38 40
39 if __name__ == "__main__" : __main__() 41
42 if __name__ == "__main__":
43 __main__()