Mercurial > repos > devteam > fasta_concatenate_by_species
comparison fasta_concatenate_by_species.py @ 2:c5311b7718d1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
| author | devteam |
|---|---|
| date | Sun, 01 Mar 2020 12:22:35 +0000 |
| parents | a63b082a26eb |
| children |
comparison
equal
deleted
inserted
replaced
| 1:d9f0a11824e9 | 2:c5311b7718d1 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 #Dan Blankenberg | 2 # Dan Blankenberg |
| 3 """ | 3 """ |
| 4 Takes a Multiple Alignment FASTA file and concatenates | 4 Takes a Multiple Alignment FASTA file and concatenates |
| 5 sequences for each species, resulting in one sequence | 5 sequences for each species, resulting in one sequence |
| 6 alignment per species. | 6 alignment per species. |
| 7 """ | 7 """ |
| 8 | 8 |
| 9 import sys, tempfile | 9 import sys |
| 10 import tempfile | |
| 11 from collections import OrderedDict | |
| 12 | |
| 10 from utils.maf_utilities import iter_fasta_alignment | 13 from utils.maf_utilities import iter_fasta_alignment |
| 11 from utils.odict import odict | 14 |
| 12 | 15 |
| 13 def __main__(): | 16 def __main__(): |
| 14 input_filename = sys.argv[1] | 17 input_filename = sys.argv[1] |
| 15 output_filename = sys.argv[2] | 18 output_filename = sys.argv[2] |
| 16 species = odict() | 19 species = OrderedDict() |
| 17 cur_size = 0 | 20 cur_size = 0 |
| 18 for components in iter_fasta_alignment( input_filename ): | 21 for components in iter_fasta_alignment(input_filename): |
| 19 species_not_written = species.keys() | 22 species_not_written = list(species.keys()) |
| 20 for component in components: | 23 for component in components: |
| 21 if component.species not in species: | 24 if component.species not in species: |
| 22 species[component.species] = tempfile.TemporaryFile() | 25 species[component.species] = tempfile.TemporaryFile(mode="r+") |
| 23 species[component.species].write( "-" * cur_size ) | 26 species[component.species].write("-" * cur_size) |
| 24 species[component.species].write( component.text ) | 27 species[component.species].write(component.text) |
| 25 try: | 28 try: |
| 26 species_not_written.remove( component.species ) | 29 species_not_written.remove(component.species) |
| 27 except ValueError: | 30 except ValueError: |
| 28 #this is a new species | 31 # this is a new species |
| 29 pass | 32 pass |
| 30 for spec in species_not_written: | 33 for spec in species_not_written: |
| 31 species[spec].write( "-" * len( components[0].text ) ) | 34 species[spec].write("-" * len(components[0].text)) |
| 32 cur_size += len( components[0].text ) | 35 cur_size += len(components[0].text) |
| 33 out = open( output_filename, 'wb' ) | 36 with open(output_filename, 'w') as out: |
| 34 for spec, f in species.iteritems(): | 37 for spec, f in species.items(): |
| 35 f.seek( 0 ) | 38 f.seek(0) |
| 36 out.write( ">%s\n%s\n" % ( spec, f.read() ) ) | 39 out.write(">%s\n%s\n" % (spec, f.read())) |
| 37 out.close() | |
| 38 | 40 |
| 39 if __name__ == "__main__" : __main__() | 41 |
| 42 if __name__ == "__main__": | |
| 43 __main__() |
