Mercurial > repos > devteam > fasta_concatenate_by_species
annotate fasta_concatenate_by_species.py @ 3:49e91b42881b draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit 34a6c9f94a5722bb7d2f887618aafa410a770e91"
author | devteam |
---|---|
date | Mon, 02 Mar 2020 11:46:08 +0000 |
parents | c5311b7718d1 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
2 # Dan Blankenberg |
0 | 3 """ |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
4 Takes a Multiple Alignment FASTA file and concatenates |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
5 sequences for each species, resulting in one sequence |
0 | 6 alignment per species. |
7 """ | |
8 | |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
9 import sys |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
10 import tempfile |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
11 from collections import OrderedDict |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
12 |
0 | 13 from utils.maf_utilities import iter_fasta_alignment |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
14 |
0 | 15 |
16 def __main__(): | |
17 input_filename = sys.argv[1] | |
18 output_filename = sys.argv[2] | |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
19 species = OrderedDict() |
0 | 20 cur_size = 0 |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
21 for components in iter_fasta_alignment(input_filename): |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
22 species_not_written = list(species.keys()) |
0 | 23 for component in components: |
24 if component.species not in species: | |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
25 species[component.species] = tempfile.TemporaryFile(mode="r+") |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
26 species[component.species].write("-" * cur_size) |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
27 species[component.species].write(component.text) |
0 | 28 try: |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
29 species_not_written.remove(component.species) |
0 | 30 except ValueError: |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
31 # this is a new species |
0 | 32 pass |
33 for spec in species_not_written: | |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
34 species[spec].write("-" * len(components[0].text)) |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
35 cur_size += len(components[0].text) |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
36 with open(output_filename, 'w') as out: |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
37 for spec, f in species.items(): |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
38 f.seek(0) |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
39 out.write(">%s\n%s\n" % (spec, f.read())) |
0 | 40 |
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
41 |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
42 if __name__ == "__main__": |
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
43 __main__() |