annotate fasta_concatenate_by_species.py @ 3:49e91b42881b draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit 34a6c9f94a5722bb7d2f887618aafa410a770e91"
author devteam
date Mon, 02 Mar 2020 11:46:08 +0000
parents c5311b7718d1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
2 # Dan Blankenberg
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
3 """
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
4 Takes a Multiple Alignment FASTA file and concatenates
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
5 sequences for each species, resulting in one sequence
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
6 alignment per species.
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
7 """
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
8
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
9 import sys
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
10 import tempfile
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
11 from collections import OrderedDict
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
12
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
13 from utils.maf_utilities import iter_fasta_alignment
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
14
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
15
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
16 def __main__():
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
17 input_filename = sys.argv[1]
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
18 output_filename = sys.argv[2]
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
19 species = OrderedDict()
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
20 cur_size = 0
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
21 for components in iter_fasta_alignment(input_filename):
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
22 species_not_written = list(species.keys())
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
23 for component in components:
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
24 if component.species not in species:
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
25 species[component.species] = tempfile.TemporaryFile(mode="r+")
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
26 species[component.species].write("-" * cur_size)
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
27 species[component.species].write(component.text)
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
28 try:
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
29 species_not_written.remove(component.species)
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
30 except ValueError:
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
31 # this is a new species
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
32 pass
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
33 for spec in species_not_written:
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
34 species[spec].write("-" * len(components[0].text))
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
35 cur_size += len(components[0].text)
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
36 with open(output_filename, 'w') as out:
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
37 for spec, f in species.items():
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
38 f.seek(0)
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
39 out.write(">%s\n%s\n" % (spec, f.read()))
0
a63b082a26eb Imported from capsule None
devteam
parents:
diff changeset
40
2
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
41
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
42 if __name__ == "__main__":
c5311b7718d1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
43 __main__()