diff fasta_concatenate_by_species.py @ 2:c5311b7718d1 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 12:22:35 +0000
parents a63b082a26eb
children
line wrap: on
line diff
--- a/fasta_concatenate_by_species.py	Mon Nov 17 10:08:37 2014 -0500
+++ b/fasta_concatenate_by_species.py	Sun Mar 01 12:22:35 2020 +0000
@@ -1,39 +1,43 @@
 #!/usr/bin/env python
-#Dan Blankenberg
+# Dan Blankenberg
 """
-Takes a Multiple Alignment FASTA file and concatenates 
-sequences for each species, resulting in one sequence 
+Takes a Multiple Alignment FASTA file and concatenates
+sequences for each species, resulting in one sequence
 alignment per species.
 """
 
-import sys, tempfile
+import sys
+import tempfile
+from collections import OrderedDict
+
 from utils.maf_utilities import iter_fasta_alignment
-from utils.odict import odict
+
 
 def __main__():
     input_filename = sys.argv[1]
     output_filename = sys.argv[2]
-    species = odict()
+    species = OrderedDict()
     cur_size = 0
-    for components in iter_fasta_alignment( input_filename ):
-        species_not_written = species.keys()
+    for components in iter_fasta_alignment(input_filename):
+        species_not_written = list(species.keys())
         for component in components:
             if component.species not in species:
-                species[component.species] = tempfile.TemporaryFile()
-                species[component.species].write( "-" * cur_size )
-            species[component.species].write( component.text )
+                species[component.species] = tempfile.TemporaryFile(mode="r+")
+                species[component.species].write("-" * cur_size)
+            species[component.species].write(component.text)
             try:
-                species_not_written.remove( component.species )
+                species_not_written.remove(component.species)
             except ValueError:
-                #this is a new species
+                # this is a new species
                 pass
         for spec in species_not_written:
-            species[spec].write( "-" * len( components[0].text ) )
-        cur_size += len( components[0].text )
-    out = open( output_filename, 'wb' )
-    for spec, f in species.iteritems():
-        f.seek( 0 )
-        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
-    out.close()
+            species[spec].write("-" * len(components[0].text))
+        cur_size += len(components[0].text)
+    with open(output_filename, 'w') as out:
+        for spec, f in species.items():
+            f.seek(0)
+            out.write(">%s\n%s\n" % (spec, f.read()))
 
-if __name__ == "__main__" : __main__()
+
+if __name__ == "__main__":
+    __main__()