comparison abims_fasta2phylip.py @ 0:851e52325ffc draft

Uploaded
author mish
date Wed, 24 Jul 2013 08:35:01 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:851e52325ffc
1 #!/usr/bin/env python
2
3 """
4 Convert fasta alignemnts to relaxed phylip ones in constant memory.
5 Written by Lucas Sinclair.
6 Kopimi.
7
8 You can use this script from the shell like this::
9 $ fasta_to_phylip seqs.fasta seqs.phylip
10 """
11
12 ###############################################################################
13 class Sequence(object):
14 """The Sequence object has a string *header* and
15 various representations."""
16
17 def __init__(self, header, seq):
18 self.header = re.findall('^>(\S+)', header)[0]
19 self.seq = seq
20
21 def __len__(self):
22 return len(self.seq)
23
24 @property
25 def phylip(self):
26 return self.header + " " + self.seq.replace('.','-') + "\n"
27
28 @property
29 def fasta(self):
30 return ">" + self.header + "\n" + self.seq + "\n"
31
32 def fasta_parse(path):
33 """Reads the file at *path* and yields
34 Sequence objects in a lazy fashion"""
35 header = ''
36 seq = ''
37 with open(path) as f:
38 for line in f:
39 line = line.strip('\n')
40 if line.startswith('>'):
41 if header: yield Sequence(header, seq)
42 header = line
43 seq = ''
44 continue
45 seq += line
46 yield Sequence(header, seq)
47
48 ###############################################################################
49 # The libraries we need #
50 import sys, os, random, string, re
51 # Get the shell arguments #
52 fa_path = sys.argv[1]
53 ph_path = sys.argv[2]
54 # Check that the path is valid #
55 if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path)
56 # Use our two functions #
57 seqs = fasta_parse(fa_path)
58 # Write the output to temporary file #
59 tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10))
60 # Count the sequences #
61 count = 0
62 with open(tm_path, 'w') as f:
63 for seq in seqs:
64 f.write(seq.phylip)
65 count += 1
66 # Add number of entries and length at the top #
67 with open(tm_path, 'r') as old, open(ph_path, 'w') as new:
68 new.write(" " + str(count) + " " + str(len(seq)) + "\n")
69 new.writelines(old)
70 # Clean up #
71 os.remove(tm_path)