0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Convert fasta alignemnts to relaxed phylip ones in constant memory.
|
|
5 Written by Lucas Sinclair.
|
|
6 Kopimi.
|
|
7
|
|
8 You can use this script from the shell like this::
|
|
9 $ fasta_to_phylip seqs.fasta seqs.phylip
|
|
10 """
|
|
11
|
|
12 ###############################################################################
|
|
13 class Sequence(object):
|
|
14 """The Sequence object has a string *header* and
|
|
15 various representations."""
|
|
16
|
|
17 def __init__(self, header, seq):
|
|
18 self.header = re.findall('^>(\S+)', header)[0]
|
|
19 self.seq = seq
|
|
20
|
|
21 def __len__(self):
|
|
22 return len(self.seq)
|
|
23
|
|
24 @property
|
|
25 def phylip(self):
|
|
26 return self.header + " " + self.seq.replace('.','-') + "\n"
|
|
27
|
|
28 @property
|
|
29 def fasta(self):
|
|
30 return ">" + self.header + "\n" + self.seq + "\n"
|
|
31
|
|
32 def fasta_parse(path):
|
|
33 """Reads the file at *path* and yields
|
|
34 Sequence objects in a lazy fashion"""
|
|
35 header = ''
|
|
36 seq = ''
|
|
37 with open(path) as f:
|
|
38 for line in f:
|
|
39 line = line.strip('\n')
|
|
40 if line.startswith('>'):
|
|
41 if header: yield Sequence(header, seq)
|
|
42 header = line
|
|
43 seq = ''
|
|
44 continue
|
|
45 seq += line
|
|
46 yield Sequence(header, seq)
|
|
47
|
|
48 ###############################################################################
|
|
49 # The libraries we need #
|
|
50 import sys, os, random, string, re
|
|
51 # Get the shell arguments #
|
|
52 fa_path = sys.argv[1]
|
|
53 ph_path = sys.argv[2]
|
|
54 # Check that the path is valid #
|
|
55 if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path)
|
|
56 # Use our two functions #
|
|
57 seqs = fasta_parse(fa_path)
|
|
58 # Write the output to temporary file #
|
|
59 tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10))
|
|
60 # Count the sequences #
|
|
61 count = 0
|
|
62 with open(tm_path, 'w') as f:
|
|
63 for seq in seqs:
|
|
64 f.write(seq.phylip)
|
|
65 count += 1
|
|
66 # Add number of entries and length at the top #
|
|
67 with open(tm_path, 'r') as old, open(ph_path, 'w') as new:
|
|
68 new.write(" " + str(count) + " " + str(len(seq)) + "\n")
|
|
69 new.writelines(old)
|
|
70 # Clean up #
|
|
71 os.remove(tm_path) |