Mercurial > repos > mish > fasta_to_phylip
comparison abims_fasta2phylip.py @ 0:851e52325ffc draft
Uploaded
author | mish |
---|---|
date | Wed, 24 Jul 2013 08:35:01 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:851e52325ffc |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Convert fasta alignemnts to relaxed phylip ones in constant memory. | |
5 Written by Lucas Sinclair. | |
6 Kopimi. | |
7 | |
8 You can use this script from the shell like this:: | |
9 $ fasta_to_phylip seqs.fasta seqs.phylip | |
10 """ | |
11 | |
12 ############################################################################### | |
13 class Sequence(object): | |
14 """The Sequence object has a string *header* and | |
15 various representations.""" | |
16 | |
17 def __init__(self, header, seq): | |
18 self.header = re.findall('^>(\S+)', header)[0] | |
19 self.seq = seq | |
20 | |
21 def __len__(self): | |
22 return len(self.seq) | |
23 | |
24 @property | |
25 def phylip(self): | |
26 return self.header + " " + self.seq.replace('.','-') + "\n" | |
27 | |
28 @property | |
29 def fasta(self): | |
30 return ">" + self.header + "\n" + self.seq + "\n" | |
31 | |
32 def fasta_parse(path): | |
33 """Reads the file at *path* and yields | |
34 Sequence objects in a lazy fashion""" | |
35 header = '' | |
36 seq = '' | |
37 with open(path) as f: | |
38 for line in f: | |
39 line = line.strip('\n') | |
40 if line.startswith('>'): | |
41 if header: yield Sequence(header, seq) | |
42 header = line | |
43 seq = '' | |
44 continue | |
45 seq += line | |
46 yield Sequence(header, seq) | |
47 | |
48 ############################################################################### | |
49 # The libraries we need # | |
50 import sys, os, random, string, re | |
51 # Get the shell arguments # | |
52 fa_path = sys.argv[1] | |
53 ph_path = sys.argv[2] | |
54 # Check that the path is valid # | |
55 if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path) | |
56 # Use our two functions # | |
57 seqs = fasta_parse(fa_path) | |
58 # Write the output to temporary file # | |
59 tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10)) | |
60 # Count the sequences # | |
61 count = 0 | |
62 with open(tm_path, 'w') as f: | |
63 for seq in seqs: | |
64 f.write(seq.phylip) | |
65 count += 1 | |
66 # Add number of entries and length at the top # | |
67 with open(tm_path, 'r') as old, open(ph_path, 'w') as new: | |
68 new.write(" " + str(count) + " " + str(len(seq)) + "\n") | |
69 new.writelines(old) | |
70 # Clean up # | |
71 os.remove(tm_path) |