Mercurial > repos > mish > fasta_to_phylip
view abims_fasta2phylip.py @ 0:851e52325ffc draft
Uploaded
author | mish |
---|---|
date | Wed, 24 Jul 2013 08:35:01 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ Convert fasta alignemnts to relaxed phylip ones in constant memory. Written by Lucas Sinclair. Kopimi. You can use this script from the shell like this:: $ fasta_to_phylip seqs.fasta seqs.phylip """ ############################################################################### class Sequence(object): """The Sequence object has a string *header* and various representations.""" def __init__(self, header, seq): self.header = re.findall('^>(\S+)', header)[0] self.seq = seq def __len__(self): return len(self.seq) @property def phylip(self): return self.header + " " + self.seq.replace('.','-') + "\n" @property def fasta(self): return ">" + self.header + "\n" + self.seq + "\n" def fasta_parse(path): """Reads the file at *path* and yields Sequence objects in a lazy fashion""" header = '' seq = '' with open(path) as f: for line in f: line = line.strip('\n') if line.startswith('>'): if header: yield Sequence(header, seq) header = line seq = '' continue seq += line yield Sequence(header, seq) ############################################################################### # The libraries we need # import sys, os, random, string, re # Get the shell arguments # fa_path = sys.argv[1] ph_path = sys.argv[2] # Check that the path is valid # if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path) # Use our two functions # seqs = fasta_parse(fa_path) # Write the output to temporary file # tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10)) # Count the sequences # count = 0 with open(tm_path, 'w') as f: for seq in seqs: f.write(seq.phylip) count += 1 # Add number of entries and length at the top # with open(tm_path, 'r') as old, open(ph_path, 'w') as new: new.write(" " + str(count) + " " + str(len(seq)) + "\n") new.writelines(old) # Clean up # os.remove(tm_path)