Mercurial > repos > mish > fasta_to_phylip
changeset 0:851e52325ffc draft
Uploaded
author | mish |
---|---|
date | Wed, 24 Jul 2013 08:35:01 -0400 |
parents | |
children | 1c14c7a6539f |
files | README.txt abims_fasta2phylip.py abims_fasta2phylip.xml |
diffstat | 3 files changed, 102 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Wed Jul 24 08:35:01 2013 -0400 @@ -0,0 +1,1 @@ +README about the fasta2phylip.xml and the fasta2phylip.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_fasta2phylip.py Wed Jul 24 08:35:01 2013 -0400 @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +""" +Convert fasta alignemnts to relaxed phylip ones in constant memory. +Written by Lucas Sinclair. +Kopimi. + +You can use this script from the shell like this:: +$ fasta_to_phylip seqs.fasta seqs.phylip +""" + +############################################################################### +class Sequence(object): + """The Sequence object has a string *header* and + various representations.""" + + def __init__(self, header, seq): + self.header = re.findall('^>(\S+)', header)[0] + self.seq = seq + + def __len__(self): + return len(self.seq) + + @property + def phylip(self): + return self.header + " " + self.seq.replace('.','-') + "\n" + + @property + def fasta(self): + return ">" + self.header + "\n" + self.seq + "\n" + +def fasta_parse(path): + """Reads the file at *path* and yields + Sequence objects in a lazy fashion""" + header = '' + seq = '' + with open(path) as f: + for line in f: + line = line.strip('\n') + if line.startswith('>'): + if header: yield Sequence(header, seq) + header = line + seq = '' + continue + seq += line + yield Sequence(header, seq) + +############################################################################### +# The libraries we need # +import sys, os, random, string, re +# Get the shell arguments # +fa_path = sys.argv[1] +ph_path = sys.argv[2] +# Check that the path is valid # +if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path) +# Use our two functions # +seqs = fasta_parse(fa_path) +# Write the output to temporary file # +tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10)) +# Count the sequences # +count = 0 +with open(tm_path, 'w') as f: + for seq in seqs: + f.write(seq.phylip) + count += 1 +# Add number of entries and length at the top # +with open(tm_path, 'r') as old, open(ph_path, 'w') as new: + new.write(" " + str(count) + " " + str(len(seq)) + "\n") + new.writelines(old) +# Clean up # +os.remove(tm_path) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_fasta2phylip.xml Wed Jul 24 08:35:01 2013 -0400 @@ -0,0 +1,30 @@ +<tool id="abims_fasta_to_phylip" name="Fasta_to_Phylip" version="1.0.0"> + + <description>Convert fasta multiple alignment to Phylip format</description> + + <command interpreter="python"> + abims_fasta2phylip.py $input $output + </command> + + <inputs> + <param name="input" type="data" format="fasta" label="Fasta file to convert" help="Fasta multiple alignment format"/> + </inputs> + + <outputs> + <data name="output" format="phy" label="${input.name}.phy"/> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + + <help> +**What it does?** + + +Convert fasta alignemnts to relaxed phylip ones in constant memory. + +Written by **Lucas Sinclair**. + </help> + +</tool>