changeset 0:851e52325ffc draft

Uploaded
author mish
date Wed, 24 Jul 2013 08:35:01 -0400
parents
children 1c14c7a6539f
files README.txt abims_fasta2phylip.py abims_fasta2phylip.xml
diffstat 3 files changed, 102 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt	Wed Jul 24 08:35:01 2013 -0400
@@ -0,0 +1,1 @@
+README about the fasta2phylip.xml and the fasta2phylip.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_fasta2phylip.py	Wed Jul 24 08:35:01 2013 -0400
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+"""
+Convert fasta alignemnts to relaxed phylip ones in constant memory.
+Written by Lucas Sinclair.
+Kopimi.
+
+You can use this script from the shell like this::
+$ fasta_to_phylip seqs.fasta seqs.phylip
+"""
+
+###############################################################################
+class Sequence(object):
+    """The Sequence object has a string *header* and
+    various representations."""
+
+    def __init__(self, header, seq):
+        self.header = re.findall('^>(\S+)', header)[0]
+        self.seq = seq
+
+    def __len__(self):
+        return len(self.seq)
+
+    @property
+    def phylip(self):
+        return self.header + " " + self.seq.replace('.','-') + "\n"
+
+    @property
+    def fasta(self):
+        return ">" + self.header + "\n" + self.seq + "\n"
+
+def fasta_parse(path):
+    """Reads the file at *path* and yields
+       Sequence objects in a lazy fashion"""
+    header = ''
+    seq = ''
+    with open(path) as f:
+        for line in f:
+            line = line.strip('\n')
+            if line.startswith('>'):
+                if header: yield Sequence(header, seq)
+                header = line
+                seq = ''
+                continue
+            seq += line
+    yield Sequence(header, seq)
+
+###############################################################################
+# The libraries we need #
+import sys, os, random, string, re
+# Get the shell arguments #
+fa_path = sys.argv[1]
+ph_path = sys.argv[2]
+# Check that the path is valid #
+if not os.path.exists(fa_path): raise Exception("No file at %s." % fa_path)
+# Use our two functions #
+seqs = fasta_parse(fa_path)
+# Write the output to temporary file #
+tm_path = ph_path + '.' + ''.join(random.choice(string.letters) for i in xrange(10))
+# Count the sequences #
+count = 0
+with open(tm_path, 'w') as f:
+    for seq in seqs:
+        f.write(seq.phylip)
+        count += 1
+# Add number of entries and length at the top #
+with open(tm_path, 'r') as old, open(ph_path, 'w') as new:
+    new.write(" " + str(count) + " " + str(len(seq)) + "\n")
+    new.writelines(old)
+# Clean up #
+os.remove(tm_path)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_fasta2phylip.xml	Wed Jul 24 08:35:01 2013 -0400
@@ -0,0 +1,30 @@
+<tool id="abims_fasta_to_phylip" name="Fasta_to_Phylip" version="1.0.0">
+
+  <description>Convert fasta multiple alignment to Phylip format</description>
+
+  <command interpreter="python">
+    abims_fasta2phylip.py $input $output
+  </command>
+    
+  <inputs>
+    <param name="input" type="data" format="fasta" label="Fasta file to convert" help="Fasta multiple alignment format"/>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="phy" label="${input.name}.phy"/>
+  </outputs>
+  
+  <stdio>
+    <exit_code range="1:" level="fatal"/>
+  </stdio>
+    
+  <help>
+**What it does?**
+
+
+Convert fasta alignemnts to relaxed phylip ones in constant memory.
+
+Written by **Lucas Sinclair**.
+  </help>
+    
+</tool>