annotate fasta_utils/__init__.py @ 0:d04fa5201f51 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
author rnateam
date Thu, 28 Jul 2016 05:56:54 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
1 """Util functions for FASTA format."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
2
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
3 __author__ = "Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
4 __copyright__ = "Copyright 2016, Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
5 __license__ = "MIT"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
6 __maintainer__ = "Gianluca Corrado"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
7 __email__ = "gianluca.corrado@unitn.it"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
8 __status__ = "Production"
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
9
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
10
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
11 def import_fasta(fasta_file):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
12 """Import a fasta file as a dictionary."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
13 dic = {}
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
14 f = open(fasta_file)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
15 fasta = f.read().strip()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
16 f.close()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
17 for a in fasta.split('>'):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
18 k = a.split('\n')[0]
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
19 v = ''.join(a.split('\n')[1:])
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
20 if k != '':
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
21 dic[k] = v
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
22 return dic
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
23
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
24
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
25 def export_fasta(dic):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
26 """Export a dictionary."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
27 fasta = ""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
28 for (k, v) in dic.iteritems():
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
29 fasta += ">%s\n%s\n" % (k, v)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
30 return fasta
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
31
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
32
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
33 def seq_names(fasta_file):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
34 """Get sequence names from fasta file."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
35 names = []
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
36 f = open(fasta_file)
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
37 fasta = f.read()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
38 f.close()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
39 for a in fasta.split('>'):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
40 names.append(a.split('\n')[0])
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
41 return [a for a in names if a != '']
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
42
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
43
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
44 def stockholm2fasta(stockholm):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
45 """Convert alignment in stockholm format to fasta format."""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
46 fasta = ""
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
47 for line in stockholm.split("\n"):
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
48 # comment line
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
49 if line[0] == "#":
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
50 continue
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
51 # termination line
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
52 elif line == "//":
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
53 return fasta
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
54 # alignment line
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
55 else:
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
56 name, align = line.split()
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
57 seq = align.replace(".", "")
d04fa5201f51 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
diff changeset
58 fasta += ">%s\n%s\n" % (name, seq)