annotate xmfa2gff3.py @ 1:272c6e61dd0c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
author iuc
date Fri, 22 May 2015 22:50:24 -0400
parents
children 71e6dd9198dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
1 #!/usr/bin/env python
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
2 import sys
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
3 from Bio import SeqIO
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
4 from Bio.Seq import Seq
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
5 from Bio.SeqRecord import SeqRecord
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
6 from Bio.SeqFeature import SeqFeature, FeatureLocation
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
7 import argparse
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
8 from BCBio import GFF
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
9 import logging
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
10 logging.basicConfig(level=logging.INFO)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
11 log = logging.getLogger(__name__)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
12
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
13
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
14 def parse_xmfa(xmfa):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
15 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
16 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
17 current_lcb = []
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
18 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
19 for line in xmfa.readlines():
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
20 if line.startswith('#'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
21 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
22
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
23 if line.strip() == '=':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
24 if 'id' in current_seq:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
25 current_lcb.append(current_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
26 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
27 yield current_lcb
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
28 current_lcb = []
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
29 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
30 line = line.strip()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
31 if line.startswith('>'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
32 if 'id' in current_seq:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
33 current_lcb.append(current_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
34 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
35 data = line.strip().split()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
36 id, loc = data[1].split(':')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
37 start, end = loc.split('-')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
38 current_seq = {
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
39 'rid': '_'.join(data[1:]),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
40 'id': id,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
41 'start': int(start),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
42 'end': int(end),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
43 'strand': 1 if data[2] == '+' else -1,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
44 'seq': ''
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
45 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
46 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
47 current_seq['seq'] += line.strip()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
48
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
49
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
50 def _percent_identity(a, b):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
51 """Calculate % identity, ignoring gaps in the host sequence
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
52 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
53 match = 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
54 mismatch = 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
55 for char_a, char_b in zip(list(a), list(b)):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
56 if char_a == '-':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
57 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
58 if char_a == char_b:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
59 match += 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
60 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
61 mismatch += 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
62
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
63 if match + mismatch == 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
64 return 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
65 return 100 * float(match) / (match + mismatch)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
66
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
67
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
68 def _id_tn_dict(sequences):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
69 """Figure out sequence IDs
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
70 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
71 label_convert = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
72 if sequences is not None:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
73 if len(sequences) == 1:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
74 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
75 label_convert[str(i + 1)] = record.id
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
76 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
77 for i, sequence in enumerate(sequences):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
78 for record in SeqIO.parse(sequence, 'fasta'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
79 label_convert[str(i + 1)] = record.id
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
80 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
81 return label_convert
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
82
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
83
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
84 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
85 label_convert = _id_tn_dict(sequences)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
86
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
87 lcbs = parse_xmfa(xmfa_file)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
88
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
89 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
90 for lcb in lcbs:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
91 ids = [seq['id'] for seq in lcb]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
92
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
93 # Doesn't match part of our sequence
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
94 if relative_to not in ids:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
95 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
96
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
97 # Skip sequences that are JUST our "relative_to" genome
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
98 if len(ids) == 1:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
99 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
100
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
101 parent = [seq for seq in lcb if seq['id'] == relative_to][0]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
102 others = [seq for seq in lcb if seq['id'] != relative_to]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
103
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
104 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
105 other['feature'] = SeqFeature(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
106 FeatureLocation(parent['start'], parent['end'] + 1),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
107 type="match", strand=parent['strand'],
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
108 qualifiers={
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
109 "source": "progressiveMauve",
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
110 "target": label_convert.get(other['id'], other['id']),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
111 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid'])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
112 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
113 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
114
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
115 for i in range(0, len(lcb[0]['seq']), window_size):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
116 block_seq = parent['seq'][i:i + window_size]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
117 real_window_size = len(block_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
118 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
119 real_end = real_start + real_window_size - block_seq.count('-')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
120
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
121 if (real_end - real_start) < 10:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
122 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
123
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
124 if parent['start'] < 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
125 strand = -1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
126 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
127 strand = 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
128
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
129 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
130 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
131 # Ignore 0% identity sequences
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
132 if pid == 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
133 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
134 other['feature'].sub_features.append(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
135 SeqFeature(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
136 FeatureLocation(real_start, real_end),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
137 type="match_part", strand=strand,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
138 qualifiers={
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
139 "source": "progressiveMauve",
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
140 'score': pid
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
141 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
142 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
143 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
144
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
145 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
146 records[0].features.append(other['feature'])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
147 return records
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
148
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
149
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
150 if __name__ == '__main__':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
151 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
152 parser.add_argument('xmfa_file', type=file, help='XMFA File')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
153 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
154 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
155 parser.add_argument('--sequences', type=file, nargs='+',
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
156 help='Fasta files (in same order) passed to parent for reconstructing proper IDs')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
157 parser.add_argument('--version', action='version', version='%(prog)s 1.0')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
158
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
159 args = parser.parse_args()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
160
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
161 result = convert_xmfa_to_gff3(**vars(args))
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
162 GFF.write(result, sys.stdout)