annotate xmfa2gff3.py @ 13:71e6dd9198dc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
author iuc
date Fri, 07 Sep 2018 10:55:36 -0400
parents 272c6e61dd0c
children 7ea1fa36f3f4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
1 #!/usr/bin/env python
13
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
2 import argparse
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
3 import logging
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
4 import sys
13
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
5
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
6 from BCBio import GFF
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
7 from Bio import SeqIO
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
8 from Bio.Seq import Seq
13
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
9 from Bio.SeqFeature import (
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
10 FeatureLocation,
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
11 SeqFeature
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
12 )
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
13 from Bio.SeqRecord import SeqRecord
13
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
14
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
15 logging.basicConfig(level=logging.INFO)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
16 log = logging.getLogger(__name__)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
17
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
18
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
19 def parse_xmfa(xmfa):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
20 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
21 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
22 current_lcb = []
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
23 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
24 for line in xmfa.readlines():
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
25 if line.startswith('#'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
26 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
27
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
28 if line.strip() == '=':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
29 if 'id' in current_seq:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
30 current_lcb.append(current_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
31 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
32 yield current_lcb
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
33 current_lcb = []
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
34 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
35 line = line.strip()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
36 if line.startswith('>'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
37 if 'id' in current_seq:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
38 current_lcb.append(current_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
39 current_seq = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
40 data = line.strip().split()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
41 id, loc = data[1].split(':')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
42 start, end = loc.split('-')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
43 current_seq = {
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
44 'rid': '_'.join(data[1:]),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
45 'id': id,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
46 'start': int(start),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
47 'end': int(end),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
48 'strand': 1 if data[2] == '+' else -1,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
49 'seq': ''
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
50 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
51 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
52 current_seq['seq'] += line.strip()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
53
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
54
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
55 def _percent_identity(a, b):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
56 """Calculate % identity, ignoring gaps in the host sequence
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
57 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
58 match = 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
59 mismatch = 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
60 for char_a, char_b in zip(list(a), list(b)):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
61 if char_a == '-':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
62 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
63 if char_a == char_b:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
64 match += 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
65 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
66 mismatch += 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
67
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
68 if match + mismatch == 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
69 return 0
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
70 return 100 * float(match) / (match + mismatch)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
71
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
72
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
73 def _id_tn_dict(sequences):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
74 """Figure out sequence IDs
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
75 """
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
76 label_convert = {}
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
77 if sequences is not None:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
78 if len(sequences) == 1:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
79 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
80 label_convert[str(i + 1)] = record.id
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
81 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
82 for i, sequence in enumerate(sequences):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
83 for record in SeqIO.parse(sequence, 'fasta'):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
84 label_convert[str(i + 1)] = record.id
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
85 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
86 return label_convert
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
87
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
88
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
89 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
90 label_convert = _id_tn_dict(sequences)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
91
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
92 lcbs = parse_xmfa(xmfa_file)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
93
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
94 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
95 for lcb in lcbs:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
96 ids = [seq['id'] for seq in lcb]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
97
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
98 # Doesn't match part of our sequence
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
99 if relative_to not in ids:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
100 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
101
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
102 # Skip sequences that are JUST our "relative_to" genome
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
103 if len(ids) == 1:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
104 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
105
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
106 parent = [seq for seq in lcb if seq['id'] == relative_to][0]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
107 others = [seq for seq in lcb if seq['id'] != relative_to]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
108
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
109 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
110 other['feature'] = SeqFeature(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
111 FeatureLocation(parent['start'], parent['end'] + 1),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
112 type="match", strand=parent['strand'],
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
113 qualifiers={
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
114 "source": "progressiveMauve",
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
115 "target": label_convert.get(other['id'], other['id']),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
116 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid'])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
117 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
118 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
119
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
120 for i in range(0, len(lcb[0]['seq']), window_size):
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
121 block_seq = parent['seq'][i:i + window_size]
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
122 real_window_size = len(block_seq)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
123 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
124 real_end = real_start + real_window_size - block_seq.count('-')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
125
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
126 if (real_end - real_start) < 10:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
127 continue
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
128
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
129 if parent['start'] < 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
130 strand = -1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
131 else:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
132 strand = 1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
133
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
134 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
135 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
136 # Ignore 0% identity sequences
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
137 if pid == 0:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
138 continue
13
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
139
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
140 # Support for Biopython 1.68 and above, which removed sub_features
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
141 if not hasattr(other['feature'], "sub_features"):
71e6dd9198dc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 1
diff changeset
142 other['feature'].sub_features = []
1
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
143 other['feature'].sub_features.append(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
144 SeqFeature(
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
145 FeatureLocation(real_start, real_end),
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
146 type="match_part", strand=strand,
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
147 qualifiers={
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
148 "source": "progressiveMauve",
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
149 'score': pid
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
150 }
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
151 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
152 )
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
153
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
154 for other in others:
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
155 records[0].features.append(other['feature'])
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
156 return records
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
157
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
158
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
159 if __name__ == '__main__':
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
160 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
161 parser.add_argument('xmfa_file', type=file, help='XMFA File')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
162 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000)
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
163 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
164 parser.add_argument('--sequences', type=file, nargs='+',
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
165 help='Fasta files (in same order) passed to parent for reconstructing proper IDs')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
166 parser.add_argument('--version', action='version', version='%(prog)s 1.0')
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
167
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
168 args = parser.parse_args()
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
169
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
170 result = convert_xmfa_to_gff3(**vars(args))
272c6e61dd0c planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff changeset
171 GFF.write(result, sys.stdout)