Mercurial > repos > iuc > progressivemauve
annotate xmfa2gff3.py @ 1:272c6e61dd0c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
author | iuc |
---|---|
date | Fri, 22 May 2015 22:50:24 -0400 |
parents | |
children | 71e6dd9198dc |
rev | line source |
---|---|
1
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
2 import sys |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
3 from Bio import SeqIO |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
4 from Bio.Seq import Seq |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
5 from Bio.SeqRecord import SeqRecord |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
6 from Bio.SeqFeature import SeqFeature, FeatureLocation |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
7 import argparse |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
8 from BCBio import GFF |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
9 import logging |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
10 logging.basicConfig(level=logging.INFO) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
11 log = logging.getLogger(__name__) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
12 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
13 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
14 def parse_xmfa(xmfa): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
15 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
16 """ |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
17 current_lcb = [] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
18 current_seq = {} |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
19 for line in xmfa.readlines(): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
20 if line.startswith('#'): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
21 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
22 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
23 if line.strip() == '=': |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
24 if 'id' in current_seq: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
25 current_lcb.append(current_seq) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
26 current_seq = {} |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
27 yield current_lcb |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
28 current_lcb = [] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
29 else: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
30 line = line.strip() |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
31 if line.startswith('>'): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
32 if 'id' in current_seq: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
33 current_lcb.append(current_seq) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
34 current_seq = {} |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
35 data = line.strip().split() |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
36 id, loc = data[1].split(':') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
37 start, end = loc.split('-') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
38 current_seq = { |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
39 'rid': '_'.join(data[1:]), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
40 'id': id, |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
41 'start': int(start), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
42 'end': int(end), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
43 'strand': 1 if data[2] == '+' else -1, |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
44 'seq': '' |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
45 } |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
46 else: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
47 current_seq['seq'] += line.strip() |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
48 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
49 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
50 def _percent_identity(a, b): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
51 """Calculate % identity, ignoring gaps in the host sequence |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
52 """ |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
53 match = 0 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
54 mismatch = 0 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
55 for char_a, char_b in zip(list(a), list(b)): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
56 if char_a == '-': |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
57 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
58 if char_a == char_b: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
59 match += 1 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
60 else: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
61 mismatch += 1 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
62 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
63 if match + mismatch == 0: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
64 return 0 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
65 return 100 * float(match) / (match + mismatch) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
66 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
67 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
68 def _id_tn_dict(sequences): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
69 """Figure out sequence IDs |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
70 """ |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
71 label_convert = {} |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
72 if sequences is not None: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
73 if len(sequences) == 1: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
74 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
75 label_convert[str(i + 1)] = record.id |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
76 else: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
77 for i, sequence in enumerate(sequences): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
78 for record in SeqIO.parse(sequence, 'fasta'): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
79 label_convert[str(i + 1)] = record.id |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
80 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
81 return label_convert |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
82 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
83 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
84 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
85 label_convert = _id_tn_dict(sequences) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
86 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
87 lcbs = parse_xmfa(xmfa_file) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
88 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
89 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
90 for lcb in lcbs: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
91 ids = [seq['id'] for seq in lcb] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
92 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
93 # Doesn't match part of our sequence |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
94 if relative_to not in ids: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
95 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
96 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
97 # Skip sequences that are JUST our "relative_to" genome |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
98 if len(ids) == 1: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
99 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
100 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
101 parent = [seq for seq in lcb if seq['id'] == relative_to][0] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
102 others = [seq for seq in lcb if seq['id'] != relative_to] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
103 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
104 for other in others: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
105 other['feature'] = SeqFeature( |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
106 FeatureLocation(parent['start'], parent['end'] + 1), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
107 type="match", strand=parent['strand'], |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
108 qualifiers={ |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
109 "source": "progressiveMauve", |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
110 "target": label_convert.get(other['id'], other['id']), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
111 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid']) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
112 } |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
113 ) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
114 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
115 for i in range(0, len(lcb[0]['seq']), window_size): |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
116 block_seq = parent['seq'][i:i + window_size] |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
117 real_window_size = len(block_seq) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
118 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
119 real_end = real_start + real_window_size - block_seq.count('-') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
120 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
121 if (real_end - real_start) < 10: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
122 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
123 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
124 if parent['start'] < 0: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
125 strand = -1 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
126 else: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
127 strand = 1 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
128 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
129 for other in others: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
130 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size]) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
131 # Ignore 0% identity sequences |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
132 if pid == 0: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
133 continue |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
134 other['feature'].sub_features.append( |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
135 SeqFeature( |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
136 FeatureLocation(real_start, real_end), |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
137 type="match_part", strand=strand, |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
138 qualifiers={ |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
139 "source": "progressiveMauve", |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
140 'score': pid |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
141 } |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
142 ) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
143 ) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
144 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
145 for other in others: |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
146 records[0].features.append(other['feature']) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
147 return records |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
148 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
149 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
150 if __name__ == '__main__': |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
151 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
152 parser.add_argument('xmfa_file', type=file, help='XMFA File') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
153 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
154 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
155 parser.add_argument('--sequences', type=file, nargs='+', |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
156 help='Fasta files (in same order) passed to parent for reconstructing proper IDs') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
157 parser.add_argument('--version', action='version', version='%(prog)s 1.0') |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
158 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
159 args = parser.parse_args() |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
160 |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
161 result = convert_xmfa_to_gff3(**vars(args)) |
272c6e61dd0c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 936ca4b4641eae2bdfa93885f35f5a9263f9dae1
iuc
parents:
diff
changeset
|
162 GFF.write(result, sys.stdout) |