Mercurial > repos > devteam > fasta_concatenate_by_species
annotate utils/maf_utilities.py @ 2:c5311b7718d1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
| author | devteam |
|---|---|
| date | Sun, 01 Mar 2020 12:22:35 +0000 |
| parents | d9f0a11824e9 |
| children | 49e91b42881b |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 """ | |
| 3 Provides wrappers and utilities for working with MAF files and alignments. | |
| 4 """ | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
5 # Dan Blankenberg |
| 0 | 6 import bx.align.maf |
| 7 import bx.intervals | |
| 8 import bx.interval_index_file | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
9 import sys |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
10 import os |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
11 import tempfile |
| 0 | 12 import logging |
| 13 from copy import deepcopy | |
| 14 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
15 try: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
16 maketrans = str.maketrans |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
17 except AttributeError: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
18 from string import maketrans |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
19 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
20 assert sys.version_info[:2] >= (2, 4) |
| 0 | 21 |
| 22 log = logging.getLogger(__name__) | |
| 23 | |
| 24 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
25 GAP_CHARS = ['-'] |
| 0 | 26 SRC_SPLIT_CHAR = '.' |
| 27 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
28 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
29 def src_split(src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
30 fields = src.split(SRC_SPLIT_CHAR, 1) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
31 spec = fields.pop(0) |
| 0 | 32 if fields: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
33 chrom = fields.pop(0) |
| 0 | 34 else: |
| 35 chrom = spec | |
| 36 return spec, chrom | |
| 37 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
38 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
39 def src_merge(spec, chrom, contig=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
40 if None in [spec, chrom]: |
| 0 | 41 spec = chrom = spec or chrom |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
42 return bx.align.maf.src_merge(spec, chrom, contig) |
| 0 | 43 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
44 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
45 def get_species_in_block(block): |
| 0 | 46 species = [] |
| 47 for c in block.components: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
48 spec, chrom = src_split(c.src) |
| 0 | 49 if spec not in species: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
50 species.append(spec) |
| 0 | 51 return species |
| 52 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
53 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
54 def tool_fail(msg="Unknown Error"): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
55 msg = "Fatal Error: %s" % msg |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
56 sys.exit(msg) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
57 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
58 # an object corresponding to a reference layered alignment |
| 0 | 59 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
60 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
61 class RegionAlignment(object): |
| 0 | 62 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
63 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca") |
| 0 | 64 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
65 def __init__(self, size, species=[]): |
| 0 | 66 self.size = size |
| 67 self.sequences = {} | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
68 if not isinstance(species, list): |
| 0 | 69 species = [species] |
| 70 for spec in species: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
71 self.add_species(spec) |
| 0 | 72 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
73 # add a species to the alignment |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
74 def add_species(self, species): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
75 # make temporary sequence files |
| 0 | 76 self.sequences[species] = tempfile.TemporaryFile() |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
77 self.sequences[species].write("-" * self.size) |
| 0 | 78 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
79 # returns the names for species found in alignment, skipping names as requested |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
80 def get_species_names(self, skip=[]): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
81 if not isinstance(skip, list): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
82 skip = [skip] |
| 0 | 83 names = self.sequences.keys() |
| 84 for name in skip: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
85 try: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
86 names.remove(name) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
87 except Exception: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
88 pass |
| 0 | 89 return names |
| 90 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
91 # returns the sequence for a species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
92 def get_sequence(self, species): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
93 self.sequences[species].seek(0) |
| 0 | 94 return self.sequences[species].read() |
| 95 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
96 # returns the reverse complement of the sequence for a species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
97 def get_sequence_reverse_complement(self, species): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
98 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)] |
| 0 | 99 complement.reverse() |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
100 return "".join(complement) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
101 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
102 # sets a position for a species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
103 def set_position(self, index, species, base): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
104 if len(base) != 1: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
105 raise Exception("A genomic position can only have a length of 1.") |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
106 return self.set_range(index, species, base) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
107 # sets a range for a species |
| 0 | 108 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
109 def set_range(self, index, species, bases): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
110 if index >= self.size or index < 0: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
111 raise Exception("Your index (%i) is out of range (0 - %i)." % (index, self.size - 1)) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
112 if len(bases) == 0: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
113 raise Exception("A set of genomic positions can only have a positive length.") |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
114 if species not in self.sequences.keys(): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
115 self.add_species(species) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
116 self.sequences[species].seek(index) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
117 self.sequences[species].write(bases) |
| 0 | 118 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
119 # Flush temp file of specified species, or all species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
120 def flush(self, species=None): |
| 0 | 121 if species is None: |
| 122 species = self.sequences.keys() | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
123 elif not isinstance(species, list): |
| 0 | 124 species = [species] |
| 125 for spec in species: | |
| 126 self.sequences[spec].flush() | |
| 127 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
128 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
129 class GenomicRegionAlignment(RegionAlignment): |
| 0 | 130 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
131 def __init__(self, start, end, species=[]): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
132 RegionAlignment.__init__(self, end - start, species) |
| 0 | 133 self.start = start |
| 134 self.end = end | |
| 135 | |
| 136 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
137 class SplicedAlignment(object): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
138 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
139 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca") |
| 0 | 140 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
141 def __init__(self, exon_starts, exon_ends, species=[]): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
142 if not isinstance(exon_starts, list): |
| 0 | 143 exon_starts = [exon_starts] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
144 if not isinstance(exon_ends, list): |
| 0 | 145 exon_ends = [exon_ends] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
146 assert len(exon_starts) == len(exon_ends), "The number of starts does not match the number of sizes." |
| 0 | 147 self.exons = [] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
148 for i in range(len(exon_starts)): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
149 self.exons.append(GenomicRegionAlignment(exon_starts[i], exon_ends[i], species)) |
| 0 | 150 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
151 # returns the names for species found in alignment, skipping names as requested |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
152 def get_species_names(self, skip=[]): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
153 if not isinstance(skip, list): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
154 skip = [skip] |
| 0 | 155 names = [] |
| 156 for exon in self.exons: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
157 for name in exon.get_species_names(skip=skip): |
| 0 | 158 if name not in names: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
159 names.append(name) |
| 0 | 160 return names |
| 161 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
162 # returns the sequence for a species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
163 def get_sequence(self, species): |
| 0 | 164 sequence = tempfile.TemporaryFile() |
| 165 for exon in self.exons: | |
| 166 if species in exon.get_species_names(): | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
167 sequence.write(exon.get_sequence(species)) |
| 0 | 168 else: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
169 sequence.write("-" * exon.size) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
170 sequence.seek(0) |
| 0 | 171 return sequence.read() |
| 172 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
173 # returns the reverse complement of the sequence for a species |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
174 def get_sequence_reverse_complement(self, species): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
175 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)] |
| 0 | 176 complement.reverse() |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
177 return "".join(complement) |
| 0 | 178 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
179 # Start and end of coding region |
| 0 | 180 @property |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
181 def start(self): |
| 0 | 182 return self.exons[0].start |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
183 |
| 0 | 184 @property |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
185 def end(self): |
| 0 | 186 return self.exons[-1].end |
| 187 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
188 # Open a MAF index using a UID |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
189 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
190 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
191 def maf_index_by_uid(maf_uid, index_location_file): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
192 for line in open(index_location_file): |
| 0 | 193 try: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
194 # read each line, if not enough fields, go to next line |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
195 if line[0:1] == "#": |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
196 continue |
| 0 | 197 fields = line.split('\t') |
| 198 if maf_uid == fields[1]: | |
| 199 try: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
200 maf_files = fields[4].replace("\n", "").replace("\r", "").split(",") |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
201 return bx.align.maf.MultiIndexed(maf_files, keep_open=True, parse_e_rows=False) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
202 except Exception as e: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
203 raise Exception('MAF UID (%s) found, but configuration appears to be malformed: %s' % (maf_uid, e)) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
204 except Exception: |
| 0 | 205 pass |
| 206 return None | |
| 207 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
208 # return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
209 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
210 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
211 def open_or_build_maf_index(maf_file, index_filename, species=None): |
| 0 | 212 try: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
213 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), None) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
214 except Exception: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
215 return build_maf_index(maf_file, species=species) |
| 0 | 216 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
217 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
218 def build_maf_index_species_chromosomes(filename, index_species=None): |
| 0 | 219 species = [] |
| 220 species_chromosomes = {} | |
| 221 indexes = bx.interval_index_file.Indexes() | |
| 222 blocks = 0 | |
| 223 try: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
224 maf_reader = bx.align.maf.Reader(open(filename)) |
| 0 | 225 while True: |
| 226 pos = maf_reader.file.tell() | |
| 227 block = maf_reader.next() | |
| 228 if block is None: | |
| 229 break | |
| 230 blocks += 1 | |
| 231 for c in block.components: | |
| 232 spec = c.src | |
| 233 chrom = None | |
| 234 if "." in spec: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
235 spec, chrom = spec.split(".", 1) |
| 0 | 236 if spec not in species: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
237 species.append(spec) |
| 0 | 238 species_chromosomes[spec] = [] |
| 239 if chrom and chrom not in species_chromosomes[spec]: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
240 species_chromosomes[spec].append(chrom) |
| 0 | 241 if index_species is None or spec in index_species: |
| 242 forward_strand_start = c.forward_strand_start | |
| 243 forward_strand_end = c.forward_strand_end | |
| 244 try: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
245 forward_strand_start = int(forward_strand_start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
246 forward_strand_end = int(forward_strand_end) |
| 0 | 247 except ValueError: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
248 continue # start and end are not integers, can't add component to index, goto next component |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
249 # this likely only occurs when parse_e_rows is True? |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
250 # could a species exist as only e rows? should the |
| 0 | 251 if forward_strand_end > forward_strand_start: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
252 # require positive length; i.e. certain lines have start = end = 0 and cannot be indexed |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
253 indexes.add(c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
254 except Exception as e: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
255 # most likely a bad MAF |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
256 log.debug('Building MAF index on %s failed: %s' % (filename, e)) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
257 return (None, [], {}, 0) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
258 return (indexes, species, species_chromosomes, blocks) |
| 0 | 259 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
260 # builds and returns ( index, index_filename ) for specified maf_file |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
261 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
262 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
263 def build_maf_index(maf_file, species=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
264 indexes, found_species, species_chromosomes, blocks = build_maf_index_species_chromosomes(maf_file, species) |
| 0 | 265 if indexes is not None: |
| 266 fd, index_filename = tempfile.mkstemp() | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
267 out = os.fdopen(fd, 'w') |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
268 indexes.write(out) |
| 0 | 269 out.close() |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
270 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), index_filename) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
271 return (None, None) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
272 |
| 0 | 273 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
274 def component_overlaps_region(c, region): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
275 if c is None: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
276 return False |
| 0 | 277 start, end = c.get_forward_strand_start(), c.get_forward_strand_end() |
| 278 if region.start >= end or region.end <= start: | |
| 279 return False | |
| 280 return True | |
| 281 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
282 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
283 def chop_block_by_region(block, src, region, species=None, mincols=0): |
| 0 | 284 # This chopping method was designed to maintain consistency with how start/end padding gaps have been working in Galaxy thus far: |
| 285 # behavior as seen when forcing blocks to be '+' relative to src sequence (ref) and using block.slice_by_component( ref, slice_start, slice_end ) | |
| 286 # whether-or-not this is the 'correct' behavior is questionable, but this will at least maintain consistency | |
| 287 # comments welcome | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
288 slice_start = block.text_size # max for the min() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
289 slice_end = 0 # min for the max() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
290 old_score = block.score # save old score for later use |
| 0 | 291 # We no longer assume only one occurance of src per block, so we need to check them all |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
292 for c in iter_components_by_src(block, src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
293 if component_overlaps_region(c, region): |
| 0 | 294 if c.text is not None: |
| 295 rev_strand = False | |
| 296 if c.strand == "-": | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
297 # We want our coord_to_col coordinates to be returned from positive stranded component |
| 0 | 298 rev_strand = True |
| 299 c = c.reverse_complement() | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
300 start = max(region.start, c.start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
301 end = min(region.end, c.end) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
302 start = c.coord_to_col(start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
303 end = c.coord_to_col(end) |
| 0 | 304 if rev_strand: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
305 # need to orient slice coordinates to the original block direction |
| 0 | 306 slice_len = end - start |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
307 end = len(c.text) - start |
| 0 | 308 start = end - slice_len |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
309 slice_start = min(start, slice_start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
310 slice_end = max(end, slice_end) |
| 0 | 311 |
| 312 if slice_start < slice_end: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
313 block = block.slice(slice_start, slice_end) |
| 0 | 314 if block.text_size > mincols: |
| 315 # restore old score, may not be accurate, but it is better than 0 for everything? | |
| 316 block.score = old_score | |
| 317 if species is not None: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
318 block = block.limit_to_species(species) |
| 0 | 319 block.remove_all_gap_columns() |
| 320 return block | |
| 321 return None | |
| 322 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
323 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
324 def orient_block_by_region(block, src, region, force_strand=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
325 # loop through components matching src, |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
326 # make sure each of these components overlap region |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
327 # cache strand for each of overlaping regions |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
328 # if force_strand / region.strand not in strand cache, reverse complement |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
329 # we could have 2 sequences with same src, overlapping region, on different strands, this would cause no reverse_complementing |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
330 strands = [c.strand for c in iter_components_by_src(block, src) if component_overlaps_region(c, region)] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
331 if strands and (force_strand is None and region.strand not in strands) or (force_strand is not None and force_strand not in strands): |
| 0 | 332 block = block.reverse_complement() |
| 333 return block | |
| 334 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
335 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
336 def get_oriented_chopped_blocks_for_region(index, src, region, species=None, mincols=0, force_strand=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
337 for block, idx, offset in get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols, force_strand): |
| 0 | 338 yield block |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
339 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
340 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
341 def get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0, force_strand=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
342 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
343 yield orient_block_by_region(block, src, region, force_strand), idx, offset |
| 0 | 344 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
345 # split a block with multiple occurances of src into one block per src |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
346 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
347 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
348 def iter_blocks_split_by_src(block, src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
349 for src_c in iter_components_by_src(block, src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
350 new_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes)) |
| 0 | 351 new_block.text_size = block.text_size |
| 352 for c in block.components: | |
| 353 if c == src_c or c.src != src: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
354 new_block.add_component(deepcopy(c)) # components have reference to alignment, dont want to loose reference to original alignment block in original components |
| 0 | 355 yield new_block |
| 356 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
357 # split a block into multiple blocks with all combinations of a species appearing only once per block |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
358 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
359 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
360 def iter_blocks_split_by_species(block, species=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
361 def __split_components_by_species(components_by_species, new_block): |
| 0 | 362 if components_by_species: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
363 # more species with components to add to this block |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
364 components_by_species = deepcopy(components_by_species) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
365 spec_comps = components_by_species.pop(0) |
| 0 | 366 for c in spec_comps: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
367 newer_block = deepcopy(new_block) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
368 newer_block.add_component(deepcopy(c)) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
369 for value in __split_components_by_species(components_by_species, newer_block): |
| 0 | 370 yield value |
| 371 else: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
372 # no more components to add, yield this block |
| 0 | 373 yield new_block |
| 374 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
375 # divide components by species |
| 0 | 376 spec_dict = {} |
| 377 if not species: | |
| 378 species = [] | |
| 379 for c in block.components: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
380 spec, chrom = src_split(c.src) |
| 0 | 381 if spec not in spec_dict: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
382 spec_dict[spec] = [] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
383 species.append(spec) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
384 spec_dict[spec].append(c) |
| 0 | 385 else: |
| 386 for spec in species: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
387 spec_dict[spec] = [] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
388 for c in iter_components_by_src_start(block, spec): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
389 spec_dict[spec].append(c) |
| 0 | 390 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
391 empty_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes)) # should we copy attributes? |
| 0 | 392 empty_block.text_size = block.text_size |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
393 # call recursive function to split into each combo of spec/blocks |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
394 for value in __split_components_by_species(spec_dict.values(), empty_block): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
395 sort_block_components_by_block(value, block) # restore original component order |
| 0 | 396 yield value |
| 397 | |
| 398 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
399 # generator yielding only chopped and valid blocks for a specified region |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
400 def get_chopped_blocks_for_region(index, src, region, species=None, mincols=0): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
401 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols): |
| 0 | 402 yield block |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
403 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
404 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
405 def get_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
406 for block, idx, offset in index.get_as_iterator_with_index_and_offset(src, region.start, region.end): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
407 block = chop_block_by_region(block, src, region, species, mincols) |
| 0 | 408 if block is not None: |
| 409 yield block, idx, offset | |
| 410 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
411 # returns a filled region alignment for specified regions |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
412 |
| 0 | 413 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
414 def get_region_alignment(index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
415 if species is not None: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
416 alignment = RegionAlignment(end - start, species) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
417 else: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
418 alignment = RegionAlignment(end - start, primary_species) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
419 return fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand, species, mincols, overwrite_with_gaps) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
420 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
421 # reduces a block to only positions exisiting in the src provided |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
422 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
423 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
424 def reduce_block_by_primary_genome(block, species, chromosome, region_start): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
425 # returns ( startIndex, {species:texts} |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
426 # where texts' contents are reduced to only positions existing in the primary genome |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
427 src = "%s.%s" % (species, chromosome) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
428 ref = block.get_component_by_src(src) |
| 0 | 429 start_offset = ref.start - region_start |
| 430 species_texts = {} | |
| 431 for c in block.components: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
432 species_texts[c.src.split('.')[0]] = list(c.text) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
433 # remove locations which are gaps in the primary species, starting from the downstream end |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
434 for i in range(len(species_texts[species]) - 1, -1, -1): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
435 if species_texts[species][i] == '-': |
| 0 | 436 for text in species_texts.values(): |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
437 text.pop(i) |
| 0 | 438 for spec, text in species_texts.items(): |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
439 species_texts[spec] = ''.join(text) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
440 return (start_offset, species_texts) |
| 0 | 441 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
442 # fills a region alignment |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
443 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
444 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
445 def fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
446 region = bx.intervals.Interval(start, end) |
| 0 | 447 region.chrom = chrom |
| 448 region.strand = strand | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
449 primary_src = "%s.%s" % (primary_species, chrom) |
| 0 | 450 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
451 # Order blocks overlaping this position by score, lowest first |
| 0 | 452 blocks = [] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
453 for block, idx, offset in index.get_as_iterator_with_index_and_offset(primary_src, start, end): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
454 score = float(block.score) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
455 for i in range(0, len(blocks)): |
| 0 | 456 if score < blocks[i][0]: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
457 blocks.insert(i, (score, idx, offset)) |
| 0 | 458 break |
| 459 else: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
460 blocks.append((score, idx, offset)) |
| 0 | 461 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
462 # gap_chars_tuple = tuple( GAP_CHARS ) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
463 gap_chars_str = ''.join(GAP_CHARS) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
464 # Loop through ordered blocks and layer by increasing score |
| 0 | 465 for block_dict in blocks: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
466 for block in iter_blocks_split_by_species(block_dict[1].get_at_offset(block_dict[2])): # need to handle each occurance of sequence in block seperately |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
467 if component_overlaps_region(block.get_component_by_src(primary_src), region): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
468 block = chop_block_by_region(block, primary_src, region, species, mincols) # chop block |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
469 block = orient_block_by_region(block, primary_src, region) # orient block |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
470 start_offset, species_texts = reduce_block_by_primary_genome(block, primary_species, chrom, start) |
| 0 | 471 for spec, text in species_texts.items(): |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
472 # we should trim gaps from both sides, since these are not positions in this species genome (sequence) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
473 text = text.rstrip(gap_chars_str) |
| 0 | 474 gap_offset = 0 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
475 while True in [text.startswith(gap_char) for gap_char in GAP_CHARS]: # python2.4 doesn't accept a tuple for .startswith() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
476 # while text.startswith( gap_chars_tuple ): |
| 0 | 477 gap_offset += 1 |
| 478 text = text[1:] | |
| 479 if not text: | |
| 480 break | |
| 481 if text: | |
| 482 if overwrite_with_gaps: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
483 alignment.set_range(start_offset + gap_offset, spec, text) |
| 0 | 484 else: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
485 for i, char in enumerate(text): |
| 0 | 486 if char not in GAP_CHARS: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
487 alignment.set_position(start_offset + gap_offset + i, spec, char) |
| 0 | 488 return alignment |
| 489 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
490 # returns a filled spliced region alignment for specified region with start and end lists |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
491 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
492 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
493 def get_spliced_region_alignment(index, primary_species, chrom, starts, ends, strand='+', species=None, mincols=0, overwrite_with_gaps=True): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
494 # create spliced alignment object |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
495 if species is not None: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
496 alignment = SplicedAlignment(starts, ends, species) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
497 else: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
498 alignment = SplicedAlignment(starts, ends, [primary_species]) |
| 0 | 499 for exon in alignment.exons: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
500 fill_region_alignment(exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols, overwrite_with_gaps) |
| 0 | 501 return alignment |
| 502 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
503 # loop through string array, only return non-commented lines |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
504 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
505 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
506 def line_enumerator(lines, comment_start='#'): |
| 0 | 507 i = 0 |
| 508 for line in lines: | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
509 if not line.startswith(comment_start): |
| 0 | 510 i += 1 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
511 yield (i, line) |
| 0 | 512 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
513 # read a GeneBed file, return list of starts, ends, raw fields |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
514 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
515 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
516 def get_starts_ends_fields_from_gene_bed(line): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
517 # Starts and ends for exons |
| 0 | 518 starts = [] |
| 519 ends = [] | |
| 520 | |
| 521 fields = line.split() | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
522 # Requires atleast 12 BED columns |
| 0 | 523 if len(fields) < 12: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
524 raise Exception("Not a proper 12 column BED line (%s)." % line) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
525 tx_start = int(fields[1]) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
526 strand = fields[5] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
527 if strand != '-': |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
528 strand = '+' # Default strand is + |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
529 cds_start = int(fields[6]) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
530 cds_end = int(fields[7]) |
| 0 | 531 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
532 # Calculate and store starts and ends of coding exons |
| 0 | 533 region_start, region_end = cds_start, cds_end |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
534 exon_starts = map(int, fields[11].rstrip(',\n').split(',')) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
535 exon_starts = map((lambda x: x + tx_start), exon_starts) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
536 exon_ends = map(int, fields[10].rstrip(',').split(',')) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
537 exon_ends = map((lambda x, y: x + y), exon_starts, exon_ends) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
538 for start, end in zip(exon_starts, exon_ends): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
539 start = max(start, region_start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
540 end = min(end, region_end) |
| 0 | 541 if start < end: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
542 starts.append(start) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
543 ends.append(end) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
544 return (starts, ends, fields) |
| 0 | 545 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
546 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
547 def iter_components_by_src(block, src): |
| 0 | 548 for c in block.components: |
| 549 if c.src == src: | |
| 550 yield c | |
| 551 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
552 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
553 def get_components_by_src(block, src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
554 return [value for value in iter_components_by_src(block, src)] |
| 0 | 555 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
556 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
557 def iter_components_by_src_start(block, src): |
| 0 | 558 for c in block.components: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
559 if c.src.startswith(src): |
| 0 | 560 yield c |
| 561 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
562 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
563 def get_components_by_src_start(block, src): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
564 return [value for value in iter_components_by_src_start(block, src)] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
565 |
| 0 | 566 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
567 def sort_block_components_by_block(block1, block2): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
568 # orders the components in block1 by the index of the component in block2 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
569 # block1 must be a subset of block2 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
570 # occurs in-place |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
571 return block1.components.sort(cmp=lambda x, y: block2.components.index(x) - block2.components.index(y)) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
572 |
| 0 | 573 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
574 def get_species_in_maf(maf_filename): |
| 0 | 575 species = [] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
576 for block in bx.align.maf.Reader(open(maf_filename)): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
577 for spec in get_species_in_block(block): |
| 0 | 578 if spec not in species: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
579 species.append(spec) |
| 0 | 580 return species |
| 581 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
582 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
583 def parse_species_option(species): |
| 0 | 584 if species: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
585 species = species.split(',') |
| 0 | 586 if 'None' not in species: |
| 587 return species | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
588 return None # provided species was '', None, or had 'None' in it |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
589 |
| 0 | 590 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
591 def remove_temp_index_file(index_filename): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
592 try: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
593 os.unlink(index_filename) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
594 except Exception: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
595 pass |
| 0 | 596 |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
597 # Below are methods to deal with FASTA files |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
598 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
599 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
600 def get_fasta_header(component, attributes={}, suffix=None): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
601 header = ">%s(%s):%i-%i|" % (component.src, component.strand, component.get_forward_strand_start(), component.get_forward_strand_end()) |
| 0 | 602 for key, value in attributes.iteritems(): |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
603 header = "%s%s=%s|" % (header, key, value) |
| 0 | 604 if suffix: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
605 header = "%s%s" % (header, suffix) |
| 0 | 606 else: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
607 header = "%s%s" % (header, src_split(component.src)[0]) |
| 0 | 608 return header |
| 609 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
610 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
611 def get_attributes_from_fasta_header(header): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
612 if not header: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
613 return {} |
| 0 | 614 attributes = {} |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
615 header = header.lstrip('>') |
| 0 | 616 header = header.strip() |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
617 fields = header.split('|') |
| 0 | 618 try: |
| 619 region = fields[0] | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
620 region = region.split('(', 1) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
621 temp = region[0].split('.', 1) |
| 0 | 622 attributes['species'] = temp[0] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
623 if len(temp) == 2: |
| 0 | 624 attributes['chrom'] = temp[1] |
| 625 else: | |
| 626 attributes['chrom'] = temp[0] | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
627 region = region[1].split(')', 1) |
| 0 | 628 attributes['strand'] = region[0] |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
629 region = region[1].lstrip(':').split('-') |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
630 attributes['start'] = int(region[0]) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
631 attributes['end'] = int(region[1]) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
632 except Exception: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
633 # fields 0 is not a region coordinate |
| 0 | 634 pass |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
635 if len(fields) > 2: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
636 for i in range(1, len(fields) - 1): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
637 prop = fields[i].split('=', 1) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
638 if len(prop) == 2: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
639 attributes[prop[0]] = prop[1] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
640 if len(fields) > 1: |
| 0 | 641 attributes['__suffix__'] = fields[-1] |
| 642 return attributes | |
| 643 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
644 |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
645 def iter_fasta_alignment(filename): |
| 0 | 646 class fastaComponent: |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
647 def __init__(self, species, text=""): |
| 0 | 648 self.species = species |
| 649 self.text = text | |
| 650 | |
|
2
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
651 def extend(self, text): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
652 self.text = self.text + text.replace('\n', '').replace('\r', '').strip() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
653 # yields a list of fastaComponents for a FASTA file |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
654 with open(filename, 'r') as f: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
655 components = [] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
656 # cur_component = None |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
657 while True: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
658 line = f.readline() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
659 if not line: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
660 if components: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
661 yield components |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
662 return |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
663 line = line.strip() |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
664 if not line: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
665 if components: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
666 yield components |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
667 components = [] |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
668 elif line.startswith('>'): |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
669 attributes = get_attributes_from_fasta_header(line) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
670 components.append(fastaComponent(attributes['species'])) |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
671 elif components: |
|
c5311b7718d1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
1
diff
changeset
|
672 components[-1].extend(line) |
