Mercurial > repos > bgruening > infernal
comparison infernal.py @ 10:0bffd4183326 draft
Uploaded
author | bgruening |
---|---|
date | Sun, 08 Sep 2013 05:08:20 -0400 |
parents | 1aa4c036e41c |
children | 2f8a8962ae22 |
comparison
equal
deleted
inserted
replaced
9:1aa4c036e41c | 10:0bffd4183326 |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 | 2 |
3 from galaxy.datatypes import data | 3 from galaxy.datatypes.data import Text |
4 from galaxy.datatypes.sniff import get_headers, get_test_fname | 4 from galaxy.datatypes.sniff import get_headers, get_test_fname |
5 from galaxy.datatypes.data import get_file_peek | 5 from galaxy.datatypes.data import get_file_peek |
6 import subprocess | 6 import subprocess |
7 import os | 7 import os |
8 | 8 |
39 except: | 39 except: |
40 pass | 40 pass |
41 return 0 | 41 return 0 |
42 | 42 |
43 | 43 |
44 class Stockholm_1_0( data.text ): | 44 class Infernal_CM_1_1( Text ): |
45 file_ext = "stockholm" | |
46 | |
47 MetadataElement( name="number_of_alignments", default=0, desc="Number of multiple alignments", readonly=True, visible=True, optional=True, no_value=0 ) | |
48 | |
49 def set_peek( self, dataset, is_multi_byte=False ): | |
50 if not dataset.dataset.purged: | |
51 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
52 if (dataset.metadata.number_of_models == 1): | |
53 dataset.blurb = "1 alignment" | |
54 else: | |
55 dataset.blurb = "%s alignments" % dataset.metadata.number_of_models | |
56 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
57 else: | |
58 dataset.peek = 'file does not exist' | |
59 dataset.blurb = 'file purged from disc' | |
60 | |
61 def sniff( self, filename ): | |
62 if count_special_lines('^#[[:space:]+]STOCKHOLM[[:space:]+]1.0', filename) > 0: | |
63 return True | |
64 else: | |
65 return False | |
66 | |
67 def set_meta( self, dataset, **kwd ): | |
68 """ | |
69 | |
70 Set the number of models in dataset. | |
71 """ | |
72 dataset.metadata.number_of_models = count_special_lines('^#[[:space:]+]STOCKHOLM[[:space:]+]1.0', dataset.file_name) | |
73 | |
74 def split( cls, input_datasets, subdir_generator_function, split_params): | |
75 """ | |
76 | |
77 Split the input files by model records. | |
78 """ | |
79 if split_params is None: | |
80 return None | |
81 | |
82 if len(input_datasets) > 1: | |
83 raise Exception("STOCKHOLM-file splitting does not support multiple files") | |
84 input_files = [ds.file_name for ds in input_datasets] | |
85 | |
86 chunk_size = None | |
87 if split_params['split_mode'] == 'number_of_parts': | |
88 raise Exception('Split mode "%s" is currently not implemented for STOCKHOLM-files.' % split_params['split_mode']) | |
89 elif split_params['split_mode'] == 'to_size': | |
90 chunk_size = int(split_params['split_size']) | |
91 else: | |
92 raise Exception('Unsupported split mode %s' % split_params['split_mode']) | |
93 | |
94 def _read_stockholm_records( filename ): | |
95 lines = [] | |
96 with open(filename) as handle: | |
97 for line in handle: | |
98 lines.append( line ) | |
99 if line.strip() == '//': | |
100 yield lines | |
101 lines = [] | |
102 | |
103 def _write_part_stockholm_file( accumulated_lines ): | |
104 part_dir = subdir_generator_function() | |
105 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) | |
106 part_file = open( part_path, 'w' ) | |
107 part_file.writelines( accumulated_lines ) | |
108 part_file.close() | |
109 | |
110 try: | |
111 | |
112 stockholm_records = _read_stockholm_records( input_files[0] ) | |
113 stockholm_lines_accumulated = [] | |
114 for counter, stockholm_record in enumerate( stockholm_records, start = 1): | |
115 stockholm_lines_accumulated.extend( stockholm_record ) | |
116 if counter % chunk_size == 0: | |
117 _write_part_stockholm_file( stockholm_lines_accumulated ) | |
118 stockholm_lines_accumulated = [] | |
119 if stockholm_lines_accumulated: | |
120 _write_part_stockholm_file( stockholm_lines_accumulated ) | |
121 except Exception, e: | |
122 log.error('Unable to split files: %s' % str(e)) | |
123 raise | |
124 split = classmethod(split) | |
125 | |
126 | |
127 class Infernal_CM_1_1( data.text ): | |
128 file_ext = "cm" | 45 file_ext = "cm" |
129 | 46 |
130 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) | 47 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) |
131 | 48 |
132 def set_peek( self, dataset, is_multi_byte=False ): | 49 def set_peek( self, dataset, is_multi_byte=False ): |
203 except Exception, e: | 120 except Exception, e: |
204 log.error('Unable to split files: %s' % str(e)) | 121 log.error('Unable to split files: %s' % str(e)) |
205 raise | 122 raise |
206 split = classmethod(split) | 123 split = classmethod(split) |
207 | 124 |
125 if __name__ == '__main__': | |
126 Infernal_CM_1_1() | |
127 Stockholm_1_0() | |
128 |