Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/vdb/built-in.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 /*=========================================================================== | |
2 * | |
3 * PUBLIC DOMAIN NOTICE | |
4 * National Center for Biotechnology Information | |
5 * | |
6 * This software/database is a "United States Government Work" under the | |
7 * terms of the United States Copyright Act. It was written as part of | |
8 * the author's official duties as a United States Government employee and | |
9 * thus cannot be copyrighted. This software/database is freely available | |
10 * to the public for use. The National Library of Medicine and the U.S. | |
11 * Government have not placed any restriction on its use or reproduction. | |
12 * | |
13 * Although all reasonable efforts have been taken to ensure the accuracy | |
14 * and reliability of the software and data, the NLM and the U.S. | |
15 * Government do not and cannot warrant the performance or results that | |
16 * may be obtained by using this software or data. The NLM and the U.S. | |
17 * Government disclaim all warranties, express or implied, including | |
18 * warranties of performance, merchantability or fitness for any particular | |
19 * purpose. | |
20 * | |
21 * Please cite the author in any work or product based on this material. | |
22 * | |
23 * =========================================================================== | |
24 * | |
25 */ | |
26 | |
27 /*========================================================================== | |
28 * VDB built-in functions, formats and types | |
29 */ | |
30 version 1; | |
31 | |
32 | |
33 /*-------------------------------------------------------------------------- | |
34 * types | |
35 */ | |
36 | |
37 // Row id range consist of row_id_start and row_id_stop | |
38 typedef I64 vdb:row_id_range [ 2 ]; | |
39 | |
40 | |
41 /*-------------------------------------------------------------------------- | |
42 * typesets | |
43 */ | |
44 typeset integer_set { I8, U8, I16, U16, I32, U32, I64, U64 }; | |
45 typeset float_set { F32, F64 }; | |
46 typeset numeric_set { integer_set, float_set }; | |
47 typeset text_set { utf8, utf16, utf32, ascii }; | |
48 typeset text8_set { utf8, ascii }; | |
49 typeset transpose_set { B8, B16, B32, B64 }; | |
50 | |
51 | |
52 /*-------------------------------------------------------------------------- | |
53 * formats | |
54 */ | |
55 fmtdef merged_fmt; | |
56 fmtdef transposed_fmt; | |
57 fmtdef delta_averaged_fmt; | |
58 | |
59 | |
60 /*-------------------------------------------------------------------------- | |
61 * constants | |
62 */ | |
63 const U8 ALIGN_LEFT = 0; | |
64 const U8 ALIGN_RIGHT = 1; | |
65 | |
66 /*-------------------------------------------------------------------------- | |
67 * functions | |
68 */ | |
69 | |
70 | |
71 /* cast | |
72 * performs a "C++ reinterpret_cast" style cast | |
73 * rewrites input as required to produce output | |
74 * | |
75 * legal operations include numeric_set -> numeric_set, | |
76 * numeric <-> character, etc. | |
77 */ | |
78 function | |
79 any cast #1.0 ( any in ) | |
80 = vdb:cast; | |
81 | |
82 /* bit_or | |
83 * performs a bitwise operation 'OR' for every byte in A and B | |
84 * A and B are not neccesarily have the same size | |
85 * the resulting row will have the size of B while OR operation is done for portion of A overlapping B | |
86 * For different sizes of A and B 'align' parameter provides what edge of A and B are aligned | |
87 * possible values ALIGN_LEFT, ALIGN_RIGHT | |
88 * | |
89 */ | |
90 function < type T > | |
91 T bit_or #1 < U8 align > ( T A, T B ) | |
92 = vdb:bit_or; | |
93 | |
94 /* trim | |
95 * performs trimming of value val from column A | |
96 * align provides left- or right- trimming | |
97 */ | |
98 function < type T > | |
99 T trim #1 < U8 align, T val > ( T A ) | |
100 = vdb:trim; | |
101 | |
102 | |
103 /* redimension | |
104 * performs a change of dimension without changing bit pattern | |
105 */ | |
106 function | |
107 any redimension #1.0 ( any in ) | |
108 = vdb:redimension; | |
109 | |
110 | |
111 /* row_id | |
112 * returns the row id of a request | |
113 */ | |
114 function | |
115 I64 row_id #1.0 () | |
116 = vdb:row_id; | |
117 | |
118 | |
119 /* row_len | |
120 * returns the number of elements in a row | |
121 * | |
122 * "in" [ DATA ] - column supplying row. if row does not exist | |
123 * in column, the resultant length is 0. | |
124 */ | |
125 function | |
126 U32 row_len #1.0 ( any in ) | |
127 = vdb:row_len; | |
128 | |
129 | |
130 /* fixed_row_len | |
131 * returns non-zero if the entire page | |
132 * has a uniform row-length, zero otherwise | |
133 * | |
134 * "in" [ DATA ] - column to query | |
135 */ | |
136 function | |
137 U32 fixed_row_len #1.0 ( any in ) | |
138 = vdb:fixed_row_len; | |
139 | |
140 | |
141 /* compare | |
142 * evaluates src [ i ] == cmp [ i ] | |
143 * causes writing exception if unequal. | |
144 * | |
145 * For whole types, equality is bitwise equal | |
146 * for floating point types see below. | |
147 * | |
148 * "T" [ TYPE ] - base element type to be processed | |
149 * | |
150 * "sig_bits" [ OPTIONAL CONST >= 1 ] - for floating point types, ignored | |
151 * otherwise, the number of significant binary digits in the mantissas to | |
152 * compare such that |x - y| <= 1, for corresponding numbers x (in a) and | |
153 * y (in b) both scaled according to sig_bits and their common magnitude. | |
154 * "sig_bits" may be an array, if so "sel" is required (see below). | |
155 * | |
156 * "src" [ DATA ] - standard input data derived from source | |
157 * | |
158 * "cmp" [ DATA ] - feedback data after being written and re-read | |
159 * | |
160 */ | |
161 validate function < type T > | |
162 void compare #1.0 < * U32 sig_bits > ( T src, T cmp ) | |
163 = vdb:compare; | |
164 | |
165 validate function < type T > | |
166 void no_compare #1.0 ( T src, T cmp ) | |
167 = vdb:no_compare; | |
168 | |
169 | |
170 /* compare2f | |
171 * evaluates src [ i ] == cmp [ i ] | |
172 * causes writing exception if unequal. | |
173 * | |
174 * "T" [ TYPE ] - base element type to be processed | |
175 * | |
176 * "sig_bits" [ CONST >= 1 ] - for floating point types, ignored otherwise, | |
177 * array containing the number of significant binary digits in the mantissas | |
178 * to compare such that |x - y| <= 1, for corresponding numbers x (in a) and | |
179 * y (in b) both scaled according to sig_bits and their common magnitude. | |
180 * | |
181 * "src" [ DATA ] - standard input data derived from source | |
182 * | |
183 * "cmp" [ DATA ] - feedback data after being written and re-read | |
184 * | |
185 * "sel" [ DATA ] - data to select which element of "sig_bits" to | |
186 * use for the comparison. The valid values of "sel" are | |
187 * [0 .. length sig_bits). | |
188 * | |
189 validate function < type T > | |
190 void compare2f #1.0 < U32 sig_bits > ( float_set src, float_set cmp, T sel ) | |
191 = vdb:compare2f; | |
192 */ | |
193 | |
194 /* range_validate | |
195 * passes input through if all values fall between lower and | |
196 * upper bounds, INCLUSIVE | |
197 * | |
198 * "T" [ TYPE ] - type to be validated | |
199 * | |
200 * "lower" [ CONST ] and "upper" [ CONST ] - inclusive | |
201 * bounds on input values | |
202 * | |
203 * "in" [ DATA ] - data to be validated | |
204 */ | |
205 function < type T > | |
206 T range_validate #1.0 < T lower, T upper > ( T in ) | |
207 = vdb:range_validate; | |
208 | |
209 | |
210 /* select | |
211 * return first non-empty input for id | |
212 * inputs are taken from first to last | |
213 * | |
214 * "T" [ TYPE ] - data type of selection | |
215 * | |
216 * "first" [ DATA ] - first of N inputs | |
217 * | |
218 * "second" [ DATA ] - second of N inputs | |
219 * all other inputs are optional and must | |
220 * be compatible with type "T" | |
221 */ | |
222 function < type T > | |
223 T select #1.0 ( T first, T second, ... ) | |
224 = vdb:select; | |
225 | |
226 | |
227 /* transpose | |
228 * transpose a page of unformatted data | |
229 * | |
230 * for example - convert a simple page of values, | |
231 * where vertical scale is row id and horizontal element index: | |
232 * | |
233 * 1 2 3 | |
234 * +---+---+---+ | |
235 * 1 | a | b | c | | |
236 * +---+---+---+ | |
237 * 2 | d | e | f | | |
238 * +---+---+---+ | |
239 * 3 | g | h | i | | |
240 * +---+---+---+ | |
241 * 4 | j | k | l | | |
242 * +---+---+---+ | |
243 * | |
244 * into: | |
245 * | |
246 * 1 2 3 4 | |
247 * +---+---+---+---+ | |
248 * 1 | a | d | g | j | | |
249 * +---+---+---+---+ | |
250 * 2 | b | e | h | k | | |
251 * +---+---+---+---+ | |
252 * 3 | c | f | i | l | | |
253 * +---+---+---+---+ | |
254 * | |
255 * variable row-lengths are supported. The output blob is | |
256 * formatted, meaning that the result can no longer be addressed | |
257 * as a matrix, but the transposition has be applied to data. | |
258 * | |
259 * "in" [ DATA ] - unformatted data to be transposed | |
260 */ | |
261 function | |
262 transposed_fmt transpose #1 ( transpose_set in ) | |
263 = vdb:transpose; | |
264 | |
265 | |
266 /* detranspose | |
267 * pardoning the awful name, apply a transposition on the result | |
268 * of "transpose" to produce the original blob. "transpose" | |
269 * itself cannot be reused because of its signature. | |
270 */ | |
271 function | |
272 transpose_set detranspose #1 ( transposed_fmt in ) | |
273 = vdb:detranspose; | |
274 | |
275 /* | |
276 * delta_average computes average representation of the maximium | |
277 * lengh row and deltas every row against it | |
278 */ | |
279 function | |
280 delta_averaged_fmt delta_average #1 ( any in ) | |
281 = vdb:delta_average; | |
282 | |
283 function | |
284 any undelta_average #1 ( delta_averaged_fmt in ) | |
285 = vdb:undelta_average; | |
286 | |
287 /* merge | |
288 * merges all input blobs of any format/type into a single blob | |
289 */ | |
290 function | |
291 merged_fmt merge #1.0 ( any in, ... ) | |
292 = vdb:merge; | |
293 | |
294 | |
295 /* split | |
296 * extracts a single blob from a merged blob by index | |
297 * | |
298 * "idx" [ CONST ] - blob index | |
299 */ | |
300 | |
301 function | |
302 any split #1.0 < U32 idx > ( merged_fmt in ) | |
303 = vdb:split; | |
304 | |
305 | |
306 /* meta:read | |
307 * reads table metadata node as a row | |
308 * meta:value | |
309 * reads metadata node as single value, | |
310 * performing size conversion if necessary, | |
311 * e.g. I8 TO I64, I32 TO I16 | |
312 * | |
313 * "T" [ TYPE ] - cast data type of metadata node | |
314 * | |
315 * "node" [ CONST ] - path to metadata node | |
316 */ | |
317 function < type T > | |
318 T meta:read #1.0 < ascii node, * bool deterministic > (); | |
319 | |
320 function < type T > | |
321 T meta:value #1.0 < ascii node, * bool deterministic > (); | |
322 | |
323 | |
324 /* meta:write | |
325 * writes row data to table metadata node | |
326 * | |
327 * "T" [ TYPE ] - cast data type of metadata node | |
328 * | |
329 * "node" [ CONST ] - path to metadata node | |
330 * | |
331 * "in" [ DATA ] - source of row data | |
332 */ | |
333 function < type T > | |
334 T meta:write #1.0 < ascii node > ( T in ); | |
335 | |
336 | |
337 /* meta:attr:read | |
338 * reads table metadata attribute as a row | |
339 * | |
340 * "node" [ CONST ] - path to metadata node | |
341 * | |
342 * "attr" [ CONST ] - attribute name on node | |
343 */ | |
344 function | |
345 ascii meta:attr:read #1.0 < ascii node, ascii attr, * bool deterministic > (); | |
346 | |
347 | |
348 /* meta:attr:write | |
349 * write row data as table metadata attribute | |
350 * | |
351 * "node" [ CONST ] - path to metadata node | |
352 * | |
353 * "attr" [ CONST ] - attribute name on node | |
354 */ | |
355 function | |
356 ascii meta:attr:write #1.0 < ascii node, ascii attr > ( ascii in ); | |
357 | |
358 | |
359 /* parameter:read | |
360 * reads named cursor parameter text | |
361 * | |
362 * "name" [ CONST ] - parameter name | |
363 */ | |
364 function | |
365 text8_set parameter:read #1.0 < ascii name, * bool deterministic > (); | |
366 | |
367 | |
368 /* environment:read | |
369 * reads named environment variable text | |
370 * | |
371 * "name" [ CONST ] - environment variable name | |
372 */ | |
373 function | |
374 text8_set environment:read #1.0 < ascii name > (); | |
375 | |
376 // case sensivity mode | |
377 const U8 CASE_SENSITIVE = 0; | |
378 const U8 CASE_INSENSITIVE_LOWER = 1; | |
379 const U8 CASE_INSENSITIVE_UPPER = 2; | |
380 | |
381 /* idx:text:project | |
382 * perform a reverse lookup in an index | |
383 * if key not found then use substitute | |
384 * | |
385 * "index_name" [ CONST ] - name of text index | |
386 * | |
387 * "substitute" [ DATA, OPTIONAL ] - source | |
388 * of values to substitute for values not | |
389 * found in the index. | |
390 * | |
391 * Version 1.1: look into "substitute" (if available) first and then to the index, | |
392 * added parameter "case_sensitivity" | |
393 */ | |
394 function text8_set idx:text:project #1.1 < ascii index_name, * U8 case_sensitivity > ( * text8_set substitute ); | |
395 | |
396 | |
397 /* idx:text:insert | |
398 * inserts "key" into index. returns key if insertion into index failed or | |
399 * when "case_insensitive" is true | |
400 * | |
401 * Version 1.1: added parameter "case_sensitivity" | |
402 */ | |
403 function text8_set idx:text:insert #1.1 < ascii index_name, * U8 case_sensitivity > ( text8_set key ); | |
404 | |
405 | |
406 /* idx:text:lookup | |
407 * perform a lookup in an index | |
408 * returns vdb:row_id_range associated with the | |
409 * | |
410 * Version 1.1: added parameter "case_sensitivity" | |
411 */ | |
412 function vdb:row_id_range idx:text:lookup #1.1 < ascii index_name , ascii query_by_name, * U8 case_sensitivity > (); |