Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/vdb/built-in.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 /*========================================================================== | |
| 28 * VDB built-in functions, formats and types | |
| 29 */ | |
| 30 version 1; | |
| 31 | |
| 32 | |
| 33 /*-------------------------------------------------------------------------- | |
| 34 * types | |
| 35 */ | |
| 36 | |
| 37 // Row id range consist of row_id_start and row_id_stop | |
| 38 typedef I64 vdb:row_id_range [ 2 ]; | |
| 39 | |
| 40 | |
| 41 /*-------------------------------------------------------------------------- | |
| 42 * typesets | |
| 43 */ | |
| 44 typeset integer_set { I8, U8, I16, U16, I32, U32, I64, U64 }; | |
| 45 typeset float_set { F32, F64 }; | |
| 46 typeset numeric_set { integer_set, float_set }; | |
| 47 typeset text_set { utf8, utf16, utf32, ascii }; | |
| 48 typeset text8_set { utf8, ascii }; | |
| 49 typeset transpose_set { B8, B16, B32, B64 }; | |
| 50 | |
| 51 | |
| 52 /*-------------------------------------------------------------------------- | |
| 53 * formats | |
| 54 */ | |
| 55 fmtdef merged_fmt; | |
| 56 fmtdef transposed_fmt; | |
| 57 fmtdef delta_averaged_fmt; | |
| 58 | |
| 59 | |
| 60 /*-------------------------------------------------------------------------- | |
| 61 * constants | |
| 62 */ | |
| 63 const U8 ALIGN_LEFT = 0; | |
| 64 const U8 ALIGN_RIGHT = 1; | |
| 65 | |
| 66 /*-------------------------------------------------------------------------- | |
| 67 * functions | |
| 68 */ | |
| 69 | |
| 70 | |
| 71 /* cast | |
| 72 * performs a "C++ reinterpret_cast" style cast | |
| 73 * rewrites input as required to produce output | |
| 74 * | |
| 75 * legal operations include numeric_set -> numeric_set, | |
| 76 * numeric <-> character, etc. | |
| 77 */ | |
| 78 function | |
| 79 any cast #1.0 ( any in ) | |
| 80 = vdb:cast; | |
| 81 | |
| 82 /* bit_or | |
| 83 * performs a bitwise operation 'OR' for every byte in A and B | |
| 84 * A and B are not neccesarily have the same size | |
| 85 * the resulting row will have the size of B while OR operation is done for portion of A overlapping B | |
| 86 * For different sizes of A and B 'align' parameter provides what edge of A and B are aligned | |
| 87 * possible values ALIGN_LEFT, ALIGN_RIGHT | |
| 88 * | |
| 89 */ | |
| 90 function < type T > | |
| 91 T bit_or #1 < U8 align > ( T A, T B ) | |
| 92 = vdb:bit_or; | |
| 93 | |
| 94 /* trim | |
| 95 * performs trimming of value val from column A | |
| 96 * align provides left- or right- trimming | |
| 97 */ | |
| 98 function < type T > | |
| 99 T trim #1 < U8 align, T val > ( T A ) | |
| 100 = vdb:trim; | |
| 101 | |
| 102 | |
| 103 /* redimension | |
| 104 * performs a change of dimension without changing bit pattern | |
| 105 */ | |
| 106 function | |
| 107 any redimension #1.0 ( any in ) | |
| 108 = vdb:redimension; | |
| 109 | |
| 110 | |
| 111 /* row_id | |
| 112 * returns the row id of a request | |
| 113 */ | |
| 114 function | |
| 115 I64 row_id #1.0 () | |
| 116 = vdb:row_id; | |
| 117 | |
| 118 | |
| 119 /* row_len | |
| 120 * returns the number of elements in a row | |
| 121 * | |
| 122 * "in" [ DATA ] - column supplying row. if row does not exist | |
| 123 * in column, the resultant length is 0. | |
| 124 */ | |
| 125 function | |
| 126 U32 row_len #1.0 ( any in ) | |
| 127 = vdb:row_len; | |
| 128 | |
| 129 | |
| 130 /* fixed_row_len | |
| 131 * returns non-zero if the entire page | |
| 132 * has a uniform row-length, zero otherwise | |
| 133 * | |
| 134 * "in" [ DATA ] - column to query | |
| 135 */ | |
| 136 function | |
| 137 U32 fixed_row_len #1.0 ( any in ) | |
| 138 = vdb:fixed_row_len; | |
| 139 | |
| 140 | |
| 141 /* compare | |
| 142 * evaluates src [ i ] == cmp [ i ] | |
| 143 * causes writing exception if unequal. | |
| 144 * | |
| 145 * For whole types, equality is bitwise equal | |
| 146 * for floating point types see below. | |
| 147 * | |
| 148 * "T" [ TYPE ] - base element type to be processed | |
| 149 * | |
| 150 * "sig_bits" [ OPTIONAL CONST >= 1 ] - for floating point types, ignored | |
| 151 * otherwise, the number of significant binary digits in the mantissas to | |
| 152 * compare such that |x - y| <= 1, for corresponding numbers x (in a) and | |
| 153 * y (in b) both scaled according to sig_bits and their common magnitude. | |
| 154 * "sig_bits" may be an array, if so "sel" is required (see below). | |
| 155 * | |
| 156 * "src" [ DATA ] - standard input data derived from source | |
| 157 * | |
| 158 * "cmp" [ DATA ] - feedback data after being written and re-read | |
| 159 * | |
| 160 */ | |
| 161 validate function < type T > | |
| 162 void compare #1.0 < * U32 sig_bits > ( T src, T cmp ) | |
| 163 = vdb:compare; | |
| 164 | |
| 165 validate function < type T > | |
| 166 void no_compare #1.0 ( T src, T cmp ) | |
| 167 = vdb:no_compare; | |
| 168 | |
| 169 | |
| 170 /* compare2f | |
| 171 * evaluates src [ i ] == cmp [ i ] | |
| 172 * causes writing exception if unequal. | |
| 173 * | |
| 174 * "T" [ TYPE ] - base element type to be processed | |
| 175 * | |
| 176 * "sig_bits" [ CONST >= 1 ] - for floating point types, ignored otherwise, | |
| 177 * array containing the number of significant binary digits in the mantissas | |
| 178 * to compare such that |x - y| <= 1, for corresponding numbers x (in a) and | |
| 179 * y (in b) both scaled according to sig_bits and their common magnitude. | |
| 180 * | |
| 181 * "src" [ DATA ] - standard input data derived from source | |
| 182 * | |
| 183 * "cmp" [ DATA ] - feedback data after being written and re-read | |
| 184 * | |
| 185 * "sel" [ DATA ] - data to select which element of "sig_bits" to | |
| 186 * use for the comparison. The valid values of "sel" are | |
| 187 * [0 .. length sig_bits). | |
| 188 * | |
| 189 validate function < type T > | |
| 190 void compare2f #1.0 < U32 sig_bits > ( float_set src, float_set cmp, T sel ) | |
| 191 = vdb:compare2f; | |
| 192 */ | |
| 193 | |
| 194 /* range_validate | |
| 195 * passes input through if all values fall between lower and | |
| 196 * upper bounds, INCLUSIVE | |
| 197 * | |
| 198 * "T" [ TYPE ] - type to be validated | |
| 199 * | |
| 200 * "lower" [ CONST ] and "upper" [ CONST ] - inclusive | |
| 201 * bounds on input values | |
| 202 * | |
| 203 * "in" [ DATA ] - data to be validated | |
| 204 */ | |
| 205 function < type T > | |
| 206 T range_validate #1.0 < T lower, T upper > ( T in ) | |
| 207 = vdb:range_validate; | |
| 208 | |
| 209 | |
| 210 /* select | |
| 211 * return first non-empty input for id | |
| 212 * inputs are taken from first to last | |
| 213 * | |
| 214 * "T" [ TYPE ] - data type of selection | |
| 215 * | |
| 216 * "first" [ DATA ] - first of N inputs | |
| 217 * | |
| 218 * "second" [ DATA ] - second of N inputs | |
| 219 * all other inputs are optional and must | |
| 220 * be compatible with type "T" | |
| 221 */ | |
| 222 function < type T > | |
| 223 T select #1.0 ( T first, T second, ... ) | |
| 224 = vdb:select; | |
| 225 | |
| 226 | |
| 227 /* transpose | |
| 228 * transpose a page of unformatted data | |
| 229 * | |
| 230 * for example - convert a simple page of values, | |
| 231 * where vertical scale is row id and horizontal element index: | |
| 232 * | |
| 233 * 1 2 3 | |
| 234 * +---+---+---+ | |
| 235 * 1 | a | b | c | | |
| 236 * +---+---+---+ | |
| 237 * 2 | d | e | f | | |
| 238 * +---+---+---+ | |
| 239 * 3 | g | h | i | | |
| 240 * +---+---+---+ | |
| 241 * 4 | j | k | l | | |
| 242 * +---+---+---+ | |
| 243 * | |
| 244 * into: | |
| 245 * | |
| 246 * 1 2 3 4 | |
| 247 * +---+---+---+---+ | |
| 248 * 1 | a | d | g | j | | |
| 249 * +---+---+---+---+ | |
| 250 * 2 | b | e | h | k | | |
| 251 * +---+---+---+---+ | |
| 252 * 3 | c | f | i | l | | |
| 253 * +---+---+---+---+ | |
| 254 * | |
| 255 * variable row-lengths are supported. The output blob is | |
| 256 * formatted, meaning that the result can no longer be addressed | |
| 257 * as a matrix, but the transposition has be applied to data. | |
| 258 * | |
| 259 * "in" [ DATA ] - unformatted data to be transposed | |
| 260 */ | |
| 261 function | |
| 262 transposed_fmt transpose #1 ( transpose_set in ) | |
| 263 = vdb:transpose; | |
| 264 | |
| 265 | |
| 266 /* detranspose | |
| 267 * pardoning the awful name, apply a transposition on the result | |
| 268 * of "transpose" to produce the original blob. "transpose" | |
| 269 * itself cannot be reused because of its signature. | |
| 270 */ | |
| 271 function | |
| 272 transpose_set detranspose #1 ( transposed_fmt in ) | |
| 273 = vdb:detranspose; | |
| 274 | |
| 275 /* | |
| 276 * delta_average computes average representation of the maximium | |
| 277 * lengh row and deltas every row against it | |
| 278 */ | |
| 279 function | |
| 280 delta_averaged_fmt delta_average #1 ( any in ) | |
| 281 = vdb:delta_average; | |
| 282 | |
| 283 function | |
| 284 any undelta_average #1 ( delta_averaged_fmt in ) | |
| 285 = vdb:undelta_average; | |
| 286 | |
| 287 /* merge | |
| 288 * merges all input blobs of any format/type into a single blob | |
| 289 */ | |
| 290 function | |
| 291 merged_fmt merge #1.0 ( any in, ... ) | |
| 292 = vdb:merge; | |
| 293 | |
| 294 | |
| 295 /* split | |
| 296 * extracts a single blob from a merged blob by index | |
| 297 * | |
| 298 * "idx" [ CONST ] - blob index | |
| 299 */ | |
| 300 | |
| 301 function | |
| 302 any split #1.0 < U32 idx > ( merged_fmt in ) | |
| 303 = vdb:split; | |
| 304 | |
| 305 | |
| 306 /* meta:read | |
| 307 * reads table metadata node as a row | |
| 308 * meta:value | |
| 309 * reads metadata node as single value, | |
| 310 * performing size conversion if necessary, | |
| 311 * e.g. I8 TO I64, I32 TO I16 | |
| 312 * | |
| 313 * "T" [ TYPE ] - cast data type of metadata node | |
| 314 * | |
| 315 * "node" [ CONST ] - path to metadata node | |
| 316 */ | |
| 317 function < type T > | |
| 318 T meta:read #1.0 < ascii node, * bool deterministic > (); | |
| 319 | |
| 320 function < type T > | |
| 321 T meta:value #1.0 < ascii node, * bool deterministic > (); | |
| 322 | |
| 323 | |
| 324 /* meta:write | |
| 325 * writes row data to table metadata node | |
| 326 * | |
| 327 * "T" [ TYPE ] - cast data type of metadata node | |
| 328 * | |
| 329 * "node" [ CONST ] - path to metadata node | |
| 330 * | |
| 331 * "in" [ DATA ] - source of row data | |
| 332 */ | |
| 333 function < type T > | |
| 334 T meta:write #1.0 < ascii node > ( T in ); | |
| 335 | |
| 336 | |
| 337 /* meta:attr:read | |
| 338 * reads table metadata attribute as a row | |
| 339 * | |
| 340 * "node" [ CONST ] - path to metadata node | |
| 341 * | |
| 342 * "attr" [ CONST ] - attribute name on node | |
| 343 */ | |
| 344 function | |
| 345 ascii meta:attr:read #1.0 < ascii node, ascii attr, * bool deterministic > (); | |
| 346 | |
| 347 | |
| 348 /* meta:attr:write | |
| 349 * write row data as table metadata attribute | |
| 350 * | |
| 351 * "node" [ CONST ] - path to metadata node | |
| 352 * | |
| 353 * "attr" [ CONST ] - attribute name on node | |
| 354 */ | |
| 355 function | |
| 356 ascii meta:attr:write #1.0 < ascii node, ascii attr > ( ascii in ); | |
| 357 | |
| 358 | |
| 359 /* parameter:read | |
| 360 * reads named cursor parameter text | |
| 361 * | |
| 362 * "name" [ CONST ] - parameter name | |
| 363 */ | |
| 364 function | |
| 365 text8_set parameter:read #1.0 < ascii name, * bool deterministic > (); | |
| 366 | |
| 367 | |
| 368 /* environment:read | |
| 369 * reads named environment variable text | |
| 370 * | |
| 371 * "name" [ CONST ] - environment variable name | |
| 372 */ | |
| 373 function | |
| 374 text8_set environment:read #1.0 < ascii name > (); | |
| 375 | |
| 376 // case sensivity mode | |
| 377 const U8 CASE_SENSITIVE = 0; | |
| 378 const U8 CASE_INSENSITIVE_LOWER = 1; | |
| 379 const U8 CASE_INSENSITIVE_UPPER = 2; | |
| 380 | |
| 381 /* idx:text:project | |
| 382 * perform a reverse lookup in an index | |
| 383 * if key not found then use substitute | |
| 384 * | |
| 385 * "index_name" [ CONST ] - name of text index | |
| 386 * | |
| 387 * "substitute" [ DATA, OPTIONAL ] - source | |
| 388 * of values to substitute for values not | |
| 389 * found in the index. | |
| 390 * | |
| 391 * Version 1.1: look into "substitute" (if available) first and then to the index, | |
| 392 * added parameter "case_sensitivity" | |
| 393 */ | |
| 394 function text8_set idx:text:project #1.1 < ascii index_name, * U8 case_sensitivity > ( * text8_set substitute ); | |
| 395 | |
| 396 | |
| 397 /* idx:text:insert | |
| 398 * inserts "key" into index. returns key if insertion into index failed or | |
| 399 * when "case_insensitive" is true | |
| 400 * | |
| 401 * Version 1.1: added parameter "case_sensitivity" | |
| 402 */ | |
| 403 function text8_set idx:text:insert #1.1 < ascii index_name, * U8 case_sensitivity > ( text8_set key ); | |
| 404 | |
| 405 | |
| 406 /* idx:text:lookup | |
| 407 * perform a lookup in an index | |
| 408 * returns vdb:row_id_range associated with the | |
| 409 * | |
| 410 * Version 1.1: added parameter "case_sensitivity" | |
| 411 */ | |
| 412 function vdb:row_id_range idx:text:lookup #1.1 < ascii index_name , ascii query_by_name, * U8 case_sensitivity > (); |
