diff libs/sratoolkit.2.8.0-centos_linux64/schema/vdb/vdb.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/vdb/vdb.vschema	Mon Nov 27 11:21:07 2017 -0500
@@ -0,0 +1,859 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/*==========================================================================
+ * VDB external functions, formats and types
+ */
+version 1;
+
+// built-in functions should be known to all
+include 'vdb/built-in.vschema';
+
+
+/*--------------------------------------------------------------------------
+ * types
+ */
+
+/* text_token
+ *  a vector describing tokens recognized within a text string
+ *
+ * COMPONENTS:
+ *  0 - token id
+ *  1 - token starting coordinate
+ *  2 - token length
+ */
+typedef U16 text:token [ 3 ];
+
+
+/*--------------------------------------------------------------------------
+ * typesets
+ */
+typeset pack_set { B8, B16, B32, B64, integer_set };
+typeset izip_set { integer_set };
+typeset fzip_set { F32 };
+
+
+/*--------------------------------------------------------------------------
+ * formats
+ */
+fmtdef izip_fmt;
+fmtdef fzip_fmt;
+fmtdef rle_fmt;
+fmtdef zlib_fmt;
+fmtdef bzip2_fmt;
+
+
+/*--------------------------------------------------------------------------
+ * functions
+ */
+
+/* echo
+ *  returns single or repeated constant value
+ *
+ *  "T" [ TYPE ] - type of constant data to return
+ *
+ *  "val" [ CONST ] - a data constant
+ *
+ *  "row_len" [ DATA, OPTIONAL ] - if omitted, "val" will be
+ *  issued once and the resultant row-length will be the length
+ *  of "val". otherwise, "val" will be repeated and/or truncated
+ *  as necessary to produce a row-length equal to that of input.
+ *
+ * USAGE:
+ *  to echo a single constant value
+ *    U16 len = row_len ( col ) | < U16 > echo < 0 > ();
+ *
+ *  to create a row of repeated values
+ *    ascii allN = < ascii > echo < 'N' > ( col );
+ */
+function < type T >
+T echo #1.0 < T val > ( * any row_len )
+    = vdb:echo;
+
+
+/* exists
+ *  returns constant or dynamic value if predicate input exists
+ *
+ *  "T" [ TYPE ] - type of data to return
+ *
+ *  "cval" [ CONST, OPTIONAL ] - a data constant. when present,
+ *  the function will behave like "echo" ( see below )
+ *
+ *  "predicate" [ DATA ] - an input whose existence determines
+ *  whether the function will operate or not.
+ *
+ *  "dval" [ DATA, OPTIONAL ] - data value, either passed through
+ *  or used to determine a repeat count of "cval" ( see below )
+ *
+ * USAGE:
+ *  when "cval" is omitted, "dval" must be present and will be
+ *  passed through depending upon the existence of "predicate"
+ *    U8 count = < U8 > exists ( col, count2 );
+ *
+ *  when "cval" is present, "dval" may be omitted, and "cval" will
+ *  be passed through just like echo depending upon "predicate"
+ *   U8 count = < U8 > exists < 2 > ( col2 ) | < U8 > echo < 1 > ();
+ *
+ *  when "cval" and "dval" are both present, the behavior is
+ *  like echo, but gated with "predicate"
+ *    ascii poly = < ascii > exists < 'a' > ( col, repeat );
+ */
+function < type T >
+T exists #1.0 < * T cval > ( any predicate, * T dval )
+    = vdb:exists;
+
+
+/* map
+ *  translate input elements
+ *  behaves much like the Unix "tr" command
+ *  except that charsets are not [currently] supported
+ *
+ *  "A" [ TYPE ] - input data type, e.g. "ascii"
+ *
+ *  "B" [ TYPE ] - output data type, e.g. "ascii" or "U8"
+ *
+ *  "from" [ CONST ] - set of key values.
+ *
+ *  "to" [ CONST ] - set of mapped values,
+ *  where length ( from ) === length ( to )
+ *
+ *  "in" [ DATA ] - input data to be matched against keys
+ *  in "from". also serves as source data when "src" is omitted
+ *
+ *  "src" [ DATA, OPTIONAL ] - source data to be edited by
+ *  substituting "to" values when corresponding "in" value
+ *  matches key in "from". if omitted, "in" is used.
+ *
+ * USAGE:
+ *  to upper case letters from a given alphabet
+ *    ascii upper = < ascii, ascii > map < 'acgtn', 'ACGTN' > ( in );
+ *
+ *  to translate from ascii to binary
+ *    U8 bin = < ascii, U8 > map < 'ACGTN', [ 0, 1, 2, 3, 0 ] > ( in );
+ *
+ *  to alter certain values of a column based upon values in another
+ *    U8 n_encoded = < ascii, U8 > map < 'N', 0 > ( read, quality );
+ *
+ * CAVEATS:
+ *  the full canonical mode of operation uses separate inputs
+ *  for key matching and output source.
+ *
+ *  when a single input is specified:
+ *   - sizeof ( A ) must equal sizeof ( B )
+ *   - A must be a proper subset of B -OR-
+ *   - "from" keys must match every possible "in" value ( total substitution )
+ */
+function < type A, type B >
+B map #1.0 < A from, B to > ( A in, * B src )
+    = vdb:map;
+
+
+/* clip
+ *  limit data values to given bounds
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *
+ *  "dim" [ CONST >= 1 ] - fixed dimension on
+ *  input and output vectors
+ *
+ *  "lower" [ CONST ] - lower bound, inclusive
+ *
+ *  "upper" [ CONST ] - upper bounds, inclusive
+ *
+ *  "in" [ DATA ] - data to be clipped
+ */
+function < type T >
+T clip #1.0 < T lower, T upper > ( T in )
+    = vdb:clip;
+
+function < type T, U32 dim >
+T [ dim ] vclip #1.0 < T lower, T upper > ( T [ dim ] in )
+    = vdb:clip;
+
+
+/* ceil
+ *  round up to the nearest integer
+ *
+ *  "in" [ DATA ] - data to be processed
+ */
+function
+numeric_set ceil #1.0 ( float_set in )
+    = vdb:ceil;
+
+/* floor
+ *  round down to the nearest integer
+ *
+ *  "in" [ DATA ] - data to be processed
+ */
+function
+numeric_set floor #1.0 ( float_set in )
+    = vdb:floor;
+
+/* round
+ *  round to nearest integer away from zero
+ *
+ *  "T" [ TYPE = { F32, F64 } ] - input and output data type
+ *
+ *  "in" [ DATA ] - data to be processed
+ */
+function
+numeric_set round #1.0 ( float_set in )
+    = vdb:round;
+
+/* trunc
+ *  round to the nearest integer not larger in absolute value
+ *
+ *  "T" [ TYPE = { F32, F64 } ] - input and output data type
+ *
+ *  "in" [ DATA ] - data to be processed
+ */
+function
+numeric_set trunc #1.0 ( float_set in )
+    = vdb:trunc;
+
+
+/* min
+ *  return the minimum value of each element
+ * max
+ *  return the maximum value of each element
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *
+ *  "a" [ DATA ] - first operand
+ *
+ *  "b" [ DATA ] - second operand
+ *
+ * SYNOPSIS:
+ *  compares two inputs element by element
+ *  returns min or max element of each
+ *
+ * USAGE:
+ *  intersections
+ *    U32 left = < U32 > max ( left_a, left_b );
+ *    U32 right = < U32 > min ( right_a, right_b );
+ */
+function < type T >
+T min #1.0 ( T a, T b )
+    = vdb:min;
+
+function < type T >
+T max #1.0 ( T a, T b )
+    = vdb:max;
+
+
+/* sum
+ *  return the sum of inputs
+ * diff
+ *  return the difference of inputs
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *  must be member of numeric_set
+ *
+ *  "k" [ CONST, DEFAULT 0 ] - optional constant
+ *  to be added or subtracted
+ *
+ *  "a" [ DATA ] - left-most operand
+ *
+ *  "b" [ DATA ] - optional subtractand
+ *
+ * SYNOPSIS:
+ *  incorporates "k" into expression for every row
+ *  returns sum or difference of inputs for all rows
+ *
+ * USAGE:
+ *  length of half-closed interval
+ *    U32 len = < U32 > diff ( stop, start );
+ *  convert one-based coordinate to zero based
+ *    U32 zero_based = < U32 > diff < 1 > ( one_based );
+ */
+function < type T >
+T sum #1.0 < * T k > ( T a, ... )
+    = vdb:sum;
+
+function < type T >
+T diff #1.0 < * T k > ( T a * T b )
+    = vdb:diff;
+
+/* deriv
+ *  return the 1st derivative of an input row
+ * integral
+ *  return the "integral" of an input row
+ *   integral   -> starts with 1st value
+ *   integral_0 -> starts with 0
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *  must be signed integer of any size
+ *
+ *  "in" [ DATA ] - input to be modified
+ *
+ * SYNOPSIS:
+ *  derivative function is ( in [ i ] - in [ i - 1 ] )
+ *    for i = 0 .. length ( in ) - 1,
+ *    assuming in [ 0 - 1 ] = 0 ( i.e. leaves in [ 0 ] intact ).
+ *
+ *  integral function is sum ( in [ 0 ] .. in [ i ] )
+ *    for i = 0 .. length ( in ) - 1.
+ *
+ *  integral_0 function is sum ( in [ 0 ] .. in [ i - 1 ] )
+ *    for i = 1 .. length ( in ) - 1,
+ *    setting output [ 0 ] = 0.
+ *
+ * USAGE:
+ *  "deriv" and "integral" are reciprocal functions.
+ *  the oddity is that "deriv" creates an output series
+ *  with the same length as the input series, causing the
+ *  first element of input to be copied to first element
+ *  of output.
+ *
+ *  "integral_0" always creates an output with the first
+ *  element being 0. the oddity here is again that the output
+ *  series is the same length as the input, dropping the effect
+ *  from the last element of input. its utility is primarily in
+ *  operations such as creating absolute offsets from a series of
+ *  lengths.
+ *
+ * EXAMPLES:
+ *  given an input series ( 15, 17, 12, 315 ):
+ *  "deriv" produces ( 15, 2, -5, 303 ) [ NOTICE first element ]
+ *  integrating ( 15, 2, -5, 303 ):
+ *  "integral" produces ( 15, 17, 12, 315 ), while
+ *  "integral_0" produces ( 0, 15, 17, 12 ).
+ *
+ *  generating starting offsets from a series of lengths ( 15, 17, 12, 315 ):
+ *  "integral_0" produces ( 0, 15, 32, 44 ) which can be used
+ *  to accompany the input series for starts and lengths.
+ */
+
+function < type T >
+T deriv #1.0 ( T in )
+    = vdb:deriv;
+function < type T >
+T integral #1.0 ( T in )
+    = vdb:integral;
+function < type T >
+T integral_0 #1.1 ( T in )
+    = vdb:integral_0;
+
+/* delta
+ *  return the 1st derivative of a whole blob
+ * undelta
+ *  return the integral of a whole blob
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *  must be signed integer of any size
+ *
+ *  "in" [ DATA ] - input to be modified
+ *
+ * SYNOPSIS:
+ *  similar to deriv/integral but operates on full blob
+ */
+
+function < type T > T delta #1.0  ( T in ) = vdb:delta;
+function < type T > T undelta #1.0  ( T in ) = vdb:undelta;
+
+
+/* outlier_encode
+ *  removes a given outlier from a data series
+ * outlier_decode
+ *  removes the effect of outlier_encode
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *  must be an integer of any size
+ *
+ *  "in" [ DATA ] - input to be modified
+ *
+ * SYNOPSIS:
+ *  The encode replaces every element that is equal to the
+ *  outlier with (the value of the previous element) * 2 + 1
+ *  and the remaining elements are replaced with their value * 2.
+ */
+
+function < type T > T outlier_encode #1.0 < T outlier > ( T in ) = vdb:outlier_encode;
+function < type T > T outlier_decode #1.0 < T outlier > ( T in ) = vdb:outlier_decode;
+
+/* add_row_id
+ *  return the sum of an input and its row-id
+ * sub_row_id
+ *  return the difference of an input and its row-id
+ *
+ *  "T" [ TYPE ] - input and output data type
+ *  must be member of numeric_set
+ *
+ *  "in" [ DATA ] - input to be modified
+ *
+ * SYNOPSIS:
+ *  adjusts for relationship between input and row-id
+ *  used primarily to reduce serial ids to constants
+ */
+function < type T >
+T add_row_id #1.0 ( T in )
+    = vdb:add_row_id;
+
+function < type T >
+T sub_row_id #1.0 ( T in )
+    = vdb:sub_row_id;
+
+
+/* cut
+ *  extract one or more elements from input vector
+ *  to form an output vector of equal or less dimension
+ *
+ *  "T" [ TYPE ] - base element type to be processed
+ *
+ *  "idx" [ CONST ] - mandatory initial element index
+ *  count of parameters must equal dimension of output type
+ *
+ *  "in" [ DATA ] - source of input vectors where the vector
+ *  element type is known, but any dimension is accepted.
+ *
+ * USAGE:
+ *  extracting a single channel from a 4 channel vector
+ *    F32 [ 4 ] vect ...
+ *    F32 chan = < F32 > cut < 0 > ( vect );
+ *
+ *  extracting multiple channels
+ *    U8 [ 16 ] in ...
+ *    U8 [ 3 ] out = < U8 > cut < 5, 1, 3 > ( in );
+ *
+ *  reversing channels
+ *    I16 [ 2 ] norm ...
+ *    I16 [ 2 ] rev = < I16 > cut < 1, 0 > ( norm );
+ */
+function < type T >
+T [ * ] cut #1.0 < U32 idx, ... > ( T [ * ] in )
+    = vdb:cut;
+
+
+/* paste
+ *  combine all elements of all inputs into a single vector
+ *  output dimension is sum of all input dimensions after type normalization
+ *
+ *  "T" [ TYPE ] - base element type to be processed
+ *
+ *  "in" [ DATA ] - first of an arbitrary number of columns
+ *  the total of input elements produces an output of "T [ total ]"
+ */
+function < type T >
+T [ * ] paste #1.0 ( T [ * ] in, ... )
+    = vdb:paste;
+
+
+/* vec_sum
+ *  compute the sum of all the elements of the row
+ *
+ *  "T" [ TYPE ] - base element type to be processed
+ *
+ *  "in" [ DATA ] - the input
+ */
+function < type T >
+T vec_sum #1.0 ( T [ * ] in )
+    = vdb:vec_sum;
+
+/* vec_sum
+ *  compute the sum of all the elements of the input vector
+ *
+ *  "T" [ TYPE ] - base element type to be processed
+ *
+ *  "in" [ DATA ] - the input
+ */
+function < type T >
+T fixed_vec_sum #1.0 ( T [ * ] in )
+    = vdb:fixed_vec_sum;
+
+
+/* checksum
+ *  compute a checksum ( hash ) of all of the input bytes
+ *  to be used in a trigger production
+ *
+ *  "node" [ CONST ] - path to metadata node where checksum
+ *   will be stored.
+ *
+ *  "algorithm" [ CONST ] - type of checksum to perform:
+ *    'crc-32'  # match against POSIX cksum
+ *    'md5'     #  "   "  md5sum
+ *    'sha-1'   #  "   "  sha1sum
+ *    'sha-256' #  "   "  sha256sum
+ *    'sha-384' #  "   "  sha384sum
+ *    'sha-512' #  "   "  sha512sum
+ *
+ *  "in" [ DATA ] - the octet-stream to be checksummed
+ */
+function
+bool checksum #1.0 < ascii node, ascii algorithm > ( B8 in )
+    = vdb:checksum;
+
+/* md5sum
+ *  compute an md5 checksum of all of the input bytes
+ */
+function
+bool md5sum #1.0 < ascii node > ( B8 in )
+{
+    return checksum < node, 'md5' > ( in );
+}
+
+
+/* pack
+ *  packs words into bit-aligned units
+ *  words are expected in architecture native byte-order
+ *  and returned in "big-bit-endian" order
+ *
+ *  the packed size is determined by the dimension of the
+ *  left-hand assignment value.
+ *
+ *  "in" [ DATA ] - B8, B16, B32 or B64 data
+ */
+function
+B1 [ * ] pack #1.0 ( pack_set in )
+    = vdb:pack;
+
+
+/* unpack
+ *  unpacks bit-aligned units into words
+ *  input is expected in "big-bit-endian" order
+ *  and returned in architecture native byte-order
+ *
+ *  the unpacked type is determined from the left-hand
+ *  assignment value.
+ *
+ *  "in" [ DATA ] - B[1]..B[64]
+ */
+function
+pack_set unpack #1.0 ( B1 [ * ] in )
+    = vdb:unpack;
+
+
+/* izip
+ * iunzip
+ *  integer compression
+ */
+function
+izip_fmt izip #2.1 ( izip_set in )
+    = vdb:izip;
+
+function
+izip_set iunzip #2.1 ( izip_fmt in )
+    = vdb:iunzip;
+
+physical < type T >
+T izip_encoding #1.0
+{
+    decode { return ( T ) iunzip ( @ ); }
+    encode { return izip ( @ ); }
+};
+
+
+/* fzip
+ * funzip
+ *  floating point compression
+ *
+ *  "mantissa" [ CONST ] - the number of mantissa bits
+ *  to preserve
+ */
+function
+fzip_fmt fzip #1.0 < U32 mantissa > ( fzip_set in )
+    = vdb:fzip;
+
+function
+fzip_set funzip #1.0 ( fzip_fmt in )
+    = vdb:funzip;
+
+physical < type T >
+T fzip_encoding #1.0 < U32 mantissa >
+{
+    decode { return funzip ( @ ); }
+    encode { return fzip < mantissa > ( @ ); }
+};
+
+
+/* rlencode
+ * rldecode
+ *  run-length encoding
+ */
+function
+rle_fmt rlencode #1.0 ( any in )
+    = vdb:rlencode;
+
+function
+any rldecode #1.0 ( rle_fmt in )
+    = vdb:rldecode;
+
+
+/* zip
+ * unzip
+ *  run things through zlib
+ *
+ *  "strategy" [ CONST, OPTIONAL ] - set the compression strategy
+ *
+ *  "level" [ CONST, OPTIONAL ] - set the amount of compression
+ *  from 0..9 ( none to best compression ), or use -1 for zlib
+ *  default behavior.
+ */
+
+// zlib strategy
+const I32 Z_FILTERED            =  1;
+const I32 Z_HUFFMAN_ONLY        =  2;
+const I32 Z_RLE                 =  3;
+const I32 Z_DEFAULT_STRATEGY    =  0;
+
+// zlib level
+const I32 Z_NO_COMPRESSION      =  0;
+const I32 Z_BEST_SPEED          =  1;
+const I32 Z_BEST_COMPRESSION    =  9;
+const I32 Z_DEFAULT_COMPRESSION = -1;
+
+function
+zlib_fmt zip #1.0 < * I32 strategy, I32 level > ( any in )
+    = vdb:zip;
+
+function
+any unzip #1.0 ( zlib_fmt in )
+    = vdb:unzip;
+
+physical < type T >
+T zip_encoding #1.0 < * I32 strategy, I32 level >
+{
+    decode { return unzip ( @ ); }
+    encode { return zip < strategy, level > ( @ ); }
+};
+
+physical
+bool bool_encoding #1.0
+{
+    decode
+    {
+        B1 bit = unzip ( @ );
+        return ( bool ) unpack ( bit );
+    }
+
+    encode
+    {
+        U8 lim = < U8 > clip < 0, 1 > ( @ );
+        B1 bit = pack ( lim );
+        return zip < Z_RLE, Z_BEST_SPEED > ( bit );
+    }
+}
+
+physical < type T >
+T delta_izip_encoding #1.0
+{
+    decode
+    {
+        T dlt = iunzip ( @ );
+        return < T > undelta ( dlt );
+    }
+
+    encode
+    {
+        T dlt = <T> delta ( @ );
+        return izip ( dlt );
+    }
+}
+physical < type T >
+T delta_zip_encoding #1.0
+{
+    decode
+    {
+        T dlt = unzip ( @ );
+        return < T > undelta ( dlt );
+    }
+
+    encode
+    {
+        T dlt = <T> delta ( @ );
+        return zip < Z_RLE, Z_BEST_SPEED > ( dlt );
+    }
+}
+physical < type T >
+T delta_average_zip_encoding #1.0
+{
+    decode
+    {
+        delta_averaged_fmt t = unzip ( @ );
+        return undelta_average ( t );
+    }
+
+    encode
+    {
+        delta_averaged_fmt t = delta_average ( @ );
+        return zip < Z_RLE, Z_BEST_SPEED > ( t );
+    }
+}
+
+/* bzip
+ * bunzip
+ *  run things through bzip2
+ *
+ *  "blockSize100k" [ CONST, OPTIONAL ] - set the compression workspace size
+ *  from 1..9 inclusive, produces a workspace of blockSize100K * 100000 bytes
+ *  default is 5
+ *
+ *  "workFactor" [ CONST, OPTIONAL ] - set compression level
+ *  from 0..250 inclusive, where 0 means bzip2 default, currently 30
+ */
+
+function
+bzip2_fmt bzip #1.0 < * U32 blockSize100k, U32 workFactor > ( any in )
+    = vdb:bzip;
+
+function
+any bunzip #1.0 ( bzip2_fmt in )
+    = vdb:bunzip;
+
+physical < type T >
+T bzip_encoding #1.0 < * U32 blockSize100k, U32 workFactor >
+{
+    decode { return bunzip ( @ ); }
+    encode { return bzip < blockSize100k, workFactor > ( @ ); }
+};
+
+
+/* simple_sub_select
+ *  project a column from another table within database
+ *
+ *  "T" [ TYPE ] - data type of column
+ *   must be compatible with source column
+ *
+ *  "tbl" [ CONST ] - name of table within parent
+ *
+ *  "col" [ CONST ] - column spec, i.e. simple name or
+ *   typed name spec
+ *
+ *  "row" [ DATA ] - row to select
+ *
+ *  "idx" [ DATA ] - one-based indexing of what element to pick, defaults to all if not given
+ */
+function < type T >
+T simple_sub_select #1.0 < ascii tbl, ascii col > ( I64 row *  I32 idx )
+    = vdb:simple_sub_select_1;
+
+
+/* extract_token
+ *  extract a textual token from an input string
+ *
+ *  "idx" [ CONST ] - a zero-based index of the token
+ *  if value < row_len ( tok ), then the substring of
+ *  indexed token is returned. otherwise, returns empty.
+ *
+ *  "str" [ DATA ] - input text. type must be compatible with
+ *  output production, meaning types must be same, or ascii input
+ *  with utf8 output.
+ *
+ *  "tok" [ DATA ] - results of tokenizing "str"
+ */
+function
+text_set extract_token #1.0 < U32 idx > ( text_set str, text:token tok )
+    = vdb:extract_token;
+
+
+/* strtonum
+ *  convert string to number
+ *
+ *  "radix" [ CONST, DEFAULT 10 ]
+ *   if not specified, or if given as 0, the default will be 10
+ *   unless the string begins with "0x" or "0X", in which case radix will be 16
+ *   octal is NOT inferred ( i.e. leading "0" does not imply octal )
+ *
+ *  "str" [ DATA ] - text to be converted
+ */
+function
+numeric_set strtonum #1.0 < * U32 radix > ( text_set str )
+    = vdb:strtonum;
+
+
+/* sprintf
+ *  formatted print to a string
+ *
+ *  formatting rules differ somewhat from C sprintf:
+ *
+ *        '%' [ <flags> ] [ <field-width> ] [ '.' <precision> ] [ ':' <index> ] <storage-class>
+ *
+ *  where:
+ *
+ *    flags
+ *        = ' '           : prepend space to a numeral if it does not have a sign
+ *        | '+'           : always produce a sign on numeric conversion
+ *        | '-'           : left-align parameter within field
+ *        | '0'           : left-pad with zeroes rather than spaces
+ *        | '#'           : use "alternate" representation
+ *        | ','           : produce comma-separated triples
+ *        ;
+ *
+ *    field-width
+ *        = DECIMAL       : a base-10 numeral
+ *        | '*'           : take field width from args
+ *        ;
+ *
+ *    precision
+ *        = DECIMAL       : a base-10 numeral
+ *        | '*'           : take precision from args
+ *        |               : an empty precision means 0
+ *        ;
+ *
+ *    index
+ *        = idx           : a single, zero-based vector element
+ *        | idx '-' idx   : a fully-closed, zero-based interval
+ *        | idx '/' len   : a start index plus length
+ *        ;
+ *
+ *    idx
+ *        = DECIMAL       : an unsigned base-10 numeral
+ *        | '*'           : take index from args
+ *        | '$'           : last element in cell
+ *        |               : an empty index means 0 or $
+ *        ;
+ *
+ *    len
+ *        = DECIMAL       : a base-10 numeral
+ *        | '*'           : take length from args
+ *        | '$'           : row-length of ( cell )
+ *        |               : an empty length means $
+ *        ;
+ *
+ *
+ *    storage-class
+ *        = 'd' | 'i'     : general decimal integer
+ *        | 'u'           : decimal unsigned integer
+ *        | 'x'           : lower-case hex
+ *        | 'X'           : upper-case hex
+ *        | 'o'           : octal
+ *        | 'b'           : binary
+ *        | 'f'           : floating point
+ *        | 'e'           : scientific notation
+ *        | 'g'           : general floating point
+ *        | 'c' | 's'     : character
+ *        ;
+ *
+ *
+ *  "fmt" [ CONST ] - constant format string, adhering to
+ *  the description above
+ *
+ *  "p1" [ DATA ] - first param
+ *  this and any subsequent params must correspond to format
+ *  in type/position/number.
+ */
+function
+text_set sprintf #1.0 < ascii fmt > ( any p1, ... )
+    = vdb:sprintf;