Mercurial > repos > charles_s_test > seqsero2
diff libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/wgs-contig.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/wgs-contig.vschema Mon Nov 27 11:21:07 2017 -0500 @@ -0,0 +1,695 @@ +/*=========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +*/ + +/*========================================================================== + * WGS Contig + */ +version 1; + +include 'vdb/vdb.vschema'; +include 'ncbi/ncbi.vschema'; +include 'ncbi/seq.vschema'; +include 'ncbi/spotname.vschema'; +include 'ncbi/stats.vschema'; + + +/*-------------------------------------------------------------------------- + * types + * constants + */ + +/* component_props + * a signed value describing contig or gap components of scaffolds, or + * gaps in contig sequences. + * Positive values refer to contigs and negatives describe gaps + */ +typedef I16 NCBI:WGS:component_props; + +/* component description + * the sequencing status of the component + * + * These typically correspond to keywords in the INSDC submission. + * Current acceptable values are: + * A Active Finishing + * D Draft HTG (often phase1 and phase2 are called Draft, + * whether or not they have the draft keyword). + * F Finished HTG (phase3) + * G Whole Genome Finishing + * O Other sequence (typically means no HTG keyword) + * P Pre Draft + * W WGS contig + */ +const NCBI:WGS:component_props NCBI:WGS:component:WGS = 0; +const NCBI:WGS:component_props NCBI:WGS:component:ActiveFinishing = 1; +const NCBI:WGS:component_props NCBI:WGS:component:DraftHTG = 2; +const NCBI:WGS:component_props NCBI:WGS:component:FinishedHTG = 3; +const NCBI:WGS:component_props NCBI:WGS:component:WholeGenomeFinishing = 4; +const NCBI:WGS:component_props NCBI:WGS:component:OtherSequence = 5; +const NCBI:WGS:component_props NCBI:WGS:component:PreDraft = 6; + +/* strand + * specifies the orientation of the component relative to scaffold + * values given allow strand to be determined as "prop / 16" + * yielding: + * 0 unknown orientation + * 1 plus strand + * 2 negative strand + */ +const NCBI:WGS:component_props NCBI:WGS:strand:plus = 16; +const NCBI:WGS:component_props NCBI:WGS:strand:minus = 32; + + +/* gap description + * These typically correspond to keywords in the INSDC submission. + * Current acceptable values are: + * N gap with specified size + * U gap of unknown size, defaulting to 100 bases. + */ +const NCBI:WGS:component_props NCBI:WGS:gap:known = -1; +const NCBI:WGS:component_props NCBI:WGS:gap:unknown = -2; + +/* gap type + * scaffold a gap between two sequence contigs in a scaffold + * contig an unspanned gap between two sequence contigs + * centromere a gap inserted for the centromere + * short_arm a gap inserted at the start of an acrocentric chromosome + * heterochromatin a gap inserted for an especially large region of heterochromatic sequence + * telomere a gap inserted for the telomere + * repeat an unresolvable repeat + */ +const NCBI:WGS:component_props NCBI:WGS:gap:scaffold = -4; +const NCBI:WGS:component_props NCBI:WGS:gap:contig = -8; +const NCBI:WGS:component_props NCBI:WGS:gap:centromere = -12; +const NCBI:WGS:component_props NCBI:WGS:gap:short_arm = -16; +const NCBI:WGS:component_props NCBI:WGS:gap:heterochromatin = -20; +const NCBI:WGS:component_props NCBI:WGS:gap:telomere = -24; +const NCBI:WGS:component_props NCBI:WGS:gap:repeat = -28; + +/* gap_linkage + */ +typedef I32 NCBI:WGS:gap_linkage; + +/* gap linkage and linkage evidence + * There can be multiple linkage evidences or linkage with no evidence + * + * paired-ends paired sequences from the two ends of a DNA fragment + * align_genus alignment to a reference genome within the same genus + * align_xgenus alignment to a reference genome within another genus + * align_trnscpt alignment to a transcript from the same species + * within_clone sequence on both sides of the gap is derived from + * the same clone, but the gap is not spanned by paired-ends + * clone_contig linkage is provided by a clone contig in the tiling path + * map linkage asserted using a non-sequence based map + * such as RH, linkage, fingerprint or optical + * strobe strobe sequencing (PacBio) + * unspecified + * pcr PCR + */ +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage:linked = 1; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:paired_ends = 2; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:align_genus = 4; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:align_xgenus = 8; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:align_trnscpt = 16; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:within_clone = 32; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:clone_contig = 64; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:map = 128; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:strobe = 256; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:unspecified = 512; +const NCBI:WGS:gap_linkage NCBI:WGS:gap:linkage_evidence:pcr = 1024; + +/*-------------------------------------------------------------------------- + * functions + */ + +/* tokenize_nuc_accession + * tokenize_prot_accession + * scans name on input + * tokenizes into parts + */ +extern function text:token + NCBI:WGS:tokenize_nuc_accession #1 ( ascii acc ); +extern function text:token + NCBI:WGS:tokenize_prot_accession #1 ( ascii acc ); + +const U16 NCBI:WGS:acc_token:unrecognized = 1; +const U16 NCBI:WGS:acc_token:prefix = 2; +const U16 NCBI:WGS:acc_token:contig = 3; + + +/* build_scaffold_read + * assembles contigs and gaps into a single row + * transcribes + strand contigs as they are, + * performs reverse complement of - strand contigs, + * fills gaps with stated number of N + * + * build_scaffold_quality + * assembles contig and gap qualities into a single row + * contig qualities are taken as they are, + * gap qualities are assigned a constant + * + * "component_start" [ DATA ] - starting locations on each + * component or 0 for gaps. normal starting point is 0, + * but offsets are supported. + * NB - ONE-BASED COORDINATES + * + * "component_len" [ DATA ] - length of contig sequence + * from component_start, or length of gap, projected onto + * scaffold at scaffold_start. + * + * "component_props" [ DATA ] - see discussion of type + * distinguish between contigs and gaps, indicate strand + * + * "component_id" [ DATA ] - foreign keys into SEQUENCE table + * row_len ( component_id ) == count-of-contigs ( component_props ) + */ +extern function INSDC:4na:bin NCBI:WGS:build_scaffold_read #1 + ( INSDC:coord:one component_start, INSDC:coord:len component_len, + NCBI:WGS:component_props component_props, I64 component_id ); + +extern function INSDC:quality:phred NCBI:WGS:build_scaffold_qual #1 + ( INSDC:coord:one component_start, INSDC:coord:len component_len, + NCBI:WGS:component_props component_props, I64 component_id ); + + +/* build_read_type + * generate standard SRA read type from component properties + * contigs are biological, gaps are technical + * + * "component_props" [ DATA ] - see discussion of type + * distinguish between contigs and gaps, indicate strand + */ +extern function INSDC:SRA:xread_type + NCBI:WGS:build_read_type #1 ( NCBI:WGS:component_props component_props ); + + +/*-------------------------------------------------------------------------- + * nucleotide + */ +table NCBI:WGS:tbl:nucleotide #1.1 + = NCBI:tbl:base_space #2.0.3 + , NCBI:tbl:phred_quality #2.0.4 + , NCBI:SRA:tbl:stats #1.2.0 +{ + /* ACCESSION + * [<opt-prefix>]<4-letter-prefix><2-digit-version><6-or-7-digit-contig> + */ + extern column ascii ACCESSION = out_accession; + extern column U32 ACC_VERSION = .ACC_VERSION | <U32> echo <1> (); + + // input + ascii in_accession = ACCESSION; + + // parsed input + text:token in_acc_token + = NCBI:WGS:tokenize_nuc_accession ( in_accession ); + // [<opt-prefix>]<4-letter-prefix><2-digit-version> + ascii in_acc_prefix + = extract_token < 0 > ( in_accession, in_acc_token ); + // <6-or-7-digit-contig> + ascii in_contig_text + = extract_token < 1 > ( in_accession, in_acc_token ); + U32 in_contig_len + = row_len ( in_contig_text ); + U64 in_contig_bin + = strtonum ( in_contig_text ); + + // physical storage + physical column < ascii > zip_encoding .ACC_PREFIX = in_acc_prefix; + physical column < U32 > izip_encoding .ACC_CONTIG_LEN = in_contig_len; + physical column < U64 > izip_encoding .ACC_CONTIG = in_contig_bin; + physical column < U32 > izip_encoding .ACC_VERSION = ACC_VERSION; //needed to back-fill WGS data from ID where version may be > 1 + + // output + ascii out_acc_prefix + = .ACC_PREFIX + | < ascii > meta:read < 'ACC_PREFIX', true > () + ; + U32 out_acc_contig_len + = .ACC_CONTIG_LEN + | < U32 > meta:value < 'ACC_CONTIG_LEN', true > () + ; + U64 out_acc_contig + = .ACC_CONTIG + | ( U64 ) row_id () + ; + ascii out_accession + = sprintf < "%s%0*u" > ( out_acc_prefix, out_acc_contig_len, out_acc_contig ); + + readonly column ascii ACC_PREFIX + = .ACC_PREFIX + | < ascii > meta:read < 'ACC_PREFIX', true > () + ; + readonly column U32 ACC_CONTIG_LEN + = .ACC_CONTIG_LEN + | < U32 > meta:value < 'ACC_CONTIG_LEN', true > () + ; + + /* CONTIG_NAME + * principal name + */ + extern column utf8 CONTIG_NAME + = idx:text:project #1.0 < 'contig_name' > ( .CONTIG_NAME ); + + physical column < utf8 > zip_encoding .CONTIG_NAME + = idx:text:insert #1.0 < 'contig_name' > ( CONTIG_NAME ); + + ascii out_contig_name = cast ( CONTIG_NAME ); + + // NB - this is only useful if CONTIG_NAME is unique + // or if clustered by CONTIG_NAME + readonly column vdb:row_id_range CONTIG_NAME_ROW_RANGE + = idx:text:lookup #1.0 < 'contig_name', 'NAME_QUERY' > (); + + /* EXTRA_SEQIDS + * pipe-separated list of additional names + */ + extern column < ascii > zip_encoding EXTRA_SEQIDS; + + /* TITLE + */ + extern column < ascii > zip_encoding TITLE; + + /* GI + * gi is indexed in a parallel table + */ + extern column < NCBI:gi > izip_encoding GI; + + /* TAXID + * taxonomy id + */ + extern column < NCBI:taxid > izip_encoding TAXID; + + /* GB_STATE + * genbank state + */ + extern column < NCBI:gb_state > izip_encoding GB_STATE; + + /* DESCR + * ASN.1 description + */ + extern column < NCBI:asn:binary > zip_encoding DESCR; + + /* ANNOT + * ASN.1 annotation + */ + extern column < NCBI:asn:binary > zip_encoding ANNOT; + + /* GAP_START + * Starting position of a gap + */ + extern column < INSDC:coord:zero > izip_encoding GAP_START; + + /* GAP_LEN + * Length of a gap + */ + extern column < INSDC:coord:len > izip_encoding GAP_LEN; + + /* GAP_PROPS + * See description of type + */ + extern column < NCBI:WGS:component_props > zip_encoding GAP_PROPS; + + /* GAP_LINKAGE + * See description of type + */ + extern column < NCBI:WGS:gap_linkage> zip_encoding GAP_LINKAGE; + + + ascii out_seqid_gi = sprintf < "gi|%u" > ( .GI ); + ascii out_seqid_gb = sprintf < "gb|%s.%u|" > ( out_accession, ACC_VERSION ) + | sprintf < "gb|%s.1|" > ( out_accession ); + ascii out_seqid_gnl = sprintf < "gnl|WGS:%s|%s" > (.ACC_PREFIX, out_contig_name ) | <ascii> echo < '' > (); + + + /* outputs to spotname */ + ascii out_seqid_name + = sprintf < "%s|%s" > ( out_seqid_gi , out_seqid_gb ) + | sprintf < "%s" > ( out_seqid_gb ); + + ascii out_spot_name = sprintf < "%s %s" > ( out_seqid_name, .TITLE ); + + readonly column ascii SEQ_ID = out_seqid_name; + readonly column ascii SEQ_ID_GNL = out_seqid_gnl; + + /* outputs to spotdesc */ + // INSDC:coord:len in_read_len = (INSDC:coord:len) row_len ( in_2na_bin ); + INSDC:coord:len out_read_len = (INSDC:coord:len) row_len ( out_2na_bin ); + INSDC:coord:len trim_len = (INSDC:coord:len) row_len ( out_2na_bin ); + INSDC:coord:zero out_read_start = <INSDC:coord:zero> echo < 0 > (); + INSDC:coord:zero trim_start = <INSDC:coord:zero> echo < 0 > (); + INSDC:SRA:read_filter out_rd_filter = < INSDC:SRA:read_filter > echo < SRA_READ_FILTER_PASS > (); + INSDC:SRA:xread_type out_read_type = < INSDC:SRA:xread_type > echo < SRA_READ_TYPE_BIOLOGICAL > (); + // help trigger statistics + INSDC:SRA:xread_type _alt_in_read_type = < INSDC:SRA:xread_type > echo < SRA_READ_TYPE_BIOLOGICAL > (); + INSDC:coord:len _alt_in_read_len = (INSDC:coord:len) row_len ( in_2na_bin ); + + ascii out_label = < ascii > echo < "contig" > (); + INSDC:coord:len out_label_len = < INSDC:coord:len > echo < 6 > (); + INSDC:coord:zero out_label_start = < INSDC:coord:zero > echo < 0 > (); + + INSDC:SRA:platform_id out_platform = < INSDC:SRA:platform_id > echo < SRA_PLATFORM_UNDEFINED > (); +}; + + +/*-------------------------------------------------------------------------- + * protein + * contig + */ +table NCBI:WGS:tbl:protein #1 + = NCBI:tbl:protein #1.0.0 +{ + /* ACCESSION + * [<opt-prefix>]<4-letter-prefix><2-digit-version><6-or-7-digit-contig> + */ + extern column ascii ACCESSION = out_accession; + + // input + ascii in_accession = ACCESSION; + + // parsed input + text:token in_acc_token + = NCBI:WGS:tokenize_prot_accession ( in_accession ); + // [<opt-prefix>]<4-letter-prefix><2-digit-version> + ascii in_acc_prefix + = extract_token < 0 > ( in_accession, in_acc_token ); + // <6-or-7-digit-contig> + ascii in_contig_text + = extract_token < 1 > ( in_accession, in_acc_token ); + U32 in_contig_len + = row_len ( in_contig_text ); + I64 in_contig_bin + = strtonum ( in_contig_text ); + + // physical storage + physical column < ascii > zip_encoding .ACC_PREFIX = in_acc_prefix; + physical column < U32 > izip_encoding .ACC_CONTIG_LEN = in_contig_len; + physical column < U64 > izip_encoding .ACC_CONTIG = in_contig_bin; + + // output + ascii out_acc_prefix + = .ACC_PREFIX + | < ascii > meta:read < 'ACC_PREFIX', true > () + ; + U32 out_acc_contig_len + = .ACC_CONTIG_LEN + | < U32 > meta:value < 'ACC_CONTIG_LEN', true > () + ; + U64 out_acc_contig + = .ACC_CONTIG + | ( U64 ) row_id () + ; + ascii out_accession + = sprintf < "%s%0*u" > ( out_acc_prefix, out_acc_contig_len, out_acc_contig ); + + /* TITLE + */ + extern column < ascii > zip_encoding TITLE; + + /* GI + * gi is indexed in a parallel table + */ + extern column < NCBI:gi > izip_encoding GI; + + /* GB_STATE + * genbank state + */ + extern column < NCBI:gb_state > izip_encoding GB_STATE; + + /* DESCR + * ASN.1 description + */ + extern column < NCBI:asn:binary > zip_encoding DESCR; + + /* ANNOT + * ASN.1 annotation + */ + extern column < NCBI:asn:binary > zip_encoding ANNOT; + + /* outputs to spotname */ + ascii out_seqid_name = sprintf < "TBD" > ( .ACC_PREFIX, out_accession ); + ascii out_spot_name = sprintf < "%s %s" > ( out_seqid_name, .TITLE ); + + /* TBD + * need to create an extension to NCBI:tbl:protein + * that satisfies fastq-dump requirements for READ and QUALITY + */ +}; + + +/*-------------------------------------------------------------------------- + * gi_idx + * gi is row-id + */ +table NCBI:WGS:tbl:gi_idx #1 +{ + /* NUC_ROW_ID + * row-id in nucleotide table + */ + extern column < I64 > izip_encoding NUC_ROW_ID; + + /* PROT_ROW_ID + * row-id in protein table + */ + extern column < I64 > izip_encoding PROT_ROW_ID; +}; + + +/*-------------------------------------------------------------------------- + * scaffold + * records AGP data + */ +table NCBI:WGS:tbl:scaffold #1 +{ + /* SCAFFOLD_NAME + * This is the identifier for the object being assembled. + * This can be a chromosome, scaffold or contig. + * If an accession.version identifier is not used to describe + * the object the naming convention is to precede chromosome numbers + * (e.g. chr1) and linkage group numbers (e.g. LG3). + * Contigs or scaffolds may have any identifier that is unique + * within the assembly + */ + extern column utf8 SCAFFOLD_NAME + = out_scaffold_name; + extern column ascii SCAFFOLD_NAME = cast (out_scaffold_name); + utf8 out_scaffold_name + = idx:text:project #1.0 < 'scaffold_name' > ( .SCAFFOLD_NAME ); + physical column < utf8 > zip_encoding .SCAFFOLD_NAME + = idx:text:insert #1.0 < 'scaffold_name' > ( SCAFFOLD_NAME ); + + /* COMPONENT_START + * starting position within the component sequence + */ + extern column < INSDC:coord:one > izip_encoding COMPONENT_START; + + /* COMPONENT_LEN + * length of the component/gap projected onto the scaffold + */ + extern column < INSDC:coord:len > izip_encoding COMPONENT_LEN; + + /* COMPONENT_PROPS + * see description of type + */ + extern column < NCBI:WGS:component_props > zip_encoding COMPONENT_PROPS; + + /* COMPONENT_ID + * one row-id for each non-gap component + */ + extern column < I64 > izip_encoding COMPONENT_ID; + + /* COMPONENT_LINKAGE + * see description of type + * one row-id for each gap component + */ + extern column < NCBI:WGS:gap_linkage > zip_encoding COMPONENT_LINKAGE; +} + +table NCBI:WGS:view:scaffold #1 = NCBI:WGS:tbl:scaffold #1 +{ + /* ACCESSION + * scaffold accession + */ + readonly column ascii ACCESSION + = out_accession; + I64 scaffold_row_id + = row_id (); + I64 acc_row_id + = < I64 > echo < 1 > (); + ascii acc_prefix + = < ascii > simple_sub_select < 'SEQUENCE', 'ACC_PREFIX' > ( acc_row_id ); + U32 acc_contig_len + = < U32 > simple_sub_select < 'SEQUENCE', 'ACC_CONTIG_LEN' > ( acc_row_id ); + ascii out_accession + = sprintf < "%sS%0*d" > ( acc_prefix, acc_contig_len, scaffold_row_id ); + + /* READ + * base space construction of entire scaffold + */ + + // construct the read from contigs and gaps + INSDC:4na:bin out_4na_bin = NCBI:WGS:build_scaffold_read + ( .COMPONENT_START, .COMPONENT_LEN, .COMPONENT_PROPS, .COMPONENT_ID ); + + // various READ columns + default readonly column INSDC:dna:text READ + = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_4na_bin ); + readonly column INSDC:4na:bin READ + = out_4na_bin; + readonly column INSDC:4na:packed READ + = pack ( out_4na_bin ); + readonly column INSDC:x2na:bin READ + = out_x2na_bin; + INSDC:x2na:bin out_x2na_bin + = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_4na_bin ); + readonly column INSDC:2na:bin READ + = out_2na_bin; + INSDC:2na:bin out_2na_bin + = < INSDC:x2na:bin, INSDC:2na:bin > map < INSDC:x2na:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( out_x2na_bin ); + readonly column INSDC:2na:packed READ + = pack ( out_2na_bin ); + + + /* CSREAD + * base space converted to color space + */ + default readonly column INSDC:color:text CSREAD + = < INSDC:x2cs:bin, INSDC:color:text > map < INSDC:x2cs:map:BINSET, INSDC:x2cs:map:CHARSET > ( out_x2cs_bin ); + readonly column INSDC:x2cs:bin CSREAD + = out_x2cs_bin; + INSDC:x2cs:bin out_x2cs_bin + = NCBI:color_from_dna ( out_x2na_bin, out_read_start, .COMPONENT_LEN, out_cs_key, out_color_matrix ); + readonly column INSDC:2cs:bin CSREAD + = out_2cs_bin; + INSDC:2cs:bin out_2cs_bin + = < INSDC:x2cs:bin, INSDC:2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( out_x2cs_bin ); + readonly column INSDC:2cs:packed CSREAD + = pack ( out_2cs_bin ); + + /* CS_NATIVE + * is color-space the native sequence space + */ + readonly column bool CS_NATIVE + = < bool > echo < false > (); + + /* CS_KEY + * leading call given in base-space + */ + readonly column INSDC:dna:text CS_KEY + = out_cs_key; + INSDC:dna:text out_cs_key + = < INSDC:dna:text > echo < 'T' > ( .COMPONENT_LEN ); + + /* COLOR_MATRIX + * matrix used for color-space conversions + */ + readonly column U8 COLOR_MATRIX + = out_color_matrix; + U8 out_color_matrix + = < U8 > echo < INSDC:color:default_matrix > (); + + + /* QUALITY + * base or color call qualities + */ + INSDC:quality:phred out_qual_phred = NCBI:WGS:build_scaffold_qual + ( .COMPONENT_START, .COMPONENT_LEN, .COMPONENT_PROPS, .COMPONENT_ID ); + + // PHRED is default + default readonly column INSDC:quality:phred QUALITY + = out_qual_phred; + + // textual encodings + readonly column INSDC:quality:text:phred_33 QUALITY + = ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred ); + readonly column INSDC:quality:text:phred_64 QUALITY + = ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred ); + + + /* PLATFORM + * sequencing platform, if known + */ + INSDC:SRA:platform_id out_platform + = < INSDC:SRA:platform_id > echo < SRA_PLATFORM_UNDEFINED > (); + readonly column INSDC:SRA:platform_id PLATFORM + = out_platform; + + /* SPOT_ID + * support for libsra + */ + INSDC:SRA:spotid_t out_spot_id + = cast ( scaffold_row_id ); + readonly column INSDC:SRA:spotid_t SPOT_ID + = out_spot_id; + + /* NAME + * spot name + */ + readonly column ascii NAME + = out_scaffold_name; + + /* SPOT_LEN + * TRIM_START + * TRIM_LEN + * spot descriptor + */ + readonly column INSDC:coord:len SPOT_LEN + = out_spot_len; + INSDC:coord:len out_spot_len + = < INSDC:coord:len > vec_sum ( .COMPONENT_LEN ); + readonly column INSDC:coord:zero TRIM_START + = < INSDC:coord:zero > echo < 0 > (); + readonly column INSDC:coord:len TRIM_LEN + = out_spot_len; + + + /* READ_START + * READ_LEN + * READ_TYPE + * read descriptor portion + */ + readonly column INSDC:coord:zero READ_START + = out_read_start; + INSDC:coord:zero out_read_start + = ( INSDC:coord:zero ) < U32 > integral ( .COMPONENT_LEN ); + readonly column INSDC:coord:len READ_LEN + = .COMPONENT_LEN; + readonly column INSDC:SRA:xread_type READ_TYPE + = out_read_type; + INSDC:SRA:xread_type out_read_type + = NCBI:WGS:build_read_type ( .COMPONENT_PROPS ); +} + + +/*-------------------------------------------------------------------------- + * contig + */ +database NCBI:WGS:db:contig #1.1 +{ + table NCBI:WGS:tbl:nucleotide SEQUENCE; + table NCBI:WGS:tbl:protein PROTEIN; + table NCBI:WGS:tbl:gi_idx GI_IDX; + table NCBI:WGS:view:scaffold SCAFFOLD; +};