comparison libs/sratoolkit.2.8.0-centos_linux64/schema/align/refseq.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /*==========================================================================
28 * VDB Reference Sequence types, functions and tables
29 */
30 version 1;
31
32 include 'vdb/vdb.vschema';
33 include 'ncbi/seq.vschema';
34 include 'ncbi/sra.vschema';
35 include 'ncbi/stats.vschema';
36
37
38 extern function U8 NCBI:refseq:stats #2 ( INSDC:4na:bin seq, INSDC:coord:len len )
39 = NCBI:refSeq:stats;
40
41 table NCBI:refseq:tbl:reference #1.0.2 =
42 NCBI:tbl:base_space #2.0.2,
43 NCBI:tbl:phred_quality #2.0.3,
44 NCBI:tbl:seqloc #1.0,
45 NCBI:SRA:tbl:stats #1.1.2
46 {
47 // 128K
48 column default limit = 131072;
49
50 extern column U32 MAX_SEQ_LEN; /* must be static */
51 extern column < ascii > izip_encoding DEF_LINE; /* remainder of defline after SEQ_ID */
52
53 // trigger upconverts to INSDC:dna:text to get MD5
54 trigger table_stats
55 = NCBI:refseq:stats(in_4na_bin, _alt_in_read_len);
56
57 readonly column U64 TOTAL_SEQ_LEN
58 = < U64 > meta:value < 'STATS/TOTAL_SEQ_LEN', true >();
59
60 readonly column U8[16] MD5
61 = < U8[16] > meta:read < 'STATS/MD5', true >();
62
63 // indicates if sequence has circular structure
64 // should be static
65 extern column bool_encoding CIRCULAR;
66
67 /* columns:
68 * READ
69 * QUALITY (optional)
70 * SEQ_ID
71 * SEQ_START
72 * SEQ_LEN
73 * MAX_SEQ_LEN
74 * TOTAL_SEQ_LEN
75 * DEF_LINE
76 */
77
78 // make CS_KEY writable
79 INSDC:dna:text in_cs_key
80 = < INSDC:dna:text, INSDC:dna:text > map < 'acgtn', 'ACGTN' > ( CS_KEY );
81 physical column < INSDC:dna:text > zip_encoding .CS_KEY = in_cs_key;
82 // extra columns needed for CS conversion
83 INSDC:coord:zero out_read_start = < INSDC:coord:zero> echo < 0 > ();
84 INSDC:coord:len out_read_len = .SEQ_LEN;
85
86 INSDC:coord:len _alt_in_read_len
87 = READ_LEN
88 | SEQ_LEN;
89
90 INSDC:SRA:xread_type _alt_in_read_type
91 = READ_TYPE
92 | < INSDC:SRA:xread_type > echo < SRA_READ_TYPE_BIOLOGICAL > ();
93
94 INSDC:SRA:xread_type out_read_type
95 = .READ_TYPE
96 | < INSDC:SRA:xread_type > echo < SRA_READ_TYPE_BIOLOGICAL > ();
97 };
98
99 // older spelling
100 alias NCBI:refseq:tbl:reference NCBI:refSeq:tbl:reference;