annotate libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
1 /*===========================================================================
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
2 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
3 * PUBLIC DOMAIN NOTICE
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
4 * National Center for Biotechnology Information
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
5 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
6 * This software/database is a "United States Government Work" under the
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
7 * terms of the United States Copyright Act. It was written as part of
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
8 * the author's official duties as a United States Government employee and
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
9 * thus cannot be copyrighted. This software/database is freely available
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
10 * to the public for use. The National Library of Medicine and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
11 * Government have not placed any restriction on its use or reproduction.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
12 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
13 * Although all reasonable efforts have been taken to ensure the accuracy
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
14 * and reliability of the software and data, the NLM and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
15 * Government do not and cannot warrant the performance or results that
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
16 * may be obtained by using this software or data. The NLM and the U.S.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
17 * Government disclaim all warranties, express or implied, including
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
18 * warranties of performance, merchantability or fitness for any particular
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
19 * purpose.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
20 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
21 * Please cite the author in any work or product based on this material.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
22 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
23 * ===========================================================================
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
24 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
25 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
26
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
27 version 1;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
28 include 'vdb/vdb.vschema';
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
29
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
30 /* PNBRDB
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
31 * the original flat-file pnbrdb structure was divided into two forks:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
32 * 1 - "hsp" containing full blastp hsps
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
33 * 2 - "nbr" containing only pig->pig relationships with max score
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
34 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
35 * each fork was organized into bin directories by "query" ( left-hand ) pig
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
36 * each bin contained entries for up to 1M query pigs with a numeric
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
37 * 4 digit 1-based name generated as "( ( qpig - 1 ) / 1024 ) / 1024 + 1".
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
38 * this bin approach served as a primitive index.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
39 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
40 * within each bin directory, there are 1024 data files, where each data file
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
41 * represented 1024 query pigs. the file name incorporated a 4 digit 1-based
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
42 * file id generated as "( ( qpig - 1 ) / 1024 ) % 1024 + 1" making it possible
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
43 * to locate any entry by query pig within a 1024 entry neighborhood by using
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
44 * filesystem path alone.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
45 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
46 * within each data file, a fixed-size 1024-entry header gave the location of
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
47 * entries ordered according to the most common queries.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
48 *
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
49 * all basic data are contained within the "hsp" fork. the "nbr" fork served
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
50 * as a pre-calculated result of the query selecting all unique pig->pig pairs
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
51 * with their maximum score value.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
52 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
53
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
54 /* The vdb representation of the pnbrdb has two tables
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
55 * 1. table with one row per qpig (i.e. qpig = row_id) and two columns:
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
56 * offset and count. Offset indicates row_id in the second table where the
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
57 * hsps for the qpig are stored and count indicates the number of these rows.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
58 * 2. table with these coulmns: spig, max_score and blob. The blob contains all
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
59 * segments for given (qpig, spig) pair.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
60 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
61
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
62 table NCBI:pnbr:table:qpig #1
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
63 {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
64 /* OFFSET
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
65 * start position of hsps for qpig = row_id in the hsp table.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
66 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
67 extern column <U64> izip_encoding OFFSET;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
68
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
69 /* COUNT
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
70 * number of hsps for qpig = row_id in the hsp table.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
71 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
72 extern column <U64> izip_encoding COUNT;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
73 };
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
74
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
75 table NCBI:pnbr:table:hsp #1
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
76 {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
77 /* SPIG
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
78 * the subject pig column
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
79 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
80 extern column <U32> izip_encoding SPIG;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
81
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
82 /* MAX_SCORE
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
83 * max score between given query pig and subject pig
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
84 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
85 extern column <I32> izip_encoding MAX_SCORE;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
86
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
87 /* SEGMENTS
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
88 * blob for storing hsps for given qpig and spig.
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
89 */
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
90 extern column <B8> zip_encoding SEGMENTS;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
91 };
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
92
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
93 database NCBI:pnbr:db:pnbr #1
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
94 {
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
95 table NCBI:pnbr:table:qpig #1 QPIG_REFERENCE;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
96 table NCBI:pnbr:table:hsp #1 HSP;
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
97 };
38ad1130d077 planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff changeset
98