comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 version 1;
28 include 'vdb/vdb.vschema';
29
30 /* PNBRDB
31 * the original flat-file pnbrdb structure was divided into two forks:
32 * 1 - "hsp" containing full blastp hsps
33 * 2 - "nbr" containing only pig->pig relationships with max score
34 *
35 * each fork was organized into bin directories by "query" ( left-hand ) pig
36 * each bin contained entries for up to 1M query pigs with a numeric
37 * 4 digit 1-based name generated as "( ( qpig - 1 ) / 1024 ) / 1024 + 1".
38 * this bin approach served as a primitive index.
39 *
40 * within each bin directory, there are 1024 data files, where each data file
41 * represented 1024 query pigs. the file name incorporated a 4 digit 1-based
42 * file id generated as "( ( qpig - 1 ) / 1024 ) % 1024 + 1" making it possible
43 * to locate any entry by query pig within a 1024 entry neighborhood by using
44 * filesystem path alone.
45 *
46 * within each data file, a fixed-size 1024-entry header gave the location of
47 * entries ordered according to the most common queries.
48 *
49 * all basic data are contained within the "hsp" fork. the "nbr" fork served
50 * as a pre-calculated result of the query selecting all unique pig->pig pairs
51 * with their maximum score value.
52 */
53
54 /* The vdb representation of the pnbrdb has two tables
55 * 1. table with one row per qpig (i.e. qpig = row_id) and two columns:
56 * offset and count. Offset indicates row_id in the second table where the
57 * hsps for the qpig are stored and count indicates the number of these rows.
58 * 2. table with these coulmns: spig, max_score and blob. The blob contains all
59 * segments for given (qpig, spig) pair.
60 */
61
62 table NCBI:pnbr:table:qpig #1
63 {
64 /* OFFSET
65 * start position of hsps for qpig = row_id in the hsp table.
66 */
67 extern column <U64> izip_encoding OFFSET;
68
69 /* COUNT
70 * number of hsps for qpig = row_id in the hsp table.
71 */
72 extern column <U64> izip_encoding COUNT;
73 };
74
75 table NCBI:pnbr:table:hsp #1
76 {
77 /* SPIG
78 * the subject pig column
79 */
80 extern column <U32> izip_encoding SPIG;
81
82 /* MAX_SCORE
83 * max score between given query pig and subject pig
84 */
85 extern column <I32> izip_encoding MAX_SCORE;
86
87 /* SEGMENTS
88 * blob for storing hsps for given qpig and spig.
89 */
90 extern column <B8> zip_encoding SEGMENTS;
91 };
92
93 database NCBI:pnbr:db:pnbr #1
94 {
95 table NCBI:pnbr:table:qpig #1 QPIG_REFERENCE;
96 table NCBI:pnbr:table:hsp #1 HSP;
97 };
98