Mercurial > repos > charles_s_test > seqsero2
annotate libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
rev | line source |
---|---|
3
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
1 /*=========================================================================== |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
2 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
3 * PUBLIC DOMAIN NOTICE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
4 * National Center for Biotechnology Information |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
5 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
6 * This software/database is a "United States Government Work" under the |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
7 * terms of the United States Copyright Act. It was written as part of |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
8 * the author's official duties as a United States Government employee and |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
9 * thus cannot be copyrighted. This software/database is freely available |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
10 * to the public for use. The National Library of Medicine and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
11 * Government have not placed any restriction on its use or reproduction. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
12 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
13 * Although all reasonable efforts have been taken to ensure the accuracy |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
14 * and reliability of the software and data, the NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
15 * Government do not and cannot warrant the performance or results that |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
16 * may be obtained by using this software or data. The NLM and the U.S. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
17 * Government disclaim all warranties, express or implied, including |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
18 * warranties of performance, merchantability or fitness for any particular |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
19 * purpose. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
20 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
21 * Please cite the author in any work or product based on this material. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
22 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
23 * =========================================================================== |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
24 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
25 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
26 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
27 version 1; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
28 include 'vdb/vdb.vschema'; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
29 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
30 /* PNBRDB |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
31 * the original flat-file pnbrdb structure was divided into two forks: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
32 * 1 - "hsp" containing full blastp hsps |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
33 * 2 - "nbr" containing only pig->pig relationships with max score |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
34 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
35 * each fork was organized into bin directories by "query" ( left-hand ) pig |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
36 * each bin contained entries for up to 1M query pigs with a numeric |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
37 * 4 digit 1-based name generated as "( ( qpig - 1 ) / 1024 ) / 1024 + 1". |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
38 * this bin approach served as a primitive index. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
39 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
40 * within each bin directory, there are 1024 data files, where each data file |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
41 * represented 1024 query pigs. the file name incorporated a 4 digit 1-based |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
42 * file id generated as "( ( qpig - 1 ) / 1024 ) % 1024 + 1" making it possible |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
43 * to locate any entry by query pig within a 1024 entry neighborhood by using |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
44 * filesystem path alone. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
45 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
46 * within each data file, a fixed-size 1024-entry header gave the location of |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
47 * entries ordered according to the most common queries. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
48 * |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
49 * all basic data are contained within the "hsp" fork. the "nbr" fork served |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
50 * as a pre-calculated result of the query selecting all unique pig->pig pairs |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
51 * with their maximum score value. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
52 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
53 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
54 /* The vdb representation of the pnbrdb has two tables |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
55 * 1. table with one row per qpig (i.e. qpig = row_id) and two columns: |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
56 * offset and count. Offset indicates row_id in the second table where the |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
57 * hsps for the qpig are stored and count indicates the number of these rows. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
58 * 2. table with these coulmns: spig, max_score and blob. The blob contains all |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
59 * segments for given (qpig, spig) pair. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
60 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
61 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
62 table NCBI:pnbr:table:qpig #1 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
63 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
64 /* OFFSET |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
65 * start position of hsps for qpig = row_id in the hsp table. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
66 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
67 extern column <U64> izip_encoding OFFSET; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
68 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
69 /* COUNT |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
70 * number of hsps for qpig = row_id in the hsp table. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
71 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
72 extern column <U64> izip_encoding COUNT; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
73 }; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
74 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
75 table NCBI:pnbr:table:hsp #1 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
76 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
77 /* SPIG |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
78 * the subject pig column |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
79 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
80 extern column <U32> izip_encoding SPIG; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
81 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
82 /* MAX_SCORE |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
83 * max score between given query pig and subject pig |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
84 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
85 extern column <I32> izip_encoding MAX_SCORE; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
86 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
87 /* SEGMENTS |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
88 * blob for storing hsps for given qpig and spig. |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
89 */ |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
90 extern column <B8> zip_encoding SEGMENTS; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
91 }; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
92 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
93 database NCBI:pnbr:db:pnbr #1 |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
94 { |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
95 table NCBI:pnbr:table:qpig #1 QPIG_REFERENCE; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
96 table NCBI:pnbr:table:hsp #1 HSP; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
97 }; |
38ad1130d077
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
diff
changeset
|
98 |