diff libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema	Mon Nov 27 11:21:07 2017 -0500
@@ -0,0 +1,98 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+version 1;
+include 'vdb/vdb.vschema';
+
+/* PNBRDB
+ *  the original flat-file pnbrdb structure was divided into two forks:
+ *   1 - "hsp" containing full blastp hsps
+ *   2 - "nbr" containing only pig->pig relationships with max score
+ *
+ *  each fork was organized into bin directories by "query" ( left-hand ) pig
+ *  each bin contained entries for up to 1M query pigs with a numeric
+ *  4 digit 1-based name generated as "( ( qpig - 1 ) / 1024 ) / 1024 + 1".
+ *  this bin approach served as a primitive index.
+ *
+ *  within each bin directory, there are 1024 data files, where each data file
+ *  represented 1024 query pigs. the file name incorporated a 4 digit 1-based
+ *  file id generated as "( ( qpig - 1 ) / 1024 ) % 1024 + 1" making it possible
+ *  to locate any entry by query pig within a 1024 entry neighborhood by using
+ *  filesystem path alone.
+ *
+ *  within each data file, a fixed-size 1024-entry header gave the location of
+ *  entries ordered according to the most common queries.
+ *
+ *  all basic data are contained within the "hsp" fork. the "nbr" fork served
+ *  as a pre-calculated result of the query selecting all unique pig->pig pairs
+ *  with their maximum score value.
+ */
+
+/* The vdb representation of the pnbrdb has two tables
+ *  1. table with one row per qpig (i.e. qpig = row_id) and two columns:
+ *     offset and count. Offset indicates row_id in the second table where the
+ *     hsps for the qpig are stored and count indicates the number of these rows.
+ *  2. table with these coulmns: spig, max_score and blob. The blob contains all
+ *     segments for given (qpig, spig) pair.
+ */
+
+table NCBI:pnbr:table:qpig #1
+{
+    /* OFFSET
+     *  start position of hsps for qpig = row_id in the hsp table.
+     */
+    extern column <U64> izip_encoding OFFSET;
+
+    /* COUNT
+     *  number of hsps for qpig = row_id in the hsp table.
+     */
+    extern column <U64> izip_encoding COUNT;
+};
+
+table NCBI:pnbr:table:hsp #1
+{
+    /* SPIG
+     *  the subject pig column
+     */
+    extern column <U32> izip_encoding SPIG;
+
+    /* MAX_SCORE
+     *  max score between given query pig and subject pig
+     */
+    extern column <I32> izip_encoding MAX_SCORE;
+
+    /* SEGMENTS
+     *  blob for storing hsps for given qpig and spig.
+     */
+    extern column <B8> zip_encoding SEGMENTS;
+};
+
+database NCBI:pnbr:db:pnbr #1
+{
+    table NCBI:pnbr:table:qpig #1 QPIG_REFERENCE;
+    table NCBI:pnbr:table:hsp #1 HSP;
+};
+