Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/pnbrdb.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 version 1; | |
| 28 include 'vdb/vdb.vschema'; | |
| 29 | |
| 30 /* PNBRDB | |
| 31 * the original flat-file pnbrdb structure was divided into two forks: | |
| 32 * 1 - "hsp" containing full blastp hsps | |
| 33 * 2 - "nbr" containing only pig->pig relationships with max score | |
| 34 * | |
| 35 * each fork was organized into bin directories by "query" ( left-hand ) pig | |
| 36 * each bin contained entries for up to 1M query pigs with a numeric | |
| 37 * 4 digit 1-based name generated as "( ( qpig - 1 ) / 1024 ) / 1024 + 1". | |
| 38 * this bin approach served as a primitive index. | |
| 39 * | |
| 40 * within each bin directory, there are 1024 data files, where each data file | |
| 41 * represented 1024 query pigs. the file name incorporated a 4 digit 1-based | |
| 42 * file id generated as "( ( qpig - 1 ) / 1024 ) % 1024 + 1" making it possible | |
| 43 * to locate any entry by query pig within a 1024 entry neighborhood by using | |
| 44 * filesystem path alone. | |
| 45 * | |
| 46 * within each data file, a fixed-size 1024-entry header gave the location of | |
| 47 * entries ordered according to the most common queries. | |
| 48 * | |
| 49 * all basic data are contained within the "hsp" fork. the "nbr" fork served | |
| 50 * as a pre-calculated result of the query selecting all unique pig->pig pairs | |
| 51 * with their maximum score value. | |
| 52 */ | |
| 53 | |
| 54 /* The vdb representation of the pnbrdb has two tables | |
| 55 * 1. table with one row per qpig (i.e. qpig = row_id) and two columns: | |
| 56 * offset and count. Offset indicates row_id in the second table where the | |
| 57 * hsps for the qpig are stored and count indicates the number of these rows. | |
| 58 * 2. table with these coulmns: spig, max_score and blob. The blob contains all | |
| 59 * segments for given (qpig, spig) pair. | |
| 60 */ | |
| 61 | |
| 62 table NCBI:pnbr:table:qpig #1 | |
| 63 { | |
| 64 /* OFFSET | |
| 65 * start position of hsps for qpig = row_id in the hsp table. | |
| 66 */ | |
| 67 extern column <U64> izip_encoding OFFSET; | |
| 68 | |
| 69 /* COUNT | |
| 70 * number of hsps for qpig = row_id in the hsp table. | |
| 71 */ | |
| 72 extern column <U64> izip_encoding COUNT; | |
| 73 }; | |
| 74 | |
| 75 table NCBI:pnbr:table:hsp #1 | |
| 76 { | |
| 77 /* SPIG | |
| 78 * the subject pig column | |
| 79 */ | |
| 80 extern column <U32> izip_encoding SPIG; | |
| 81 | |
| 82 /* MAX_SCORE | |
| 83 * max score between given query pig and subject pig | |
| 84 */ | |
| 85 extern column <I32> izip_encoding MAX_SCORE; | |
| 86 | |
| 87 /* SEGMENTS | |
| 88 * blob for storing hsps for given qpig and spig. | |
| 89 */ | |
| 90 extern column <B8> zip_encoding SEGMENTS; | |
| 91 }; | |
| 92 | |
| 93 database NCBI:pnbr:db:pnbr #1 | |
| 94 { | |
| 95 table NCBI:pnbr:table:qpig #1 QPIG_REFERENCE; | |
| 96 table NCBI:pnbr:table:hsp #1 HSP; | |
| 97 }; | |
| 98 |
