annotate spp/src/BGZF.cpp @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 19 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // BGZF routines were adapted from the bgzf.c code developed at the Broad
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // Institute.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 // Provides the basic functionality for reading & writing BGZF files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 #include <BGZF.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 using namespace BamTools;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 #include <algorithm>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 using namespace std;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 BgzfData::BgzfData(void)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 , CompressedBlockSize(MAX_BLOCK_SIZE)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 , BlockLength(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24 , BlockOffset(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 , BlockAddress(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 , IsOpen(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 , IsWriteOnly(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 , IsWriteUncompressed(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 , Stream(NULL)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 , UncompressedBlock(NULL)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 , CompressedBlock(NULL)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 try {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 CompressedBlock = new char[CompressedBlockSize];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 UncompressedBlock = new char[UncompressedBlockSize];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36 } catch( std::bad_alloc& ba ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 fprintf(stderr, "BGZF ERROR: unable to allocate memory for our BGZF object.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 // destructor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 BgzfData::~BgzfData(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 if( CompressedBlock ) delete[] CompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 if( UncompressedBlock ) delete[] UncompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 // closes BGZF file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 void BgzfData::Close(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // skip if file not open, otherwise set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 if ( !IsOpen ) return;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 // if writing to file, flush the current BGZF block,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 // then write an empty block (as EOF marker)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 if ( IsWriteOnly ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 FlushBlock();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 int blockLength = DeflateBlock();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 fwrite(CompressedBlock, 1, blockLength, Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 // flush and close
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 fflush(Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 fclose(Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 IsWriteUncompressed = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 IsOpen = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 // compresses the current block
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 int BgzfData::DeflateBlock(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 // initialize the gzip header
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 char* buffer = CompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 memset(buffer, 0, 18);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 buffer[0] = GZIP_ID1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 buffer[1] = (char)GZIP_ID2;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 buffer[2] = CM_DEFLATE;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 buffer[3] = FLG_FEXTRA;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 buffer[9] = (char)OS_UNKNOWN;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 buffer[10] = BGZF_XLEN;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 buffer[12] = BGZF_ID1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 buffer[13] = BGZF_ID2;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 buffer[14] = BGZF_LEN;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 // set compression level
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 const int compressionLevel = ( IsWriteUncompressed ? 0 : Z_DEFAULT_COMPRESSION );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 // loop to retry for blocks that do not compress enough
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 int inputLength = BlockOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 int compressedLength = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 unsigned int bufferSize = CompressedBlockSize;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 while ( true ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 // initialize zstream values
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 z_stream zs;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 zs.zalloc = NULL;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 zs.zfree = NULL;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 zs.next_in = (Bytef*)UncompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 zs.avail_in = inputLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 // initialize the zlib compression algorithm
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 if ( deflateInit2(&zs, compressionLevel, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 fprintf(stderr, "BGZF ERROR: zlib deflate initialization failed.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 // compress the data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 int status = deflate(&zs, Z_FINISH);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 if ( status != Z_STREAM_END ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 deflateEnd(&zs);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 // reduce the input length and try again
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 if ( status == Z_OK ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 inputLength -= 1024;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 if( inputLength < 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 fprintf(stderr, "BGZF ERROR: input reduction failed.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 continue;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 // finalize the compression routine
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 if ( deflateEnd(&zs) != Z_OK ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 compressedLength = zs.total_out;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138 if ( compressedLength > MAX_BLOCK_SIZE ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 fprintf(stderr, "BGZF ERROR: deflate overflow.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143 break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
146 // store the compressed length
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
147 BgzfData::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
148
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
149 // store the CRC32 checksum
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
150 unsigned int crc = crc32(0, NULL, 0);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
151 crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
152 BgzfData::PackUnsignedInt(&buffer[compressedLength - 8], crc);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
153 BgzfData::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
154
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
155 // ensure that we have less than a block of data left
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
156 int remaining = BlockOffset - inputLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
157 if ( remaining > 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
158 if ( remaining > inputLength ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
159 fprintf(stderr, "BGZF ERROR: after deflate, remainder too large.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
160 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
161 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
162 memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
163 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
164
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
165 BlockOffset = remaining;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
166 return compressedLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
167 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
168
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
169 // flushes the data in the BGZF block
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
170 void BgzfData::FlushBlock(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
171
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
172 // flush all of the remaining blocks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
173 while ( BlockOffset > 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
174
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
175 // compress the data block
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
176 int blockLength = DeflateBlock();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
177
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
178 // flush the data to our output stream
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
179 int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
180
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
181 if ( numBytesWritten != blockLength ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
182 fprintf(stderr, "BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
183 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
184 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
185
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
186 BlockAddress += blockLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
187 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
188 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
189
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
190 // de-compresses the current block
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
191 int BgzfData::InflateBlock(const int& blockLength) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
192
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
193 // Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
194 z_stream zs;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
195 zs.zalloc = NULL;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
196 zs.zfree = NULL;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
197 zs.next_in = (Bytef*)CompressedBlock + 18;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
198 zs.avail_in = blockLength - 16;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
199 zs.next_out = (Bytef*)UncompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
200 zs.avail_out = UncompressedBlockSize;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
201
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
202 int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
203 if ( status != Z_OK ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
204 fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
205 return -1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
206 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
207
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
208 status = inflate(&zs, Z_FINISH);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
209 if ( status != Z_STREAM_END ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
210 inflateEnd(&zs);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
211 fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflate() failed\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
212 return -1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
213 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
214
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
215 status = inflateEnd(&zs);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
216 if ( status != Z_OK ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
217 fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
218 return -1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
219 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
220
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
221 return zs.total_out;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
222 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
223
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
224 // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
225 bool BgzfData::Open(const string& filename, const char* mode, bool isWriteUncompressed ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
226
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
227 // determine open mode
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
228 if ( strcmp(mode, "rb") == 0 )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
229 IsWriteOnly = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
230 else if ( strcmp(mode, "wb") == 0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
231 IsWriteOnly = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
232 else {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
233 fprintf(stderr, "BGZF ERROR: unknown file mode: %s\n", mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
234 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
235 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
236
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
237 // ----------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
238 // open Stream to read to/write from file, stdin, or stdout
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
239 // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
240
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
241 // read/write BGZF data to/from a file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
242 if ( (filename != "stdin") && (filename != "stdout") )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
243 Stream = fopen(filename.c_str(), mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
244
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
245 // read BGZF data from stdin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
246 else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
247 Stream = freopen(NULL, mode, stdin);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
248
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
249 // write BGZF data to stdout
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
250 else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
251 Stream = freopen(NULL, mode, stdout);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
252
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
253 if ( !Stream ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
254 fprintf(stderr, "BGZF ERROR: unable to open file %s\n", filename.c_str() );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
255 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
256 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
257
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
258 // set flags, return success
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
259 IsOpen = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
260 IsWriteUncompressed = isWriteUncompressed;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
261 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
262 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
263
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
264 // reads BGZF data into a byte buffer
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
265 int BgzfData::Read(char* data, const unsigned int dataLength) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
266
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
267 if ( !IsOpen || IsWriteOnly || dataLength == 0 ) return 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
268
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
269 char* output = data;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
270 unsigned int numBytesRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
271 while ( numBytesRead < dataLength ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
272
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
273 int bytesAvailable = BlockLength - BlockOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
274 if ( bytesAvailable <= 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
275 if ( !ReadBlock() ) return -1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
276 bytesAvailable = BlockLength - BlockOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
277 if ( bytesAvailable <= 0 ) break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
278 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
279
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
280 char* buffer = UncompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
281 int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
282 memcpy(output, buffer + BlockOffset, copyLength);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
283
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
284 BlockOffset += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
285 output += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
286 numBytesRead += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
287 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
288
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
289 if ( BlockOffset == BlockLength ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
290 BlockAddress = ftell64(Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
291 BlockOffset = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
292 BlockLength = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
293 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
294
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
295 return numBytesRead;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
296 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
297
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
298 // reads a BGZF block
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
299 bool BgzfData::ReadBlock(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
300
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
301 char header[BLOCK_HEADER_LENGTH];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
302 int64_t blockAddress = ftell64(Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
303
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
304 int count = fread(header, 1, sizeof(header), Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
305 if ( count == 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
306 BlockLength = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
307 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
308 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
309
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
310 if ( count != sizeof(header) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
311 fprintf(stderr, "BGZF ERROR: read block failed - could not read block header\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
312 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
313 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
314
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
315 if ( !BgzfData::CheckBlockHeader(header) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
316 fprintf(stderr, "BGZF ERROR: read block failed - invalid block header\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
317 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
318 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
319
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
320 int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
321 char* compressedBlock = CompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
322 memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
323 int remaining = blockLength - BLOCK_HEADER_LENGTH;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
324
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
325 count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
326 if ( count != remaining ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
327 fprintf(stderr, "BGZF ERROR: read block failed - could not read data from block\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
328 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
329 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
330
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
331 count = InflateBlock(blockLength);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
332 if ( count < 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
333 fprintf(stderr, "BGZF ERROR: read block failed - could not decompress block data\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
334 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
335 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
336
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
337 if ( BlockLength != 0 )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
338 BlockOffset = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
339
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
340 BlockAddress = blockAddress;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
341 BlockLength = count;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
342 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
343 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
344
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
345 // seek to position in BGZF file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
346 bool BgzfData::Seek(int64_t position) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
347
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
348 if ( !IsOpen ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
349
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
350 int blockOffset = (position & 0xFFFF);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
351 int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
352
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
353 if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
354 fprintf(stderr, "BGZF ERROR: unable to seek in file\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
355 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
356 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
357
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
358 BlockLength = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
359 BlockAddress = blockAddress;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
360 BlockOffset = blockOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
361 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
362 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
363
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
364 // get file position in BGZF file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
365 int64_t BgzfData::Tell(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
366 if ( !IsOpen )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
367 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
368 else
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
369 return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
370 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
371
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
372 // writes the supplied data into the BGZF buffer
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
373 unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
374
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
375 if ( !IsOpen || !IsWriteOnly ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
376
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
377 // initialize
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
378 unsigned int numBytesWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
379 const char* input = data;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
380 unsigned int blockLength = UncompressedBlockSize;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
381
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
382 // copy the data to the buffer
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
383 while ( numBytesWritten < dataLen ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
384
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
385 unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
386 char* buffer = UncompressedBlock;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
387 memcpy(buffer + BlockOffset, input, copyLength);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
388
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
389 BlockOffset += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
390 input += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
391 numBytesWritten += copyLength;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
392
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
393 if ( BlockOffset == blockLength )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
394 FlushBlock();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
395 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
396
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
397 return numBytesWritten;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
398 }