NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
output_gzip.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio/basic/numbers.h>
30 #include <nvbio/basic/types.h>
31 
32 #include <stdio.h>
33 #include <stdarg.h>
34 
35 namespace nvbio {
36 namespace io {
37 
39 {
40  // initialize the gzip header
41  // note that we don't actually care about most of these fields
42  gzh.text = 0;
43  gzh.time = 0;
44  gzh.xflags = 0;
45  gzh.extra = Z_NULL;
46  gzh.extra_len = 0;
47  gzh.os = 255; // meaning unknown OS
48  gzh.name = Z_NULL;
49  gzh.comment = Z_NULL;
50  gzh.hcrc = 0;
51 }
52 
54 {
55  NVBIO_VAR_UNUSED int ret;
56 
57  // initialize the zlib stream
61 
62  stream.next_in = NULL;
63  stream.avail_in = 0;
64 
65  stream.next_out = (Bytef *) output.get_base_ptr();
67 
68  ret = deflateInit2(&stream, // stream object
69  Z_DEFAULT_COMPRESSION, // compression level (0-9, default = 6)
70  Z_DEFLATED, // compression method (no other choice...)
71  15 + 16, // log2 of compression window size + 16 to switch zlib to gzip format
72  9, // memlevel (1..9, default 8: 1 uses less memory but is slower, 9 uses more memory and is faster)
73  Z_DEFAULT_STRATEGY); // compression strategy, may affect compression ratio and/or performance
74  // xxxnsubtil: we might want to shmoo the compression strategy
75  NVBIO_CUDA_ASSERT(ret == Z_OK);
76 
77  // set our custom gzip header
78  ret = deflateSetHeader(&stream, &gzh);
79  NVBIO_CUDA_ASSERT(ret == Z_OK);
80 
81  output.pos = stream.total_out;
82 }
83 
84 // compress all of input into the current position of the output buffer
85 // note that this assumes output has enough space!
87 {
88  NVBIO_VAR_UNUSED int status;
89 
90  stream.next_in = (Bytef *) input.get_base_ptr();
91  stream.avail_in = input.get_pos();
92 
93  stream.next_out = (Bytef *) output.get_cur_ptr();
95 
96  status = deflate(&stream, Z_NO_FLUSH);
97  NVBIO_CUDA_ASSERT(status == Z_OK);
99 
100  output.pos = stream.total_out;
101 }
102 
103 // finish writing this block
105 {
106  NVBIO_VAR_UNUSED int ret;
107 
108  stream.next_in = NULL;
110 
111  stream.next_out = (Bytef *) output.get_cur_ptr();
114 
115  ret = deflate(&stream, Z_FINISH);
116  NVBIO_CUDA_ASSERT(status == Z_STREAM_END);
117 
118  output.pos = stream.total_out;
119 
120  ret = deflateEnd(&stream);
121  NVBIO_CUDA_ASSERT(status == Z_OK);
122 
123  output.pos = stream.total_out;
124 }
125 
126 
128  : GzipCompressor()
129 {
130  // set up our gzip extra data field
131  // these values are defined in the samtools spec (http://samtools.sourceforge.net/SAMv1.pdf)
132  extra_data.SI1 = 66;
133  extra_data.SI2 = 67;
134  extra_data.SLEN = 2;
135 
136  // we set the actual BAM-specific BSIZE field to 0, since we don't know
137  // ahead of time how big the output block will be; this will be updated
138  // after the entire block has been compressed
139  extra_data.BSIZE = 0;
140 
141  gzh.extra = (Bytef *) &extra_data;
142  gzh.extra_len = sizeof(extra_data);
143 }
144 
146 {
147  // finish up the gzip block
149 
150  NVBIO_CUDA_ASSERT((output.get_pos() - 1) < 0xffff);
151  // poke the BAM-specific BSIZE value in the header
152  output.poke_uint16(16, (uint16)output.get_pos() - 1);
153 }
154 
155 } // namespace io
156 } // namespace nvbio