NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BGZF.h
Go to the documentation of this file.
1 // ***************************************************************************
2 // BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 8 December 2009 (DB)
7 // ---------------------------------------------------------------------------
8 // BGZF routines were adapted from the bgzf.c code developed at the Broad
9 // Institute.
10 // ---------------------------------------------------------------------------
11 // Provides the basic functionality for reading & writing BGZF files
12 // ***************************************************************************
13 
14 #ifndef BGZF_H
15 #define BGZF_H
16 
17 #include <nvbio/basic/atomics.h>
18 
19 // 'C' includes
20 #include <cstdio>
21 #include <cstdlib>
22 #include <cstring>
23 
24 // C++ includes
25 #include <string>
26 
27 // zlib includes
28 #include <zlib/zlib.h>
29 
30 // Platform-specific type definitions
31 #ifdef _MSC_VER
32  typedef char int8_t;
33  typedef unsigned char uint8_t;
34  typedef short int16_t;
35  typedef unsigned short uint16_t;
36  typedef int int32_t;
37  typedef unsigned int uint32_t;
38  typedef long long int64_t;
39  typedef unsigned long long uint64_t;
40 #else
41  #include <stdint.h>
42 #endif
43 
44 namespace BamTools {
45 
46 // zlib constants
47 const int GZIP_ID1 = 31;
48 const int GZIP_ID2 = 139;
49 const int CM_DEFLATE = 8;
50 const int FLG_FEXTRA = 4;
51 const int OS_UNKNOWN = 255;
52 const int BGZF_XLEN = 6;
53 const int BGZF_ID1 = 66;
54 const int BGZF_ID2 = 67;
55 const int BGZF_LEN = 2;
56 const int GZIP_WINDOW_BITS = -15;
57 const int Z_DEFAULT_MEM_LEVEL = 8;
58 
59 // BZGF constants
60 const int BLOCK_HEADER_LENGTH = 18;
61 const int BLOCK_FOOTER_LENGTH = 8;
62 const int MAX_BLOCK_SIZE = 65536;
63 const int DEFAULT_BLOCK_SIZE = 65536;
64 
65 struct BgzfThread;
66 
67 struct BgzfData {
68 
69  // data members
70  unsigned int UncompressedBlockSize;
71  unsigned int CompressedBlockSize;
72  unsigned int BlockLength;
73  unsigned int BlockOffset;
75  bool IsOpen;
77  FILE* Stream;
80 
83  volatile unsigned int CurrentBlockSize;
86  int volatile* BlockLengths;
88 
89  // constructor & destructor
90  BgzfData(const uint32_t threads = uint32_t(-1));
91  ~BgzfData(void);
92 
93  // closes BGZF file
94  void Close(void);
95  // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing
96  void Open(const std::string& filename, const char* mode);
97  // reads BGZF data into a byte buffer
98  int Read(char* data, const unsigned int dataLength);
99  // reads BGZF block
100  int ReadBlock(void);
101  // seek to position in BAM file
102  bool Seek(int64_t position);
103  // get file position in BAM file
104  int64_t Tell(void);
105  // writes the supplied data into the BGZF buffer
106  unsigned int Write(const char* data, const unsigned int dataLen);
107 
108  // checks BGZF block header
109  static inline bool CheckBlockHeader(char* header);
110  // packs an unsigned integer into the specified buffer
111  static inline void PackUnsignedInt(char* buffer, unsigned int value);
112  // packs an unsigned short into the specified buffer
113  static inline void PackUnsignedShort(char* buffer, unsigned short value);
114  // unpacks a buffer into a signed int
115  static inline signed int UnpackSignedInt(char* buffer);
116  // unpacks a buffer into a unsigned int
117  static inline unsigned int UnpackUnsignedInt(char* buffer);
118  // unpacks a buffer into a unsigned short
119  static inline unsigned short UnpackUnsignedShort(char* buffer);
120 
121  // compresses the given block
122  int DeflateBlock(int32_t id, const unsigned int blockSize);
123  // compresses the current block
124  int DeflateBlocks(void);
125  // flushes the data in the BGZF block
126  void FlushBlocks(void);
127  // de-compresses the current block
128  int InflateBlock(const int& blockLength);
129 };
130 
131 // -------------------------------------------------------------
132 
133 inline
135 
136  return (header[0] == GZIP_ID1 &&
137  header[1] == (char)GZIP_ID2 &&
138  header[2] == Z_DEFLATED &&
139  (header[3] & FLG_FEXTRA) != 0 &&
140  BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
141  header[12] == BGZF_ID1 &&
142  header[13] == BGZF_ID2 &&
143  BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
144 }
145 
146 // packs an unsigned integer into the specified buffer
147 inline
148 void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
149  buffer[0] = (char)value;
150  buffer[1] = (char)(value >> 8);
151  buffer[2] = (char)(value >> 16);
152  buffer[3] = (char)(value >> 24);
153 }
154 
155 // packs an unsigned short into the specified buffer
156 inline
157 void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
158  buffer[0] = (char)value;
159  buffer[1] = (char)(value >> 8);
160 }
161 
162 // unpacks a buffer into a signed int
163 inline
164 signed int BgzfData::UnpackSignedInt(char* buffer) {
165  union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
166  un.value = 0;
167  un.valueBuffer[0] = buffer[0];
168  un.valueBuffer[1] = buffer[1];
169  un.valueBuffer[2] = buffer[2];
170  un.valueBuffer[3] = buffer[3];
171  return un.value;
172 }
173 
174 // unpacks a buffer into an unsigned int
175 inline
176 unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
177  union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
178  un.value = 0;
179  un.valueBuffer[0] = buffer[0];
180  un.valueBuffer[1] = buffer[1];
181  un.valueBuffer[2] = buffer[2];
182  un.valueBuffer[3] = buffer[3];
183  return un.value;
184 }
185 
186 // unpacks a buffer into an unsigned short
187 inline
188 unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
189  union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un;
190  un.value = 0;
191  un.valueBuffer[0] = buffer[0];
192  un.valueBuffer[1] = buffer[1];
193  return un.value;
194 }
195 
196 } // namespace BamTools
197 
198 #endif // BGZF_H