NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cram_io.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2012-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
42 #ifndef _CRAM_IO_H_
43 #define _CRAM_IO_H_
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define ITF8_MACROS
50 
51 #include <stdint.h>
52 #include <cram/misc.h>
53 
61 #define CRAM_KEY(a,b) (((a)<<8)|((b)))
62 
70 int itf8_decode(cram_fd *fd, int32_t *val);
71 
72 #ifndef ITF8_MACROS
73 
80 int itf8_get(char *cp, int32_t *val_p);
81 
88 int itf8_put(char *cp, int32_t val);
89 
90 #else
91 
92 /*
93  * Macro implementations of the above
94  */
95 #define itf8_get(c,v) (((uc)(c)[0]<0x80)?(*(v)=(uc)(c)[0],1):(((uc)(c)[0]<0xc0)?(*(v)=(((uc)(c)[0]<<8)|(uc)(c)[1])&0x3fff,2):(((uc)(c)[0]<0xe0)?(*(v)=(((uc)(c)[0]<<16)|((uc)(c)[1]<<8)|(uc)(c)[2])&0x1fffff,3):(((uc)(c)[0]<0xf0)?(*(v)=(((uc)(c)[0]<<24)|((uc)(c)[1]<<16)|((uc)(c)[2]<<8)|(uc)(c)[3])&0x0fffffff,4):(*(v)=(((uc)(c)[0]&0x0f)<<28)|((uc)(c)[1]<<20)|((uc)(c)[2]<<12)|((uc)(c)[3]<<4)|((uc)(c)[4]&0x0f),5)))))
96 
97 #define itf8_put(c,v) ((!((v)&~0x7f))?((c)[0]=(v),1):(!((v)&~0x3fff))?((c)[0]=((v)>>8)|0x80,(c)[1]=(v)&0xff,2):(!((v)&~0x1fffff))?((c)[0]=((v)>>16)|0xc0,(c)[1]=((v)>>8)&0xff,(c)[2]=(v)&0xff,3):(!((v)&~0xfffffff))?((c)[0]=((v)>>24)|0xe0,(c)[1]=((v)>>16)&0xff,(c)[2]=((v)>>8)&0xff,(c)[3]=(v)&0xff,4):((c)[0]=0xf0|(((v)>>28)&0xff),(c)[1]=((v)>>20)&0xff,(c)[2]=((v)>>12)&0xff,(c)[3]=((v)>>4)&0xff,(c)[4]=(v)&0xf,5))
98 
99 #define itf8_size(v) ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5)
100 
101 #endif
102 
111 int itf8_put_blk(cram_block *blk, int val);
112 
129 cram_block *cram_new_block(enum cram_content_type content_type,
130  int content_id);
131 
139 
146 int cram_write_block(cram_fd *fd, cram_block *b);
147 
150 void cram_free_block(cram_block *b);
151 
158 char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size);
159 
167 
181 int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
182  int level, int strat,
183  int level2, int strat2);
184 
188 
189 /* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
190 
191 /* Block size and data pointer. */
192 #define BLOCK_SIZE(b) ((b)->byte)
193 #define BLOCK_DATA(b) ((b)->data)
194 
195 /* Returns the address one past the end of the block */
196 #define BLOCK_END(b) (&(b)->data[(b)->byte])
197 
198 /* Request block to be at least 'l' bytes long */
199 #define BLOCK_RESIZE(b,l) \
200  do { \
201  while((b)->alloc <= (l)) { \
202  (b)->alloc = (b)->alloc ? (b)->alloc*1.5 : 1024; \
203  (b)->data = realloc((b)->data, (b)->alloc); \
204  } \
205  } while(0)
206 
207 /* Ensure the block can hold at least another 'l' bytes */
208 #define BLOCK_GROW(b,l) BLOCK_RESIZE((b), BLOCK_SIZE((b)) + (l))
209 
210 /* Append string 's' of length 'l' */
211 #define BLOCK_APPEND(b,s,l) \
212  do { \
213  BLOCK_GROW((b),(l)); \
214  memcpy(BLOCK_END((b)), (s), (l)); \
215  BLOCK_SIZE((b)) += (l); \
216  } while (0)
217 
218 /* Append as single character 'c' */
219 #define BLOCK_APPEND_CHAR(b,c) \
220  do { \
221  BLOCK_GROW((b),1); \
222  (b)->data[(b)->byte++] = (c); \
223  } while (0)
224 
225 /* Append via sprintf with 1 arg */
226 #define BLOCK_APPENDF_1(b,buf,fmt, a1) \
227  do { \
228  int l = sprintf((buf), (fmt), (a1)); \
229  BLOCK_APPEND((b), (buf), l); \
230  } while (0)
231 
232 /* Append via sprintf with 2 args */
233 #define BLOCK_APPENDF_2(b,buf,fmt, a1,a2) \
234  do { \
235  int l = sprintf((buf), (fmt), (a1), (a2)); \
236  BLOCK_APPEND((b), (buf), l); \
237  } while (0)
238 
239 #define BLOCK_UPLEN(b) \
240  (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
241 
253 int cram_load_reference(cram_fd *fd, char *fn);
254 
264 int refs2id(refs_t *r, SAM_hdr *bfd);
265 
266 void refs_free(refs_t *r);
267 
281 char *cram_get_ref(cram_fd *fd, int id, int start, int end);
282 void cram_ref_incr(refs_t *r, int id);
283 void cram_ref_decr(refs_t *r, int id);
296 cram_container *cram_new_container(int nrec, int nslice);
298 
306 
314 
327 
328 
341 
344 
345 
353 
355 void cram_free_slice(cram_slice *s);
356 
364 cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);
365 
379 
380 
381 
394 
402 
405 
406 
422 
429 int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr);
430 
431 
445 cram_fd *cram_open(const char *filename, const char *mode);
446 
456 cram_fd *cram_dopen(cram_FILE *fp, const char *filename, const char *mode);
457 
464 int cram_close(cram_fd *fd);
465 
466 /*
467  * Seek within a CRAM file.
468  *
469  * Returns 0 on success
470  * -1 on failure
471  */
472 int cram_seek(cram_fd *fd, off_t offset, int whence);
473 
474 /*
475  * Flushes a CRAM file.
476  * Useful for when writing to stdout without wishing to close the stream.
477  *
478  * Returns 0 on success
479  * -1 on failure
480  */
481 int cram_flush(cram_fd *fd);
482 
490 int cram_eof(cram_fd *fd);
491 
501 int cram_set_option(cram_fd *fd, enum cram_option opt, ...);
502 
512 int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args);
513 
525 int cram_set_header(cram_fd *fd, SAM_hdr *hdr);
526 
527 
528 #ifdef __cplusplus
529 }
530 #endif
531 
532 #endif /* _CRAM_IO_H_ */