NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sam.h
Go to the documentation of this file.
1 #ifndef BAM_H
2 #define BAM_H
3 
4 #include <stdint.h>
5 #include "hts.h"
6 
7 /**********************
8  *** SAM/BAM header ***
9  **********************/
10 
21 typedef struct {
22  int32_t n_targets, ignore_sam_err;
26  char **target_name;
27  char *text;
28  void *sdict;
29 } bam_hdr_t;
30 
31 /****************************
32  *** CIGAR related macros ***
33  ****************************/
34 
35 #define BAM_CMATCH 0
36 #define BAM_CINS 1
37 #define BAM_CDEL 2
38 #define BAM_CREF_SKIP 3
39 #define BAM_CSOFT_CLIP 4
40 #define BAM_CHARD_CLIP 5
41 #define BAM_CPAD 6
42 #define BAM_CEQUAL 7
43 #define BAM_CDIFF 8
44 #define BAM_CBACK 9
45 
46 #define BAM_CIGAR_STR "MIDNSHP=XB"
47 #define BAM_CIGAR_SHIFT 4
48 #define BAM_CIGAR_MASK 0xf
49 #define BAM_CIGAR_TYPE 0x3C1A7
50 
51 #define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
52 #define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
53 #define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
54 #define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
55 
56 /* bam_cigar_type returns a bit flag with:
57  * bit 1 set if the cigar operation consumes the query
58  * bit 2 set if the cigar operation consumes the reference
59  *
60  * For reference, the unobfuscated truth table for this function is:
61  * BAM_CIGAR_TYPE QUERY REFERENCE
62  * --------------------------------
63  * BAM_CMATCH 1 1
64  * BAM_CINS 1 0
65  * BAM_CDEL 0 1
66  * BAM_CREF_SKIP 0 1
67  * BAM_CSOFT_CLIP 1 0
68  * BAM_CHARD_CLIP 0 0
69  * BAM_CPAD 0 0
70  * BAM_CEQUAL 1 1
71  * BAM_CDIFF 1 1
72  * BAM_CBACK 0 0
73  * --------------------------------
74  */
75 #define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference
76 
78 #define BAM_FPAIRED 1
79 
80 #define BAM_FPROPER_PAIR 2
81 
82 #define BAM_FUNMAP 4
83 
84 #define BAM_FMUNMAP 8
85 
86 #define BAM_FREVERSE 16
87 
88 #define BAM_FMREVERSE 32
89 
90 #define BAM_FREAD1 64
91 
92 #define BAM_FREAD2 128
93 
94 #define BAM_FSECONDARY 256
95 
96 #define BAM_FQCFAIL 512
97 
98 #define BAM_FDUP 1024
99 
100 #define BAM_FSUPPLEMENTARY 2048
101 
102 /*************************
103  *** Alignment records ***
104  *************************/
105 
119 typedef struct {
122  uint32_t bin:16, qual:8, l_qname:8;
123  uint32_t flag:16, n_cigar:16;
128 } bam1_core_t;
129 
145 typedef struct {
147  int l_data, m_data;
149 #ifndef BAM_NO_ID
151 #endif
152 } bam1_t;
153 
159 #define bam_is_rev(b) (((b)->core.flag&BAM_FREVERSE) != 0)
160 
165 #define bam_is_mrev(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
166 
171 #define bam_get_qname(b) ((char*)(b)->data)
172 
181 #define bam_get_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
182 
192 #define bam_get_seq(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname)
193 
198 #define bam_get_qual(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
199 
204 #define bam_get_aux(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq)
205 
210 #define bam_get_l_aux(b) ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1))
211 
217 #define bam_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
218 
219 /**************************
220  *** Exported functions ***
221  **************************/
222 
223 #ifdef __cplusplus
224 extern "C" {
225 #endif
226 
227  /***************
228  *** BAM I/O ***
229  ***************/
230 
231  bam_hdr_t *bam_hdr_init(void);
233  int bam_hdr_write(BGZF *fp, const bam_hdr_t *h);
234  void bam_hdr_destroy(bam_hdr_t *h);
235  int bam_name2id(bam_hdr_t *h, const char *ref);
236  bam_hdr_t* bam_hdr_dup(const bam_hdr_t *h0);
237 
238  bam1_t *bam_init1(void);
239  void bam_destroy1(bam1_t *b);
240  int bam_read1(BGZF *fp, bam1_t *b);
241  int bam_write1(BGZF *fp, const bam1_t *b);
242  bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc);
243  bam1_t *bam_dup1(const bam1_t *bsrc);
244 
245  int bam_cigar2qlen(int n_cigar, const uint32_t *cigar);
246  int bam_cigar2rlen(int n_cigar, const uint32_t *cigar);
247 
259  int32_t bam_endpos(const bam1_t *b);
260 
261  int bam_str2flag(const char *str);
262  char *bam_flag2str(int flag);
264  /*************************
265  *** BAM/CRAM indexing ***
266  *************************/
267 
268  // These BAM iterator functions work only on BAM files. To work with either
269  // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.
270  #define bam_itr_destroy(iter) hts_itr_destroy(iter)
271  #define bam_itr_queryi(idx, tid, beg, end) sam_itr_queryi(idx, tid, beg, end)
272  #define bam_itr_querys(idx, hdr, region) sam_itr_querys(idx, hdr, region)
273  #define bam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
274 
275  // Load .csi or .bai BAM index file.
276  #define bam_index_load(fn) hts_idx_load((fn), HTS_FMT_BAI)
277 
278  int bam_index_build(const char *fn, int min_shift);
279 
280  // Load BAM (.csi or .bai) or CRAM (.crai) index file.
281  hts_idx_t *sam_index_load(htsFile *fp, const char *fn);
282 
283  #define sam_itr_destroy(iter) hts_itr_destroy(iter)
284  hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, int beg, int end);
285  hts_itr_t *sam_itr_querys(const hts_idx_t *idx, bam_hdr_t *hdr, const char *region);
286  #define sam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), (htsfp))
287 
288  /***************
289  *** SAM I/O ***
290  ***************/
291 
292  #define sam_open(fn, mode) (hts_open((fn), (mode)))
293  #define sam_close(fp) hts_close(fp)
294 
295  int sam_open_mode(char *mode, const char *fn, const char *format);
296 
297  typedef htsFile samFile;
298  bam_hdr_t *sam_hdr_parse(int l_text, const char *text);
299  bam_hdr_t *sam_hdr_read(samFile *fp);
300  int sam_hdr_write(samFile *fp, const bam_hdr_t *h);
301 
302  int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b);
303  int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str);
304  int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);
305  int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b);
306 
307  /*************************************
308  *** Manipulating auxiliary fields ***
309  *************************************/
310 
311  uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
312  int32_t bam_aux2i(const uint8_t *s);
313  double bam_aux2f(const uint8_t *s);
314  char bam_aux2A(const uint8_t *s);
315  char *bam_aux2Z(const uint8_t *s);
316 
317  void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
318  int bam_aux_del(bam1_t *b, uint8_t *s);
319 
320 #ifdef __cplusplus
321 }
322 #endif
323 
324 /**************************
325  *** Pileup and Mpileup ***
326  **************************/
327 
328 #if !defined(BAM_NO_PILEUP)
329 
348 typedef struct {
351  int indel, level;
352  uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
353 } bam_pileup1_t;
354 
355 typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
356 
357 struct __bam_plp_t;
358 typedef struct __bam_plp_t *bam_plp_t;
359 
360 struct __bam_mplp_t;
361 typedef struct __bam_mplp_t *bam_mplp_t;
362 
363 #ifdef __cplusplus
364 extern "C" {
365 #endif
366 
373  bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
375  int bam_plp_push(bam_plp_t iter, const bam1_t *b);
376  const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
377  const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
378  void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
379  void bam_plp_reset(bam_plp_t iter);
380 
381  bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
390  void bam_mplp_init_overlaps(bam_mplp_t iter);
391  void bam_mplp_destroy(bam_mplp_t iter);
392  void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
393  int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
394 
395 #ifdef __cplusplus
396 }
397 #endif
398 
399 #endif // ~!defined(BAM_NO_PILEUP)
400 
401 #endif