NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sam_header.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2013-2014 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
43 /*
44  * TODO.
45  *
46  * - Sort order (parse to struct, enum type, updating funcs)
47  * - Removal of lines.
48  * - Updating of lines
49  */
50 
51 #ifndef _SAM_HDR_H_
52 #define _SAM_HDR_H_
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 #ifdef HAVE_CONFIG_H
59 #include "io_lib_config.h"
60 #endif
61 
62 #include <stdarg.h>
63 
64 #include "cram/string_alloc.h"
65 #include "cram/pooled_alloc.h"
66 
67 #include "htslib/khash.h"
68 #include "htslib/kstring.h"
69 
70 // For structure assignment. Eg kstring_t s = KS_INITIALIZER;
71 #define KS_INITIALIZER {0,0,0}
72 
73 // For initialisation elsewhere. Eg KS_INIT(x->str);
74 #define KS_INIT(ks) ((ks)->l = 0, (ks)->m = 0, (ks)->s = NULL)
75 
76 // Frees the string subfield only. Assumes 's' itself is static.
77 #define KS_FREE(ks) do { if ((ks)->s) free((ks)->s); } while(0)
78 
79 /*
80  * Proposed new SAM header parsing
81 
82 1 @SQ ID:foo LN:100
83 2 @SQ ID:bar LN:200
84 3 @SQ ID:ram LN:300 UR:xyz
85 4 @RG ID:r ...
86 5 @RG ID:s ...
87 
88 Hash table for 2-char @keys without dup entries.
89 If dup lines, we form a circular linked list. Ie hash keys = {RG, SQ}.
90 
91 HASH("SQ")--\
92  |
93  (3) <-> 1 <-> 2 <-> 3 <-> (1)
94 
95 HASH("RG")--\
96  |
97  (5) <-> 4 <-> 5 <-> (4)
98 
99 Items stored in the hash values also form their own linked lists:
100 Ie SQ->ID(foo)->LN(100)
101  SQ->ID(bar)->LN(200)
102  SQ->ID(ram)->LN(300)->UR(xyz)
103  RG->ID(r)
104  */
105 
113 typedef struct SAM_hdr_tag_s {
115  char *str;
116  int len;
117 } SAM_hdr_tag;
118 
134 typedef struct SAM_hdr_item_s {
135  struct SAM_hdr_item_s *next; // cirular
137  SAM_hdr_tag *tag; // first tag
138  int order; // 0 upwards
139 } SAM_hdr_type;
140 
142 typedef struct {
143  char *name;
147 } SAM_SQ;
148 
150 typedef struct {
151  char *name;
154  int name_len;
155  int id; // numerical ID
156 } SAM_RG;
157 
159 typedef struct {
160  char *name;
163  int name_len;
164  int id; // numerical ID
165  int prev_id; // -1 if none
166 } SAM_PG;
167 
169 KHASH_MAP_INIT_STR(m_s2i, int)
170 
183 typedef struct {
185  khash_t(sam_hdr) *h;
189 
190  // @SQ lines / references
191  int nref;
193  khash_t(m_s2i) *ref_hash;
194 
195  // @RG lines / read-groups
196  int nrg;
198  khash_t(m_s2i) *rg_hash;
199 
200  // @PG lines / programs
201  int npg;
202  int npg_end;
205  khash_t(m_s2i) *pg_hash;
206  int *pg_end;
207 
208  // @cond internal
209  char ID_buf[1024]; // temporary buffer
210  int ID_cnt;
211  int ref_count; // number of uses of this SAM_hdr
212  // @endcond
213 } SAM_hdr;
214 
221 SAM_hdr *sam_hdr_new(void);
222 
231 #ifdef SAMTOOLS
232 SAM_hdr *sam_hdr_parse_(const char *hdr, int len);
233 #else
234 SAM_hdr *sam_hdr_parse(const char *hdr, int len);
235 #endif
236 
237 
243 
244 
250 void sam_hdr_incr_ref(SAM_hdr *hdr);
251 
252 
261 void sam_hdr_decr_ref(SAM_hdr *hdr);
262 
263 
272 void sam_hdr_free(SAM_hdr *hdr);
273 
278 int sam_hdr_length(SAM_hdr *hdr);
279 
284 char *sam_hdr_str(SAM_hdr *hdr);
285 
299 int sam_hdr_add_lines(SAM_hdr *sh, const char *lines, int len);
300 
310 int sam_hdr_add(SAM_hdr *sh, const char *type, ...);
311 
328 int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap, ...);
329 
337 SAM_hdr_type *sam_hdr_find(SAM_hdr *hdr, char *type,
338  char *ID_key, char *ID_value);
339 
352 char *sam_hdr_find_line(SAM_hdr *hdr, char *type,
353  char *ID_key, char *ID_value);
354 
367  SAM_hdr_type *type,
368  char *key,
369  SAM_hdr_tag **prev);
370 
383 int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type, ...);
384 
390 int sam_hdr_rebuild(SAM_hdr *hdr);
391 
396 int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref);
397 
404 SAM_RG *sam_hdr_find_rg(SAM_hdr *hdr, const char *rg);
405 
416 int sam_hdr_link_pg(SAM_hdr *hdr);
417 
418 
435 int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...);
436 
446 char *stringify_argv(int argc, char *argv[]);
447 
448 #ifdef __cplusplus
449 }
450 #endif
451 
452 #endif /* _SAM_HDR_H_ */