32 #include "io_lib_config.h"
42 #define sam_hdr_parse sam_hdr_parse_
45 static void sam_hdr_error(
char *msg,
char *line,
int len,
int lno) {
48 for (j = 0; j < len && line[j] !=
'\n'; j++)
50 fprintf(stderr,
"%s at line %d: \"%.*s\"\n", msg, lno, j, line);
57 printf(
"===DUMP===\n");
65 t1 = t2 =
kh_val(hdr->h, k);
66 c[0] =
kh_key(hdr->h, k)>>8;
67 c[1] =
kh_key(hdr->h, k)&0xff;
68 printf(
"Type %.2s, count %d\n", c, t1->
prev->
order+1);
72 printf(
">>>%d ", t1->
order);
73 for (tag = t1->
tag; tag; tag=tag->
next) {
74 printf(
"\"%.2s\":\"%.*s\"\t",
83 printf(
"\n@PG chains:\n");
84 for (i = 0; i < hdr->
npg_end; i++) {
89 j == hdr->
pg_end[i] ?
" " :
"->",
95 puts(
"===END DUMP===");
103 static int sam_hdr_update_hashes(
SAM_hdr *sh,
107 if ((type>>8) ==
'S' && (type&0xff) ==
'Q') {
118 sh->
ref[nref].
ty = h_type;
122 if (tag->
str[0] ==
'S' && tag->
str[1] ==
'N') {
127 }
else if (tag->
str[0] ==
'L' && tag->
str[1] ==
'N') {
137 if (-1 == r)
return -1;
138 kh_val(sh->ref_hash, k) = nref;
145 if ((type>>8) ==
'R' && (type&0xff) ==
'G') {
149 sh->
rg = realloc(sh->
rg, (sh->
nrg+1)*
sizeof(*sh->
rg));
156 sh->
rg[nrg].
ty = h_type;
157 sh->
rg[nrg].
tag = tag;
158 sh->
rg[nrg].
id = nrg;
161 if (tag->
str[0] ==
'I' && tag->
str[1] ==
'D') {
162 if (!(sh->
rg[nrg].
name = malloc(tag->
len)))
175 if (-1 == r)
return -1;
176 kh_val(sh->rg_hash, k) = nrg;
183 if ((type>>8) ==
'P' && (type&0xff) ==
'G') {
187 sh->
pg = realloc(sh->
pg, (sh->
npg+1)*
sizeof(*sh->
pg));
194 sh->
pg[npg].
ty = h_type;
195 sh->
pg[npg].
tag = tag;
196 sh->
pg[npg].
id = npg;
200 if (tag->
str[0] ==
'I' && tag->
str[1] ==
'D') {
201 if (!(sh->
pg[npg].
name = malloc(tag->
len)))
206 }
else if (tag->
str[0] ==
'P' && tag->
str[1] ==
'P') {
210 k =
kh_get(m_s2i, sh->pg_hash, tag->
str+3);
213 if (k !=
kh_end(sh->pg_hash)) {
214 int p_id =
kh_val(sh->pg_hash, k);
223 for (i = 0; i < sh->
npg_end; i++) {
224 if (sh->
pg_end[i] == p_id) {
242 if (-1 == r)
return -1;
243 kh_val(sh->pg_hash, k) = npg;
277 int i, lno = 1, text_offset;
283 text_offset = ks_len(&sh->
text);
284 if (EOF == kputsn(lines, len, &sh->
text))
286 hdr = ks_str(&sh->
text) + text_offset;
288 for (i = 0; i < len; i++) {
292 int l_start = i,
new;
298 for (j = i; j < len && hdr[j] !=
'\n'; j++)
300 sam_hdr_error(
"Header line does not start with '@'",
301 &hdr[l_start], len - l_start, lno);
305 type = (hdr[i+1]<<8) | hdr[i+2];
306 if (hdr[i+1] <
'A' || hdr[i+1] >
'z' ||
307 hdr[i+2] <
'A' || hdr[i+2] >
'z') {
308 sam_hdr_error(
"Header line does not have a two character key",
309 &hdr[l_start], len - l_start, lno);
320 if (-1 == (k =
kh_put(sam_hdr, sh->h, type, &
new)))
334 h_type->
order = p->order+1;
336 kh_val(sh->h, k) = h_type;
337 h_type->
prev = h_type->
next = h_type;
343 if ((type>>8) ==
'C' && (type&0xff) ==
'O') {
345 if (hdr[i] !=
'\t') {
346 sam_hdr_error(
"Missing tab",
347 &hdr[l_start], len - l_start, lno);
351 for (j = ++i; j < len && hdr[j] !=
'\n'; j++)
367 if (hdr[i] !=
'\t') {
368 sam_hdr_error(
"Missing tab",
369 &hdr[l_start], len - l_start, lno);
373 for (j = ++i; j < len && hdr[j] !=
'\n' && hdr[j] !=
'\t'; j++)
384 if (h_tag->
len < 3 || h_tag->
str[2] !=
':') {
385 sam_hdr_error(
"Malformed key:value pair",
386 &hdr[l_start], len - l_start, lno);
397 }
while (i < len && hdr[i] !=
'\n');
401 if (-1 == sam_hdr_update_hashes(sh, type, h_type))
418 va_start(args, type);
427 khint32_t type_i = (type[0]<<8) | type[1], k;
429 #if defined(HAVE_VA_COPY)
433 if (EOF == kputc_(
'@', &sh->
text))
435 if (EOF == kputsn(type, 2, &sh->
text))
440 if (-1 == (k =
kh_put(sam_hdr, sh->h, type_i, &
new)))
442 kh_val(sh->h, k) = h_type;
455 h_type->
order = p->order + 1;
457 h_type->
prev = h_type->
next = h_type;
469 if (!(k = (
char *)va_arg(args,
char *)))
471 v = va_arg(args,
char *);
473 if (EOF == kputc_(
'\t', &sh->
text))
478 idx = ks_len(&sh->
text);
480 if (EOF == kputs(k, &sh->
text))
482 if (EOF == kputc_(
':', &sh->
text))
484 if (EOF == kputs(v, &sh->
text))
487 h_tag->
len = ks_len(&sh->
text) - idx;
489 ks_str(&sh->
text) + idx,
504 #if defined(HAVE_VA_COPY)
505 va_copy(ap_local, ap);
514 if (!(k = (
char *)va_arg(ap,
char *)))
516 v = va_arg(ap,
char *);
518 if (EOF == kputc_(
'\t', &sh->
text))
523 idx = ks_len(&sh->
text);
525 if (EOF == kputs(k, &sh->
text))
527 if (EOF == kputc_(
':', &sh->
text))
529 if (EOF == kputs(v, &sh->
text))
532 h_tag->
len = ks_len(&sh->
text) - idx;
534 ks_str(&sh->
text) + idx,
549 if (EOF == kputc(
'\n', &sh->
text))
552 int itype = (type[0]<<8) | type[1];
553 if (-1 == sam_hdr_update_hashes(sh, itype, h_type))
556 return h_type->
order;
566 char *ID_key,
char *ID_value) {
568 int itype = (type[0]<<8)|(type[1]);
573 if (type[0] ==
'S' && type[1] ==
'Q' &&
574 ID_key[0] ==
'S' && ID_key[1] ==
'N') {
575 k =
kh_get(m_s2i, hdr->ref_hash, ID_value);
576 return k !=
kh_end(hdr->ref_hash)
581 if (type[0] ==
'R' && type[1] ==
'G' &&
582 ID_key[0] ==
'I' && ID_key[1] ==
'D') {
583 k =
kh_get(m_s2i, hdr->rg_hash, ID_value);
584 return k !=
kh_end(hdr->rg_hash)
589 if (type[0] ==
'P' && type[1] ==
'G' &&
590 ID_key[0] ==
'I' && ID_key[1] ==
'D') {
591 k =
kh_get(m_s2i, hdr->pg_hash, ID_value);
592 return k !=
kh_end(hdr->pg_hash)
598 k =
kh_get(sam_hdr, hdr->h, itype);
605 t1 = t2 =
kh_val(hdr->h, k);
608 for (tag = t1->
tag; tag; tag = tag->
next) {
609 if (tag->
str[0] == ID_key[0] && tag->
str[1] == ID_key[1]) {
610 char *cp1 = tag->
str+3;
611 char *cp2 = ID_value;
612 while (*cp1 && *cp1 == *cp2)
636 char *ID_key,
char *ID_value) {
646 r |= (kputc_(
'@', &ks) == EOF);
647 r |= (kputs(type, &ks) == EOF);
648 for (tag = ty->
tag; tag; tag = tag->
next) {
649 r |= (kputc_(
'\t', &ks) == EOF);
650 r |= (kputsn(tag->
str, tag->
len, &ks) == EOF);
678 for (tag = type->
tag; tag; p = tag, tag = tag->
next) {
679 if (tag->
str[0] == key[0] && tag->
str[1] == key[1]) {
714 if (!(k = (
char *)va_arg(ap,
char *)))
716 v = va_arg(ap,
char *);
730 idx = ks_len(&hdr->
text);
733 tag->
len = ks_len(&hdr->
text) - idx;
735 ks_str(&hdr->
text) + idx,
746 #define K(a) (((a)[0]<<8)|((a)[1]))
759 k =
kh_get(sam_hdr, hdr->h,
K(
"HD"));
760 if (k !=
kh_end(hdr->h)) {
763 if (EOF == kputs(
"@HD", &ks))
765 for (tag = ty->
tag; tag; tag = tag->
next) {
766 if (EOF == kputc_(
'\t', &ks))
768 if (EOF == kputsn_(tag->
str, tag->
len, &ks))
771 if (EOF == kputc(
'\n', &ks))
781 if (
kh_key(hdr->h, k) ==
K(
"HD"))
784 t1 = t2 =
kh_val(hdr->h, k);
789 if (EOF == kputc_(
'@', &ks))
791 c[0] =
kh_key(hdr->h, k)>>8;
792 c[1] =
kh_key(hdr->h, k)&0xff;
793 if (EOF == kputsn_(c, 2, &ks))
795 for (tag = t1->
tag; tag; tag=tag->
next) {
796 if (EOF == kputc_(
'\t', &ks))
798 if (EOF == kputsn_(tag->
str, tag->
len, &ks))
801 if (EOF == kputc(
'\n', &ks))
807 if (ks_str(&hdr->
text))
823 SAM_hdr *sh = calloc(1,
sizeof(*sh));
837 if (!(sh->ref_hash =
kh_init(m_s2i)))
842 if (!(sh->rg_hash =
kh_init(m_s2i)))
849 if (!(sh->pg_hash =
kh_init(m_s2i)))
896 if (NULL == sh)
return NULL;
898 if (NULL == hdr)
return sh;
964 if (--hdr->ref_count > 0)
967 if (ks_str(&hdr->
text))
978 for (i = 0; i < hdr->
nref; i++)
989 for (i = 0; i < hdr->
nrg; i++)
1000 for (i = 0; i < hdr->
npg; i++)
1022 return ks_len(&hdr->
text);
1026 return ks_str(&hdr->
text);
1035 return k ==
kh_end(hdr->ref_hash) ? -1 :
kh_val(hdr->ref_hash, k);
1046 return k ==
kh_end(hdr->rg_hash)
1048 : &hdr->
rg[
kh_val(hdr->rg_hash, k)];
1073 for (i = 0; i < hdr->
npg; i++)
1076 for (i = 0; i < hdr->
npg; i++) {
1081 for (tag = hdr->
pg[i].
tag; tag; tag = tag->
next) {
1082 if (tag->
str[0] ==
'P' && tag->
str[1] ==
'P')
1091 k =
kh_get(m_s2i, hdr->pg_hash, tag->
str+3);
1092 tag->
str[tag->
len] = tmp;
1094 if (k ==
kh_end(hdr->pg_hash)) {
1103 for (i = j = 0; i < hdr->
npg; i++) {
1104 if (hdr->
pg_end[i] != -1)
1120 if (k ==
kh_end(sh->pg_hash))
1124 sprintf(sh->ID_buf,
"%.1000s.%d", name, sh->ID_cnt++);
1125 k =
kh_get(m_s2i, sh->pg_hash, sh->ID_buf);
1126 }
while (k ==
kh_end(sh->pg_hash));
1149 va_start(args, name);
1153 int *end = malloc(sh->
npg_end *
sizeof(
int));
1159 memcpy(end, sh->
pg_end, nends *
sizeof(*end));
1161 for (i = 0; i < nends; i++) {
1165 "PP", sh->
pg[end[i]].
name,
1200 for (i = 0; i < argc; i++) {
1201 nbytes += strlen(argv[i]) + 1;
1203 if (!(str = malloc(nbytes)))
1208 for (i = 0; i < argc; i++) {
1210 while (argv[i][j]) {
1211 if (argv[i][j] ==
'\t')