32 #include "io_lib_config.h"
41 #include <sys/types.h>
51 # define bam_copy(dst, src) bam_copy1(*(dst), (src))
56 if (bf->alloc > (*bt)->alloc) {
57 a = ((int)((bf->alloc+15)/16))*16;
58 *bt = realloc(*bt, a);
59 memcpy(*bt, bf, bf->alloc);
62 memcpy(*bt, bf, bf->alloc);
69 #define Z_CRAM_STRAT Z_FILTERED
82 static int sub_idx(
char *key,
char val) {
85 for (i = 0; *key && *key++ != val; i++);
131 if (!(h->preservation_map =
kh_init(map)))
134 k =
kh_put(map, h->preservation_map,
"RN", &r);
135 if (-1 == r)
return NULL;
136 kh_val(h->preservation_map, k).i = 1;
139 k =
kh_put(map, h->preservation_map,
"PI", &r);
140 if (-1 == r)
return NULL;
141 kh_val(h->preservation_map, k).i = 0;
143 k =
kh_put(map, h->preservation_map,
"UI", &r);
144 if (-1 == r)
return NULL;
145 kh_val(h->preservation_map, k).i = 1;
147 k =
kh_put(map, h->preservation_map,
"MI", &r);
148 if (-1 == r)
return NULL;
149 kh_val(h->preservation_map, k).i = 1;
153 k =
kh_put(map, h->preservation_map,
"SM", &r);
154 if (-1 == r)
return NULL;
155 kh_val(h->preservation_map, k).i = 0;
157 k =
kh_put(map, h->preservation_map,
"TD", &r);
158 if (-1 == r)
return NULL;
159 kh_val(h->preservation_map, k).i = 0;
161 k =
kh_put(map, h->preservation_map,
"AP", &r);
162 if (-1 == r)
return NULL;
167 k =
kh_put(map, h->preservation_map,
"RR", &r);
168 if (-1 == r)
return NULL;
169 kh_val(h->preservation_map, k).i = 0;
177 if (h->preservation_map) {
180 for (k =
kh_begin(h->preservation_map);
181 k !=
kh_end(h->preservation_map);
184 khash_t(map) *pmap = h->preservation_map;
219 char smat[5], *mp = smat;
258 fprintf(stderr,
"Unknown preservation key '%.2s'\n", key);
438 HashIter *iter = HashTableIterCreate();
446 mp +=
itf8_put(mp, (hi->key[0]<<16)|(hi->key[1]<<8)|hi->key[2]);
453 HashTableIterDestroy(iter);
461 for (k =
kh_begin(c->tags_used); k !=
kh_end(c->tags_used); k++) {
469 switch(
kh_key(c->tags_used, k) & 0xff) {
489 case 'A':
case 'c':
case 'C':
523 case 'i':
case 'I':
case 'f':
559 fprintf(stderr,
"Unsupported SAM aux type '%c'\n",
560 kh_key(c->tags_used, k) & 0xff);
571 fprintf(stderr,
"Wrote compression block header in %d bytes\n",
597 if (NULL == (cp = buf = malloc(16+5*(8+s->
hdr->
num_blocks)))) {
617 memcpy(cp, s->
hdr->
md5, 16); cp += 16;
622 b->
data = (
unsigned char *)buf;
638 int rec, r = 0, last_pos;
734 s->
block[s->ba_id]->
data = calloc(1, s->BA_len);
766 (
char *)&cr->
len, 1);
769 i32 = cr->
apos - last_pos;
806 (
char *)&cr->
tlen, 1);
827 for (j = 0; j < cr->
ntags; j++) {
828 uint32_t i32 = s->TN[cr->TN_idx + j];
847 for (j = 0; j < cr->
nfeature; j++) {
853 i32 = f->
X.pos - prev_pos;
941 fprintf(stderr,
"unhandled feature code %c\n",
948 (
char *)&cr->
mqual, 1);
1005 # define STRAT2 Z_RLE
1017 if (fd->
level == 0) {
1019 }
else if (fd->
level == 1) {
1026 }
else if (fd->
level < 3) {
1087 int i, j, slice_offset;
1091 int r1, r2, sn, nref;
1097 pthread_mutex_unlock(&fd->
ref_lock);
1100 for (i = 0; i < nref; i++) {
1113 if (!ref &&
bam_ref(b) >= 0) {
1114 fprintf(stderr,
"Failed to load reference #%d\n",
bam_ref(b));
1130 for (r1 = sn = 0; r1 < c->
curr_c_rec; sn++) {
1132 int first_base = INT_MAX, last_base = INT_MIN;
1134 assert(sn < c->curr_slice);
1152 fprintf(stderr,
"Failed to load reference #%d\n",
1165 process_one_read(fd, c, s, cr, b, r2);
1167 if (first_base > cr->
apos)
1168 first_base = cr->
apos;
1170 if (last_base < cr->aend)
1171 last_base = cr->
aend;
1187 spares = malloc(
sizeof(*spares));
1201 fprintf(stderr,
"Multi-ref container\n");
1221 memset(s->
hdr->
md5, 0, 16);
1267 (
void *)CRAM_EXT_NS,
1432 fprintf(stderr,
"Encode slice %d\n", i);
1433 if (cram_encode_slice(fd, c, h, c->
slices[i]) != 0)
1465 slice_offset = c_hdr->method ==
RAW
1504 slice_offset += s->
block[j]->method ==
RAW
1509 c->
length += slice_offset;
1519 for (i = 0; i < fd->
refs->
nref; i++) {
1563 int pos,
char base,
char qual,
char ref) {
1567 if (fd->
L2[(uc)base]<4 || (fd->
L2[(uc)base]<5 && fd->
L2[(uc)ref]<4)) {
1581 return cram_add_feature(c, s, r, &f);
1586 int pos,
char base,
char qual) {
1599 return cram_add_feature(c, s, r, &f);
1604 int pos,
char qual) {
1611 return cram_add_feature(c, s, r, &f);
1615 int pos,
int len,
char *base) {
1621 return cram_add_feature(c, s, r, &f);
1625 int pos,
int len,
char *base,
int version) {
1640 for (i = 0; i < len; i++)
1645 return cram_add_feature(c, s, r, &f);
1649 int pos,
int len,
char *base) {
1655 return cram_add_feature(c, s, r, &f);
1659 int pos,
int len,
char *base) {
1665 return cram_add_feature(c, s, r, &f);
1669 int pos,
int len,
char *base) {
1675 return cram_add_feature(c, s, r, &f);
1679 int pos,
int len,
char *base) {
1683 char b = base ? *base :
'N';
1699 for (i = 0; i < len; i++)
1704 return cram_add_feature(c, s, r, &f);
1714 char *aux, *tmp, *rg = NULL, *tmp_tn;
1729 cr->TN_idx = s->nTN;
1731 while (aux[0] != 0) {
1735 if (aux[0] ==
'R' && aux[1] ==
'G' && aux[2] ==
'Z') {
1740 if (aux[0] ==
'M' && aux[1] ==
'D' && aux[2] ==
'Z') {
1744 if (aux[0] ==
'N' && aux[1] ==
'M') {
1746 case 'A':
case 'C':
case 'c': aux+=4;
break;
1747 case 'I':
case 'i':
case 'f': aux+=7;
break;
1749 fprintf(stderr,
"Unhandled type code for NM tag\n");
1757 i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
1758 kh_put(s_i2i, c->tags_used, i32, &r);
1763 if (s->nTN >= s->aTN) {
1764 s->aTN = s->aTN ? s->aTN*2 : 1024;
1765 if (!(s->TN = realloc(s->TN, s->aTN *
sizeof(*s->TN))))
1768 s->TN[s->nTN++] = i32;
1775 case 'A':
case 'C':
case 'c':
1782 *tmp++=*aux++; *tmp++=*aux++;
1785 case 'I':
case 'i':
case 'f':
1787 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1792 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1793 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1798 while ((*tmp++=*aux++));
1803 int type = aux[3], blen;
1805 (((
unsigned char *)aux)[5]<< 8) +
1806 (((
unsigned char *)aux)[6]<<16) +
1807 (((
unsigned char *)aux)[7]<<24));
1819 case 'i':
case 'I':
case 'f':
1823 fprintf(stderr,
"Unknown sub-type '%c' for aux type 'B'\n",
1832 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1835 memcpy(tmp, aux, blen); tmp += blen; aux += blen;
1841 fprintf(stderr,
"Unknown aux type '%c'\n", aux[2]);
1870 char *aux, *orig, *tmp, *rg = NULL;
1888 orig = aux = (
char *)
bam_aux(b);
1891 while (aux - orig < aux_size && aux[0] != 0) {
1895 if (aux[0] ==
'R' && aux[1] ==
'G' && aux[2] ==
'Z') {
1900 if (aux[0] ==
'M' && aux[1] ==
'D' && aux[2] ==
'Z') {
1904 if (aux[0] ==
'N' && aux[1] ==
'M') {
1906 case 'A':
case 'C':
case 'c': aux+=4;
break;
1907 case 'S':
case 's': aux+=5;
break;
1908 case 'I':
case 'i':
case 'f': aux+=7;
break;
1910 fprintf(stderr,
"Unhandled type code for NM tag\n");
1918 i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
1919 kh_put(s_i2i, c->tags_used, i32, &r);
1924 case 'A':
case 'C':
case 'c':
1931 *tmp++=*aux++; *tmp++=*aux++;
1934 case 'I':
case 'i':
case 'f':
1936 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1941 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1942 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1946 while ((*tmp++=*aux++));
1951 int type = aux[3], blen;
1953 (((
unsigned char *)aux)[5]<< 8) +
1954 (((
unsigned char *)aux)[6]<<16) +
1955 (((
unsigned char *)aux)[7]<<24));
1967 case 'i':
case 'I':
case 'f':
1971 fprintf(stderr,
"Unknown sub-type '%c' for aux type 'B'\n",
1980 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
1983 memcpy(tmp, aux, blen); tmp += blen; aux += blen;
1989 fprintf(stderr,
"Unknown aux type '%c'\n", aux[2]);
2006 }
else if (
new == 0) {
2070 fprintf(stderr,
"Flush container %d/%d..%d\n",
2138 int i, fake_qual = 0;
2140 char *ref, *seq, *qual;
2154 rg = cram_encode_aux_1_0(fd, b, c, s, cr);
2156 rg = cram_encode_aux(fd, b, c, s, cr);
2165 cr->
rg = brg ? brg->
id : -1;
2221 for (i = 0; i < cr->
len; i++) {
2236 int apos = cr->
apos-1, spos = 0;
2252 for (i = 0; i < cr->
ncigar; i++) {
2255 cig_to[i] = cig_from[i];
2270 int end = cig_len+apos < c->
ref_end
2271 ? cig_len : c->
ref_end - apos;
2272 for (l = 0; l < end && seq[spos]; l++, apos++, spos++) {
2273 if (ref[apos] != seq[spos]) {
2276 if (cram_add_substitution(fd, c, s, cr, spos,
2277 seq[spos], qual[spos],
2284 if (l < cig_len && cr->len) {
2286 for (; l < cig_len && seq[spos]; l++, spos++) {
2287 if (cram_add_base(fd, c, s, cr, spos,
2288 seq[spos], qual[spos]))
2292 }
else if (!cr->
len) {
2300 if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos]))
2306 if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos]))
2312 if (cram_add_insertion(c, s, cr, spos, cig_len,
2313 cr->
len ? &seq[spos] : NULL))
2316 for (l = 0; l < cig_len; l++, spos++) {
2317 cram_add_quality(fd, c, s, cr, spos, qual[spos]);
2325 if (cram_add_softclip(c, s, cr, spos, cig_len,
2326 cr->
len ? &seq[spos] : NULL,
2331 for (l = 0; l < cig_len; l++, spos++) {
2332 cram_add_quality(fd, c, s, cr, spos, qual[spos]);
2335 for (l = 0; l < cig_len; l++, spos++) {
2336 cram_add_quality(fd, c, s, cr, spos, -1);
2345 if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos]))
2350 if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos]))
2366 s->BA_len += cr->
len;
2368 for (i = 0; i < cr->
len; i++)
2384 memset(cp, 255, cr->
len);
2388 char *from = (
char *)&
bam_qual(b)[0];
2390 memcpy(to, from, cr->
len);
2414 k =
kh_put(m_s2i, s->pair, key, &
new);
2418 kh_val(s->pair, k) = rnum;
2442 p->mate_pos = cr->
apos;
2452 p->tlen = p->apos - cr->
aend;
2466 p->mate_line = rnum - (
kh_val(s->pair, k) + 1);
2469 kh_val(s->pair, k) = rnum;
2531 int slice_rec, curr_rec, multi_seq = fd->
multi_seq == 1;
2545 fprintf(stderr,
"Multi-ref enabled for this container\n");
2554 if (NULL == (c = cram_next_container(fd, b)))
2571 pthread_mutex_unlock(&fd->
ref_lock);
2587 pthread_mutex_unlock(&fd->
ref_lock);
2591 fprintf(stderr,
"Unsorted mode enabled\n");
2594 pthread_mutex_unlock(&fd->
ref_lock);