37 #include "io_lib_config.h"
46 #include <sys/types.h>
109 if (!(h->
TL = calloc(h->
nTL,
sizeof(
unsigned char *))))
112 h->
TL[nTL++] = &dat[i];
135 if (b->method !=
RAW) {
140 cp = (
char *)b->
data;
164 if (!hdr->preservation_map) {
179 cp +=
itf8_get(cp, &map_size); cp_copy = cp;
181 for (i = 0; i < map_count; i++) {
190 k =
kh_put(
map, hdr->preservation_map,
"MI", &r);
196 kh_val(hdr->preservation_map, k) = hd;
202 k =
kh_put(
map, hdr->preservation_map,
"UI", &r);
208 kh_val(hdr->preservation_map, k) = hd;
214 k =
kh_put(
map, hdr->preservation_map,
"PI", &r);
220 kh_val(hdr->preservation_map, k) = hd;
226 k =
kh_put(
map, hdr->preservation_map,
"RN", &r);
232 kh_val(hdr->preservation_map, k) = hd;
238 k =
kh_put(
map, hdr->preservation_map,
"AP", &r);
244 kh_val(hdr->preservation_map, k) = hd;
250 k =
kh_put(
map, hdr->preservation_map,
"RR", &r);
256 kh_val(hdr->preservation_map, k) = hd;
289 k =
kh_put(
map, hdr->preservation_map,
"SM", &r);
294 kh_val(hdr->preservation_map, k) = hd;
307 k =
kh_put(
map, hdr->preservation_map,
"TD", &r);
312 kh_val(hdr->preservation_map, k) = hd;
317 fprintf(stderr,
"Unrecognised preservation map key %c%c\n",
324 if (cp - cp_copy != map_size) {
330 cp +=
itf8_get(cp, &map_size); cp_copy = cp;
332 for (i = 0; i < map_count; i++) {
348 m->
key = (key[0]<<8)|key[1];
369 if (key[0] ==
'B' && key[1] ==
'F') {
375 }
else if (key[0] ==
'C' && key[1] ==
'F') {
381 }
else if (key[0] ==
'R' && key[1] ==
'I') {
387 }
else if (key[0] ==
'R' && key[1] ==
'L') {
393 }
else if (key[0] ==
'A' && key[1] ==
'P') {
399 }
else if (key[0] ==
'R' && key[1] ==
'G') {
405 }
else if (key[0] ==
'M' && key[1] ==
'F') {
411 }
else if (key[0] ==
'N' && key[1] ==
'S') {
417 }
else if (key[0] ==
'N' && key[1] ==
'P') {
423 }
else if (key[0] ==
'T' && key[1] ==
'S') {
429 }
else if (key[0] ==
'N' && key[1] ==
'F') {
435 }
else if (key[0] ==
'T' && key[1] ==
'C') {
441 }
else if (key[0] ==
'T' && key[1] ==
'N') {
447 }
else if (key[0] ==
'F' && key[1] ==
'N') {
453 }
else if (key[0] ==
'F' && key[1] ==
'C') {
459 }
else if (key[0] ==
'F' && key[1] ==
'P') {
465 }
else if (key[0] ==
'B' && key[1] ==
'S') {
471 }
else if (key[0] ==
'I' && key[1] ==
'N') {
478 }
else if (key[0] ==
'S' && key[1] ==
'C') {
485 }
else if (key[0] ==
'D' && key[1] ==
'L') {
491 }
else if (key[0] ==
'B' && key[1] ==
'A') {
497 }
else if (key[0] ==
'R' && key[1] ==
'S') {
503 }
else if (key[0] ==
'P' && key[1] ==
'D') {
509 }
else if (key[0] ==
'H' && key[1] ==
'C') {
515 }
else if (key[0] ==
'M' && key[1] ==
'Q') {
521 }
else if (key[0] ==
'R' && key[1] ==
'N') {
528 }
else if (key[0] ==
'Q' && key[1] ==
'S') {
540 }
else if (key[0] ==
'T' && key[1] ==
'L') {
546 }
else if (key[0] ==
'T' && key[1] ==
'M') {
547 }
else if (key[0] ==
'T' && key[1] ==
'V') {
549 fprintf(stderr,
"Unrecognised key: %.2s\n", key);
556 if (cp - cp_copy != map_size) {
562 cp +=
itf8_get(cp, &map_size); cp_copy = cp;
564 for (i = 0; i < map_count; i++) {
575 m->
key = (key[0]<<16)|(key[1]<<8)|key[2];
596 if (cp - cp_copy != map_size) {
615 char *cp = (
char *)b->
data;
622 if (!(hdr = calloc(1,
sizeof(*hdr))))
653 memcpy(hdr->
md5, cp, 16);
655 memset(hdr->
md5, 0, 16);
664 static int nbits(
int v) {
665 static const int MultiplyDeBruijnBitPosition[32] = {
666 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31,
667 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32
677 return MultiplyDeBruijnBitPosition[(
uint32_t)(v * 0x07C4ACDDU) >> 27];
682 static int sort_freqs(
const void *vp1,
const void *vp2) {
683 const int i1 = *(
const int *)vp1;
684 const int i2 = *(
const int *)vp2;
699 int cf,
char *seq,
char *qual) {
700 int prev_pos = 0, f, r = 0, out_sz = 1;
702 int cig_len = 0, ref_pos = cr->
apos;
714 memset(qual, 30, cr->
len);
724 (
char *)&fn, &out_sz);
728 for (f = 0; f < fn; f++) {
732 if (ncigar+2 >= cigar_alloc) {
733 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
735 if (!(cigar = realloc(cigar, cigar_alloc *
sizeof(*cigar))))
744 (
char *)&pos, &out_sz);
752 if (ref_pos + pos - seq_pos > bfd->
ref[cr->
ref_id].
len) {
753 static int whinged = 0;
755 fprintf(stderr,
"Ref pos outside of ref "
756 "sequence boundary\n");
759 memcpy(&seq[seq_pos-1], &s->
ref[ref_pos - s->
ref_start +1],
765 cigar[ncigar++] = (cig_len<<4) + cig_op;
771 cigar[ncigar++] = (cig_len<<4) + cig_op;
776 cig_len += pos - seq_pos;
777 ref_pos += pos - seq_pos;
778 md_dist += pos - seq_pos;
789 cigar[ncigar++] = (cig_len<<4) + cig_op;
795 blk, &seq[pos-1], &out_sz2)
796 : (seq[pos-1] =
'N', out_sz2 = 1, 0);
800 blk, &seq[pos-1], &out_sz2)
801 : (seq[pos-1] =
'N', out_sz2 = 1, 0);
813 cigar[ncigar++] = (cig_len<<4) + cig_op;
818 (
char *)&base, &out_sz);
824 cigar[ncigar++] = (cig_len<<4) + cig_op;
829 (
char *)&base, &out_sz);
851 if (cig_len && cig_op !=
BAM_CDEL) {
852 cigar[ncigar++] = (cig_len<<4) + cig_op;
857 (
char *)&i32, &out_sz);
875 if (cig_len && cig_op !=
BAM_CINS) {
876 cigar[ncigar++] = (cig_len<<4) + cig_op;
882 &seq[pos-1], &out_sz2);
892 if (cig_len && cig_op !=
BAM_CINS) {
893 cigar[ncigar++] = (cig_len<<4) + cig_op;
898 (
char *)&seq[pos-1], &out_sz);
910 cigar[ncigar++] = (cig_len<<4) + cig_op;
915 cigar[ncigar++] = (cig_len<<4) + cig_op;
921 (
char *)&seq[pos-1], &out_sz);
924 (
char *)&qual[pos-1], &out_sz);
940 (
char *)&qual[pos-1], &out_sz);
947 cigar[ncigar++] = (cig_len<<4) + cig_op;
952 (
char *)&i32, &out_sz);
960 if (cig_len && cig_op !=
BAM_CPAD) {
961 cigar[ncigar++] = (cig_len<<4) + cig_op;
966 (
char *)&i32, &out_sz);
975 cigar[ncigar++] = (cig_len<<4) + cig_op;
980 (
char *)&i32, &out_sz);
994 if (cr->
len >= seq_pos) {
997 static int whinged = 0;
999 fprintf(stderr,
"Ref pos outside of ref sequence boundary\n");
1002 memcpy(&seq[seq_pos-1], &s->
ref[ref_pos - s->
ref_start +1],
1003 cr->
len - seq_pos + 1);
1004 ref_pos += cr->
len - seq_pos + 1;
1005 md_dist += cr->
len - seq_pos + 1;
1009 if (ncigar+1 >= cigar_alloc) {
1010 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
1012 if (!(cigar = realloc(cigar, cigar_alloc *
sizeof(*cigar))))
1017 cigar[ncigar++] = (cig_len<<4) + cig_op;
1023 cigar[ncigar++] = (cig_len<<4) + cig_op;
1028 cig_len += cr->
len - seq_pos+1;
1035 if (ncigar >= cigar_alloc) {
1036 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
1038 if (!(cigar = realloc(cigar, cigar_alloc *
sizeof(*cigar))))
1042 cigar[ncigar++] = (cig_len<<4) + cig_op;
1052 (
char *)&cr->
mqual, &out_sz);
1054 if (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) {
1070 buf[0] =
'N'; buf[1] =
'M'; buf[2] =
'I';
1071 buf[3] = (nm>> 0) & 0xff;
1072 buf[4] = (nm>> 8) & 0xff;
1073 buf[5] = (nm>>16) & 0xff;
1074 buf[6] = (nm>>24) & 0xff;
1089 while (m && m->
key !=
id)
1100 int i, r = 0, out_sz = 1;
1101 unsigned char ntags;
1105 (
char *)&ntags, &out_sz);
1112 for (i = 0; i < cr->
ntags; i++) {
1114 unsigned char tag_data[3];
1120 blk, (
char *)&
id, &out_sz);
1122 tag_data[0] = ((
char *)&
id)[0];
1123 tag_data[1] = ((
char *)&
id)[1];
1124 tag_data[2] = ((
char *)&
id)[2];
1126 tag_data[0] = (
id>>16) & 0xff;
1127 tag_data[1] = (
id>>8) & 0xff;
1128 tag_data[2] =
id & 0xff;
1136 if (!m->
codec)
return -1;
1147 int i, r = 0, out_sz = 1;
1153 (
char *)&TL, &out_sz);
1158 cr->
ntags = strlen((
char *)TN)/3;
1164 for (i = 0; i < cr->
ntags; i++) {
1166 unsigned char tag_data[3];
1170 tag_data[0] = *TN++;
1171 tag_data[1] = *TN++;
1172 tag_data[2] = *TN++;
1173 id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2];
1180 if (!m->
codec)
return -1;
1189 static void cram_decode_slice_xref(
cram_slice *s) {
1210 if (cr->
tlen == INT_MIN) {
1211 int id1 = rec, id2 = rec;
1212 int aleft = cr->
apos, aright = cr->
aend;
1219 if (aright < s->crecs[id2].aend)
1230 }
while (id2 != id1);
1233 tlen = aright - aleft + 1;
1253 while (id2 != id1) {
1271 while (id2 != id1) {
1297 fprintf(stderr,
"Mate line out of bounds: %d vs [0, %d]\n",
1314 if (cr->
tlen == INT_MIN)
1319 static char *md5_print(
unsigned char *md5,
char *out) {
1321 for (i = 0; i < 16; i++) {
1322 out[i*2+0] =
"0123456789abcdef"[md5[i]>>4];
1323 out[i*2+1] =
"0123456789abcdef"[md5[i]&15];
1343 int unknown_rg = -1;
1356 !strcmp(bfd->
rg[bfd->
nrg-1].
name,
"UNKNOWN"))
1357 unknown_rg = bfd->
nrg-1;
1374 fprintf(stderr,
"No reference specified and "
1375 "no embedded reference is available.\n");
1384 }
else if (!fd->
no_ref) {
1398 fprintf(stderr,
"Slice starts before base 1.\n");
1402 pthread_mutex_lock(&fd->
refs->
lock);
1404 fprintf(stderr,
"Slice ends beyond reference end.\n");
1407 pthread_mutex_unlock(&fd->
refs->
lock);
1408 pthread_mutex_unlock(&fd->
ref_lock);
1413 fprintf(stderr,
"Unable to fetch reference #%d %d..%d\n",
1421 && memcmp(s->
hdr->
md5,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
1423 unsigned char digest[16];
1431 fprintf(stderr,
"Slice starts before base 1.\n");
1438 fprintf(stderr,
"Slice ends beyond reference end.\n");
1458 || memcmp(digest, s->
hdr->
md5, 16) != 0) {
1460 fprintf(stderr,
"ERROR: md5sum reference mismatch for ref "
1462 fprintf(stderr,
"CRAM: %s\n", md5_print(s->
hdr->
md5, M));
1463 fprintf(stderr,
"Ref : %s\n", md5_print(digest, M));
1470 pthread_mutex_lock(&fd->
refs->
lock);
1471 refs = calloc(fd->
refs->
nref,
sizeof(
char *));
1472 pthread_mutex_unlock(&fd->
refs->
lock);
1473 pthread_mutex_unlock(&fd->
ref_lock);
1488 (
char *)&bf, &out_sz);
1499 (
char *)&cf, &out_sz);
1512 (
char *)&cr->
ref_id, &out_sz);
1521 pthread_mutex_lock(&fd->
refs->
lock);
1523 pthread_mutex_unlock(&fd->
refs->
lock);
1524 pthread_mutex_unlock(&fd->
ref_lock);
1533 (
char *)&cr->
len, &out_sz);
1537 (
char *)&cr->
apos, &out_sz);
1544 (
char *)&cr->
rg, &out_sz);
1545 if (cr->
rg == unknown_rg)
1569 blk, (
char *)&mf, &out_sz);
1606 (
char *)&cr->
tlen, &out_sz);
1639 r |= cram_decode_aux_1_0(c, s, blk, cr);
1641 r |= cram_decode_aux(c, s, blk, cr);
1658 memset(seq,
'=', cr->
len);
1662 r |= cram_decode_seq(fd, c, s, blk, cr, bfd, cf, seq, qual);
1664 int out_sz2 = cr->
len;
1674 (
char *)seq, &out_sz2);
1676 if (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) {
1680 blk, qual, &out_sz2);
1682 memset(qual, 30, cr->
len);
1690 for (i = 0; i < fd->
refs->
nref; i++) {
1695 }
else if (ref_id >= 0 && s->
ref != fd->
ref_free) {
1698 pthread_mutex_unlock(&fd->
ref_lock);
1701 cram_decode_slice_xref(s);
1733 if (!(j = malloc(
sizeof(*j))))
1775 int bam_idx, rg_len;
1776 char name_a[1024], *name;
1778 char *aux, *aux_orig;
1789 name_len = sprintf(name_a,
"%s:%"PRId64":%d",
1792 name_len = sprintf(name_a,
"%s:%"PRId64":%d",
1797 if (cr->
rg < -1 || cr->
rg >= bfd->
nrg)
1823 aux = aux_orig = (
char *)
bam_aux(*bam);
1834 *aux++ =
'R'; *aux++ =
'G'; *aux++ =
'Z';
1835 memcpy(aux, bfd->
rg[cr->
rg].
name, len);
1846 return bam_idx + (aux - aux_orig);
1858 if (!(c = fd->
ctr)) {
1863 }
while (c->
length == 0);
1880 }
while (c->
length == 0);
1898 pthread_mutex_unlock(&fd->
ref_lock);
1918 }
else if (!fd->
ooc) {
1931 }
while (c->
length == 0);
1971 pthread_mutex_unlock(&fd->
ref_lock);
1977 goto empty_container;
2022 fprintf(stderr,
"Failure to decode slice\n");
2048 if (!res || !res->
data) {
2049 fprintf(stderr,
"t_pool_next_result failure\n");
2082 if (!(s = cram_next_slice(fd, &c)))