48 #include "io_lib_config.h"
60 #include <sys/types.h>
73 #include <sys/syscall.h>
74 #define gettid() (int)syscall(SYS_gettid)
76 #define RP(...) fprintf (stderr, __VA_ARGS__)
83 #define paranoid_hclose(fp) (hclose(fp))
85 #define hclose_abruptly(fp) (fclose(fp))
86 #define hflush(fp) (fflush(fp))
87 #define hgetc(fp) (getc(fp))
88 #define hputc(c, fp) (putc((c), (fp)))
89 #define hread(fp, buffer, nbytes) (fread((buffer), 1, (nbytes), (fp)))
90 #define hseek(fp, offset, whence) (fseeko((fp), (offset), (whence)))
91 #define hwrite(fp, buffer, nbytes) (fwrite((buffer), 1, (nbytes), (fp)))
92 #define paranoid_hclose(fp) (paranoid_fclose(fp))
109 static int nbytes[16] = {
117 static int nbits[16] = {
118 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
119 0x3f, 0x3f, 0x3f, 0x3f,
129 int i = nbytes[val>>4];
130 val &= nbits[val>>4];
138 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
143 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
144 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
149 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
150 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
151 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
156 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
157 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
158 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
159 val = (val<<4) | (((
unsigned char)
hgetc(fd->
fp)) & 0x0f);
174 return hwrite(fd->
fp, buf, len) == len ? 0 : -1;
182 unsigned char *up = (
unsigned char *)cp;
187 }
else if (up[0] < 0xc0) {
188 *val_p = ((up[0] <<8) | up[1]) & 0x3fff;
190 }
else if (up[0] < 0xe0) {
191 *val_p = ((up[0]<<16) | (up[1]<< 8) | up[2]) & 0x1fffff;
193 }
else if (up[0] < 0xf0) {
194 *val_p = ((up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff;
197 *val_p = ((up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f);
209 if (!(val & ~0x00000007f)) {
212 }
else if (!(val & ~0x00003fff)) {
213 *cp++ = (val >> 8 ) | 0x80;
216 }
else if (!(val & ~0x01fffff)) {
217 *cp++ = (val >> 16) | 0xc0;
218 *cp++ = (val >> 8 ) & 0xff;
221 }
else if (!(val & ~0x0fffffff)) {
222 *cp++ = (val >> 24) | 0xe0;
223 *cp++ = (val >> 16) & 0xff;
224 *cp++ = (val >> 8 ) & 0xff;
228 *cp++ = 0xf0 | ((val>>28) & 0xff);
229 *cp++ = (val >> 20) & 0xff;
230 *cp++ = (val >> 12) & 0xff;
231 *cp++ = (val >> 4 ) & 0xff;
240 if (!(val & ~((1LL<<7)-1))) {
243 }
else if (!(val & ~((1LL<<(6+8))-1))) {
244 *cp++ = (val >> 8 ) | 0x80;
247 }
else if (!(val & ~((1LL<<(5+2*8))-1))) {
248 *cp++ = (val >> 16) | 0xc0;
249 *cp++ = (val >> 8 ) & 0xff;
252 }
else if (!(val & ~((1LL<<(4+3*8))-1))) {
253 *cp++ = (val >> 24) | 0xe0;
254 *cp++ = (val >> 16) & 0xff;
255 *cp++ = (val >> 8 ) & 0xff;
258 }
else if (!(val & ~((1LL<<(3+4*8))-1))) {
259 *cp++ = (val >> 32) | 0xf0;
260 *cp++ = (val >> 24) & 0xff;
261 *cp++ = (val >> 16) & 0xff;
262 *cp++ = (val >> 8 ) & 0xff;
265 }
else if (!(val & ~((1LL<<(2+5*8))-1))) {
266 *cp++ = (val >> 40) | 0xf8;
267 *cp++ = (val >> 32) & 0xff;
268 *cp++ = (val >> 24) & 0xff;
269 *cp++ = (val >> 16) & 0xff;
270 *cp++ = (val >> 8 ) & 0xff;
273 }
else if (!(val & ~((1LL<<(1+6*8))-1))) {
274 *cp++ = (val >> 48) | 0xfc;
275 *cp++ = (val >> 40) & 0xff;
276 *cp++ = (val >> 32) & 0xff;
277 *cp++ = (val >> 24) & 0xff;
278 *cp++ = (val >> 16) & 0xff;
279 *cp++ = (val >> 8 ) & 0xff;
282 }
else if (!(val & ~((1LL<<(7*8))-1))) {
283 *cp++ = (val >> 56) | 0xfe;
284 *cp++ = (val >> 48) & 0xff;
285 *cp++ = (val >> 40) & 0xff;
286 *cp++ = (val >> 32) & 0xff;
287 *cp++ = (val >> 24) & 0xff;
288 *cp++ = (val >> 16) & 0xff;
289 *cp++ = (val >> 8 ) & 0xff;
294 *cp++ = (val >> 56) & 0xff;
295 *cp++ = (val >> 48) & 0xff;
296 *cp++ = (val >> 40) & 0xff;
297 *cp++ = (val >> 32) & 0xff;
298 *cp++ = (val >> 24) & 0xff;
299 *cp++ = (val >> 16) & 0xff;
300 *cp++ = (val >> 8 ) & 0xff;
307 unsigned char *up = (
unsigned char *)cp;
312 }
else if (up[0] < 0xc0) {
314 (
uint64_t)up[1]) & (((1LL<<(6+8)))-1);
316 }
else if (up[0] < 0xe0) {
319 (
uint64_t)up[2]) & ((1LL<<(5+2*8))-1);
321 }
else if (up[0] < 0xf0) {
325 (
uint64_t)up[3]) & ((1LL<<(4+3*8))-1);
327 }
else if (up[0] < 0xf8) {
332 (
uint64_t)up[4]) & ((1LL<<(3+4*8))-1);
334 }
else if (up[0] < 0xfc) {
340 (
uint64_t)up[5]) & ((1LL<<(2+5*8))-1);
342 }
else if (up[0] < 0xfe) {
349 (
uint64_t)up[6]) & ((1LL<<(1+6*8))-1);
351 }
else if (up[0] < 0xff) {
358 (
uint64_t)up[7]) & ((1LL<<(7*8))-1);
375 int64_t val = (
unsigned char)c;
383 }
else if (val < 0xc0) {
384 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
385 *val_p = val & (((1LL<<(6+8)))-1);
388 }
else if (val < 0xe0) {
389 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
390 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
391 *val_p = val & ((1LL<<(5+2*8))-1);
394 }
else if (val < 0xf0) {
395 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
396 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
397 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
398 *val_p = val & ((1LL<<(4+3*8))-1);
401 }
else if (val < 0xf8) {
402 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
403 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
404 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
405 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
406 *val_p = val & ((1LL<<(3+4*8))-1);
409 }
else if (val < 0xfc) {
410 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
411 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
412 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
413 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
414 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
415 *val_p = val & ((1LL<<(2+5*8))-1);
418 }
else if (val < 0xfe) {
419 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
420 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
421 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
422 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
423 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
424 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
425 *val_p = val & ((1LL<<(1+6*8))-1);
428 }
else if (val < 0xff) {
429 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
430 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
431 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
432 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
433 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
434 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
435 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
436 *val_p = val & ((1LL<<(7*8))-1);
440 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
441 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
442 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
443 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
444 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
445 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
446 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
447 val = (val<<8) | (
unsigned char)
hgetc(fd->
fp);
516 cp[0] = ( val & 0xff);
517 cp[1] = ((val>>8) & 0xff);
518 cp[2] = ((val>>16) & 0xff);
519 cp[3] = ((val>>24) & 0xff);
522 return b->
data ? 0 : -1;
532 unsigned char *data = NULL;
537 data = malloc(data_alloc = csize*1.2+100);
545 s.
next_in = (
unsigned char *)cdata;
555 fprintf(stderr,
"zlib inflateInit error: %s\n", s.
msg);
562 unsigned char *data_tmp;
571 fprintf(stderr,
"zlib inflate error: %s\n", s.
msg);
577 data = realloc((data_tmp = data), data_alloc += alloc_inc);
590 static char *zlib_mem_deflate(
char *data,
size_t size,
size_t *cdata_size,
591 int level,
int strat) {
593 unsigned char *cdata = NULL;
598 cdata = malloc(cdata_alloc = size*1.05+100);
607 s.
next_in = (
unsigned char *)data;
617 fprintf(stderr,
"zlib deflateInit2 error: %s\n", s.
msg);
625 if (cdata_alloc - cdata_pos <= 0) {
626 fprintf(stderr,
"Deflate produced larger output than expected. Abort\n");
632 fprintf(stderr,
"zlib deflate error: %s\n", s.
msg);
637 fprintf(stderr,
"zlib deflate error: %s\n", s.
msg);
642 fprintf(stderr,
"zlib deflate error: %s\n", s.
msg);
644 return (
char *)cdata;
692 if (-1 == (b->method =
hgetc(fd->
fp))) { free(b);
return NULL; }
701 if (b->method ==
RAW) {
711 if (!(b->
data = malloc(b->
comp_size))) { free(b);
return NULL; }
735 if (
hputc(b->method, fd->
fp) == EOF)
return -1;
741 if (b->method ==
RAW) {
768 size_t uncomp_size = 0;
788 b->
data = (
unsigned char *)uncomp;
789 b->
alloc = uncomp_size;
796 if (!(uncomp = malloc(usize)))
798 if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize,
804 b->
data = (
unsigned char *)uncomp;
812 fprintf(stderr,
"Bzip2 compression is not compiled into this "
813 "version.\nPlease rebuild and try again.\n");
828 unsigned int comp_size = b->
uncomp_size*1.01 + 600;
829 char *
comp = malloc(comp_size);
830 char *data = (
char *)b->
data;
838 if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size,
846 b->
data = (
unsigned char *)comp;
851 fprintf(stderr,
"Compressed block ID %d from %d to %d\n",
867 int level,
int strat,
868 int level2,
int strat2) {
870 size_t comp_size = 0;
878 if (b->method !=
RAW) {
879 fprintf(stderr,
"Attempt to compress an already compressed block.\n");
886 return cram_compress_block_bzip2(fd, b, metrics, level);
892 fprintf(stderr,
"metrics trial %d, next_trial %d, m1 %d, m2 %d\n",
894 metrics->
m1, metrics->
m2);
896 if (strat2 >= 0 && (metrics->
trial > 0 || --metrics->
next_trial <= 0)) {
903 metrics->
m1 = metrics->
m2 = 0;
910 &s2, level2, strat2);
917 if (s1 < 0.98 * s2) {
919 fprintf(stderr,
"M1 wins %d vs %d\n", (
int)s1, (
int)s2);
920 comp = c1; comp_size = s1;
925 fprintf(stderr,
"M2 wins %d vs %d\n", (
int)s1, (
int)s2);
926 comp = c2; comp_size = s2;
932 }
else if (strat2 >= 0) {
933 int xlevel = metrics->
m1 > metrics->
m2 ? level : level2;
934 int xstrat = metrics->
m1 > metrics->
m2 ? strat : strat2;
948 b->
data = (
unsigned char *)comp;
953 fprintf(stderr,
"Compressed block ID %d from %d to %d\n",
971 case RAW:
return "RAW";
972 case GZIP:
return "GZIP";
973 case BZIP2:
return "BZIP2";
986 case CORE:
return "CORE";
1000 if (-1 == fflush(fp) && errno != EBADF) {
1006 if (-1 == fsync(fileno(fp))) {
1007 if (errno != EINVAL) {
1039 RP(
"refs_free()\n");
1058 if (!(e =
kh_val(r->h_meta, k)))
1074 pthread_mutex_destroy(&r->
lock);
1079 static refs_t *refs_create(
void) {
1080 refs_t *r = calloc(1,
sizeof(*r));
1082 RP(
"refs_create()\n");
1095 if (!(r->h_meta =
kh_init(refs)))
1098 pthread_mutex_init(&r->
lock, NULL);
1116 static refs_t *refs_load_fai(
refs_t *r_orig,
char *fn,
int is_err) {
1122 size_t fn_l = strlen(fn);
1124 RP(
"refs_load_fai %s\n", fn);
1127 if (!(r = refs_create()))
1131 if (stat(fn, &sb) != 0) {
1144 if (fn_l > 4 && strcmp(&fn[fn_l-4],
".fai") == 0)
1147 if (!(r->
fp = fopen(r->
fn,
"r"))) {
1154 sprintf(fai_fn,
"%.*s.fai",
PATH_MAX-5, r->
fn);
1156 if (stat(fai_fn, &sb) != 0) {
1161 if (!(fp = fopen(fai_fn,
"r"))) {
1166 while (fgets(line, 8192, fp) != NULL) {
1176 for (cp = line; *cp && !isspace(*cp); cp++)
1182 while (*cp && isspace(*cp))
1184 e->
length = strtoll(cp, &cp, 10);
1187 while (*cp && isspace(*cp))
1189 e->
offset = strtoll(cp, &cp, 10);
1192 while (*cp && isspace(*cp))
1197 while (*cp && isspace(*cp))
1214 kh_val(r->h_meta, k) = e;
1224 kh_val(r->h_meta, k) = e;
1261 for (i = 0; i < h->
nref; i++) {
1263 if (k !=
kh_end(r->h_meta)) {
1266 fprintf(stderr,
"Unable to find ref name '%s'\n",
1282 if (!h || h->
nref == 0)
1295 for (; i < r->
nref; i++)
1299 for (i = 0; i < h->
nref; i++) {
1343 return refs_from_header(fd->
refs, fd, hdr);
1354 while ((cp = strchr(dir,
'%'))) {
1355 strncpy(path, dir, cp-dir);
1363 }
else if (*cp >=
'0' && *cp <=
'9') {
1367 l = strtol(cp, &endp, 10);
1368 l =
MIN(l, strlen(fn));
1370 strncpy(path, fn, l);
1386 path += strlen(dir);
1387 if (*fn && path[-1] !=
'/')
1396 char *cp = strrchr(path,
'/');
1406 if (mkdir(path, mode) == 0) {
1426 char *ref_path = getenv(
"REF_PATH");
1430 char *local_cache = getenv(
"REF_CACHE");
1434 fprintf(stderr,
"cram_populate_ref on fd %p, id %d\n", fd,
id);
1449 fprintf(stderr,
"Querying ref %s\n", tag->
str+3);
1452 if (local_cache && *local_cache) {
1458 if (0 == stat(path, &sb) && (fp = fopen(path,
"r"))) {
1489 fn = (strncmp(tag->
str+3,
"file:", 5) == 0)
1497 if (!(refs = refs_load_fai(fd->
refs, fn, 0)))
1518 if (local_cache && *local_cache) {
1524 fprintf(stderr,
"Path='%s'\n", path);
1529 sprintf(path_tmp,
"%s.tmp_%d", path, i);
1531 fp = fopen(path_tmp,
"wx");
1532 }
while (fp == NULL && errno == EEXIST);
1546 if (0 == chmod(path_tmp, 0444))
1547 rename(path_tmp, path);
1556 static void cram_ref_incr_locked(
refs_t *r,
int id) {
1557 RP(
"%d INC REF %d, %d %p\n", gettid(),
id, (
int)(
id>=0?r->
ref_id[
id]->
count+1:-999),
id>=0?r->
ref_id[
id]->
seq:(
char *)1);
1559 if (id < 0 || !r->ref_id[
id]->seq)
1569 pthread_mutex_lock(&r->
lock);
1570 cram_ref_incr_locked(r,
id);
1571 pthread_mutex_unlock(&r->
lock);
1574 static void cram_ref_decr_locked(
refs_t *r,
int id) {
1575 RP(
"%d DEC REF %d, %d %p\n", gettid(),
id, (
int)(
id>=0?r->
ref_id[
id]->
count-1:-999),
id>=0?r->
ref_id[
id]->
seq:(
char *)1);
1577 if (id < 0 || !r->ref_id[
id]->seq) {
1587 RP(
"%d FREE REF %d (%p)\n", gettid(),
1601 pthread_mutex_lock(&r->
lock);
1602 cram_ref_decr_locked(r,
id);
1603 pthread_mutex_unlock(&r->
lock);
1615 static char *load_ref_portion(FILE *fp,
ref_entry *e,
int start,
int end) {
1634 : end-1) - offset + 1;
1636 if (0 != fseeko(fp, offset,
SEEK_SET)) {
1637 perror(
"fseeko() on reference file");
1641 if (len == 0 || !(seq = malloc(len))) {
1645 if (len != fread(seq, 1, len, fp)) {
1646 perror(
"fread() on reference file");
1652 if (len != end-start+1) {
1657 for (i = j = 0; i < len; i++) {
1658 if (cp[i] >=
'!' && cp[i] <=
'~')
1659 cp[j++] = cp[i] & ~0x20;
1663 if (cp_to - seq != end-start+1) {
1664 fprintf(stderr,
"Malformed reference file?\n");
1670 for (i = 0; i < len; i++) {
1671 seq[i] = seq[i] & ~0x20;
1687 int start = 1, end = e->
length;
1694 assert(e->
count == 0);
1699 for (idx = 0; idx < r->
nref; idx++)
1702 RP(
"%d cram_ref_load DECR %d\n", gettid(), idx);
1706 RP(
"%d FREE REF %d (%p)\n", gettid(),
id, r->
ref_id[
id]->
seq);
1715 if (strcmp(r->
fn, e->
fn) || r->
fp == NULL) {
1719 if (!(r->
fp = fopen(r->
fn,
"r"))) {
1725 RP(
"%d Loading ref %d (%d..%d)\n", gettid(),
id, start, end);
1727 if (!(seq = load_ref_portion(r->
fp, e, start, end))) {
1731 RP(
"%d Loaded ref %d (%d..%d) = %p\n", gettid(),
id, start, end, seq);
1733 RP(
"%d INC REF %d, %d\n", gettid(),
id, (
int)(e->
count+1));
1741 RP(
"%d cram_ref_load INCR %d => %d\n", gettid(),
id, e->
count+1);
1786 RP(
"%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd,
id, start, end);
1799 fprintf(stderr,
"No reference found for id %d\n",
id);
1800 pthread_mutex_unlock(&fd->
ref_lock);
1805 fprintf(stderr,
"No reference found for id %d\n",
id);
1806 pthread_mutex_unlock(&fd->
ref_lock);
1811 fprintf(stderr,
"No reference found for id %d\n",
id);
1812 pthread_mutex_unlock(&fd->
ref_lock);
1828 pthread_mutex_lock(&fd->
refs->
lock);
1830 if (cram_populate_ref(fd,
id, r) == -1) {
1831 fprintf(stderr,
"Failed to populate reference for id %d\n",
id);
1832 pthread_mutex_unlock(&fd->
refs->
lock);
1833 pthread_mutex_unlock(&fd->
ref_lock);
1869 cram_ref_incr_locked(fd->
refs,
id);
1873 pthread_mutex_unlock(&fd->
refs->
lock);
1874 pthread_mutex_unlock(&fd->
ref_lock);
1882 cram_ref_incr_locked(fd->
refs,
id);
1896 RP(
"%d cram_get_ref returning for id %d, count %d\n", gettid(),
id, (
int)r->
count);
1898 pthread_mutex_unlock(&fd->
refs->
lock);
1899 pthread_mutex_unlock(&fd->
ref_lock);
1919 pthread_mutex_unlock(&fd->
refs->
lock);
1920 pthread_mutex_unlock(&fd->
ref_lock);
1931 pthread_mutex_unlock(&fd->
refs->
lock);
1932 pthread_mutex_unlock(&fd->
ref_lock);
1937 if (!(fd->
ref = load_ref_portion(fd->
refs->
fp, r, start, end))) {
1938 pthread_mutex_unlock(&fd->
refs->
lock);
1939 pthread_mutex_unlock(&fd->
ref_lock);
1952 pthread_mutex_unlock(&fd->
refs->
lock);
1953 pthread_mutex_unlock(&fd->
ref_lock);
1955 return seq + ostart - start;
1965 fd->
refs = refs_load_fai(fd->
refs, fn,
1972 if (!(fd->
refs = refs_create()))
1974 if (-1 == refs_from_header(fd->
refs, fd, fd->
header))
2055 if (!(c->tags_used =
kh_init(s_i2i)))
2126 if (c->tags_used)
kh_destroy(s_i2i, c->tags_used);
2144 memset(&c2, 0,
sizeof(c2));
2182 if (!(c = calloc(1,
sizeof(*c))))
2230 char buf_a[1024], *buf = buf_a, *cp;
2261 if (cp-buf !=
hwrite(fd->
fp, buf, cp-buf)) {
2300 return hflush(fd->
fp) == 0 ? 0 : -1;
2316 return cram_flush_container2(fd, c);
2329 fprintf(stderr,
"cram_encode_container failed\n");
2336 static int cram_flush_result(
cram_fd *fd) {
2352 if (0 != cram_flush_container2(fd, c))
2366 ret |=
hflush(fd->
fp) == 0 ? 0 : -1;
2380 if (!(j = malloc(
sizeof(*j))))
2387 return cram_flush_result(fd);
2410 if (!(hdr->TD_hash =
kh_init(m_s2i))) {
2432 if (hdr->preservation_map)
2625 s->nTN = s->aTN = 0;
2630 if (!(s->pair =
kh_init(m_s2i)))
goto err;
2659 int i, n, max_id, min_id;
2673 fprintf(stderr,
"Unexpected block of type %s\n",
2682 for (max_id = i = 0, min_id = INT_MAX; i < n; i++) {
2687 if (max_id < s->block[i]->content_id)
2693 if (min_id >= 0 && max_id < 1024) {
2697 for (i = 0; i < n; i++) {
2758 if (memcmp(def->
magic,
"CRAM", 4) != 0) {
2764 fprintf(stderr,
"CRAM version number mismatch\n"
2765 "Expected 1.x or 2.x, got %d.%d\n",
2815 if (NULL == (header = malloc(header_len+1)))
2819 if (header_len !=
hread(fd->
fp, header, header_len))
2853 if (NULL == (header = malloc(header_len))) {
2858 memcpy(header,
BLOCK_END(b), header_len);
2876 char *pads = malloc(c->
length - len);
2894 hdr = sam_hdr_parse_(header, header_len);
2907 static void full_path(
char *out,
char *in) {
2916 (len = strlen(out))+1+strlen(in) >=
PATH_MAX) {
2922 sprintf(out+len,
"/%.*s",
PATH_MAX - len, in);
2935 #define PADDED_BLOCK
2943 "ID",
"UNKNOWN",
"SM",
"UNKNOWN", NULL))
2950 for (i = 0; i < hdr->
nref; i++) {
2958 char unsigned buf[16], buf2[33];
2970 if (NULL == ref)
return -1;
2976 for (j = 0; j < 16; j++) {
2977 buf2[j*2+0] =
"0123456789abcdef"[buf[j]>>4];
2978 buf2[j*2+1] =
"0123456789abcdef"[buf[j]&15];
2987 full_path(ref_fn, fd->
ref_fn);
3055 c->
length += padded_length;
3056 if (NULL == (pads = calloc(1, padded_length))) {
3066 #ifdef PADDED_CONTAINER
3068 c->
length += padded_length;
3095 #ifdef PADDED_CONTAINER
3097 if (NULL == (pads = calloc(1, padded_length)))
3099 if (padded_length !=
hwrite(fd->
fp, pads, padded_length))
3105 if (-1 == refs_from_header(fd->
refs, fd, fd->
header))
3113 RP(
"=== Finishing saving header ===\n");
3127 static void cram_init_tables(
cram_fd *fd) {
3130 memset(fd->
L1, 4, 256);
3131 fd->
L1[
'A'] = 0; fd->
L1[
'a'] = 0;
3132 fd->
L1[
'C'] = 1; fd->
L1[
'c'] = 1;
3133 fd->
L1[
'G'] = 2; fd->
L1[
'g'] = 2;
3134 fd->
L1[
'T'] = 3; fd->
L1[
't'] = 3;
3136 memset(fd->
L2, 5, 256);
3137 fd->
L2[
'A'] = 0; fd->
L2[
'a'] = 0;
3138 fd->
L2[
'C'] = 1; fd->
L2[
'c'] = 1;
3139 fd->
L2[
'G'] = 2; fd->
L2[
'g'] = 2;
3140 fd->
L2[
'T'] = 3; fd->
L2[
't'] = 3;
3141 fd->
L2[
'N'] = 4; fd->
L2[
'n'] = 4;
3144 for (i = 0; i < 0x200; i++) {
3160 for (i = 0; i < 0x1000; i++) {
3177 for (i = 0; i < 0x1000; i++)
3179 for (i = 0; i < 0x1000; i++)
3184 for (i = 0; i < 32; i++) {
3191 for (i = 0; i < 20; i+=4) {
3193 for (j = 0; j < 20; j++) {
3207 static int major_version = 2;
3208 static int minor_version = 1;
3220 char fmode[3]= { mode[0],
'\0',
'\0' };
3222 if (strlen(mode) > 1 && (mode[1] ==
'b' || mode[1] ==
'c')) {
3227 fp =
hopen(filename, fmode);
3229 if (strcmp(filename,
"-") == 0) {
3230 fp = (*fmode ==
'r') ? stdin : stdout;
3232 fp = fopen(filename, fmode);
3256 cram_fd *fd = calloc(1,
sizeof(*fd));
3261 if (strlen(mode) > 2 && mode[2] >=
'0' && mode[2] <=
'9')
3262 fd->
level = mode[2] -
'0';
3268 if (fd->
mode ==
'r') {
3291 strncpy(def.
file_id, filename, 20);
3300 cram_init_tables(fd);
3302 fd->
prefix = strdup((cp = strrchr(filename,
'/')) ? cp+1 : filename);
3310 fd->
refs = refs_create();
3335 for (i = 0; i < 7; i++)
3345 if (-1 == refs_from_header(fd->
refs, fd, fd->
header))
3366 if (
hseek(fd->
fp, offset, whence) >= 0)
3369 if (!(whence ==
SEEK_CUR && offset >= 0))
3373 while (offset > 0) {
3374 int len =
MIN(65536, offset);
3375 if (len !=
hread(fd->
fp, buf, len))
3394 if (fd->
mode ==
'w' && fd->
ctr) {
3416 if (fd->
mode ==
'w' && fd->
ctr) {
3426 if (0 != cram_flush_result(fd))
3430 pthread_mutex_destroy(&fd->
ref_lock);
3440 if (fd->
mode ==
'w') {
3442 if (30 !=
hwrite(fd->
fp,
"\x0b\x00\x00\x00\xff\xff\xff\xff"
3443 "\xff\xe0\x45\x4f\x46\x00\x00\x00"
3444 "\x00\x01\x00\x00\x01\x00\x06\x06"
3445 "\x01\x00\x01\x00\x01\x00", 30))
3454 for (bl = fd->
bl; bl; bl = next) {
3458 for (i = 0; i < max_rec; i++) {
3485 for (i = 0; i < 7; i++)
3518 va_start(args, opt);
3543 if (!(fd->
prefix = strdup(va_arg(args,
char *))))
3548 fd->
verbose = va_arg(args,
int);
3564 fd->
no_ref = va_arg(args,
int);
3572 fd->
use_bz2 = va_arg(args,
int);
3577 refs = va_arg(args,
refs_t *);
3578 if (refs != fd->
refs) {
3595 char *s = va_arg(args,
char *);
3596 if (2 != sscanf(s,
"%d.%d", &major, &minor)) {
3597 fprintf(stderr,
"Malformed version string %s\n", s);
3600 if (!((major == 1 && minor == 0) ||
3601 (major == 2 && (minor == 0 || minor == 1)) ||
3602 (major == 3 && minor == 0))) {
3603 fprintf(stderr,
"Unknown version string; "
3604 "use 1.0, 2.0, 2.1 or 3.0\n");
3615 int nthreads = va_arg(args,
int);
3622 pthread_mutex_init(&fd->
ref_lock, NULL);
3635 pthread_mutex_init(&fd->
ref_lock, NULL);
3647 fprintf(stderr,
"Unknown CRAM option code %d\n", opt);