34 #include <sys/types.h>
41 #define BLOCK_HEADER_LENGTH 18
42 #define BLOCK_FOOTER_LENGTH 8
59 static const uint8_t g_magic[19] =
"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
91 buffer[1] = value >> 8;
94 static inline int unpackInt16(
const uint8_t *buffer)
96 return buffer[0] | buffer[1] << 8;
102 buffer[1] = value >> 8;
103 buffer[2] = value >> 16;
104 buffer[3] = value >> 24;
111 ssize_t n =
hpeek(hfpr, magic, 2);
112 if (n < 0)
return NULL;
114 fp = (
BGZF*)calloc(1,
sizeof(
BGZF));
115 if (fp == NULL)
return NULL;
118 fp->
is_compressed = (n==2 && magic[0]==0x1f && magic[1]==0x8b);
127 static BGZF *bgzf_write_init(
int compress_level)
130 fp = (
BGZF*)calloc(1,
sizeof(
BGZF));
132 if ( compress_level==-2 )
145 static int mode2level(
const char *__restrict mode)
147 int i, compress_level = -1;
148 for (i = 0; mode[i]; ++i)
149 if (mode[i] >=
'0' && mode[i] <=
'9')
break;
150 if (mode[i]) compress_level = (int)mode[i] -
'0';
151 if (strchr(mode,
'u')) compress_level = -2;
152 return compress_level;
159 if (strchr(mode,
'r')) {
161 if ((fpr =
hopen(path, mode)) == 0)
return 0;
162 fp = bgzf_read_init(fpr);
165 }
else if (strchr(mode,
'w') || strchr(mode,
'a')) {
167 if ((fpw =
hopen(path, mode)) == 0)
return 0;
168 fp = bgzf_write_init(mode2level(mode));
171 else { errno = EINVAL;
return 0; }
173 fp->
is_be = ed_is_big();
181 if (strchr(mode,
'r')) {
183 if ((fpr =
hdopen(fd, mode)) == 0)
return 0;
184 fp = bgzf_read_init(fpr);
187 }
else if (strchr(mode,
'w') || strchr(mode,
'a')) {
189 if ((fpw =
hdopen(fd, mode)) == 0)
return 0;
190 fp = bgzf_write_init(mode2level(mode));
193 else { errno = EINVAL;
return 0; }
195 fp->
is_be = ed_is_big();
203 if (strchr(mode,
'r')) {
204 fp = bgzf_read_init(hfp);
205 if (fp == NULL)
return NULL;
206 }
else if (strchr(mode,
'w') || strchr(mode,
'a')) {
207 fp = bgzf_write_init(mode2level(mode));
209 else { errno = EINVAL;
return 0; }
212 fp->
is_be = ed_is_big();
216 static int bgzf_compress(
void *_dst,
int *dlen,
void *src,
int slen,
int level)
233 memcpy(dst, g_magic, BLOCK_HEADER_LENGTH);
234 packInt16(&dst[16], *dlen - 1);
237 packInt32((
uint8_t*)&dst[*dlen - 8], crc);
238 packInt32((
uint8_t*)&dst[*dlen - 4], slen);
243 static int deflate_block(
BGZF *fp,
int block_length)
255 static int inflate_block(
BGZF* fp,
int block_length)
281 static int inflate_gzip_block(
BGZF *fp,
int cached)
300 if ( ret<0 )
return -1;
302 if ( have )
return have;
313 if ( header[0] != 31 || header[1] != 139 || header[2] != 8 )
return -2;
314 return ((header[3] & 4) != 0
315 && unpackInt16((
uint8_t*)&header[10]) == 6
316 && header[12] ==
'B' && header[13] ==
'C'
317 && unpackInt16((
uint8_t*)&header[14]) == 2) ? 0 : -1;
321 static void free_cache(
BGZF *fp)
331 static int load_block_from_cache(
BGZF *fp,
int64_t block_address)
336 k =
kh_get(cache, h, block_address);
337 if (k ==
kh_end(h))
return 0;
352 static void cache_block(
BGZF *fp,
int size)
371 if (ret == 0)
return;
379 static void free_cache(
BGZF *fp) {}
380 static int load_block_from_cache(
BGZF *fp,
int64_t block_address) {
return 0;}
381 static void cache_block(
BGZF *fp,
int size) {}
387 int count, size = 0, block_length, remaining;
406 block_address = htell(fp->
fp);
409 count = inflate_gzip_block(fp, 0);
419 if (fp->
cache_size && load_block_from_cache(fp, block_address))
return 0;
420 count =
hread(fp->
fp, header,
sizeof(header));
426 if ( count !=
sizeof(header) || (ret=check_header(header))==-2 )
435 memcpy(cblock, header,
sizeof(header));
441 if ( header[3] & 0x4 )
443 nskip += unpackInt16(&cblock[nskip]) + 2;
445 if ( header[3] & 0x8 )
455 if ( header[3] & 0x10 )
465 if ( header[3] & 0x2 ) nskip += 2;
477 count = inflate_gzip_block(fp, 1);
488 block_length = unpackInt16((
uint8_t*)&header[16]) + 1;
490 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
492 count =
hread(fp->
fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
493 if (count != remaining) {
498 if ((count = inflate_block(fp, block_length)) < 0)
return -1;
507 cache_block(fp, size);
513 ssize_t bytes_read = 0;
515 if (length <= 0)
return 0;
517 while (bytes_read < length) {
520 if (available <= 0) {
523 if (available <= 0)
break;
525 copy_length = length - bytes_read < available? length - bytes_read : available;
529 output += copy_length;
530 bytes_read += copy_length;
542 return hread(fp->
fp, data, length);
568 pthread_mutex_lock(&w->
mt->
lock);
571 if (w->
mt->
done) stop = 1;
573 pthread_mutex_unlock(&w->
mt->
lock);
576 for (i = w->
i; i < w->mt->curr; i += w->
mt->
n_threads) {
580 memcpy(w->
mt->
blk[i], w->
buf, clen);
581 w->
mt->
len[i] = clen;
587 static void *mt_worker(
void *data)
589 while (worker_aux((
worker_t*)data) == 0);
598 if (!fp->
is_write || fp->
mt || n_threads <= 1)
return -1;
601 mt->
n_blks = n_threads * n_sub_blks;
602 mt->
len = (
int*)calloc(mt->
n_blks,
sizeof(
int));
603 mt->
blk = (
void**)calloc(mt->
n_blks,
sizeof(
void*));
604 for (i = 0; i < mt->
n_blks; ++i)
606 mt->
tid = (pthread_t*)calloc(mt->
n_threads,
sizeof(pthread_t));
614 pthread_attr_init(&attr);
615 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
616 pthread_mutex_init(&mt->
lock, 0);
617 pthread_cond_init(&mt->
cv, 0);
619 pthread_create(&mt->
tid[i], &attr, mt_worker, &mt->
w[i]);
624 static void mt_destroy(
mtaux_t *mt)
628 pthread_mutex_lock(&mt->
lock);
630 pthread_cond_broadcast(&mt->
cv);
631 pthread_mutex_unlock(&mt->
lock);
632 for (i = 1; i < mt->
n_threads; ++i) pthread_join(mt->
tid[i], 0);
634 for (i = 0; i < mt->
n_blks; ++i) free(mt->
blk[i]);
636 free(mt->
blk); free(mt->
len); free(mt->
w); free(mt->
tid);
637 pthread_cond_destroy(&mt->
cv);
638 pthread_mutex_destroy(&mt->
lock);
642 static void mt_queue(
BGZF *fp)
652 static int mt_flush_queue(
BGZF *fp)
657 pthread_mutex_lock(&mt->
lock);
660 pthread_cond_broadcast(&mt->
cv);
661 pthread_mutex_unlock(&mt->
lock);
663 worker_aux(&mt->
w[0]);
674 return (fp->
errcode == 0)? 0 : -1;
677 static int lazy_flush(
BGZF *fp)
686 #else // ~ #ifdef BGZF_MT
688 int bgzf_mt(
BGZF *fp,
int n_threads,
int n_sub_blks)
693 static inline int lazy_flush(
BGZF *fp)
698 #endif // ~ #ifdef BGZF_MT
706 return mt_flush_queue(fp);
716 if (block_length < 0)
return -1;
735 return hwrite(fp->
fp, data, length);
738 ssize_t remaining =
length;
740 while (remaining > 0) {
743 if (copy_length > remaining) copy_length = remaining;
746 input += copy_length;
747 remaining -= copy_length;
749 if (lazy_flush(fp) != 0)
return -1;
752 return length - remaining;
757 return hwrite(fp->
fp, data, length);
762 int ret, block_length;
763 if (fp == 0)
return -1;
767 block_length = deflate_block(fp, 0);
774 if (fp->
mt) mt_destroy(fp->
mt);
783 if (ret != 0)
return -1;
800 off_t offset = htell(fp->
fp);
802 if (errno == ESPIPE) { hclearerr(fp->
fp);
return 2; }
805 if (
hread(fp->
fp, buf, 28) != 28 )
return -1;
807 return (memcmp(
"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", buf, 28) == 0)? 1 : 0;
819 block_offset = pos & 0xFFFF;
820 block_address = pos >> 16;
836 if ((fp =
hopen(fn,
"r")) == 0)
return 0;
837 n =
hread(fp, buf, 16);
838 if (
hclose(fp) < 0 )
return -1;
839 if (n != 16)
return 0;
840 return memcmp(g_magic, buf, 16) == 0? 1 : 0;
861 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
874 for (l = fp->
block_offset; l < fp->block_length && buf[l] != delim; ++l);
875 if (l < fp->block_length) state = 1;
877 if (str->
l + l + 1 >= str->
m) {
878 str->
m = str->
l + l + 2;
880 str->
s = (
char*)realloc(str->
s, str->
m);
890 }
while (state == 0);
891 if (str->l == 0 && state < 0)
return state;
899 if ( !fp->
idx )
return;
910 if ( !fp->
idx )
return -1;
923 if ( !fp->
idx->
offs )
return -1;
938 int blen = strlen(bname);
939 int slen = strlen(suffix);
940 tmp = (
char*) malloc(blen + slen + 1);
941 if ( !tmp )
return -1;
942 memcpy(tmp,bname,blen);
943 memcpy(tmp+blen,suffix,slen+1);
946 FILE *idx = fopen(tmp?tmp:bname,
"wb");
947 if ( tmp ) free(tmp);
948 if ( !idx )
return -1;
958 fwrite(ed_swap_8p(&x), 1,
sizeof(x), idx);
961 x = fp->
idx->
offs[i].
caddr; fwrite(ed_swap_8p(&x), 1,
sizeof(x), idx);
962 x = fp->
idx->
offs[i].
uaddr; fwrite(ed_swap_8p(&x), 1,
sizeof(x), idx);
968 fwrite(&x, 1,
sizeof(x), idx);
985 int blen = strlen(bname);
986 int slen = strlen(suffix);
987 tmp = (
char*) malloc(blen + slen + 1);
988 if ( !tmp )
return -1;
989 memcpy(tmp,bname,blen);
990 memcpy(tmp+blen,suffix,slen+1);
993 FILE *idx = fopen(tmp?tmp:bname,
"rb");
994 if ( tmp ) free(tmp);
995 if ( !idx )
return -1;
999 if ( fread(&x, 1,
sizeof(x), idx) !=
sizeof(x) )
return -1;
1011 ret += fread(&x, 1,
sizeof(x), idx); fp->
idx->
offs[i].
caddr = ed_swap_8(x);
1012 ret += fread(&x, 1,
sizeof(x), idx); fp->
idx->
offs[i].
uaddr = ed_swap_8(x);
1014 if ( ret !=
sizeof(x)*2*(fp->
idx->
noffs-1) )
return -1;
1021 ret += fread(&x, 1,
sizeof(x), idx); fp->
idx->
offs[i].
caddr = x;
1022 ret += fread(&x, 1,
sizeof(x), idx); fp->
idx->
offs[i].
uaddr = x;
1024 if ( ret !=
sizeof(x)*2*(fp->
idx->
noffs-1) )
return -1;
1055 int ilo = 0, ihi = fp->
idx->
noffs - 1;
1058 int i = (ilo+ihi)*0.5;
1059 if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1;
1060 else if ( uoffset >= fp->
idx->
offs[i].
uaddr ) ilo = i + 1;