NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cram_codecs.c
Go to the documentation of this file.
1 /*
2 Copyright (c) 2012-2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8  1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 
11  2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15  3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 
31 /*
32  * FIXME: add checking of cram_external_type to return NULL on unsupported
33  * {codec,type} tuples.
34  */
35 
36 #ifdef HAVE_CONFIG_H
37 #include "io_lib_config.h"
38 #endif
39 
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 
45 #include "cram/cram.h"
46 
47 static char *codec2str(enum cram_encoding codec) {
48  switch (codec) {
49  case E_NULL: return "NULL";
50  case E_EXTERNAL: return "EXTERNAL";
51  case E_GOLOMB: return "GOLOMB";
52  case E_HUFFMAN: return "HUFFMAN";
53  case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";
54  case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
55  case E_BETA: return "BETA";
56  case E_SUBEXP: return "SUBEXP";
57  case E_GOLOMB_RICE: return "GOLOMB_RICE";
58  case E_GAMMA: return "GAMMA";
59  }
60 
61  return "(unknown)";
62 }
63 
64 /*
65  * ---------------------------------------------------------------------------
66  * Block bit-level I/O functions.
67  * All defined static here to promote easy inlining by the compiler.
68  */
69 
70 #if 0
71 /* Get a single bit, MSB first */
72 static signed int get_bit_MSB(cram_block *block) {
73  unsigned int val;
74 
75  if (block->byte > block->alloc)
76  return -1;
77 
78  val = block->data[block->byte] >> block->bit;
79  if (--block->bit == -1) {
80  block->bit = 7;
81  block->byte++;
82  //printf("(%02X)", block->data[block->byte]);
83  }
84 
85  //printf("-B%d-", val&1);
86 
87  return val & 1;
88 }
89 #endif
90 
91 /*
92  * Count number of successive 0 and 1 bits
93  */
94 static int get_one_bits_MSB(cram_block *block) {
95  int n = 0, b;
96  do {
97  b = block->data[block->byte] >> block->bit;
98  if (--block->bit == -1) {
99  block->bit = 7;
100  block->byte++;
101  }
102  n++;
103  } while (b&1);
104 
105  return n-1;
106 }
107 
108 static int get_zero_bits_MSB(cram_block *block) {
109  int n = 0, b;
110  do {
111  b = block->data[block->byte] >> block->bit;
112  if (--block->bit == -1) {
113  block->bit = 7;
114  block->byte++;
115  }
116  n++;
117  } while (!(b&1));
118 
119  return n-1;
120 }
121 
122 #if 0
123 /* Stores a single bit */
124 static void store_bit_MSB(cram_block *block, unsigned int bit) {
125  if (block->byte >= block->alloc) {
126  block->alloc = block->alloc ? block->alloc*2 : 1024;
127  block->data = realloc(block->data, block->alloc);
128  }
129 
130  if (bit)
131  block->data[block->byte] |= (1 << block->bit);
132 
133  if (--block->bit == -1) {
134  block->bit = 7;
135  block->byte++;
136  block->data[block->byte] = 0;
137  }
138 }
139 #endif
140 
141 #if 0
142 /* Rounds to the next whole byte boundary first */
143 static void store_bytes_MSB(cram_block *block, char *bytes, int len) {
144  if (block->bit != 7) {
145  block->bit = 7;
146  block->byte++;
147  }
148 
149  while (block->byte + len >= block->alloc) {
150  block->alloc = block->alloc ? block->alloc*2 : 1024;
151  block->data = realloc(block->data, block->alloc);
152  }
153 
154  memcpy(&block->data[block->byte], bytes, len);
155  block->byte += len;
156 }
157 #endif
158 
159 /* Local optimised copy for inlining */
160 static inline unsigned int get_bits_MSB(cram_block *block, int nbits) {
161  unsigned int val = 0;
162  int i;
163 
164 #if 0
165  // Fits within the current byte */
166  if (nbits <= block->bit+1) {
167  val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
168  if ((block->bit -= nbits) == -1) {
169  block->bit = 7;
170  block->byte++;
171  }
172  return val;
173  }
174 
175  // partial first byte
176  val = block->data[block->byte] & ((1<<(block->bit+1))-1);
177  nbits -= block->bit+1;
178  block->bit = 7;
179  block->byte++;
180 
181  // whole middle bytes
182  while (nbits >= 8) {
183  val = (val << 8) | block->data[block->byte++];
184  nbits -= 8;
185  }
186 
187  val <<= nbits;
188  val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
189  block->bit -= nbits;
190  return val;
191 #endif
192 
193 #if 0
194  /* Inefficient implementation! */
195  //printf("{");
196  for (i = 0; i < nbits; i++)
197  //val = (val << 1) | get_bit_MSB(block);
198  GET_BIT_MSB(block, val);
199 #endif
200 
201 #if 1
202  /* Combination of 1st two methods */
203  if (nbits <= block->bit+1) {
204  val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
205  if ((block->bit -= nbits) == -1) {
206  block->bit = 7;
207  block->byte++;
208  }
209  return val;
210  }
211 
212  switch(nbits) {
213 // case 15: GET_BIT_MSB(block, val);
214 // case 14: GET_BIT_MSB(block, val);
215 // case 13: GET_BIT_MSB(block, val);
216 // case 12: GET_BIT_MSB(block, val);
217 // case 11: GET_BIT_MSB(block, val);
218 // case 10: GET_BIT_MSB(block, val);
219 // case 9: GET_BIT_MSB(block, val);
220  case 8: GET_BIT_MSB(block, val);
221  case 7: GET_BIT_MSB(block, val);
222  case 6: GET_BIT_MSB(block, val);
223  case 5: GET_BIT_MSB(block, val);
224  case 4: GET_BIT_MSB(block, val);
225  case 3: GET_BIT_MSB(block, val);
226  case 2: GET_BIT_MSB(block, val);
227  case 1: GET_BIT_MSB(block, val);
228  break;
229 
230  default:
231  for (i = 0; i < nbits; i++)
232  //val = (val << 1) | get_bit_MSB(block);
233  GET_BIT_MSB(block, val);
234  }
235 #endif
236 
237  //printf("=0x%x}", val);
238 
239  return val;
240 }
241 
242 /*
243  * Can store up to 24-bits worth of data encoded in an integer value
244  * Possibly we'd want to have a less optimal store_bits function when dealing
245  * with nbits > 24, but for now we assume the codes generated are never
246  * that big. (Given this is only possible with 121392 or more
247  * characters with exactly the correct frequency distribution we check
248  * for it elsewhere.)
249  */
250 static int store_bits_MSB(cram_block *block, unsigned int val, int nbits) {
251  /* fprintf(stderr, " store_bits: %02x %d\n", val, nbits); */
252 
253  /*
254  * Use slow mode until we tweak the huffman generator to never generate
255  * codes longer than 24-bits.
256  */
257  unsigned int mask;
258 
259  if (block->byte+4 >= block->alloc) {
260  if (block->byte) {
261  block->alloc *= 2;
262  block->data = realloc(block->data, block->alloc + 4);
263  if (!block->data)
264  return -1;
265  } else {
266  block->alloc = 1024;
267  block->data = realloc(block->data, block->alloc + 4);
268  if (!block->data)
269  return -1;
270  block->data[0] = 0; // initialise first byte of buffer
271  }
272  }
273 
274 
275 
276  if (nbits <= block->bit+1) {
277  block->data[block->byte] |= (val << (block->bit+1-nbits));
278  if ((block->bit-=nbits) == -1) {
279  block->bit = 7;
280  block->byte++;
281  block->data[block->byte] = 0;
282  }
283  return 0;
284  }
285 
286  block->data[block->byte] |= (val >> (nbits -= block->bit+1));
287  block->bit = 7;
288  block->byte++;
289  block->data[block->byte] = 0;
290 
291  mask = 1<<(nbits-1);
292  do {
293  if (val & mask)
294  block->data[block->byte] |= (1 << block->bit);
295  if (--block->bit == -1) {
296  block->bit = 7;
297  block->byte++;
298  block->data[block->byte] = 0;
299  }
300  mask >>= 1;
301  } while(--nbits);
302 
303  return 0;
304 }
305 
306 /*
307  * Returns the next 'size' bytes from a block, or NULL if insufficient
308  * data left.This is just a pointer into the block data and not an
309  * allocated object, so do not free the result.
310  */
311 static char *cram_extract_block(cram_block *b, int size) {
312  char *cp = (char *)b->data + b->idx;
313  b->idx += size;
314  if (b->idx > b->uncomp_size)
315  return NULL;
316 
317  return cp;
318 }
319 
320 /*
321  * ---------------------------------------------------------------------------
322  * EXTERNAL
323  */
325  cram_block *in, char *out, int *out_size) {
326  int i;
327  char *cp;
328  cram_block *b = NULL;
329 
330  /* Find the external block */
331  if (slice->block_by_id) {
332  if (!(b = slice->block_by_id[c->external.content_id]))
333  return -1;
334  } else {
335  for (i = 0; i < slice->hdr->num_blocks; i++) {
336  b = slice->block[i];
337  if (b->content_type == EXTERNAL &&
338  b->content_id == c->external.content_id) {
339  break;
340  }
341  }
342  if (i == slice->hdr->num_blocks || !b)
343  return -1;
344  }
345 
346  cp = (char *)b->data + b->idx;
347  // E_INT and E_LONG are guaranteed single item queries
348  b->idx += itf8_get(cp, (int32_t *)out);
349  *out_size = 1;
350 
351  return 0;
352 }
353 
355  cram_block *in, char *out,
356  int *out_size) {
357  int i;
358  char *cp;
359  cram_block *b = NULL;
360 
361  /* Find the external block */
362  if (slice->block_by_id) {
363  if (!(b = slice->block_by_id[c->external.content_id]))
364  return -1;
365  } else {
366  for (i = 0; i < slice->hdr->num_blocks; i++) {
367  b = slice->block[i];
368  if (b->content_type == EXTERNAL &&
369  b->content_id == c->external.content_id) {
370  break;
371  }
372  }
373  if (i == slice->hdr->num_blocks || !b)
374  return -1;
375  }
376 
377  cp = cram_extract_block(b, *out_size);
378  if (!cp)
379  return -1;
380 
381  memcpy(out, cp, *out_size);
382  return 0;
383 }
384 
386  cram_block *in, char *out_,
387  int *out_size) {
388  int i;
389  char *cp;
390  cram_block *b = NULL;
391  cram_block *out = (cram_block *)out_;
392 
393  /* Find the external block */
394  if (slice->block_by_id) {
395  if (!(b = slice->block_by_id[c->external.content_id]))
396  return -1;
397  } else {
398  for (i = 0; i < slice->hdr->num_blocks; i++) {
399  b = slice->block[i];
400  if (b->content_type == EXTERNAL &&
401  b->content_id == c->external.content_id) {
402  break;
403  }
404  }
405  if (i == slice->hdr->num_blocks || !b)
406  return -1;
407  }
408 
409  cp = cram_extract_block(b, *out_size);
410  if (!cp)
411  return -1;
412 
413  BLOCK_APPEND(out, cp, *out_size);
414  return 0;
415 }
416 
418  if (c)
419  free(c);
420 }
421 
422 cram_codec *cram_external_decode_init(char *data, int size,
423  enum cram_external_type option,
424  int version) {
425  cram_codec *c;
426  char *cp = data;
427 
428  if (!(c = malloc(sizeof(*c))))
429  return NULL;
430 
431  c->codec = E_EXTERNAL;
432  if (option == E_INT || option == E_LONG)
434  else if (option == E_BYTE_ARRAY || option == E_BYTE)
436  else
439 
440  cp += itf8_get(cp, &c->external.content_id);
441 
442  if (cp - data != size) {
443  fprintf(stderr, "Malformed external header stream\n");
444  free(c);
445  return NULL;
446  }
447 
448  c->external.type = option;
449 
450  return c;
451 }
452 
454  cram_block *out, char *in, int in_size) {
455  uint32_t *i32 = (uint32_t *)in;
456 
457  itf8_put_blk(out, *i32);
458  return 0;
459 }
460 
462  if (!c)
463  return;
464  free(c);
465 }
466 
468  int version) {
469  char tmp[99], *tp = tmp;
470  int len = 0;
471 
472  if (prefix) {
473  size_t l = strlen(prefix);
474  BLOCK_APPEND(b, prefix, l);
475  len += l;
476  }
477 
478  tp += itf8_put(tp, c->e_external.content_id);
479  len += itf8_put_blk(b, c->codec);
480  len += itf8_put_blk(b, tp-tmp);
481  BLOCK_APPEND(b, tmp, tp-tmp);
482  len += tp-tmp;
483 
484  return len;
485 }
486 
488  enum cram_external_type option,
489  void *dat,
490  int version) {
491  cram_codec *c;
492 
493  c = malloc(sizeof(*c));
494  if (!c)
495  return NULL;
496  c->codec = E_EXTERNAL;
500 
501  c->e_external.content_id = (size_t)dat;
502 
503  return c;
504 }
505 
506 /*
507  * ---------------------------------------------------------------------------
508  * BETA
509  */
510 int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
511  int32_t *out_i = (int32_t *)out;
512  int i, n;
513 
514  if (c->beta.nbits) {
515  for (i = 0, n = *out_size; i < n; i++)
516  out_i[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
517  } else {
518  for (i = 0, n = *out_size; i < n; i++)
519  out_i[i] = 0;
520  }
521 
522  return 0;
523 }
524 
525 int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
526  int i, n;
527 
528  if (c->beta.nbits) {
529  for (i = 0, n = *out_size; i < n; i++)
530  out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
531  } else {
532  for (i = 0, n = *out_size; i < n; i++)
533  out[i] = 0;
534  }
535 
536  return 0;
537 }
538 
540  if (c)
541  free(c);
542 }
543 
544 cram_codec *cram_beta_decode_init(char *data, int size,
545  enum cram_external_type option,
546  int version) {
547  cram_codec *c;
548  char *cp = data;
549 
550  if (!(c = malloc(sizeof(*c))))
551  return NULL;
552 
553  c->codec = E_BETA;
554  if (option == E_INT || option == E_LONG)
556  else if (option == E_BYTE_ARRAY || option == E_BYTE)
558  else
559  abort();
561 
562  cp += itf8_get(cp, &c->beta.offset);
563  cp += itf8_get(cp, &c->beta.nbits);
564 
565  if (cp - data != size) {
566  fprintf(stderr, "Malformed beta header stream\n");
567  free(c);
568  return NULL;
569  }
570 
571  return c;
572 }
573 
575  char *prefix, int version) {
576  int len = 0;
577 
578  if (prefix) {
579  size_t l = strlen(prefix);
580  BLOCK_APPEND(b, prefix, l);
581  len += l;
582  }
583 
584  len += itf8_put_blk(b, c->codec);
585  len += itf8_put_blk(b, itf8_size(c->e_beta.offset)
586  + itf8_size(c->e_beta.nbits)); // codec length
587  len += itf8_put_blk(b, c->e_beta.offset);
588  len += itf8_put_blk(b, c->e_beta.nbits);
589 
590  return len;
591 }
592 
594  cram_block *out, char *in, int in_size) {
595  int *syms = (int *)in;
596  int i, r = 0;
597 
598  for (i = 0; i < in_size; i++)
599  r |= store_bits_MSB(out, syms[i] + c->e_beta.offset, c->e_beta.nbits);
600 
601  return r;
602 }
603 
605  cram_block *out, char *in, int in_size) {
606  unsigned char *syms = (unsigned char *)in;
607  int i, r = 0;
608 
609  for (i = 0; i < in_size; i++)
610  r |= store_bits_MSB(out, syms[i] + c->e_beta.offset, c->e_beta.nbits);
611 
612  return r;
613 }
614 
616  if (c) free(c);
617 }
618 
620  enum cram_external_type option,
621  void *dat,
622  int version) {
623  cram_codec *c;
624  int min_val, max_val, len = 0;
625 
626  c = malloc(sizeof(*c));
627  if (!c)
628  return NULL;
629  c->codec = E_BETA;
631  if (option == E_INT)
633  else
636 
637  if (dat) {
638  min_val = ((int *)dat)[0];
639  max_val = ((int *)dat)[1];
640  } else {
641  min_val = INT_MAX;
642  max_val = INT_MIN;
643  int i;
644  for (i = 0; i < MAX_STAT_VAL; i++) {
645  if (!st->freqs[i])
646  continue;
647  if (min_val > i)
648  min_val = i;
649  max_val = i;
650  }
651  if (st->h) {
652  khint_t k;
653 
654  for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
655  if (!kh_exist(st->h, k))
656  continue;
657 
658  i = kh_key(st->h, k);
659  if (min_val > i)
660  min_val = i;
661  if (max_val < i)
662  max_val = i;
663  }
664  }
665  }
666 
667  assert(max_val >= min_val);
668  c->e_beta.offset = -min_val;
669  max_val -= min_val;
670  while (max_val) {
671  len++;
672  max_val >>= 1;
673  }
674  c->e_beta.nbits = len;
675 
676  return c;
677 }
678 
679 /*
680  * ---------------------------------------------------------------------------
681  * SUBEXP
682  */
683 int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
684  int32_t *out_i = (int32_t *)out;
685  int n, count;
686  int k = c->subexp.k;
687 
688  for (count = 0, n = *out_size; count < n; count++) {
689  int i = 0, tail;
690  int val;
691 
692  /* Get number of 1s */
693  //while (get_bit_MSB(in) == 1) i++;
694  i = get_one_bits_MSB(in);
695 
696  /*
697  * Val is
698  * i > 0: 2^(k+i-1) + k+i-1 bits
699  * i = 0: k bits
700  */
701  if (i) {
702  tail = i + k-1;
703  val = 0;
704  while (tail) {
705  //val = val<<1; val |= get_bit_MSB(in);
706  GET_BIT_MSB(in, val);
707  tail--;
708  }
709  val += 1 << (i + k-1);
710  } else {
711  tail = k;
712  val = 0;
713  while (tail) {
714  //val = val<<1; val |= get_bit_MSB(in);
715  GET_BIT_MSB(in, val);
716  tail--;
717  }
718  }
719 
720  out_i[count] = val - c->subexp.offset;
721  }
722 
723  return 0;
724 }
725 
727  if (c)
728  free(c);
729 }
730 
731 cram_codec *cram_subexp_decode_init(char *data, int size,
732  enum cram_external_type option,
733  int version) {
734  cram_codec *c;
735  char *cp = data;
736 
737  if (!(c = malloc(sizeof(*c))))
738  return NULL;
739 
740  c->codec = E_SUBEXP;
743 
744  cp += itf8_get(cp, &c->subexp.offset);
745  cp += itf8_get(cp, &c->subexp.k);
746 
747  if (cp - data != size) {
748  fprintf(stderr, "Malformed subexp header stream\n");
749  free(c);
750  return NULL;
751  }
752 
753  return c;
754 }
755 
756 /*
757  * ---------------------------------------------------------------------------
758  * GAMMA
759  */
760 int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
761  int32_t *out_i = (int32_t *)out;
762  int i, n;
763 
764  for (i = 0, n = *out_size; i < n; i++) {
765  int nz = 0;
766  int val;
767  //while (get_bit_MSB(in) == 0) nz++;
768  nz = get_zero_bits_MSB(in);
769  val = 1;
770  while (nz > 0) {
771  //val <<= 1; val |= get_bit_MSB(in);
772  GET_BIT_MSB(in, val);
773  nz--;
774  }
775 
776  out_i[i] = val - c->gamma.offset;
777  }
778 
779  return 0;
780 }
781 
783  if (c)
784  free(c);
785 }
786 
787 cram_codec *cram_gamma_decode_init(char *data, int size,
788  enum cram_external_type option,
789  int version) {
790  cram_codec *c;
791  char *cp = data;
792 
793  if (!(c = malloc(sizeof(*c))))
794  return NULL;
795 
796  c->codec = E_GAMMA;
799 
800  cp += itf8_get(cp, &c->gamma.offset);
801 
802  if (cp - data != size) {
803  fprintf(stderr, "Malformed gamma header stream\n");
804  free(c);
805  return NULL;
806  }
807 
808  return c;
809 }
810 
811 /*
812  * ---------------------------------------------------------------------------
813  * HUFFMAN
814  */
815 
816 static int code_sort(const void *vp1, const void *vp2) {
817  const cram_huffman_code *c1 = (const cram_huffman_code *)vp1;
818  const cram_huffman_code *c2 = (const cram_huffman_code *)vp2;
819 
820  if (c1->len != c2->len)
821  return c1->len - c2->len;
822  else
823  return c1->symbol - c2->symbol;
824 }
825 
827  if (!c)
828  return;
829 
830  if (c->huffman.codes)
831  free(c->huffman.codes);
832  free(c);
833 }
834 
836  cram_block *in, char *out, int *out_size) {
837  int i, n;
838 
839  /* Special case of 0 length codes */
840  for (i = 0, n = *out_size; i < n; i++) {
841  out[i] = c->huffman.codes[0].symbol;
842  }
843  return 0;
844 }
845 
847  cram_block *in, char *out, int *out_size) {
848  int i, n, ncodes = c->huffman.ncodes;
849  const cram_huffman_code * const codes = c->huffman.codes;
850 
851  for (i = 0, n = *out_size; i < n; i++) {
852  int idx = 0;
853  int val = 0, len = 0, last_len = 0;
854 
855  for (;;) {
856  int dlen = codes[idx].len - last_len;
857  if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen)
858  return -1;
859 
860  //val <<= dlen;
861  //val |= get_bits_MSB(in, dlen);
862  //last_len = (len += dlen);
863 
864  last_len = (len += dlen);
865  for (; dlen; dlen--) GET_BIT_MSB(in, val);
866 
867  idx = val - codes[idx].p;
868  if (idx >= ncodes || idx < 0)
869  return -1;
870 
871  if (codes[idx].code == val && codes[idx].len == len) {
872  out[i] = codes[idx].symbol;
873  break;
874  }
875  }
876  }
877 
878  return 0;
879 }
880 
882  cram_block *in, char *out, int *out_size) {
883  int32_t *out_i = (int32_t *)out;
884  int i, n;
885  const cram_huffman_code * const codes = c->huffman.codes;
886 
887  /* Special case of 0 length codes */
888  for (i = 0, n = *out_size; i < n; i++) {
889  out_i[i] = codes[0].symbol;
890  }
891  return 0;
892 }
893 
895  cram_block *in, char *out, int *out_size) {
896  int32_t *out_i = (int32_t *)out;
897  int i, n, ncodes = c->huffman.ncodes;
898  const cram_huffman_code * const codes = c->huffman.codes;
899 
900  for (i = 0, n = *out_size; i < n; i++) {
901  int idx = 0;
902  int val = 0, len = 0, last_len = 0;
903 
904  // Now one bit at a time for remaining checks
905  for (;;) {
906  int dlen = codes[idx].len - last_len;
907  if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen)
908  return -1;
909 
910  //val <<= dlen;
911  //val |= get_bits_MSB(in, dlen);
912  //last_len = (len += dlen);
913 
914  last_len = (len += dlen);
915  for (; dlen; dlen--) GET_BIT_MSB(in, val);
916 
917  idx = val - codes[idx].p;
918  if (idx >= ncodes || idx < 0)
919  return -1;
920 
921  if (codes[idx].code == val && codes[idx].len == len) {
922  out_i[i] = codes[idx].symbol;
923  break;
924  }
925  }
926  }
927 
928  return 0;
929 }
930 
931 /*
932  * Initialises a huffman decoder from an encoding data stream.
933  */
934 cram_codec *cram_huffman_decode_init(char *data, int size,
935  enum cram_external_type option,
936  int version) {
937  int32_t ncodes, i, j;
938  char *cp = data, *data_end = &data[size];
939  cram_codec *h;
940  cram_huffman_code *codes;
941  int32_t val, last_len, max_len = 0;
942 
943  cp += itf8_get(cp, &ncodes);
944  h = calloc(1, sizeof(*h));
945  if (!h)
946  return NULL;
947 
949 
950  h->huffman.ncodes = ncodes;
951  codes = h->huffman.codes = malloc(ncodes * sizeof(*codes));
952  if (!codes) {
953  free(h);
954  return NULL;
955  }
956 
957  /* Read symbols and bit-lengths */
958  for (i = 0; i < ncodes && cp < data_end; i++) {
959  cp += itf8_get(cp, &codes[i].symbol);
960  }
961 
962  if (cp >= data_end) {
963  fprintf(stderr, "Malformed huffman header stream\n");
964  free(h);
965  return NULL;
966  }
967  cp += itf8_get(cp, &i);
968  if (i != ncodes) {
969  fprintf(stderr, "Malformed huffman header stream\n");
970  free(h);
971  return NULL;
972  }
973 
974  if (ncodes == 0) {
975  /* NULL huffman stream */
976  return h;
977  }
978 
979  for (i = 0; i < ncodes && cp < data_end; i++) {
980  cp += itf8_get(cp, &codes[i].len);
981  if (max_len < codes[i].len)
982  max_len = codes[i].len;
983  }
984  if (cp - data != size || max_len >= ncodes) {
985  fprintf(stderr, "Malformed huffman header stream\n");
986  free(h);
987  return NULL;
988  }
989 
990  /* Sort by bit length and then by symbol value */
991  qsort(codes, ncodes, sizeof(*codes), code_sort);
992 
993  /* Assign canonical codes */
994  val = -1, last_len = 0;
995  for (i = 0; i < ncodes; i++) {
996  val++;
997  if (codes[i].len > last_len) {
998  while (codes[i].len > last_len) {
999  val <<= 1;
1000  last_len++;
1001  }
1002  }
1003  codes[i].code = val;
1004  }
1005 
1006  /*
1007  * Compute the next starting point, offset by the i'th value.
1008  * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then
1009  * codes[10..13].p = 30 - 10.
1010  */
1011  last_len = 0;
1012  for (i = j = 0; i < ncodes; i++) {
1013  if (codes[i].len > last_len) {
1014  j = codes[i].code - i;
1015  last_len = codes[i].len;
1016  }
1017  codes[i].p = j;
1018  }
1019 
1020 // puts("==HUFF LEN==");
1021 // for (i = 0; i <= last_len+1; i++) {
1022 // printf("len %d=%d prefix %d\n", i, h->huffman.lengths[i], h->huffman.prefix[i]);
1023 // }
1024 // puts("===HUFFMAN CODES===");
1025 // for (i = 0; i < ncodes; i++) {
1026 // int j;
1027 // printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code);
1028 // j = codes[i].len;
1029 // while (j) {
1030 // putchar(codes[i].code & (1 << --j) ? '1' : '0');
1031 // }
1032 // printf(" %d\n", codes[i].code);
1033 // }
1034 
1035  h->codec = E_HUFFMAN;
1036  if (option == E_BYTE || option == E_BYTE_ARRAY) {
1037  if (h->huffman.codes[0].len == 0)
1039  else
1041  } else if (option == E_BYTE_ARRAY_BLOCK) {
1042  abort();
1043  } else {
1044  if (h->huffman.codes[0].len == 0)
1046  else
1048  }
1049 
1050  return (cram_codec *)h;
1051 }
1052 
1054  cram_block *out, char *in, int in_size) {
1055  return 0;
1056 }
1057 
1059  cram_block *out, char *in, int in_size) {
1060  int i, code, len, r = 0;
1061  unsigned char *syms = (unsigned char *)in;
1062 
1063  do {
1064  int sym = *syms++;
1065  if (sym >= -1 && sym < MAX_HUFF) {
1066  i = c->e_huffman.val2code[sym+1];
1067  assert(c->e_huffman.codes[i].symbol == sym);
1068  code = c->e_huffman.codes[i].code;
1069  len = c->e_huffman.codes[i].len;
1070  } else {
1071  /* Slow - use a lookup table for when sym < MAX_HUFF? */
1072  for (i = 0; i < c->e_huffman.nvals; i++) {
1073  if (c->e_huffman.codes[i].symbol == sym)
1074  break;
1075  }
1076  if (i == c->e_huffman.nvals)
1077  return -1;
1078 
1079  code = c->e_huffman.codes[i].code;
1080  len = c->e_huffman.codes[i].len;
1081  }
1082 
1083  r |= store_bits_MSB(out, code, len);
1084  } while (--in_size);
1085 
1086  return r;
1087 }
1088 
1090  cram_block *out, char *in, int in_size) {
1091  return 0;
1092 }
1093 
1095  cram_block *out, char *in, int in_size) {
1096  int i, code, len, r = 0;
1097  int *syms = (int *)in;
1098 
1099  do {
1100  int sym = *syms++;
1101 
1102  if (sym >= -1 && sym < MAX_HUFF) {
1103  i = c->e_huffman.val2code[sym+1];
1104  assert(c->e_huffman.codes[i].symbol == sym);
1105  code = c->e_huffman.codes[i].code;
1106  len = c->e_huffman.codes[i].len;
1107  } else {
1108  /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */
1109  for (i = 0; i < c->e_huffman.nvals; i++) {
1110  if (c->e_huffman.codes[i].symbol == sym)
1111  break;
1112  }
1113  if (i == c->e_huffman.nvals)
1114  return -1;
1115 
1116  code = c->e_huffman.codes[i].code;
1117  len = c->e_huffman.codes[i].len;
1118  }
1119 
1120  r |= store_bits_MSB(out, code, len);
1121  } while (--in_size);
1122 
1123  return r;
1124 }
1125 
1127  if (!c)
1128  return;
1129 
1130  if (c->e_huffman.codes)
1131  free(c->e_huffman.codes);
1132  free(c);
1133 }
1134 
1135 /*
1136  * Encodes a huffman tree.
1137  * Returns number of bytes written.
1138  */
1140  int version) {
1141  int i, len = 0;
1142  cram_huffman_code *codes = c->e_huffman.codes;
1143  /*
1144  * Up to code length 127 means 2.5e+26 bytes of data required (worst
1145  * case huffman tree needs symbols with freqs matching the Fibonacci
1146  * series). So guaranteed 1 byte per code.
1147  *
1148  * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8).
1149  *
1150  * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory
1151  */
1152  char *tmp = malloc(6*c->e_huffman.nvals+16);
1153  char *tp = tmp;
1154 
1155  if (!tmp)
1156  return -1;
1157 
1158  if (prefix) {
1159  size_t l = strlen(prefix);
1160  BLOCK_APPEND(b, prefix, l);
1161  len += l;
1162  }
1163 
1164  tp += itf8_put(tp, c->e_huffman.nvals);
1165  for (i = 0; i < c->e_huffman.nvals; i++) {
1166  tp += itf8_put(tp, codes[i].symbol);
1167  }
1168 
1169  tp += itf8_put(tp, c->e_huffman.nvals);
1170  for (i = 0; i < c->e_huffman.nvals; i++) {
1171  tp += itf8_put(tp, codes[i].len);
1172  }
1173 
1174  len += itf8_put_blk(b, c->codec);
1175  len += itf8_put_blk(b, tp-tmp);
1176  BLOCK_APPEND(b, tmp, tp-tmp);
1177  len += tp-tmp;
1178 
1179  free(tmp);
1180 
1181  return len;
1182 }
1183 
1185  enum cram_external_type option,
1186  void *dat,
1187  int version) {
1188  int *vals = NULL, *freqs = NULL, vals_alloc = 0, *lens, code, len;
1189  int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k;
1190  cram_codec *c;
1191  cram_huffman_code *codes;
1192 
1193  c = malloc(sizeof(*c));
1194  if (!c)
1195  return NULL;
1196  c->codec = E_HUFFMAN;
1197 
1198  /* Count number of unique symbols */
1199  for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
1200  if (!st->freqs[i])
1201  continue;
1202  if (nvals >= vals_alloc) {
1203  vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
1204  vals = realloc(vals, vals_alloc * sizeof(int));
1205  freqs = realloc(freqs, vals_alloc * sizeof(int));
1206  if (!vals || !freqs) {
1207  if (vals) free(vals);
1208  if (freqs) free(freqs);
1209  free(c);
1210  return NULL;
1211  }
1212  }
1213  vals[nvals] = i;
1214  freqs[nvals] = st->freqs[i];
1215  assert(st->freqs[i] > 0);
1216  ntot += freqs[nvals];
1217  if (max_val < i) max_val = i;
1218  if (min_val > i) min_val = i;
1219  nvals++;
1220  }
1221  if (st->h) {
1222  khint_t k;
1223 
1224  for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
1225  if (!kh_exist(st->h, k))
1226  continue;
1227  if (nvals >= vals_alloc) {
1228  vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
1229  vals = realloc(vals, vals_alloc * sizeof(int));
1230  freqs = realloc(freqs, vals_alloc * sizeof(int));
1231  if (!vals || !freqs)
1232  return NULL;
1233  }
1234  vals[nvals]= kh_key(st->h, k);
1235  freqs[nvals] = kh_val(st->h, k);
1236  assert(freqs[nvals] > 0);
1237  ntot += freqs[nvals];
1238  if (max_val < i) max_val = i;
1239  if (min_val > i) min_val = i;
1240  nvals++;
1241  }
1242  }
1243 
1244  assert(nvals > 0);
1245 
1246  freqs = realloc(freqs, 2*nvals*sizeof(*freqs));
1247  lens = calloc(2*nvals, sizeof(*lens));
1248  if (!lens || !freqs)
1249  return NULL;
1250 
1251  /* Inefficient, use pointers to form chain so we can insert and maintain
1252  * a sorted list? This is currently O(nvals^2) complexity.
1253  */
1254  for (;;) {
1255  int low1 = INT_MAX, low2 = INT_MAX;
1256  int ind1 = 0, ind2 = 0;
1257  for (i = 0; i < nvals; i++) {
1258  if (freqs[i] < 0)
1259  continue;
1260  if (low1 > freqs[i])
1261  low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;
1262  else if (low2 > freqs[i])
1263  low2 = freqs[i], ind2 = i;
1264  }
1265  if (low2 == INT_MAX)
1266  break;
1267 
1268  freqs[nvals] = low1 + low2;
1269  lens[ind1] = nvals;
1270  lens[ind2] = nvals;
1271  freqs[ind1] *= -1;
1272  freqs[ind2] *= -1;
1273  nvals++;
1274  }
1275  nvals = nvals/2+1;
1276 
1277  /* Assign lengths */
1278  for (i = 0; i < nvals; i++) {
1279  int code_len = 0;
1280  for (k = lens[i]; k; k = lens[k])
1281  code_len++;
1282  lens[i] = code_len;
1283  freqs[i] *= -1;
1284  //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]);
1285  }
1286 
1287 
1288  /* Sort, need in a struct */
1289  if (!(codes = malloc(nvals * sizeof(*codes))))
1290  return NULL;
1291  for (i = 0; i < nvals; i++) {
1292  codes[i].symbol = vals[i];
1293  codes[i].len = lens[i];
1294  }
1295  qsort(codes, nvals, sizeof(*codes), code_sort);
1296 
1297  /*
1298  * Generate canonical codes from lengths.
1299  * Sort by length.
1300  * Start with 0.
1301  * Every new code of same length is +1.
1302  * Every new code of new length is +1 then <<1 per extra length.
1303  *
1304  * /\
1305  * a/\
1306  * /\/\
1307  * bcd/\
1308  * ef
1309  *
1310  * a 1 0
1311  * b 3 4 (0+1)<<2
1312  * c 3 5
1313  * d 3 6
1314  * e 4 14 (6+1)<<1
1315  * f 5 15
1316  */
1317  code = 0; len = codes[0].len;
1318  for (i = 0; i < nvals; i++) {
1319  while (len != codes[i].len) {
1320  code<<=1;
1321  len++;
1322  }
1323  codes[i].code = code++;
1324 
1325  if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF)
1326  c->e_huffman.val2code[codes[i].symbol+1] = i;
1327 
1328  //fprintf(stderr, "sym %d, code %d, len %d\n",
1329  // codes[i].symbol, codes[i].code, codes[i].len);
1330  }
1331 
1332  free(lens);
1333  free(vals);
1334  free(freqs);
1335 
1336  c->e_huffman.codes = codes;
1337  c->e_huffman.nvals = nvals;
1338 
1340  if (option == E_BYTE || option == E_BYTE_ARRAY) {
1341  if (c->e_huffman.codes[0].len == 0)
1343  else
1345  } else {
1346  if (c->e_huffman.codes[0].len == 0)
1348  else
1350  }
1352 
1353  return c;
1354 }
1355 
1356 /*
1357  * ---------------------------------------------------------------------------
1358  * BYTE_ARRAY_LEN
1359  */
1361  cram_block *in, char *out,
1362  int *out_size) {
1363  /* Fetch length */
1364  int32_t len, one = 1;
1365 
1366  c->byte_array_len.len_codec->decode(slice, c->byte_array_len.len_codec, in, (char *)&len, &one);
1367  //printf("ByteArray Len=%d\n", len);
1368 
1369  if (c->byte_array_len.value_codec) {
1370  c->byte_array_len.value_codec->decode(slice,
1372  in, out, &len);
1373  } else {
1374  return -1;
1375  }
1376 
1377  *out_size = len;
1378 
1379  return 0;
1380 }
1381 
1383  if (!c) return;
1384 
1385  if (c->byte_array_len.len_codec)
1387 
1388  if (c->byte_array_len.value_codec)
1390 
1391  free(c);
1392 }
1393 
1395  enum cram_external_type option,
1396  int version) {
1397  cram_codec *c;
1398  char *cp = data;
1399  int32_t encoding;
1400  int32_t sub_size;
1401 
1402  if (!(c = malloc(sizeof(*c))))
1403  return NULL;
1404 
1405  c->codec = E_BYTE_ARRAY_LEN;
1408 
1409  cp += itf8_get(cp, &encoding);
1410  cp += itf8_get(cp, &sub_size);
1411  c->byte_array_len.len_codec = cram_decoder_init(encoding, cp, sub_size,
1412  E_INT, version);
1413  cp += sub_size;
1414 
1415  cp += itf8_get(cp, &encoding);
1416  cp += itf8_get(cp, &sub_size);
1417  c->byte_array_len.value_codec = cram_decoder_init(encoding, cp, sub_size,
1418  option, version);
1419  cp += sub_size;
1420 
1421  if (cp - data != size) {
1422  fprintf(stderr, "Malformed byte_array_len header stream\n");
1423  free(c);
1424  return NULL;
1425  }
1426 
1427  return c;
1428 }
1429 
1431  cram_block *out, char *in, int in_size) {
1432  return -1; // not imp.
1433 }
1434 
1436  if (!c)
1437  return;
1438  free(c);
1439 }
1440 
1442  char *prefix, int version) {
1443  int len = 0;
1444 
1445  if (prefix) {
1446  size_t l = strlen(prefix);
1447  BLOCK_APPEND(b, prefix, l);
1448  len += l;
1449  }
1450 
1451  len += itf8_put_blk(b, c->codec);
1452  len += itf8_put_blk(b, c->e_byte_array_len.len_len +
1455  len += c->e_byte_array_len.len_len;
1456 
1458  len += c->e_byte_array_len.val_len;
1459 
1460  return len;
1461 }
1462 
1464  enum cram_external_type option,
1465  void *dat,
1466  int version) {
1467  cram_codec *c;
1469 
1470  c = malloc(sizeof(*c));
1471  if (!c)
1472  return NULL;
1473  c->codec = E_BYTE_ARRAY_LEN;
1477 
1482 
1483  return c;
1484 }
1485 
1486 /*
1487  * ---------------------------------------------------------------------------
1488  * BYTE_ARRAY_STOP
1489  */
1491  cram_block *in, char *out,
1492  int *out_size) {
1493  int i;
1494  cram_block *b = NULL;
1495  char *cp, ch;
1496 
1497  if (slice->block_by_id) {
1498  if (!(b = slice->block_by_id[c->byte_array_stop.content_id]))
1499  return -1;
1500  } else {
1501  for (i = 0; i < slice->hdr->num_blocks; i++) {
1502  b = slice->block[i];
1503  if (b->content_type == EXTERNAL &&
1505  break;
1506  }
1507  }
1508  if (i == slice->hdr->num_blocks || !b)
1509  return -1;
1510  }
1511 
1512  if (b->idx >= b->uncomp_size)
1513  return -1;
1514 
1515  cp = (char *)b->data + b->idx;
1516  while ((ch = *cp) != (char)c->byte_array_stop.stop) {
1517  if (cp - (char *)b->data >= b->uncomp_size)
1518  return -1;
1519  *out++ = ch;
1520  cp++;
1521  }
1522 
1523  *out_size = cp - (char *)(b->data + b->idx);
1524  b->idx = cp - (char *)b->data + 1;
1525 
1526  return 0;
1527 }
1528 
1530  cram_block *in, char *out_,
1531  int *out_size) {
1532  int space = 256;
1533  cram_block *b = NULL;
1534  cram_block *out = (cram_block *)out_;
1535  char *cp, ch, *out_cp, *cp_end, *out_end;
1536  char stop;
1537 
1538  if (slice->block_by_id) {
1539  if (!(b = slice->block_by_id[c->byte_array_stop.content_id]))
1540  return -1;
1541  } else {
1542  int i;
1543  for (i = 0; i < slice->hdr->num_blocks; i++) {
1544  b = slice->block[i];
1545  if (b->content_type == EXTERNAL &&
1547  break;
1548  }
1549  }
1550  if (i == slice->hdr->num_blocks || !b)
1551  return -1;
1552  }
1553 
1554  if (b->idx >= b->uncomp_size)
1555  return -1;
1556  cp = (char *)b->data + b->idx;
1557  cp_end = (char *)b->data + b->uncomp_size;
1558  BLOCK_GROW(out, space);
1559  out_cp = (char *)BLOCK_END(out);
1560  out_end = out_cp + space;
1561 
1562  stop = c->byte_array_stop.stop;
1563  while ((ch = *cp) != stop) {
1564  if (cp++ == cp_end)
1565  return -1;
1566  *out_cp++ = ch;
1567 
1568  if (out_cp == out_end) {
1569  BLOCK_SIZE(out) = out_cp - (char *)BLOCK_DATA(out);
1570  space *= 2;
1571  BLOCK_GROW(out, space);
1572  out_cp = (char *)BLOCK_END(out);
1573  out_end = out_cp + space;
1574  }
1575  }
1576  BLOCK_SIZE(out) = out_cp - (char *)BLOCK_DATA(out);
1577 
1578  *out_size = cp - (char *)(b->data + b->idx);
1579  b->idx = cp - (char *)b->data + 1;
1580 
1581  return 0;
1582 }
1583 
1585  if (!c) return;
1586 
1587  free(c);
1588 }
1589 
1591  enum cram_external_type option,
1592  int version) {
1593  cram_codec *c;
1594  unsigned char *cp = (unsigned char *)data;
1595 
1596  if (!(c = malloc(sizeof(*c))))
1597  return NULL;
1598 
1599  c->codec = E_BYTE_ARRAY_STOP;
1600  c->decode = (option == E_BYTE_ARRAY_BLOCK)
1604 
1605  c->byte_array_stop.stop = *cp++;
1606  if (version == CRAM_1_VERS) {
1607  c->byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16)
1608  + (cp[3]<<24);
1609  cp += 4;
1610  } else {
1611  cp += itf8_get(cp, &c->byte_array_stop.content_id);
1612  }
1613 
1614  if ((char *)cp - data != size) {
1615  fprintf(stderr, "Malformed byte_array_stop header stream\n");
1616  free(c);
1617  return NULL;
1618  }
1619 
1620  return c;
1621 }
1622 
1624  cram_block *out, char *in, int in_size) {
1625  return -1; // not imp.
1626 }
1627 
1629  if (!c)
1630  return;
1631  free(c);
1632 }
1633 
1635  char *prefix, int version) {
1636  int len = 0;
1637  char buf[20], *cp = buf;
1638 
1639  if (prefix) {
1640  size_t l = strlen(prefix);
1641  BLOCK_APPEND(b, prefix, l);
1642  len += l;
1643  }
1644 
1645  cp += itf8_put(cp, c->codec);
1646 
1647  if (version == CRAM_1_VERS) {
1648  cp += itf8_put(cp, 5);
1649  *cp++ = c->e_byte_array_stop.stop;
1650  *cp++ = (c->e_byte_array_stop.content_id >> 0) & 0xff;
1651  *cp++ = (c->e_byte_array_stop.content_id >> 8) & 0xff;
1652  *cp++ = (c->e_byte_array_stop.content_id >> 16) & 0xff;
1653  *cp++ = (c->e_byte_array_stop.content_id >> 24) & 0xff;
1654  } else {
1655  cp += itf8_put(cp, 1 + itf8_size(c->e_byte_array_stop.content_id));
1656  *cp++ = c->e_byte_array_stop.stop;
1657  cp += itf8_put(cp, c->e_byte_array_stop.content_id);
1658  }
1659 
1660  BLOCK_APPEND(b, buf, cp-buf);
1661  len += cp-buf;
1662 
1663  return len;
1664 }
1665 
1667  enum cram_external_type option,
1668  void *dat,
1669  int version) {
1670  cram_codec *c;
1671 
1672  c = malloc(sizeof(*c));
1673  if (!c)
1674  return NULL;
1675  c->codec = E_BYTE_ARRAY_STOP;
1679 
1680  c->e_byte_array_stop.stop = ((int *)dat)[0];
1681  c->e_byte_array_stop.content_id = ((int *)dat)[1];
1682 
1683  return c;
1684 }
1685 
1686 /*
1687  * ---------------------------------------------------------------------------
1688  */
1689 
1691  switch (t) {
1692  case E_NULL: return "NULL";
1693  case E_EXTERNAL: return "EXTERNAL";
1694  case E_GOLOMB: return "GOLOMB";
1695  case E_HUFFMAN: return "HUFFMAN";
1696  case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";
1697  case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
1698  case E_BETA: return "BETA";
1699  case E_SUBEXP: return "SUBEXP";
1700  case E_GOLOMB_RICE: return "GOLOMB_RICE";
1701  case E_GAMMA: return "GAMMA";
1702  }
1703  return "?";
1704 }
1705 
1706 static cram_codec *(*decode_init[])(char *data,
1707  int size,
1708  enum cram_external_type option,
1709  int version) = {
1710  NULL,
1712  NULL,
1718  NULL,
1720 };
1721 
1723  char *data, int size,
1724  enum cram_external_type option,
1725  int version) {
1726  if (decode_init[codec]) {
1727  return decode_init[codec](data, size, option, version);
1728  } else {
1729  fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec));
1730  return NULL;
1731  }
1732 }
1733 
1734 static cram_codec *(*encode_init[])(cram_stats *stx,
1735  enum cram_external_type option,
1736  void *opt,
1737  int version) = {
1738  NULL,
1740  NULL,
1745  NULL, //cram_subexp_encode_init,
1746  NULL,
1747  NULL, //cram_gamma_encode_init,
1748 };
1749 
1751  cram_stats *st,
1752  enum cram_external_type option,
1753  void *dat,
1754  int version) {
1755  if (st && !st->nvals)
1756  return NULL;
1757 
1758  if (encode_init[codec]) {
1759  return encode_init[codec](st, option, dat, version);
1760  } else {
1761  fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec));
1762  abort();
1763  }
1764 }