NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Classes | Macros | Functions
cram_io.c File Reference
#include <stdio.h>
#include <errno.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <math.h>
#include <ctype.h>
#include "cram/cram.h"
#include "cram/os.h"
#include "cram/md5.h"
#include "cram/open_trace_file.h"

Go to the source code of this file.

Classes

struct  cram_job
 

Macros

#define RP(...)
 
#define hclose_abruptly(fp)   (fclose(fp))
 
#define hflush(fp)   (fflush(fp))
 
#define hgetc(fp)   (getc(fp))
 
#define hputc(c, fp)   (putc((c), (fp)))
 
#define hread(fp, buffer, nbytes)   (fread((buffer), 1, (nbytes), (fp)))
 
#define hseek(fp, offset, whence)   (fseeko((fp), (offset), (whence)))
 
#define hwrite(fp, buffer, nbytes)   (fwrite((buffer), 1, (nbytes), (fp)))
 
#define paranoid_hclose(fp)   (paranoid_fclose(fp))
 
#define PADDED_BLOCK
 

Functions

int itf8_decode (cram_fd *fd, int32_t *val_p)
 
int itf8_encode (cram_fd *fd, int32_t val)
 
int itf8_get (char *cp, int32_t *val_p)
 
int itf8_put (char *cp, int32_t val)
 
int ltf8_put (char *cp, int64_t val)
 
int ltf8_get (char *cp, int64_t *val_p)
 
int ltf8_decode (cram_fd *fd, int64_t *val_p)
 
int itf8_put_blk (cram_block *blk, int val)
 
int int32_decode (cram_fd *fd, int32_t *val)
 
int int32_encode (cram_fd *fd, int32_t val)
 
int int32_get (cram_block *b, int32_t *val)
 
int int32_put (cram_block *b, int32_t val)
 
char * zlib_mem_inflate (char *cdata, size_t csize, size_t *size)
 
cram_blockcram_new_block (enum cram_content_type content_type, int content_id)
 
cram_blockcram_read_block (cram_fd *fd)
 
int cram_write_block (cram_fd *fd, cram_block *b)
 
void cram_free_block (cram_block *b)
 
int cram_uncompress_block (cram_block *b)
 
int cram_compress_block (cram_fd *fd, cram_block *b, cram_metrics *metrics, int level, int strat, int level2, int strat2)
 
cram_metricscram_new_metrics (void)
 
char * cram_block_method2str (enum cram_block_method m)
 
char * cram_content_type2str (enum cram_content_type t)
 
int paranoid_fclose (FILE *fp)
 
void refs_free (refs_t *r)
 
int refs2id (refs_t *r, SAM_hdr *h)
 
int cram_set_header (cram_fd *fd, SAM_hdr *hdr)
 
void expand_cache_path (char *path, char *dir, char *fn)
 
void mkdir_prefix (char *path, int mode)
 
void cram_ref_incr (refs_t *r, int id)
 
void cram_ref_decr (refs_t *r, int id)
 
ref_entrycram_ref_load (refs_t *r, int id)
 
char * cram_get_ref (cram_fd *fd, int id, int start, int end)
 
int cram_load_reference (cram_fd *fd, char *fn)
 
cram_containercram_new_container (int nrec, int nslice)
 
void cram_free_container (cram_container *c)
 
cram_containercram_read_container (cram_fd *fd)
 
int cram_write_container (cram_fd *fd, cram_container *c)
 
int cram_flush_container (cram_fd *fd, cram_container *c)
 
void * cram_flush_thread (void *arg)
 
int cram_flush_container_mt (cram_fd *fd, cram_container *c)
 
cram_block_compression_hdrcram_new_compression_header (void)
 
void cram_free_compression_header (cram_block_compression_hdr *hdr)
 
void cram_free_slice_header (cram_block_slice_hdr *hdr)
 
void cram_free_slice (cram_slice *s)
 
cram_slicecram_new_slice (enum cram_content_type type, int nrecs)
 
cram_slicecram_read_slice (cram_fd *fd)
 
cram_file_defcram_read_file_def (cram_fd *fd)
 
int cram_write_file_def (cram_fd *fd, cram_file_def *def)
 
void cram_free_file_def (cram_file_def *def)
 
SAM_hdrcram_read_SAM_hdr (cram_fd *fd)
 
int cram_write_SAM_hdr (cram_fd *fd, SAM_hdr *hdr)
 
cram_fdcram_open (const char *filename, const char *mode)
 
cram_fdcram_dopen (cram_FILE *fp, const char *filename, const char *mode)
 
int cram_seek (cram_fd *fd, off_t offset, int whence)
 
int cram_flush (cram_fd *fd)
 
int cram_close (cram_fd *fd)
 
int cram_eof (cram_fd *fd)
 
int cram_set_option (cram_fd *fd, enum cram_option opt,...)
 
int cram_set_voption (cram_fd *fd, enum cram_option opt, va_list args)
 

Macro Definition Documentation

#define hclose_abruptly (   fp)    (fclose(fp))

Definition at line 85 of file cram_io.c.

#define hflush (   fp)    (fflush(fp))

Definition at line 86 of file cram_io.c.

#define hgetc (   fp)    (getc(fp))

Definition at line 87 of file cram_io.c.

#define hputc (   c,
  fp 
)    (putc((c), (fp)))

Definition at line 88 of file cram_io.c.

#define hread (   fp,
  buffer,
  nbytes 
)    (fread((buffer), 1, (nbytes), (fp)))

Definition at line 89 of file cram_io.c.

#define hseek (   fp,
  offset,
  whence 
)    (fseeko((fp), (offset), (whence)))

Definition at line 90 of file cram_io.c.

#define hwrite (   fp,
  buffer,
  nbytes 
)    (fwrite((buffer), 1, (nbytes), (fp)))

Definition at line 91 of file cram_io.c.

#define PADDED_BLOCK

Definition at line 2935 of file cram_io.c.

#define paranoid_hclose (   fp)    (paranoid_fclose(fp))

Definition at line 92 of file cram_io.c.

#define RP (   ...)

Definition at line 78 of file cram_io.c.

Function Documentation

char* cram_block_method2str ( enum cram_block_method  m)

Definition at line 969 of file cram_io.c.

int cram_close ( cram_fd fd)

Closes a CRAM file.

Returns
Returns 0 on success; -1 on failure

Definition at line 3409 of file cram_io.c.

int cram_compress_block ( cram_fd fd,
cram_block b,
cram_metrics metrics,
int  level,
int  strat,
int  level2,
int  strat2 
)

Compresses a block.

Compresses a block using one of two different zlib strategies. If we only want one choice set strat2 to be -1.

The logic here is that sometimes Z_RLE does a better job than Z_FILTERED or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is significantly faster.

Returns
Returns 0 on success; -1 on failure

Definition at line 866 of file cram_io.c.

char* cram_content_type2str ( enum cram_content_type  t)

Definition at line 979 of file cram_io.c.

cram_fd* cram_dopen ( cram_FILE fp,
const char *  filename,
const char *  mode 
)

Opens an existing stream for reading or writing.

Returns
Returns file handle on success; NULL on failure.

cram_FILE is either htslib's hFILE or stdio's FILE, depending on how cram_structs.h has been configured.

Definition at line 3253 of file cram_io.c.

int cram_eof ( cram_fd fd)

Checks for end of file on a cram_fd stream.

Returns
Returns 0 if not at end of file 1 if we hit an expected EOF (end of range or EOF block) 2 for other EOF (end of stream without EOF block)

Definition at line 3502 of file cram_io.c.

int cram_flush ( cram_fd fd)

Definition at line 3390 of file cram_io.c.

int cram_flush_container ( cram_fd fd,
cram_container c 
)

Flushes a container to disk.

Flushes a completely or partially full container to disk, writing container structure, header and blocks. This also calls the encoder functions.

Returns
Returns 0 on success; -1 on failure

Definition at line 2311 of file cram_io.c.

int cram_flush_container_mt ( cram_fd fd,
cram_container c 
)

Definition at line 2374 of file cram_io.c.

void* cram_flush_thread ( void *  arg)

Definition at line 2324 of file cram_io.c.

void cram_free_block ( cram_block b)

Frees a CRAM block, deallocating internal data too.

Definition at line 755 of file cram_io.c.

void cram_free_compression_header ( cram_block_compression_hdr hdr)

Frees a cram_block_compression_hdr

Definition at line 2426 of file cram_io.c.

void cram_free_container ( cram_container c)

Definition at line 2070 of file cram_io.c.

void cram_free_file_def ( cram_file_def def)

Frees a cram_file_def structure.

Definition at line 2786 of file cram_io.c.

void cram_free_slice ( cram_slice s)

Frees a slice

Definition at line 2514 of file cram_io.c.

void cram_free_slice_header ( cram_block_slice_hdr hdr)

Slices and slice headers

Frees a slice header

Definition at line 2502 of file cram_io.c.

char* cram_get_ref ( cram_fd fd,
int  id,
int  start,
int  end 
)

Returns a portion of a reference sequence from start to end inclusive.

The returned pointer is owned by the cram_file fd and should not be freed by the caller. It is valid only until the next cram_get_ref is called with the same fd parameter (so is thread-safe if given multiple files).

To return the entire reference sequence, specify start as 1 and end as 0.

Returns
Returns reference on success; NULL on failure

Definition at line 1770 of file cram_io.c.

int cram_load_reference ( cram_fd fd,
char *  fn 
)

Reference sequence handling

Loads a reference set from fn and stores in the cram_fd.

Returns
Returns 0 on success; -1 on failure

Definition at line 1963 of file cram_io.c.

cram_block* cram_new_block ( enum cram_content_type  content_type,
int  content_id 
)

CRAM blocks - the dynamically growable data block. We have code to create, update, (un)compress and read/write.

These are derived from the deflate_interlaced.c blocks, but with the CRAM extension of content types and IDs.

Allocates a new cram_block structure with a specified content_type and id.

Returns
Returns block pointer on success; NULL on failure

Definition at line 662 of file cram_io.c.

cram_block_compression_hdr* cram_new_compression_header ( void  )

Compression headers; the first part of the container

Creates a new blank container compression header

Returns
Returns header ptr on success; NULL on failure

Definition at line 2400 of file cram_io.c.

cram_container* cram_new_container ( int  nrec,
int  nslice 
)

Containers

Creates a new container, specifying the maximum number of slices and records permitted.

Returns
Returns cram_container ptr on success; NULL on failure

Definition at line 1995 of file cram_io.c.

cram_metrics* cram_new_metrics ( void  )

Definition at line 959 of file cram_io.c.

cram_slice* cram_new_slice ( enum cram_content_type  type,
int  nrecs 
)

Creates a new empty slice in memory, for subsequent writing to disk.

Returns
Returns cram_slice ptr on success; NULL on failure

Definition at line 2591 of file cram_io.c.

cram_fd* cram_open ( const char *  filename,
const char *  mode 
)

The top-level cram opening, closing and option handling

Opens a CRAM file for read (mode "rb") or write ("wb").

The filename may be "-" to indicate stdin or stdout.

Returns
Returns file handle on success; NULL on failure.

Definition at line 3217 of file cram_io.c.

cram_block* cram_read_block ( cram_fd fd)

Reads a block from a cram file.

Returns
Returns cram_block pointer on success; NULL on failure

Definition at line 685 of file cram_io.c.

cram_container* cram_read_container ( cram_fd fd)

Reads a container header.

Returns
Returns cram_container on success; NULL on failure or no container left (fd->err == 0).

Definition at line 2137 of file cram_io.c.

cram_file_def* cram_read_file_def ( cram_fd fd)

CRAM file definition (header)

Reads a CRAM file definition structure.

Returns
Returns file_def ptr on success; NULL on failure

Definition at line 2748 of file cram_io.c.

SAM_hdr* cram_read_SAM_hdr ( cram_fd fd)

SAM header I/O

Reads the SAM header from the first CRAM data block.

Also performs minimal parsing to extract read-group and sample information.

Returns
Returns SAM hdr ptr on success; NULL on failure

Definition at line 2803 of file cram_io.c.

cram_slice* cram_read_slice ( cram_fd fd)

Loads an entire slice.

FIXME: In 1.0 the native unit of slices within CRAM is broken as slices contain references to objects in other slices. To work around this while keeping the slice oriented outer loop we read all slices and stitch them together into a fake large slice instead.

Returns
Returns cram_slice ptr on success; NULL on failure

Definition at line 2656 of file cram_io.c.

void cram_ref_decr ( refs_t r,
int  id 
)

Definition at line 1600 of file cram_io.c.

void cram_ref_incr ( refs_t r,
int  id 
)

Definition at line 1568 of file cram_io.c.

ref_entry* cram_ref_load ( refs_t r,
int  id 
)

Definition at line 1685 of file cram_io.c.

int cram_seek ( cram_fd fd,
off_t  offset,
int  whence 
)

Definition at line 3363 of file cram_io.c.

int cram_set_header ( cram_fd fd,
SAM_hdr hdr 
)

Attaches a header to a cram_fd.

This should be used when creating a new cram_fd for writing where we have an SAM_hdr already constructed (eg from a file we've read in).

Returns
Returns 0 on success; -1 on failure

Definition at line 1341 of file cram_io.c.

int cram_set_option ( cram_fd fd,
enum cram_option  opt,
  ... 
)

Sets options on the cram_fd.

See CRAM_OPT_* definitions in cram_structs.h. Use this immediately after opening.

Returns
Returns 0 on success; -1 on failure

Definition at line 3514 of file cram_io.c.

int cram_set_voption ( cram_fd fd,
enum cram_option  opt,
va_list  args 
)

Sets options on the cram_fd.

See CRAM_OPT_* definitions in cram_structs.h. Use this immediately after opening.

Returns
Returns 0 on success; -1 on failure

Definition at line 3532 of file cram_io.c.

int cram_uncompress_block ( cram_block b)

Uncompresses a CRAM block, if compressed.

Returns
Returns 0 on success; -1 on failure

Definition at line 766 of file cram_io.c.

int cram_write_block ( cram_fd fd,
cram_block b 
)

Writes a CRAM block.

Returns
Returns 0 on success; -1 on failure

Definition at line 732 of file cram_io.c.

int cram_write_container ( cram_fd fd,
cram_container h 
)

Writes a container structure.

Returns
Returns 0 on success; -1 on failure

Definition at line 2229 of file cram_io.c.

int cram_write_file_def ( cram_fd fd,
cram_file_def def 
)

Writes a cram_file_def structure to cram_fd.

Returns
Returns 0 on success; -1 on failure

Definition at line 2782 of file cram_io.c.

int cram_write_SAM_hdr ( cram_fd fd,
SAM_hdr hdr 
)

Writes a CRAM SAM header.

Returns
Returns 0 on success; -1 on failure

Definition at line 2936 of file cram_io.c.

void expand_cache_path ( char *  path,
char *  dir,
char *  fn 
)

Definition at line 1351 of file cram_io.c.

int int32_decode ( cram_fd fd,
int32_t val 
)

Definition at line 476 of file cram_io.c.

int int32_encode ( cram_fd fd,
int32_t  val 
)

Definition at line 491 of file cram_io.c.

int int32_get ( cram_block b,
int32_t val 
)

Definition at line 500 of file cram_io.c.

int int32_put ( cram_block b,
int32_t  val 
)

Definition at line 514 of file cram_io.c.

int itf8_decode ( cram_fd fd,
int32_t val 
)

Reads an integer in ITF-8 encoding from 'fd' and stores it in *val.

Returns
Returns the number of bytes read on success; -1 on failure

Definition at line 108 of file cram_io.c.

int itf8_encode ( cram_fd fd,
int32_t  val 
)

Definition at line 171 of file cram_io.c.

int itf8_get ( char *  cp,
int32_t val_p 
)

Definition at line 181 of file cram_io.c.

int itf8_put ( char *  cp,
int32_t  val 
)

Definition at line 208 of file cram_io.c.

int itf8_put_blk ( cram_block blk,
int  val 
)

Pushes a value in ITF8 format onto the end of a block.

This shouldn't be used for high-volume data as it is not the fastest method.

Returns
Returns the number of bytes written

Definition at line 461 of file cram_io.c.

int ltf8_decode ( cram_fd fd,
int64_t val_p 
)

Definition at line 373 of file cram_io.c.

int ltf8_get ( char *  cp,
int64_t val_p 
)

Definition at line 306 of file cram_io.c.

int ltf8_put ( char *  cp,
int64_t  val 
)

Definition at line 239 of file cram_io.c.

void mkdir_prefix ( char *  path,
int  mode 
)

Definition at line 1395 of file cram_io.c.

int paranoid_fclose ( FILE *  fp)

Definition at line 999 of file cram_io.c.

int refs2id ( refs_t r,
SAM_hdr bfd 
)

Generates a lookup table in refs based on the SQ headers in SAM_hdr.

Indexes references by the order they appear in a BAM file. This may not necessarily be the same order they appear in the fasta reference file.

Returns
Returns 0 on success; -1 on failure

Definition at line 1248 of file cram_io.c.

void refs_free ( refs_t r)

Definition at line 1038 of file cram_io.c.

char* zlib_mem_inflate ( char *  cdata,
size_t  csize,
size_t *  size 
)

Uncompress a memory block using Zlib.

Returns
Returns 0 on success; -1 on failure

Definition at line 530 of file cram_io.c.