NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
contrib
htslib
cram
cram_structs.h
Go to the documentation of this file.
1
/*
2
Copyright (c) 2012-2013 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#ifndef _CRAM_STRUCTS_H_
32
#define _CRAM_STRUCTS_H_
33
34
#ifdef __cplusplus
35
extern
"C"
{
36
#endif
37
38
/*
39
* Defines in-memory structs for the basic file-format objects in the
40
* CRAM format.
41
*
42
* The basic file format is:
43
* File-def SAM-hdr Container Container ...
44
*
45
* Container:
46
* Service-block data-block data-block ...
47
*
48
* Multiple blocks in a container are grouped together as slices,
49
* also sometimes referred to as landmarks in the spec.
50
*/
51
52
53
#include <stdint.h>
54
55
#include "
cram/thread_pool.h
"
56
57
#ifdef SAMTOOLS
58
// From within samtools/HTSlib
59
# include "
cram/string_alloc.h
"
60
# include "
htslib/khash.h
"
61
62
// Generic hash-map integer -> integer
63
KHASH_MAP_INIT_INT
(m_i2i,
int
)
64
65
// Generic hash-set integer -> (existance)
66
KHASH_SET_INIT_INT
(s_i2i)
67
68
// For brevity
69
typedef
unsigned
char
uc;
70
71
/*
72
* A union for the preservation map. Required for khash.
73
*/
74
typedef union {
75
int
i;
76
char
*p;
77
} pmap_t;
78
79
// Generates static functions here which isn't ideal, but we have no way
80
// currently to declare the kh_map_t structure here without also declaring a
81
// duplicate in the .c files due to the nature of the KHASH macros.
82
KHASH_MAP_INIT_STR
(
map
, pmap_t)
83
84
struct
hFILE
;
85
typedef struct hFILE
cram_FILE
;
86
87
#else
88
// From within io_lib
89
# include "cram/bam.h"
// For BAM header parsing
90
typedef
FILE
cram_FILE
;
91
#endif
92
93
#define SEQS_PER_SLICE 10000
94
#define SLICE_PER_CNT 1
95
96
#define CRAM_SUBST_MATRIX "CGTNAGTNACTNACGNACGT"
97
98
#define TN_external
99
//#define NS_external
100
#define TS_external
101
//#define BA_external
102
103
#define MAX_STAT_VAL 1024
104
//#define MAX_STAT_VAL 16
105
typedef
struct
{
106
int
freqs[
MAX_STAT_VAL
];
107
khash_t
(m_i2i) *h;
108
int
nsamp
;
// total number of values added
109
int
nvals
;
// total number of unique values added
110
}
cram_stats
;
111
112
/* NB: matches java impl, not the spec */
113
enum
cram_encoding
{
114
E_NULL
= 0,
115
E_EXTERNAL
= 1,
116
E_GOLOMB
= 2,
117
E_HUFFMAN
= 3,
118
E_BYTE_ARRAY_LEN
= 4,
119
E_BYTE_ARRAY_STOP
= 5,
120
E_BETA
= 6,
121
E_SUBEXP
= 7,
122
E_GOLOMB_RICE
= 8,
123
E_GAMMA
= 9
124
};
125
126
enum
cram_external_type
{
127
E_INT
= 1,
128
E_LONG
= 2,
129
E_BYTE
= 3,
130
E_BYTE_ARRAY
= 4,
131
E_BYTE_ARRAY_BLOCK
= 5,
132
};
133
134
/* "File Definition Structure" */
135
typedef
struct
{
136
char
magic[4];
137
uint8_t
major_version
;
138
uint8_t
minor_version
;
139
char
file_id[20];
// Filename or SHA1 checksum
140
}
cram_file_def
;
141
142
#define CRAM_1_VERS 100 // 1.0
143
#define CRAM_2_VERS 200 // 1.1, or 2.0?
144
145
struct
cram_slice
;
146
147
enum
cram_block_method
{
148
BM_ERROR
= -1,
149
RAW
= 0,
150
GZIP
= 1,
151
BZIP2
= 2,
152
};
153
154
enum
cram_content_type
{
155
CT_ERROR
= -1,
156
FILE_HEADER
= 0,
157
COMPRESSION_HEADER
= 1,
158
MAPPED_SLICE
= 2,
159
UNMAPPED_SLICE
= 3,
// CRAM_1_VERS only
160
EXTERNAL
= 4,
161
CORE
= 5,
162
};
163
164
/* Compression metrics */
165
typedef
struct
{
166
int
m1
;
167
int
m2
;
168
int
trial
;
169
int
next_trial
;
170
}
cram_metrics
;
171
172
/* Block */
173
typedef
struct
{
174
enum
cram_block_method
method, orig_method;
175
enum
cram_content_type
content_type;
176
int32_t
content_id
;
177
int32_t
comp_size
;
178
int32_t
uncomp_size
;
179
int32_t
idx
;
/* offset into data */
180
unsigned
char
*
data
;
181
182
// For bit I/O
183
size_t
alloc
;
184
size_t
byte
;
185
int
bit
;
186
}
cram_block
;
187
188
struct
cram_codec
;
/* defined in cram_codecs.h */
189
struct
cram_map
;
190
191
#define CRAM_MAP_HASH 32
192
#define CRAM_MAP(a,b) (((a)*3+(b))&(CRAM_MAP_HASH-1))
193
194
/* Compression header block */
195
typedef
struct
{
196
int32_t
ref_seq_id
;
197
int32_t
ref_seq_start
;
198
int32_t
ref_seq_span
;
199
int32_t
num_records
;
200
int32_t
num_landmarks
;
201
int32_t
*
landmark
;
202
203
/* Flags from preservation map */
204
int
mapped_qs_included
;
205
int
unmapped_qs_included
;
206
int
unmapped_placed
;
207
int
qs_included
;
208
int
read_names_included
;
209
int
AP_delta
;
210
// indexed by ref-base and subst. code
211
char
substitution_matrix[5][4];
212
213
// TD Dictionary as a concatenated block
214
cram_block
*
TD_blk
;
// Tag Dictionary
215
int
nTL
;
// number of TL entries in TD
216
unsigned
char
**
TL
;
// array of size nTL, pointer into TD_blk.
217
khash_t
(m_s2i) *TD_hash;
// Keyed on TD strings, map to TL[] indices
218
string_alloc_t
*
TD_keys
;
// Pooled keys for TD hash.
219
220
khash_t
(
map
) *preservation_map;
221
struct
cram_map
*rec_encoding_map[
CRAM_MAP_HASH
];
222
struct
cram_map
*tag_encoding_map[
CRAM_MAP_HASH
];
223
224
struct
cram_codec
*
BF_codec
;
// bam bit flags
225
struct
cram_codec
*
CF_codec
;
// compression flags
226
struct
cram_codec
*
RL_codec
;
// read length
227
struct
cram_codec
*
AP_codec
;
// alignment pos
228
struct
cram_codec
*
RG_codec
;
// read group
229
struct
cram_codec
*
MF_codec
;
// mate flags
230
struct
cram_codec
*
NS_codec
;
// next frag ref ID
231
struct
cram_codec
*
NP_codec
;
// next frag pos
232
struct
cram_codec
*
TS_codec
;
// template size
233
struct
cram_codec
*
NF_codec
;
// next frag distance
234
struct
cram_codec
*
TC_codec
;
// tag count CRAM_1_VERS
235
struct
cram_codec
*
TN_codec
;
// tag name/type CRAM_1_VERS
236
struct
cram_codec
*
TL_codec
;
// tag line CRAM_2_VERS
237
struct
cram_codec
*
FN_codec
;
// no. features
238
struct
cram_codec
*
FC_codec
;
// feature code
239
struct
cram_codec
*
FP_codec
;
// feature pos
240
struct
cram_codec
*
BS_codec
;
// base subst feature
241
struct
cram_codec
*
IN_codec
;
// insertion feature
242
struct
cram_codec
*
SC_codec
;
// soft-clip feature
243
struct
cram_codec
*
DL_codec
;
// deletion len feature
244
struct
cram_codec
*
BA_codec
;
// base feature
245
struct
cram_codec
*
RS_codec
;
// ref skip length feature
246
struct
cram_codec
*
PD_codec
;
// padding length feature
247
struct
cram_codec
*
HC_codec
;
// hard clip length feature
248
struct
cram_codec
*
MQ_codec
;
// mapping quality
249
struct
cram_codec
*
RN_codec
;
// read names
250
struct
cram_codec
*
QS_codec
;
// quality value (single)
251
struct
cram_codec
*
Qs_codec
;
// quality values (string)
252
struct
cram_codec
*
RI_codec
;
// ref ID
253
struct
cram_codec
*
TM_codec
;
// ?
254
struct
cram_codec
*
TV_codec
;
// ?
255
256
char
*
uncomp
;
// A single block of uncompressed data
257
size_t
uncomp_size
, uncomp_alloc;
258
}
cram_block_compression_hdr
;
259
260
typedef
struct
cram_map
{
261
int
key
;
/* 0xe0 + 3 bytes */
262
enum
cram_encoding
encoding
;
263
int
offset
;
/* Offset into a single block of memory */
264
int
size
;
/* Size */
265
struct
cram_codec
*
codec
;
266
struct
cram_map
*
next
;
// for noddy internal hash
267
}
cram_map
;
268
269
/* Mapped or unmapped slice header block */
270
typedef
struct
{
271
enum
cram_content_type
content_type;
272
int32_t
ref_seq_id
;
/* if content_type == MAPPED_SLICE */
273
int32_t
ref_seq_start
;
/* if content_type == MAPPED_SLICE */
274
int32_t
ref_seq_span
;
/* if content_type == MAPPED_SLICE */
275
int32_t
num_records
;
276
int32_t
record_counter
;
277
int32_t
num_blocks
;
278
int32_t
num_content_ids
;
279
int32_t
*
block_content_ids
;
280
int32_t
ref_base_id
;
/* if content_type == MAPPED_SLICE */
281
unsigned
char
md5[16];
282
}
cram_block_slice_hdr
;
283
284
struct
ref_entry
;
285
286
/*
287
* Container.
288
*
289
* Conceptually a container is split into slices, and slices into blocks.
290
* However on disk it's just a list of blocks and we need to query the
291
* block types to identify the start/end points of the slices.
292
*
293
* OR... are landmarks the start/end points of slices?
294
*/
295
typedef
struct
{
296
int32_t
length
;
297
int32_t
ref_seq_id
;
298
int32_t
ref_seq_start
;
299
int32_t
ref_seq_span
;
300
int32_t
record_counter
;
301
int64_t
num_bases
;
302
int32_t
num_records
;
303
int32_t
num_blocks
;
304
int32_t
num_landmarks
;
305
int32_t
*
landmark
;
306
307
/* Size of container header above */
308
size_t
offset
;
309
310
/* Compression header is always the first block? */
311
cram_block_compression_hdr
*
comp_hdr
;
312
cram_block
*
comp_hdr_block
;
313
314
/* For construction purposes */
315
int
max_slice
, curr_slice;
// maximum number of slices
316
int
max_rec
, curr_rec;
// current and max recs per slice
317
int
max_c_rec
, curr_c_rec;
// current and max recs per container
318
int
slice_rec
;
// rec no. for start of this slice
319
int
curr_ref
;
// current ref ID. -2 for no previous
320
int
last_pos
;
// last record position
321
struct
cram_slice
**
slices
, *slice;
322
int
pos_sorted
;
// boolean, 1=>position sorted data
323
int
max_apos
;
// maximum position, used if pos_sorted==0
324
int
last_slice
;
// number of reads in last slice (0 for 1st)
325
int
multi_seq
;
// true if packing multi seqs per cont/slice
326
int
unsorted
;
// true is AP_delta is 0.
327
328
/* Copied from fd before encoding, to allow multi-threading */
329
int
ref_start
, first_base, last_base,
ref_id
,
ref_end
;
330
char
*
ref
;
331
//struct ref_entry *ref;
332
333
/* For multi-threading */
334
bam_seq_t
**
bams
;
335
336
/* Statistics for encoding */
337
cram_stats
*
TS_stats
;
338
cram_stats
*
RG_stats
;
339
cram_stats
*
FP_stats
;
340
cram_stats
*
NS_stats
;
341
cram_stats
*
RN_stats
;
342
cram_stats
*
CF_stats
;
343
cram_stats
*
TN_stats
;
344
cram_stats
*
BA_stats
;
345
cram_stats
*
TV_stats
;
346
cram_stats
*
BS_stats
;
347
cram_stats
*
FC_stats
;
348
cram_stats
*
BF_stats
;
349
cram_stats
*
AP_stats
;
350
cram_stats
*
NF_stats
;
351
cram_stats
*
MF_stats
;
352
cram_stats
*
FN_stats
;
353
cram_stats
*
RL_stats
;
354
cram_stats
*
DL_stats
;
355
cram_stats
*
TC_stats
;
356
cram_stats
*
TL_stats
;
357
cram_stats
*
MQ_stats
;
358
cram_stats
*
TM_stats
;
359
cram_stats
*
QS_stats
;
360
cram_stats
*
NP_stats
;
361
cram_stats
*
RI_stats
;
362
cram_stats
*
RS_stats
;
363
cram_stats
*
PD_stats
;
364
cram_stats
*
HC_stats
;
365
366
khash_t
(s_i2i) *tags_used;
// set of tag types in use, for tag encoding map
367
int
*
refs_used
;
// array of frequency of ref seq IDs
368
}
cram_container
;
369
370
/*
371
* A single cram record
372
*/
373
typedef
struct
{
374
struct
cram_slice
*
s
;
// Filled out by cram_decode only
375
376
int32_t
ref_id
;
// fixed for all recs in slice?
377
int32_t
flags
;
// BF
378
int32_t
cram_flags
;
// CF
379
int32_t
len
;
// RL
380
int32_t
apos
;
// AP
381
int32_t
rg
;
// RG
382
int32_t
name
;
// RN; idx to s->names_blk
383
int32_t
name_len
;
384
int32_t
mate_line
;
// index to another cram_record
385
int32_t
mate_ref_id
;
386
int32_t
mate_pos
;
// NP
387
int32_t
tlen
;
// TS
388
389
// Auxiliary data
390
int32_t
ntags
;
// TC
391
int32_t
aux
;
// idx to s->aux_blk
392
int32_t
aux_size
;
// total size of packed ntags in aux_blk
393
#ifndef TN_external
394
int32_t
TN_idx;
// TN; idx to s->TN;
395
#else
396
int32_t
tn
;
// idx to s->tn_blk
397
#endif
398
int
TL
;
399
400
int32_t
seq
;
// idx to s->seqs_blk
401
int32_t
qual
;
// idx to s->qual_blk
402
int32_t
cigar
;
// idx to s->cigar
403
int32_t
ncigar
;
404
int32_t
aend
;
// alignment end
405
int32_t
mqual
;
// MQ
406
407
int32_t
feature
;
// idx to s->feature
408
int32_t
nfeature
;
// number of features
409
int32_t
mate_flags
;
// MF
410
}
cram_record
;
411
412
// Accessor macros as an analogue of the bam ones
413
#define cram_qname(c) (&(c)->s->name_blk->data[(c)->name])
414
#define cram_seq(c) (&(c)->s->seqs_blk->data[(c)->seq])
415
#define cram_qual(c) (&(c)->s->qual_blk->data[(c)->qual])
416
#define cram_aux(c) (&(c)->s->aux_blk->data[(c)->aux])
417
#define cram_seqi(c,i) (cram_seq((c))[(i)])
418
#define cram_name_len(c) ((c)->name_len)
419
#define cram_strand(c) (((c)->flags & BAM_FREVERSE) != 0)
420
#define cram_mstrand(c) (((c)->flags & BAM_FMREVERSE) != 0)
421
#define cram_cigar(c) (&((cr)->s->cigar)[(c)->cigar])
422
423
/*
424
* A feature is a base difference, used for the sequence reference encoding.
425
* (We generate these internally when writing CRAM.)
426
*/
427
typedef
struct
{
428
union
{
429
struct
{
430
int
pos
;
431
int
code
;
432
int
base
;
// substitution code
433
} X;
434
struct
{
435
int
pos;
436
int
code
;
437
int
base;
// actual base & qual
438
int
qual
;
439
} B;
440
struct
{
441
int
pos;
442
int
code
;
443
int
qual;
444
} Q;
445
struct
{
446
int
pos;
447
int
code
;
448
int
len
;
449
int
seq_idx
;
// soft-clip multiple bases
450
} S;
451
struct
{
452
int
pos;
453
int
code
;
454
int
len;
455
int
seq_idx;
// insertion multiple bases
456
}
I
;
457
struct
{
458
int
pos;
459
int
code
;
460
int
base;
// insertion single base
461
} i;
462
struct
{
463
int
pos;
464
int
code
;
465
int
len;
466
} D;
467
struct
{
468
int
pos;
469
int
code
;
470
int
len;
471
} N;
472
struct
{
473
int
pos;
474
int
code
;
475
int
len;
476
} P;
477
struct
{
478
int
pos;
479
int
code
;
480
int
len;
481
}
H
;
482
};
483
}
cram_feature
;
484
485
/*
486
* A slice is really just a set of blocks, but it
487
* is the logical unit for decoding a number of
488
* sequences.
489
*/
490
typedef
struct
cram_slice
{
491
cram_block_slice_hdr
*
hdr
;
492
cram_block
*
hdr_block
;
493
cram_block
**
block
;
494
cram_block
**
block_by_id
;
495
496
/* State used during encoding/decoding */
497
int
last_apos
,
max_apos
;
498
499
/* Identifier used for auto-assigning read names */
500
uint64_t
id
;
501
502
/* Array of decoded cram records */
503
cram_record
*
crecs
;
504
505
/* An dynamically growing buffers for data pointed
506
* to by crecs[] array.
507
*/
508
uint32_t
*
cigar
;
509
uint32_t
cigar_alloc
;
510
uint32_t
ncigar
;
511
cram_block
*
name_blk
;
512
cram_block
*
seqs_blk
;
513
cram_block
*
qual_blk
;
514
cram_block
*
aux_blk
;
515
cram_block
*
base_blk
;
// substitutions (soft-clips for 1.0)
516
cram_block
*
soft_blk
;
// soft-clips
517
518
cram_feature
*
features
;
519
int
nfeatures
;
520
int
afeatures
;
// allocated size of features
521
522
#ifndef TN_external
523
// TN field (Tag Name)
524
uint32_t
*TN;
525
int
nTN, aTN;
// used and allocated size for TN[]
526
#else
527
cram_block
*
tn_blk
;
528
int
tn_id
;
529
#endif
530
531
string_alloc_t
*
pair_keys
;
// Pooled keys for pair hash.
532
khash_t
(m_s2i) *pair;
// for identifying read-pairs in this slice.
533
534
char
*
ref
;
// slice of current reference
535
int
ref_start
;
// start position of current reference;
536
int
ref_end
;
// end position of current reference;
537
538
#ifdef BA_external
539
int
BA_len;
540
int
ba_id;
541
#endif
542
int
ref_id
;
543
}
cram_slice
;
544
545
/*-----------------------------------------------------------------------------
546
* Consider moving reference handling to cram_refs.[ch]
547
*/
548
// from fa.fai / samtools faidx files
549
typedef
struct
ref_entry
{
550
char
*
name
;
551
char
*
fn
;
552
int64_t
length
;
553
int64_t
offset
;
554
int
bases_per_line
;
555
int
line_length
;
556
int64_t
count
;
// for shared references so we know to dealloc seq
557
char
*
seq
;
558
}
ref_entry
;
559
560
KHASH_MAP_INIT_STR
(refs,
ref_entry
*)
561
562
// References structure.
563
typedef struct {
564
string_alloc_t
*
pool
;
// String pool for holding filenames and SN vals
565
566
khash_t
(refs) *h_meta;
// ref_entry*, index by name
567
ref_entry
**
ref_id
;
// ref_entry*, index by ID
568
int
nref
;
// number of ref_entry
569
570
char
*
fn
;
// current file opened
571
FILE *
fp
;
// and the FILE* to go with it.
572
573
int
count
;
// how many cram_fd sharing this refs struct
574
575
pthread_mutex_t
lock
;
// Mutex for multi-threaded updating
576
ref_entry
*
last
;
// Last queried sequence
577
int
last_id
;
// Used in cram_ref_decr_locked to delay free
578
}
refs_t
;
579
580
/*-----------------------------------------------------------------------------
581
* CRAM index
582
*
583
* Detect format by number of entries per line.
584
* 5 => 1.0 (refid, start, nseq, C offset, slice)
585
* 6 => 1.1 (refid, start, span, C offset, S offset, S size)
586
*
587
* Indices are stored in a nested containment list, which is trivial to set
588
* up as the indices are on sorted data so we're appending to the nclist
589
* in sorted order. Basically if a slice entirely fits within a previous
590
* slice then we append to that slices list. This is done recursively.
591
*
592
* Lists are sorted on two dimensions: ref id + slice coords.
593
*/
594
typedef
struct
cram_index
{
595
int
nslice
,
nalloc
;
// total number of slices
596
struct
cram_index
*
e
;
// array of size nslice
597
598
int
refid
;
// 1.0 1.1
599
int
start
;
// 1.0 1.1
600
int
end
;
// 1.1
601
int
nseq
;
// 1.0 - undocumented
602
int
slice
;
// 1.0 landmark index, 1.1 landmark value
603
int
len
;
// 1.1 - size of slice in bytes
604
int64_t
offset
;
// 1.0 1.1
605
}
cram_index
;
606
607
typedef
struct
{
608
int
refid
;
609
int
start
;
610
int
end
;
611
}
cram_range
;
612
613
/*-----------------------------------------------------------------------------
614
*/
615
/* CRAM File handle */
616
617
typedef
struct
spare_bams
{
618
bam_seq_t
**
bams
;
619
struct
spare_bams
*
next
;
620
}
spare_bams
;
621
622
typedef
struct
cram_fd
{
623
cram_FILE *
fp
;
624
int
mode
;
// 'r' or 'w'
625
int
version
;
626
cram_file_def
*
file_def
;
627
SAM_hdr
*
header
;
628
629
char
*
prefix
;
630
int
record_counter
;
631
int
slice_num
;
632
int
err
;
633
634
// Most recent compression header decoded
635
//cram_block_compression_hdr *comp_hdr;
636
//cram_block_slice_hdr *slice_hdr;
637
638
// Current container being processed.
639
cram_container
*
ctr
;
640
641
// positions for encoding or decoding
642
int
first_base
,
last_base
;
643
644
// cached reference portion
645
refs_t
*
refs
;
// ref meta-data structure
646
char
*
ref
, *
ref_free
;
// current portion held in memory
647
int
ref_id
;
648
int
ref_start
;
649
int
ref_end
;
650
char
*
ref_fn
;
// reference fasta filename
651
652
// compression level and metrics
653
int
level
;
654
cram_metrics
*
m
[7];
655
656
// options
657
int
decode_md
;
// Whether to export MD and NM tags
658
int
verbose
;
659
int
seqs_per_slice
;
660
int
slices_per_container
;
661
int
embed_ref
;
662
int
no_ref
;
663
int
ignore_md5
;
664
int
use_bz2
;
665
int
shared_ref
;
666
cram_range
range
;
667
668
// lookup tables, stored here so we can be trivially multi-threaded
669
unsigned
int
bam_flag_swap
[0x1000];
// cram -> bam flags
670
unsigned
int
cram_flag_swap
[0x1000];
// bam -> cram flags
671
unsigned
char
L1
[256];
// ACGT{*} ->0123{4}
672
unsigned
char
L2
[256];
// ACGTN{*}->01234{5}
673
char
cram_sub_matrix
[32][32];
// base substituion codes
674
675
int
index_sz
;
676
cram_index
*
index
;
// array, sizeof index_sz
677
off_t
first_container
;
678
int
eof
;
679
int
last_slice
;
// number of recs encoded in last slice
680
int
multi_seq
;
681
int
unsorted
;
682
int
empty_container
;
// Marker for EOF block
683
684
// thread pool
685
int
own_pool
;
686
t_pool
*
pool
;
687
t_results_queue
*
rqueue
;
688
pthread_mutex_t
metrics_lock
;
689
pthread_mutex_t
ref_lock
;
690
spare_bams
*
bl
;
691
pthread_mutex_t
bam_list_lock
;
692
void
*
job_pending
;
693
int
ooc
;
// out of containers.
694
}
cram_fd
;
695
696
enum
cram_option
{
697
CRAM_OPT_DECODE_MD
,
698
CRAM_OPT_PREFIX
,
699
CRAM_OPT_VERBOSITY
,
700
CRAM_OPT_SEQS_PER_SLICE
,
701
CRAM_OPT_SLICES_PER_CONTAINER
,
702
CRAM_OPT_RANGE
,
703
CRAM_OPT_VERSION
,
704
CRAM_OPT_EMBED_REF
,
705
CRAM_OPT_IGNORE_MD5
,
706
CRAM_OPT_REFERENCE
,
707
CRAM_OPT_MULTI_SEQ_PER_SLICE
,
708
CRAM_OPT_NO_REF
,
709
CRAM_OPT_USE_BZIP2
,
710
CRAM_OPT_SHARED_REF
,
711
CRAM_OPT_NTHREADS
,
712
CRAM_OPT_THREAD_POOL
,
713
};
714
715
/* BF bitfields */
716
/* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */
717
#define CRAM_FPAIRED 256
718
#define CRAM_FPROPER_PAIR 128
719
#define CRAM_FUNMAP 64
720
#define CRAM_FREVERSE 32
721
#define CRAM_FREAD1 16
722
#define CRAM_FREAD2 8
723
#define CRAM_FSECONDARY 4
724
#define CRAM_FQCFAIL 2
725
#define CRAM_FDUP 1
726
727
#define CRAM_M_REVERSE 1
728
#define CRAM_M_UNMAP 2
729
730
731
/* CF bitfields */
732
#define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0)
733
#define CRAM_FLAG_DETACHED (1<<1)
734
#define CRAM_FLAG_MATE_DOWNSTREAM (1<<2)
735
736
/* External IDs used by this implementation (only assumed during writing) */
737
#define CRAM_EXT_IN 0
738
#define CRAM_EXT_QUAL 1
739
#define CRAM_EXT_NAME 2
740
#define CRAM_EXT_TS_NP 3
741
#define CRAM_EXT_TAG 4
742
#define CRAM_EXT_TAG_S "\004"
743
#define CRAM_EXT_BA 5
744
#define CRAM_EXT_TN 6
745
#define CRAM_EXT_SC 7
746
#define CRAM_EXT_REF 8
747
748
#ifdef __cplusplus
749
}
750
#endif
751
752
#endif
/* _CRAM_STRUCTS_H_ */
Generated on Wed Feb 25 2015 08:32:47 for NVBIO by
1.8.4