NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
contrib
htslib
vcf_sweep.c
Go to the documentation of this file.
1
#include "
htslib/vcf_sweep.h
"
2
#include "
htslib/bgzf.h
"
3
4
#define SW_FWD 0
5
#define SW_BWD 1
6
7
struct
_bcf_sweep_t
8
{
9
htsFile
*
file
;
10
bcf_hdr_t
*
hdr
;
11
BGZF
*
fp
;
12
13
int
direction
;
// to tell if the direction has changed
14
int
block_size
;
// the size of uncompressed data to hold in memory
15
bcf1_t
*
rec
;
// bcf buffer
16
int
nrec
,
mrec
;
// number of used records; total size of the buffer
17
int
lrid
,
lpos
,
lnals
,
lals_len
,
mlals
;
// to check uniqueness of a record
18
char
*
lals
;
19
20
uint64_t
*
idx
;
// uncompressed offsets of VCF/BCF records
21
int
iidx
,
nidx
,
midx
;
// i: current offset; n: used; m: allocated
22
int
idx_done
;
// the index is built during the first pass
23
};
24
25
BGZF
*
hts_get_bgzfp
(
htsFile
*fp);
26
int
hts_useek
(
htsFile
*file,
long
uoffset,
int
where);
27
long
hts_utell
(
htsFile
*file);
28
29
static
inline
int
sw_rec_equal(
bcf_sweep_t
*sw,
bcf1_t
*rec)
30
{
31
if
( sw->
lrid
!=rec->
rid
)
return
0;
32
if
( sw->
lpos
!=rec->
pos
)
return
0;
33
if
( sw->
lnals
!=rec->
n_allele
)
return
0;
34
35
char
*t = rec->
d
.
allele
[sw->
lnals
-1];
36
int
len = t - rec->
d
.
allele
[0] + 1;
37
while
( *t ) { t++; len++; }
38
if
( sw->
lals_len
!=len )
return
0;
39
if
( memcmp(sw->
lals
,rec->
d
.
allele
[0],len) )
return
0;
40
return
1;
41
}
42
43
static
void
sw_rec_save(
bcf_sweep_t
*sw,
bcf1_t
*rec)
44
{
45
sw->
lrid
= rec->
rid
;
46
sw->
lpos
= rec->
pos
;
47
sw->
lnals
= rec->
n_allele
;
48
49
char
*t = rec->
d
.
allele
[sw->
lnals
-1];
50
int
len = t - rec->
d
.
allele
[0] + 1;
51
while
( *t ) { t++; len++; }
52
sw->
lals_len
= len;
53
hts_expand
(
char
, len, sw->
mlals
, sw->
lals
);
54
memcpy(sw->
lals
, rec->
d
.
allele
[0], len);
55
}
56
57
static
void
sw_fill_buffer(
bcf_sweep_t
*sw)
58
{
59
if
( !sw->
iidx
)
return
;
60
sw->
iidx
--;
61
62
int
ret =
hts_useek
(sw->
file
, sw->
idx
[sw->
iidx
], 0);
63
assert( ret==0 );
64
65
sw->
nrec
= 0;
66
bcf1_t
*rec = &sw->
rec
[sw->
nrec
];
67
while
( (ret=
bcf_read1
(sw->
file
, sw->
hdr
, rec))==0 )
68
{
69
bcf_unpack
(rec,
BCF_UN_STR
);
70
71
// if not in the last block, stop at the saved record
72
if
( sw->
iidx
+1 < sw->
nidx
&& sw_rec_equal(sw,rec) )
break
;
73
74
sw->
nrec
++;
75
hts_expand0
(
bcf1_t
, sw->
nrec
+1, sw->
mrec
, sw->
rec
);
76
rec = &sw->
rec
[sw->
nrec
];
77
}
78
sw_rec_save(sw, &sw->
rec
[0]);
79
}
80
81
bcf_sweep_t
*
bcf_sweep_init
(
const
char
*fname)
82
{
83
bcf_sweep_t
*sw = (
bcf_sweep_t
*) calloc(1,
sizeof
(
bcf_sweep_t
));
84
sw->
file
=
hts_open
(fname,
"r"
);
85
sw->
fp
=
hts_get_bgzfp
(sw->
file
);
86
bgzf_index_build_init
(sw->
fp
);
87
sw->
hdr
=
bcf_hdr_read
(sw->
file
);
88
sw->
mrec
= 1;
89
sw->
rec
= (
bcf1_t
*) calloc(sw->
mrec
,(
sizeof
(
bcf1_t
)));
90
sw->
block_size
= 1024*1024*3;
91
sw->
direction
=
SW_FWD
;
92
return
sw;
93
}
94
95
void
bcf_empty1
(
bcf1_t
*v);
96
void
bcf_sweep_destroy
(
bcf_sweep_t
*sw)
97
{
98
int
i;
99
for
(i=0; i<sw->
mrec
; i++)
bcf_empty1
(&sw->
rec
[i]);
100
free(sw->
idx
);
101
free(sw->
rec
);
102
free(sw->
lals
);
103
bcf_hdr_destroy
(sw->
hdr
);
104
hts_close
(sw->
file
);
105
free(sw);
106
}
107
108
static
void
sw_seek(
bcf_sweep_t
*sw,
int
direction)
109
{
110
sw->
direction
= direction;
111
if
( direction==
SW_FWD
)
112
hts_useek
(sw->
file
, sw->
idx
[0], 0);
113
else
114
{
115
sw->
iidx
= sw->
nidx
;
116
sw->
nrec
= 0;
117
}
118
}
119
120
bcf1_t
*
bcf_sweep_fwd
(
bcf_sweep_t
*sw)
121
{
122
if
( sw->
direction
==
SW_BWD
) sw_seek(sw,
SW_FWD
);
123
124
long
pos =
hts_utell
(sw->
file
);
125
126
bcf1_t
*rec = &sw->
rec
[0];
127
int
ret =
bcf_read1
(sw->
file
, sw->
hdr
, rec);
128
129
if
( ret!=0 )
// last record, get ready for sweeping backwards
130
{
131
sw->
idx_done
= 1;
132
sw->
fp
->
idx_build_otf
= 0;
133
sw_seek(sw,
SW_BWD
);
134
return
NULL;
135
}
136
137
if
( !sw->
idx_done
)
138
{
139
if
( !sw->
nidx
|| pos - sw->
idx
[sw->
nidx
-1] > sw->
block_size
)
140
{
141
sw->
nidx
++;
142
hts_expand
(
uint64_t
, sw->
nidx
, sw->
midx
, sw->
idx
);
143
sw->
idx
[sw->
nidx
-1] = pos;
144
}
145
}
146
return
rec;
147
}
148
149
bcf1_t
*
bcf_sweep_bwd
(
bcf_sweep_t
*sw)
150
{
151
if
( sw->
direction
==
SW_FWD
) sw_seek(sw,
SW_BWD
);
152
if
( !sw->
nrec
) sw_fill_buffer(sw);
153
if
( !sw->
nrec
)
return
NULL;
154
return
&sw->
rec
[ --sw->
nrec
];
155
}
156
157
bcf_hdr_t
*
bcf_sweep_hdr
(
bcf_sweep_t
*sw) {
return
sw->
hdr
; }
158
Generated on Wed Feb 25 2015 08:32:49 for NVBIO by
1.8.4