NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
vcf.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <nvbio/basic/types.h>
29 #include <nvbio/basic/vector.h>
31 
32 #include <vector>
33 #include <string>
34 
35 #pragma once
36 
37 namespace nvbio {
38 namespace io {
39 
41 {
42  // these indices are stored in base-pairs since variants are extremely short
47 
51  { }
52 
55  : reference_start(reference_start), reference_len(reference_len),
56  variant_start(variant_start), variant_len(variant_len)
57  { }
58 };
59 
61 {
62  // the name of the reference sequence
63  // note: VCF allows this to be an integer ID encoded in a string that references
64  // a contig from an assembly referenced in the header; this is not supported yet
65  std::vector<std::string> reference_sequence_names;
66 
67  // start (x) and stop (y) positions of the variant in the reference sequence (first base in the sequence is position 1)
68  // the "stop" position is either start + len or the contents of the END= info tag
70 
71  // packed reference sequences
73  // packed variant sequences
75  // an index for both references and variants
77 
78  // quality value assigned to each variant
80 
82  {
83  reference_sequences.clear();
84  variants.clear();
85  ref_variant_index.clear();
86  }
87 };
88 
89 // loads variant data from file_name and appends to output
90 bool loadVCF(SNPDatabase& output, const char *file_name);
91 
92 } // namespace io
93 } // namespace nvbio