NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
utils.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 // utils.h
29 //
30 
31 #pragma once
32 
33 #include <nvbio/basic/numbers.h>
34 #include <nvbio/basic/threads.h>
35 #include <nvbio/basic/cuda/arch.h>
36 
37 using namespace nvbio;
38 
39 enum {
42  UNFIXABLE = 2,
45 };
46 
47 //static const uint32 SAMPLED_KMERS_FILTER_K = 9; // optimal number of hashes for a Bloom filter with false probability rate of 0.01
48 //static const uint32 TRUSTED_KMERS_FILTER_K = 11; // optimal number of hashes for a Bloom filter with false probability rate of 0.0005
49 //static const uint32 SAMPLED_KMERS_FILTER_K = 7; // optimal number of hashes for a Bloom filter with false probability rate of 0.01
50 //static const uint32 TRUSTED_KMERS_FILTER_K = 11; // optimal number of hashes for a Bloom filter with false probability rate of 0.0005
51 static const uint32 SAMPLED_KMERS_FILTER_K = 5; // this parameter should theoretically achieve a worse false probability rate than 0.01, yet in practice it does better
52 static const uint32 TRUSTED_KMERS_FILTER_K = 8; // this parameter should theoretically achieve a worse false probability rate than 0.0005, yet in practice it does better
53 
55 {
57  uint64 operator() (const uint64 kmer) const { return hash( kmer ); }
58 };
60 {
62  uint64 operator() (const uint64 kmer) const { return hash2( kmer ); }
63 };
64 
65 enum { MAX_READ_LENGTH = 2048 };
66 
67 struct KmerCode
68 {
70  KmerCode() : mask(0), code(0), len(0), invalid(-1) {}
71 
73  KmerCode(const int l) : mask(0), code(0), len(l), invalid(-1)
74  {
75  for (int i = 0 ; i < len; ++i)
76  {
77  mask = mask << 2u;
78  mask = mask | 3u;
79  }
80  }
82  KmerCode(const KmerCode& k) : mask(k.mask), code(k.code), len(k.len), invalid(k.invalid)
83  {}
84 
86  void restart() { code = 0ull ; invalid = -1 ; }
87 
89  void push_back(const uint8 c)
90  {
91  if (invalid != -1)
92  invalid++;
93 
94  code = ((code << 2ull) & mask) | uint64(c & 3);
95  if (c >= 4)
96  invalid = 0;
97 
98  if (invalid >= len)
99  invalid = -1;
100  }
102  void push_front(const uint8 c)
103  {
104  shift_right( 1 );
105 
106  if (c >= 4)
107  invalid = len - 1;
108 
109  code = (code | ((uint64(c & 3)) << (2ull * (len - 1)))) & mask;
110  }
111 
113  void shift_right(int k)
114  {
115  if (invalid != -1)
116  invalid -= k;
117 
118  code = (code >> (2ull * k)) & (mask >> (2ull * k));
119  if (invalid < 0)
120  invalid = -1;
121  }
122 
124  bool is_valid() const { return invalid == -1; }
125 
128  int len;
129  int invalid;
130 };
131 
133 {
134  SequenceStats() : m_reads(0), m_bps(0), m_time(0) {}
135 
139  float m_time;
140 };