NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
backtrack.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
31 #include <nvbio/fmindex/fmindex.h>
32 
33 namespace nvbio {
34 
37 
60 template <typename FMIndex, typename String, typename Stack, typename Delegate>
63  const FMIndex fmi,
64  const String pattern,
65  const uint32 len,
66  const uint32 seed,
67  const uint32 mismatches,
68  Stack stack,
69  Delegate& delegate)
70 {
71  if (mismatches == 0 || seed == len)
72  {
73  const uint2 range = match( fmi, pattern, len );
74 
75  if (range.x <= range.y)
76  delegate( range );
77  }
78  else
79  {
80  uint2 root_range = match(
81  fmi, pattern + len - seed, seed );
82 
83  // check if there is no seed match
84  if (root_range.x > root_range.y)
85  return;
86 
87  // compute the four children
88  uint4 cnt_k, cnt_l;
89  rank4( fmi, make_uint2( root_range.x-1, root_range.y ), &cnt_k, &cnt_l );
90 
91  // initialize the stack size
92  uint32 sp = 0;
93 
94  const uint8 c_pattern = pattern[ len - seed - 1u ];
95 
96  for (uint32 c = 0; c < 4; ++c)
97  {
98  // check if the child node for character 'c' exists
99  if (comp( cnt_k, c ) < comp( cnt_l, c ))
100  {
101  const uint8 cost = (c == c_pattern) ? 0u : 1u;
102 
103  const uint2 range_c = make_uint2(
104  fmi.L2(c) + comp( cnt_k, c ) + 1,
105  fmi.L2(c) + comp( cnt_l, c ) );
106 
107  stack[sp] = make_uint4( range_c.x, range_c.y, cost, len - seed - 1u );
108 
109  ++sp;
110  }
111  }
112 
113  while (sp)
114  {
115  // pop the stack
116  --sp;
117 
118  const uint4 entry_info = stack[sp];
119  const uint8 cost = entry_info.z;
120  const uint32 l = entry_info.w;
121  uint2 range = make_uint2( entry_info.x, entry_info.y );
122 
123  // check if we reached the maximum extension length
124  if (l == 0u)
125  {
126  if (range.x <= range.y)
127  delegate( range );
128  }
129 
130  if (cost < mismatches)
131  {
132  // compute the four children
133  uint4 cnt_k, cnt_l;
134  rank4( fmi, make_uint2( range.x-1, range.y ), &cnt_k, &cnt_l );
135 
136  const uint8 c_pattern = pattern[l-1u];
137 
138  for (uint32 c = 0; c < 4; ++c)
139  {
140  // check if the child node for character 'c' exists
141  if (comp( cnt_k, c ) < comp( cnt_l, c ))
142  {
143  const uint2 range_c = make_uint2(
144  fmi.L2(c) + comp( cnt_k, c ) + 1,
145  fmi.L2(c) + comp( cnt_l, c ) );
146 
147  // compute the extension cost
148  const uint8 new_cost = cost + (c == c_pattern ? 0u : 1u);
149 
150  // copy the band to the stack
151  stack[sp] = make_uint4( range_c.x, range_c.y, new_cost, l-1u );
152 
153  ++sp;
154  }
155  }
156  }
157  else
158  {
159  // perform exact matching of the rest of the string
160  range = match( fmi, pattern, l, range );
161 
162  // update stats
163  if (range.x <= range.y)
164  delegate( range );
165  }
166  }
167  }
168 }
169 
171 
172 } // namespace nvbio