NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
banded_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
31 #include <nvbio/alignment/utils.h>
37 namespace nvbio {
38 namespace aln {
39 
40 // a function to compute the alignment score between a pattern and a text string
41 // with banded DP alignment.
42 //
43 // \param aligner alignment algorithm
44 // \param pattern pattern string
45 // \param quals quality string
46 // \param text text string
47 // \param min_score threshold alignment score
48 // \param sink output sink
49 //
50 template <
51  uint32 BAND_LEN,
52  typename aligner_type,
53  typename pattern_string,
54  typename qual_string,
55  typename text_string,
56  typename sink_type>
59  const aligner_type aligner,
60  const pattern_string pattern,
61  const qual_string quals,
62  const text_string text,
63  const int32 min_score,
64  sink_type& sink)
65 {
66  return priv::banded_alignment_score<BAND_LEN>(
67  aligner,
68  pattern,
69  quals,
70  text,
71  min_score,
72  sink );
73 }
74 
75 // a function to compute the alignment score between a pattern and a text string
76 // with banded DP alignment.
77 //
78 // \param aligner alignment algorithm
79 // \param pattern pattern string
80 // \param quals quality string
81 // \param text text string
82 // \param min_score threshold alignment score
83 // \param sink output sink
84 //
85 template <
86  uint32 BAND_LEN,
87  typename aligner_type,
88  typename pattern_string,
89  typename text_string,
90  typename sink_type>
93  const aligner_type aligner,
94  const pattern_string pattern,
95  const text_string text,
96  const int32 min_score,
97  sink_type& sink)
98 {
99  return priv::banded_alignment_score<BAND_LEN>(
100  aligner,
101  pattern,
103  text,
104  min_score,
105  sink );
106 }
107 
108 // a function to compute the alignment score between a pattern and a text string
109 // with banded DP alignment.
110 //
111 // \param aligner alignment algorithm
112 // \param pattern pattern string
113 // \param quals quality string
114 // \param text text string
115 // \param min_score threshold alignment score
116 //
117 // \return best alignment score
118 //
119 template <
120  uint32 BAND_LEN,
121  typename aligner_type,
122  typename pattern_string,
123  typename qual_string,
124  typename text_string>
127  const aligner_type aligner,
128  const pattern_string pattern,
129  const qual_string quals,
130  const text_string text,
131  const int32 min_score)
132 {
133  BestSink<int32> sink;
134  priv::banded_alignment_score<BAND_LEN>(
135  aligner,
136  pattern,
137  quals,
138  text,
139  min_score,
140  sink );
141  return sink.score;
142 }
143 
144 // a function to compute the alignment score between a pattern and a text string
145 // with banded DP alignment.
146 //
147 // \param aligner alignment algorithm
148 // \param pattern pattern string
149 // \param text text string
150 // \param min_score threshold alignment score
151 //
152 // \return best alignment score
153 //
154 template <
155  uint32 BAND_LEN,
156  typename aligner_type,
157  typename pattern_string,
158  typename text_string>
161  const aligner_type aligner,
162  const pattern_string pattern,
163  const text_string text,
164  const int32 min_score)
165 {
166  BestSink<int32> sink;
167  priv::banded_alignment_score<BAND_LEN>(
168  aligner,
169  pattern,
171  text,
172  min_score,
173  sink );
174  return sink.score;
175 }
176 
177 // a function to compute the alignment score between a pattern and a text string
178 // with banded DP alignment in multiple passes, where each pass handles a window
179 // of the pattern and saves a checkpoint for the next.
180 //
181 // \param aligner alignment algorithm
182 // \param pattern pattern string
183 // \param quals quality string
184 // \param text text string
185 // \param min_score threshold alignment score
186 // \param sink output sink
187 //
188 template <
189  uint32 BAND_LEN,
190  typename aligner_type,
191  typename pattern_string,
192  typename qual_string,
193  typename text_string,
194  typename sink_type,
195  typename checkpoint_type>
198  const aligner_type aligner,
199  const pattern_string pattern,
200  const qual_string quals,
201  const text_string text,
202  const int32 min_score,
203  const uint32 window_begin,
204  const uint32 window_end,
205  sink_type& sink,
206  checkpoint_type checkpoint)
207 {
208  return priv::banded_alignment_score<BAND_LEN>(
209  aligner,
210  pattern,
211  quals,
212  text,
213  min_score,
214  window_begin,
215  window_end,
216  sink,
217  checkpoint );
218 }
219 
220 //
221 // Calculate a set of checkpoints of the DP matrix for the alignment between a pattern
222 // and a text.
223 //
224 // \param aligner scoring scheme
225 // \param pattern pattern string (horizontal
226 // \param quals pattern qualities
227 // \param text text string (vertical)
228 // \param min_score minimum score
229 // \param sink output sink
230 // \param checkpoints the set of output checkpointed rows
231 //
232 template <
233  uint32 BAND_LEN,
234  uint32 CHECKPOINTS,
235  typename aligner_type,
236  typename pattern_string,
237  typename qual_string,
238  typename text_string,
239  typename sink_type,
240  typename checkpoint_type>
243  const aligner_type aligner,
244  const pattern_string pattern,
245  const qual_string quals,
246  const text_string text,
247  const int32 min_score,
248  sink_type& sink,
249  checkpoint_type checkpoints)
250 {
251  priv::banded_alignment_checkpoints<BAND_LEN,CHECKPOINTS>(
252  aligner,
253  pattern,
254  quals,
255  text,
256  min_score,
257  sink,
258  checkpoints );
259 }
260 
261 //
262 // Compute the banded Dynamic Programming submatrix between two given checkpoints,
263 // storing its flow at each cell.
264 // The function returns the submatrix width.
265 //
266 // \tparam BAND_LEN size of the DP band
267 //
268 // \tparam CHECKPOINTS number of DP rows between each checkpoint
269 //
270 // \tparam checkpoint_type a class to represent the collection of checkpoints,
271 // represented as a linear array storing each checkpointed
272 // band contiguously.
273 // The class has to provide the const indexing operator[].
274 //
275 // \tparam submatrix_type a class to store the flow submatrix, represented
276 // as a linear array of size (BAND_LEN*CHECKPOINTS).
277 // The class has to provide the non-const indexing operator[].
278 // Note that the submatrix entries can assume only 3 values,
279 // and could hence be packed in 2 bits.
280 //
281 // \param aligner scoring scheme
282 // \param pattern pattern string (horizontal
283 // \param quals pattern qualities
284 // \param text text string (vertical)
285 // \param min_score minimum score
286 // \param checkpoints the set of checkpointed rows
287 // \param checkpoint_id the starting checkpoint used as the beginning of the submatrix
288 // \param submatrix the output submatrix
289 //
290 // \return the submatrix width
291 //
292 template <
293  uint32 BAND_LEN,
294  uint32 CHECKPOINTS,
295  typename aligner_type,
296  typename pattern_string,
297  typename qual_string,
298  typename text_string,
299  typename checkpoint_type,
300  typename submatrix_type>
303  const aligner_type aligner,
304  const pattern_string pattern,
305  const qual_string quals,
306  const text_string text,
307  const int32 min_score,
308  checkpoint_type checkpoints,
309  const uint32 checkpoint_id,
310  submatrix_type submatrix)
311 {
312  return priv::banded_alignment_submatrix<BAND_LEN,CHECKPOINTS>(
313  aligner,
314  pattern,
315  quals,
316  text,
317  min_score,
318  checkpoints,
319  checkpoint_id,
320  submatrix );
321 }
322 
323 //
324 // Backtrace an optimal alignment using a full DP algorithm.
325 //
326 // \tparam CHECKPOINTS number of DP rows between each checkpoint
327 // \tparam aligner_type an \ref Aligner "Aligner" algorithm
328 // \tparam pattern_string a string representing the pattern.
329 // \tparam qual_string an array representing the pattern qualities.
330 // \tparam text_string a string representing the text.
331 // \tparam checkpoints_type an array-like class defining operator[], used to represent a reduced DP score matrix,
332 // containing all the matrix columns whose index is a multiple of CHECKPOINTS;
333 // the type of the matrix cells depends on the aligner, and can be obtained as
334 // typename checkpoints_storage_type<aligner_type>::type
335 // \tparam submatrix_type an array-like class defining operator[], used to represent a temporary DP flow submatrix,
336 // containing all the matrix flow cells between two checkpointed columns.
337 // the number of bits needed for the submatrix cells depends on the aligner, and can be obtained as
338 // direction_vector_traits<aligner_type>::BITS
339 // \tparam backtracer_type a model of \ref Backtracer.
340 //
341 // \param aligner alignment algorithm
342 // \param pattern pattern to be aligned
343 // \param quals pattern quality scores
344 // \param text text to align the pattern to
345 // \param min_score minimum accepted score
346 // \param backtracer backtracking delegate
347 // \param checkpoints temporary checkpoints storage
348 // \param submatrix temporary submatrix storage
349 //
350 // \return reported alignment
351 //
352 template <
353  uint32 BAND_LEN,
354  uint32 CHECKPOINTS,
355  typename aligner_type,
356  typename pattern_string,
357  typename qual_string,
358  typename text_string,
359  typename backtracer_type,
360  typename checkpoints_type,
361  typename submatrix_type>
364  const aligner_type aligner,
365  const pattern_string pattern,
366  const qual_string quals,
367  const text_string text,
368  const int32 min_score,
369  backtracer_type& backtracer,
370  checkpoints_type checkpoints,
371  submatrix_type submatrix)
372 {
373  //
374  // This function performs backtracing in a completely generic fashion that works
375  // for any DP based aligner.
376  // It does so using 2 module-wide generic functions:
377  // * alignment_score_checkpoints()
378  // * alignment_score_submatrix()
379  //
380  // and one aligner-specific function:
381  // * priv::alignment_traceback()
382  //
383  BestSink<int32> best;
384  banded_alignment_score_checkpoints<BAND_LEN,CHECKPOINTS>(
385  aligner, pattern, quals, text, /*pos,*/ min_score, best, checkpoints );
386 
387  const uint32 n_checkpoints = 1u + (pattern.length() + CHECKPOINTS-1)/CHECKPOINTS;
388 
389  uint2 sink = best.sink;
390  if (sink.x == uint32(-1) ||
391  sink.y == uint32(-1))
392  return Alignment<int32>( best.score, make_uint2( uint32(-1), uint32(-1) ), make_uint2( uint32(-1), uint32(-1) ) );
393 
394  // clip the end of the alignment
395  backtracer.clip( pattern.length() - sink.y );
396 
397  // find the checkpoint containing the sink
398  int32 checkpoint_id = n_checkpoints-2;
399 
400  if (aligner_type::TYPE == LOCAL)
401  {
402  for (; checkpoint_id >= 0; --checkpoint_id)
403  {
404  if (checkpoint_id * CHECKPOINTS < sink.y)
405  break;
406  }
407  }
408 
409  //store state (H, E, or F) between checkpoints
410  uint8 state = HSTATE;
411 
412  // backtrack until needed
413  for (; checkpoint_id >= 0; --checkpoint_id)
414  {
415  const uint32 submatrix_height = banded_alignment_score_submatrix<BAND_LEN,CHECKPOINTS>(
416  aligner, pattern, quals, text, /*pos,*/ min_score, checkpoints, checkpoint_id, submatrix );
417 
418  if (priv::banded_alignment_traceback<BAND_LEN,CHECKPOINTS>(
419  aligner, checkpoints, checkpoint_id, submatrix, submatrix_height, state, sink, backtracer ))
420  break;
421  }
422 
423  // clip the beginning of the alignment
424  backtracer.clip( sink.y );
425 
426  return Alignment<int32>( best.score, sink, best.sink );
427 }
428 
429 //
430 // Backtrace an optimal alignment using a full DP algorithm.
431 //
432 // \tparam MAX_PATTERN_LEN maximum pattern length
433 // \tparam MAX_TEXT_LEN maximum text length
434 // \tparam CHECKPOINTS number of DP rows between each checkpoint
435 // \tparam aligner_type an \ref Aligner "Aligner" algorithm
436 // \tparam pattern_string a string representing the pattern.
437 // \tparam qual_string an array representing the pattern qualities.
438 // \tparam text_string a string representing the text.
439 // \tparam backtracer_type a model of \ref Backtracer.
440 //
441 // \param aligner alignment algorithm
442 // \param pattern pattern to be aligned
443 // \param quals pattern quality scores
444 // \param text text to align the pattern to
445 // \param min_score minimum accepted score
446 // \param backtracer backtracking delegate
447 //
448 // \return reported alignment
449 //
450 template <
451  uint32 BAND_LEN,
452  uint32 MAX_PATTERN_LEN,
453  uint32 CHECKPOINTS,
454  typename aligner_type,
455  typename pattern_string,
456  typename qual_string,
457  typename text_string,
458  typename backtracer_type>
461  const aligner_type aligner,
462  const pattern_string pattern,
463  const qual_string quals,
464  const text_string text,
465  const int32 min_score,
466  backtracer_type& backtracer)
467 {
468  typedef typename checkpoint_storage_type<aligner_type>::type checkpoint_type;
469  typedef typename column_storage_type<aligner_type>::type cell_type;
470 
472  const uint32 ELEMENTS_PER_WORD = 32 / BITS;
473 
474  checkpoint_type checkpoints[ BAND_LEN*(1u + (MAX_PATTERN_LEN + CHECKPOINTS-1)/CHECKPOINTS) ];
475 
476  NVBIO_VAR_UNUSED const uint32 SUBMATRIX_WORDS = (BAND_LEN*CHECKPOINTS + ELEMENTS_PER_WORD-1) / ELEMENTS_PER_WORD;
477  uint32 submatrix_base[ SUBMATRIX_WORDS ];
478  PackedStream<uint32*,uint8,BITS,false> submatrix( submatrix_base );
479 
480  return banded_alignment_traceback<BAND_LEN,CHECKPOINTS>(
481  aligner,
482  pattern,
483  quals,
484  text,
485  min_score,
486  backtracer,
487  checkpoints,
488  submatrix );
489 }
490 
491 } // namespace alignment
492 } // namespace nvbio