NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alignment_test_utils.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 // alignment_test.cu
29 //
30 
31 #pragma once
32 
34 #include <nvbio/basic/timer.h>
35 #include <nvbio/basic/console.h>
36 #include <nvbio/basic/cuda/ldg.h>
42 #include <nvbio/basic/dna.h>
45 #include <nvbio/alignment/sink.h>
46 #include <thrust/device_vector.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <vector>
50 #include <algorithm>
51 
52 namespace nvbio {
53 namespace aln {
54 
56 {
57  ALL = 0xFFFFFFFFu,
58  ED = 1u,
59  SW = 2u,
60  GOTOH = 4u,
61  ED_BANDED = 8u,
62  SW_BANDED = 16u,
63  GOTOH_BANDED = 32u,
64  SW_WARP = 64u,
65  SW_STRIPED = 128u,
66  FUNCTIONAL = 256u,
67 };
68 
69 // make a light-weight string from an ASCII char string
71 {
72  return vector_view<const char*>( uint32(strlen(str)), str );
73 }
74 
75 // run-length encode a string
76 std::string rle(const char* input)
77 {
78  char buffer[1024];
79  buffer[0] = '\0';
80 
81  char prev = '\0';
82  uint32 cnt = 0;
83 
84  for (const char* p = input; *p != '\0'; ++p)
85  {
86  if (*p == prev)
87  ++cnt;
88  else
89  {
90  if (p != input)
91  sprintf( buffer + strlen(buffer), "%u%c", cnt, prev );
92  prev = *p;
93  cnt = 1u;
94  }
95  }
96  if (cnt)
97  sprintf( buffer + strlen(buffer), "%u%c", cnt, prev );
98  return std::string( buffer );
99 }
100 
101 // fill an array with random symbols
102 template <uint32 BITS, typename rand_type>
103 void fill_packed_stream(rand_type& rand, const uint32 value_range, const uint32 n, uint32* storage)
104 {
105  typedef PackedStream<uint32*,uint8,BITS,false> packed_stream_type;
106 
107  packed_stream_type stream( storage );
108 
109  for (uint32 i = 0; i < n; ++i)
110  stream[i] = rand.next() % value_range;
111 }
112 
113 // abstract container for the score matrices
114 template <uint32 N, uint32 M, typename aligner_tag>
115 struct ScoreMatrices {};
116 
117 // ScoreMatrices EditDistanceTag-specialization
118 template <uint32 N, uint32 M>
120 {
121  int32 H[N+1][M+1];
122  char H_flow[N+1][M+1];
123 
124  void print()
125  {
126  for (uint32 i = 0; i <= N; ++i)
127  {
128  fprintf(stderr,"H[%2u] : ", i);
129  for (uint32 j = 0; j <= M; ++j)
130  fprintf(stderr, " %3d", H[i][j]);
131  fprintf(stderr,"\n");
132  }
133  fprintf(stderr,"\n");
134  for (uint32 i = 0; i <= N; ++i)
135  {
136  fprintf(stderr,"Hd[%2u] : ", i);
137  for (uint32 j = 0; j <= M; ++j)
138  fprintf(stderr, " %c", H_flow[i][j] );
139  fprintf(stderr,"\n");
140  }
141  fprintf(stderr,"\n");
142  }
143 };
144 
145 // ScoreMatrices SmithWatermanTag-specialization
146 template <uint32 N, uint32 M>
148 {
149  int32 H[N+1][M+1];
150  char H_flow[N+1][M+1];
151 
152  void print()
153  {
154  for (uint32 i = 0; i <= N; ++i)
155  {
156  fprintf(stderr,"H[%2u] : ", i);
157  for (uint32 j = 0; j <= M; ++j)
158  fprintf(stderr, " %3d", H[i][j]);
159  fprintf(stderr,"\n");
160  }
161  fprintf(stderr,"\n");
162  for (uint32 i = 0; i <= N; ++i)
163  {
164  fprintf(stderr,"Hd[%2u] : ", i);
165  for (uint32 j = 0; j <= M; ++j)
166  fprintf(stderr, " %c", H_flow[i][j] );
167  fprintf(stderr,"\n");
168  }
169  fprintf(stderr,"\n");
170  }
171 };
172 
173 // ScoreMatrices GotohTag-specialization
174 template <uint32 N, uint32 M>
175 struct ScoreMatrices<N,M,aln::GotohTag>
176 {
177  int32 H[N+1][M+1];
178  int32 E[N+1][M+1];
179  int32 F[N+1][M+1];
180  char H_flow[N+1][M+1];
181  char E_flow[N+1][M+1];
182  char F_flow[N+1][M+1];
183 
184  void print()
185  {
186  for (uint32 i = 0; i <= N; ++i)
187  {
188  fprintf(stderr,"H[%2u] : ", i);
189  for (uint32 j = 0; j <= M; ++j)
190  fprintf(stderr, " %3d", H[i][j]);
191  fprintf(stderr,"\n");
192  }
193  fprintf(stderr,"\n");
194  for (uint32 i = 0; i <= N; ++i)
195  {
196  fprintf(stderr,"E[%2u] : ", i);
197  for (uint32 j = 0; j <= M; ++j)
198  fprintf(stderr, " %3d", E[i][j]);
199  fprintf(stderr,"\n");
200  }
201  fprintf(stderr,"\n");
202  for (uint32 i = 0; i <= N; ++i)
203  {
204  fprintf(stderr,"F[%2u] : ", i);
205  for (uint32 j = 0; j <= M; ++j)
206  fprintf(stderr, " %3d", F[i][j]);
207  fprintf(stderr,"\n");
208  }
209  fprintf(stderr,"\n");
210  for (uint32 i = 0; i <= N; ++i)
211  {
212  fprintf(stderr,"Hd[%2u] : ", i);
213  for (uint32 j = 0; j <= M; ++j)
214  fprintf(stderr, " %c", H_flow[i][j] );
215  fprintf(stderr,"\n");
216  }
217  fprintf(stderr,"\n");
218  for (uint32 i = 0; i <= N; ++i)
219  {
220  fprintf(stderr,"Ed[%2u] : ", i);
221  for (uint32 j = 0; j <= M; ++j)
222  fprintf(stderr, " %c", E_flow[i][j] );
223  fprintf(stderr,"\n");
224  }
225  fprintf(stderr,"\n");
226  for (uint32 i = 0; i <= N; ++i)
227  {
228  fprintf(stderr,"Fd[%2u] : ", i);
229  for (uint32 j = 0; j <= M; ++j)
230  fprintf(stderr, " %c", F_flow[i][j] );
231  fprintf(stderr,"\n");
232  }
233  }
234 };
235 
236 template <uint32 M, uint32 N, uint32 BAND_LEN, aln::AlignmentType TYPE, typename scheme_type>
237 int32 ref_banded_sw(const uint8* str, const uint8* ref, const uint32 pos, const aln::SmithWatermanAligner<TYPE, scheme_type> aligner)
238 {
239  const scheme_type& scoring = aligner.scheme;
240 
241  const int32 G = scoring.deletion();
242  const int32 I = scoring.insertion();
243  const int32 S = scoring.mismatch();
244  const int32 V = scoring.match();
245 
246  const uint32 start = pos;
247 
248  int32 best_score = Field_traits<int32>::min();
249 
250  int32 band[BAND_LEN];
251  for (int32 j = 0; j < BAND_LEN; ++j)
252  band[j] = 0;
253 
254  for (int32 i = 0; i < M; ++i)
255  {
256  const uint8 q = str[i];
257 
258  int32 top, left, diagonal, hi;
259 
260  // j == 0 case
261  {
262  const int32 S_ij = (ref[start+i] == q) ? V : S;
263  diagonal = band[0] + S_ij;
264  top = band[1] + G;
265  hi = nvbio::max( top, diagonal );
266  if (TYPE == aln::LOCAL)
267  {
268  hi = nvbio::max( hi, int32(0) ); // clamp to zero
269  best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
270  }
271  band[0] = hi;
272  }
273 
274  for (uint32 j = 1; j < BAND_LEN-1; ++j)
275  {
276  const int32 S_ij = (ref[start+i+j] == q) ? V : S;
277  diagonal = band[j] + S_ij;
278  top = band[j+1] + G;
279  left = band[j-1] + I;
280  hi = nvbio::max3( top, left, diagonal );
281  if (TYPE == LOCAL)
282  {
283  hi = nvbio::max( hi, int32(0) ); // clamp to zero
284  best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
285  }
286  band[j] = hi;
287  }
288 
289  // j == BAND_LEN-1 case
290  const int32 S_ij = (ref[start+i+BAND_LEN-1] == q) ? V : S;
291  diagonal = band[BAND_LEN-1] + S_ij;
292  left = band[BAND_LEN-2] + I;
293  hi = nvbio::max( left, diagonal );
294  if (TYPE == LOCAL)
295  {
296  hi = nvbio::max( hi, int32(0) ); // clamp to zero
297  best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
298  }
299  band[BAND_LEN-1] = hi;
300  }
301 
302  if (TYPE == GLOBAL)
303  best_score = band[ BAND_LEN-1 ];
304  else if (TYPE == SEMI_GLOBAL)
305  {
306  // get the highest score along the long edge of the path graph
307  best_score = band[0];
308  for (uint32 j = 1; j < BAND_LEN; ++j)
309  best_score = nvbio::max( best_score, band[j] );
310  }
311  return best_score;
312 }
313 
314 template <uint32 M, uint32 N, uint32 BAND_LEN, aln::AlignmentType TYPE, typename scheme_type>
315 int32 ref_banded_sw(const uint8* pattern, const uint8* text, const uint32 pos, const aln::GotohAligner<TYPE, scheme_type> aligner)
316 {
317  const scheme_type& scoring = aligner.scheme;
318 
319  const int32 G_o = scoring.pattern_gap_open();
320  const int32 G_e = scoring.pattern_gap_extension();
321  const int32 S = scoring.mismatch();
322  const int32 V = scoring.match();
323 
324  const uint32 start = pos;
325 
326  int32 best_score = Field_traits<int32>::min();
327  int32 infimum = Field_traits<int32>::min() - G_e;
328 
329  int32 H_band[BAND_LEN];
330  int32 F_band[BAND_LEN];
331 
332  H_band[0] = 0;
333  for (uint32 j = 1; j < BAND_LEN; ++j)
334  H_band[j] = TYPE == GLOBAL ? scoring.text_gap_open() + (j-1)*scoring.text_gap_extension() : 0;
335 
336  for (uint32 j = 0; j < BAND_LEN; ++j)
337  F_band[j] = infimum;
338 
339 #define DEBUG_THIS
340 #if defined(DEBUG_THIS)
341  FILE* file = fopen("debug.txt", "w");
342  #define DEBUG_THIS_STATEMENT(x) x
343 #else
344  #define DEBUG_THIS_STATEMENT(x)
345 #endif
346 
347  for (uint32 i = 0; i < M; ++i)
348  {
349  #if defined(DEBUG_THIS)
350  {
351  fprintf(file,"F[%2u] = ", i);
352  for (uint32 j = 0; j < BAND_LEN; ++j)
353  fprintf(file," %3d", F_band[j]);
354  fprintf(file,"\n");
355 
356  fprintf(file,"H[%2u] = ", i);
357  for (uint32 j = 0; j < BAND_LEN; ++j)
358  fprintf(file," %3d", H_band[j]);
359  fprintf(file,"\n\n");
360  }
361  #endif
362 
363  const uint8 q = pattern[i];
364 
365  DEBUG_THIS_STATEMENT( fprintf(file,"U[%2u] = ", i) );
366 
367  // update F for the next row
368  for (uint32 j = 0; j < BAND_LEN-1; ++j)
369  {
370  const int32 ftop = F_band[j+1] + G_e;
371  const int32 htop = H_band[j+1] + G_o;
372  F_band[j] = nvbio::max( ftop, htop );
373  if (i && (j == BAND_LEN-2))
374  assert( F_band[j] == htop );
375 
376  DEBUG_THIS_STATEMENT( fprintf(file," %c", ftop > htop ? 'F' : 'H') );
377  }
378  F_band[BAND_LEN-1] = infimum;
379 
380  //if (i < 8) fprintf(stderr,"\nU[%2u] = ", i);
381  // j == 0 case
382  {
383  const int32 S_ij = (text[start+i] == q) ? V : S;
384  const int32 diagonal = H_band[0] + S_ij;
385  const int32 top = F_band[0];
386  int32 hi = nvbio::max( top, diagonal );
387 
388  if (TYPE == LOCAL)
389  {
390  hi = nvbio::max( hi, int32(0) ); // clamp to zero
391  best_score = nvbio::max( best_score, hi );
392  }
393  H_band[0] = hi;
394 
395  DEBUG_THIS_STATEMENT( fprintf(file," %c", top > diagonal ? 'I' : 'S') );
396  }
397 
398  // compute E_1
399  int32 E_j = H_band[0] + G_o;
400 
401  for (uint32 j = 1; j < BAND_LEN-1; ++j)
402  {
403  const uint32 g = text[start+i+j];
404 
405  const int32 S_ij = (g == q) ? V : S;
406  const int32 diagonal = H_band[j] + S_ij;
407  const int32 top = F_band[j];
408  const int32 left = E_j;
409  int32 hi = nvbio::max3( top, left, diagonal );
410  if (TYPE == LOCAL)
411  {
412  hi = nvbio::max( hi, int32(0) ); // clamp to zero
413  best_score = nvbio::max( best_score, hi );
414  }
415  H_band[j] = hi;
416  DEBUG_THIS_STATEMENT( fprintf(file," %c", (top > left ? (top > diagonal ? 'I' : 'S') : (left > diagonal ? 'D' : 'S'))) );
417 
418  // update E for the next round, i.e. j+1
419  const int32 eleft = E_j + G_e;
420  const int32 ediagonal = hi + G_o;
421  E_j = nvbio::max( ediagonal, eleft );
422  }
423 
424  // j == BAND_LEN-1 case
425  {
426  const uint8 g = text[start+i+BAND_LEN-1];
427 
428  const int32 S_ij = (g == q) ? V : S;
429  const int32 diagonal = H_band[ BAND_LEN-1 ] + S_ij;
430  const int32 left = E_j;
431  int32 hi = nvbio::max( left, diagonal );
432  if (TYPE == LOCAL)
433  {
434  hi = nvbio::max( hi, int32(0) ); // clamp to zero
435  best_score = nvbio::max( best_score, hi );
436  }
437  H_band[ BAND_LEN-1 ] = hi;
438 
439  DEBUG_THIS_STATEMENT( fprintf(file," %c", left > diagonal ? 'D' : 'S') );
440  }
441  DEBUG_THIS_STATEMENT( fprintf(file,"\n") );
442  }
443 
444  DEBUG_THIS_STATEMENT(fclose(file));
445 
446  if (TYPE == GLOBAL)
447  best_score = H_band[ BAND_LEN-1 ];
448  else if (TYPE == SEMI_GLOBAL)
449  {
450  // get the highest score along the long edge of the path graph
451  best_score = H_band[0];
452  for (uint32 j = 1; j < BAND_LEN; ++j)
453  best_score = nvbio::max( best_score, H_band[j] );
454  }
455  return best_score;
456 
457 #if defined(DEBUG_THIS)
458  #undef DEBUG_THIS_STATEMENT
459  #undef DEBUG_THIS
460 #endif
461 }
462 
463 template <uint32 M, uint32 N, aln::AlignmentType TYPE, typename scheme_type>
465  const uint8* str,
466  const uint8* ref,
469 {
470  const scheme_type& scoring = aligner.scheme;
471 
472  const int32 G = scoring.deletion();
473  const int32 I = scoring.insertion();
474  const int32 S = scoring.mismatch();
475  const int32 V = scoring.match();
476 
477  mat->H[0][0] = 0;
478  mat->H_flow[0][0] = '*';
479  for (uint32 j = 1; j <= M; ++j)
480  {
481  mat->H[0][j] = TYPE != LOCAL ? G * j : 0;
482  mat->H_flow[0][j] = '*';
483  }
484  for (uint32 i = 1; i <= N; ++i)
485  {
486  mat->H[i][0] = TYPE == GLOBAL ? G * i : 0;
487  mat->H_flow[i][0] = '*';
488  }
489 
490  int32 best_score = Field_traits<int32>::min();
491 
492  for (uint32 i = 1; i <= N; ++i)
493  {
494  for (uint32 j = 1; j <= M; ++j)
495  {
496  const uint8 r_i = ref[i-1];
497  const uint8 s_j = str[j-1];
498  const int32 S_ij = (r_i == s_j) ? V : S;
499 
500  const int32 top = mat->H[i-1][j] + G;
501  const int32 left = mat->H[i][j-1] + I;
502  const int32 diag = mat->H[i-1][j-1] + S_ij;
503 
504  mat->H[i][j] = nvbio::max3( top, left, diag );
505 
506  mat->H_flow[i][j] = top > left ?
507  (top > diag ? '|' : '\\') :
508  (left > diag ? '-' : '\\');
509 
510  if (TYPE == aln::LOCAL)
511  best_score = nvbio::max( best_score, mat->H[i][j] = nvbio::max( mat->H[i][j], int32(0) ) );
512  }
513  if (TYPE == aln::SEMI_GLOBAL)
514  best_score = nvbio::max( best_score, mat->H[i][M] );
515  }
516  if (TYPE == aln::LOCAL || TYPE == aln::SEMI_GLOBAL)
517  return best_score;
518  else
519  return mat->H[N][M];
520 }
521 
522 template <uint32 M, uint32 N, aln::AlignmentType TYPE>
524  const uint8* str,
525  const uint8* ref,
526  const aln::EditDistanceAligner<TYPE> aligner,
528 {
529  return ref_sw<M,N>(
530  str,
531  ref,
532  aln::make_smith_waterman_aligner<TYPE>( priv::EditDistanceSWScheme() ),
534 }
535 
536 template <uint32 M, uint32 N, aln::AlignmentType TYPE, typename scheme_type>
538  const uint8* str,
539  const uint8* ref,
542 {
543  const scheme_type& scoring = aligner.scheme;
544 
545  const int32 G_o = scoring.pattern_gap_open();
546  const int32 G_e = scoring.pattern_gap_extension();
547  const int32 S = scoring.mismatch();
548  const int32 V = scoring.match();
549 
550  mat->H[0][0] = 0;
551  mat->F[0][0] = (TYPE != LOCAL) ? -100000 : 0;
552  mat->E[0][0] = (TYPE != LOCAL) ? -100000 : 0;
553  mat->H_flow[0][0] = '*';
554  mat->E_flow[0][0] = '*';
555  mat->F_flow[0][0] = '*';
556  for (uint32 j = 1; j <= M; ++j)
557  {
558  mat->H[0][j] = (TYPE != LOCAL) ? G_o + G_e * (j-1) : 0;
559  mat->E[0][j] = mat->F[0][j] = (TYPE != LOCAL) ? -100000 : 0;
560  mat->H_flow[0][j] = '*';
561  mat->E_flow[0][j] = '*';
562  mat->F_flow[0][j] = '*';
563  }
564  for (uint32 i = 1; i <= N; ++i)
565  {
566  mat->H[i][0] = (TYPE == aln::GLOBAL) ? G_o + G_e * (i-1) : 0;
567  mat->E[i][0] = mat->F[i][0] = (TYPE != LOCAL) ? -100000 : 0;
568  mat->H_flow[i][0] = '*';
569  mat->E_flow[i][0] = '*';
570  mat->F_flow[i][0] = '*';
571  }
572 
573  int32 best_score = Field_traits<int32>::min();
574  uint2 best_sink = make_uint2(uint32(-1),uint32(-1));
575 
576  for (uint32 i = 1; i <= N; ++i)
577  {
578  for (uint32 j = 1; j <= M; ++j)
579  {
580  const uint8 r_i = ref[i-1];
581  const uint8 s_j = str[j-1];
582  const int32 S_ij = (r_i == s_j) ? V : S;
583 
584  mat->E_flow[i][j] = mat->E[i][j-1] + G_e > mat->H[i][j-1] + G_o ? '-' : 'H';
585  mat->F_flow[i][j] = mat->F[i-1][j] + G_e > mat->H[i-1][j] + G_o ? '|' : 'H';
586 
587  mat->E[i][j] = nvbio::max( mat->E[i][j-1] + G_e, mat->H[i][j-1] + G_o );
588  mat->F[i][j] = nvbio::max( mat->F[i-1][j] + G_e, mat->H[i-1][j] + G_o );
589 
590  mat->H_flow[i][j] = mat->F[i][j] > mat->E[i][j] ?
591  (mat->F[i][j] > mat->H[i-1][j-1] + S_ij ? 'F' : '\\') :
592  (mat->E[i][j] > mat->H[i-1][j-1] + S_ij ? 'E' : '\\');
593 
594  mat->H[i][j] = nvbio::max3( mat->H[i-1][j-1] + S_ij, mat->E[i][j], mat->F[i][j] );
595 
596  if (TYPE == aln::LOCAL)
597  {
598  mat->H[i][j] = nvbio::max( mat->H[i][j], int32(0) );
599 
600  if (mat->H[i][j] == 0)
601  mat->H_flow[i][j] = '0';
602 
603  if (best_score < mat->H[i][j])
604  {
605  best_score = mat->H[i][j];
606  best_sink = make_uint2( i, j );
607  }
608  }
609  }
610  if (TYPE == aln::SEMI_GLOBAL)
611  {
612  if (best_score < mat->H[i][M])
613  {
614  best_score = mat->H[i][M];
615  best_sink = make_uint2( i, M );
616  }
617  }
618  }
619  if (TYPE == aln::LOCAL || TYPE == aln::SEMI_GLOBAL)
620  return best_score;
621  else
622  return mat->H[N][M];
623 }
624 
625 //
626 // A test Backtracer class, \see Traceback.
627 //
629 {
630  void clear() { aln[0] = '\0'; aln_len = 0; }
631 
633  void clip(const uint32 len)
634  {
635  for (uint32 i = 0; i < len; ++i)
636  aln[aln_len+i] = 'S';
637  aln[aln_len+len] = '\0';
638  }
640  void push(const uint8 op)
641  {
642  aln[ aln_len++ ] = "MID"[op];
643  }
644 
645  // compute the score of the resulting alignment
646  //
647  template <AlignmentType TYPE>
650  const uint32 offset,
651  const uint8* str,
652  const uint8* ref)
653  {
654  int32 score = 0;
655 
656  for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
657  {
658  const char op = aln[aln_len-a-1];
659 
660  if (op == 'S')
661  ++j;
662  else if (op == 'I')
663  {
664  // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
665  --score;
666 
667  // mover in the pattern
668  ++j;
669  }
670  else if (op == 'D')
671  {
672  // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
673  --score;
674 
675  // move in the text
676  ++k;
677  }
678  else if (op == 'M')
679  {
680  // add a match or mismatch score
681  score -= (str[j] == ref[k]) ? 0 : 1;
682 
683  // move diagonally
684  ++j; ++k;
685  }
686  }
687  return score;
688  }
689 
690  // compute the score of the resulting alignment
691  //
692  template <AlignmentType TYPE, typename scoring_type>
695  const uint32 offset,
696  const uint8* str,
697  const uint8* ref)
698  {
699  const scoring_type& scoring = aligner.scheme;
700 
701  int32 score = 0;
702 
703  for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
704  {
705  const char op = aln[aln_len-a-1];
706 
707  if (op == 'S')
708  ++j;
709  else if (op == 'I')
710  {
711  // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
712  score += scoring.insertion();
713 
714  // mover in the pattern
715  ++j;
716  }
717  else if (op == 'D')
718  {
719  // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
720  score += scoring.deletion();
721 
722  // move in the text
723  ++k;
724  }
725  else if (op == 'M')
726  {
727  // add a match or mismatch score
728  score += (str[j] == ref[k]) ? scoring.match() : scoring.mismatch();
729 
730  // move diagonally
731  ++j; ++k;
732  }
733  }
734  return score;
735  }
736 
737  // compute the score of the resulting alignment
738  //
739  template <AlignmentType TYPE, typename scoring_type>
742  const uint32 offset,
743  const uint8* str,
744  const uint8* ref)
745  {
746  const scoring_type& scoring = aligner.scheme;
747 
748  int32 score = 0;
749  char state = 'S';
750 
751  for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
752  {
753  const char op = aln[aln_len-a-1];
754 
755  if (op == 'S')
756  ++j;
757  else if (op == 'I')
758  {
759  // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
760  score += (state != op) ? scoring.pattern_gap_open() : scoring.pattern_gap_extension();
761 
762  // mover in the pattern
763  ++j;
764  }
765  else if (op == 'D')
766  {
767  // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
768  score += (state != op) ? scoring.text_gap_open() : scoring.text_gap_extension();
769 
770  // move in the text
771  ++k;
772  }
773  else if (op == 'M')
774  {
775  // add a match or mismatch score
776  score += (str[j] == ref[k]) ? scoring.match() : scoring.mismatch();
777 
778  // move diagonally
779  ++j; ++k;
780  }
781 
782  // store new state
783  state = op;
784  }
785  return score;
786  }
787 
788  char aln[1024];
790 };
791 
792 } // namespace sw
793 } // namespace nvbio