47 typename scoring_type,
54 const scoring_type& scoring,
58 const int32 min_score,
62 #if __CUDA_ARCH__ >= 350
63 typedef int32 score_type;
69 const uint32 M = str.length();
70 const uint32 N = ref.length();
72 const score_type SCORE_GAP = scoring.deletion();
73 const score_type SCORE_INSERTION = scoring.insertion();
76 score_type h_top, h_left, h_diag, hi;
79 alignment best_alignment = alignment::minimum_value();
86 score_type temp_cache;
87 uint8 reference_cache;
93 const uint32 wi = warp_tid() + 1;
96 for (
uint32 i = warp_tid(); i <= N; i += WARP_SIZE)
97 temp[i] = (
TYPE ==
GLOBAL ? SCORE_GAP * (i + 1) : 0);
99 for (
uint32 warp_block = 0; warp_block < M; warp_block += WARP_SIZE)
102 warp_block_width = (warp_block + WARP_SIZE >= M ? M % WARP_SIZE : WARP_SIZE);
104 const uint32 i = wi + warp_block;
107 h_top = (
TYPE !=
LOCAL ? SCORE_GAP * i : 0);
109 h_diag = (
TYPE !=
LOCAL ? SCORE_GAP * (i - 1) : 0);
112 const uint8 s_i = (i <= M ? str[i - 1] : 0);
113 const uint8 q_i = (i <= M ? quals[i - 1] : 0);
116 for (
uint32 block_diag = 2; block_diag <= warp_block_width + N; block_diag += WARP_SIZE)
119 temp_cache = (block_diag - 2) + warp_tid() < N ? temp[(block_diag - 2) + warp_tid()] : 0;
120 reference_cache = (block_diag - 2) + warp_tid() < N ? ref[(block_diag - 2) + warp_tid()] : 0;
122 for (
uint32 diag = block_diag; diag < block_diag + WARP_SIZE; diag++)
127 const uint32 j = diag - wi;
130 if (wi <= diag_len && j <= N)
135 r_j = reference_cache;
141 const score_type S_ij = (r_j == s_i) ? scoring.match(q_i) : scoring.mismatch(q_i);
146 h_left + SCORE_INSERTION);
163 if (hi > best_alignment.score)
164 best_alignment = alignment(hi, make_uint2(j, i));
174 r_j = __shfl_up(r_j, 1);
176 h_left = __shfl_up(hi, 1);
179 temp_cache = __shfl_down(temp_cache, 1);
180 reference_cache = __shfl_down(reference_cache, 1);
188 __shared__
volatile alignment sm_red [WARP_SIZE * NUM_WARPS * 2];
189 volatile alignment *sm_warp_red = sm_red + WARP_SIZE * warp_id() * 2;
190 cuda::scan<32>(best_alignment, alignment::max_operator(), alignment::minimum_value(), sm_warp_red);
191 best_alignment = cuda::scan_total<32>(sm_warp_red);
196 best_alignment.score = __shfl(hi, warp_block_width - 1);
197 best_alignment.sink = make_uint2(N, M);
200 *sink = best_alignment.sink;
201 return best_alignment.score;
212 typename scoring_type,
213 typename pattern_string,
214 typename qual_string,
215 typename text_string,
220 const pattern_string pattern,
221 const qual_string quals,
222 const text_string
text,
223 const int32 min_score,
227 #if defined(NVBIO_DEVICE_COMPILATION)
228 return sw_alignment_score<BLOCKDIM,TYPE>(