NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
select_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
34 #include <nvbio/io/alignments.h>
40 #include <nvbio/basic/sum_tree.h>
41 
42 namespace nvbio {
43 namespace bowtie2 {
44 namespace cuda {
45 
46 //
47 // Initialize the hit-selection pipeline
48 //
49 template <typename ScoringScheme>
51 {
53  pipeline.reads.size(),
54  pipeline.reads.name_stream(),
55  pipeline.reads.name_index(),
56  pipeline.hits,
57  pipeline.trys,
58  pipeline.rseeds,
59  params );
60 }
61 
64 
67 
70 
74 template <typename BatchType, typename ContextType> __global__
76  const BatchType read_batch,
78  const ContextType context,
79  ScoringQueuesDeviceView scoring_queues,
80  const ParamsPOD params)
81 {
82  __shared__ volatile uint32 sm_broadcast[BLOCKDIM >> 5];
83  volatile uint32& warp_broadcast = sm_broadcast[ warp_id() ];
84 
85  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
86  if (thread_id >= scoring_queues.active_read_count()) return;
87 
88  // Fetch the next item from the work queue
89  const packed_read read_info = scoring_queues.active_read( thread_id );
90  const uint32 read_id = read_info.read_id;
91  uint32 top_flag = read_info.top_flag;
92 
93  NVBIO_CUDA_ASSERT( read_id < read_batch.size() );
94 
95  // check whether we finished our loop
96  if (context.stop( read_id ))
97  return;
98 
99  typedef SeedHitDequeArrayDeviceView::reference hit_deque_reference;
100 
101  // check whether there's any hits left
102  hit_deque_reference hit_deque = hits[ read_id ];
103  if (hit_deque.size() == 0)
104  return;
105 
106  // get a reference to the top element in the hit queue
107  SeedHit* hit = const_cast<SeedHit*>( &hit_deque.top() );
108 
109  // check whether this hit's range became empty
110  if (hit->empty())
111  {
112  // pop the top of the hit queue
113  hit_deque.pop_top();
114 
115  // update the hit reference
116  if (hit_deque.size() == 0)
117  return;
118 
119  hit = const_cast<SeedHit*>( &hit_deque.top() );
120 
121  // no longer on the top seed, unmask the top flag
122  top_flag = 0u;
123  }
124 
125  // fetch next SA row from the selected hit
126  const uint32 sa_pos = hit->pop_front();
127  const uint32 r_type = hit->get_readtype() ? 1u : 0u;
128 
129  const uint32 slot = alloc( scoring_queues.active_reads.out_size, &warp_broadcast );
130  NVBIO_CUDA_ASSERT( slot < scoring_queues.active_reads.in_size );
131 
132  // write the output active read
133  scoring_queues.active_reads.out_queue[slot] = packed_read( read_id, top_flag );
134 
135  // write the output hit
136  HitReference<HitQueuesDeviceView> out_hit( scoring_queues.hits, slot );
137  out_hit.read_id = read_id;
138  out_hit.loc = sa_pos;
139  out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, top_flag );
140 
141  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_select( read_id ), "select() : selected SA[%u:%u:%u] in slot [%u])\n", sa_pos, hit->get_indexdir(), hit->get_posinread(), slot);
142 }
143 
144 template <typename ProbTree>
146 uint32 randomized_select(ProbTree& prob_tree, SeedHit* hits_data, uint32* rseeds, const uint32 read_id)
147 {
148  for (uint32 i = 0; i < 10; ++i)
149  {
150  #if 1
151  // pick a new random value
152  const uint32 ri = 1664525u * rseeds[ read_id ] + 1013904223u;
153 
154  // store the new state
155  rseeds[ read_id ] = ri;
156 
157  // convert to a float
158  const float rf = float(ri) / float(0xFFFFFFFFu);
159  #else
160  // advance the QMC sampler
161  const float rf = radical_inverse( rseeds[ read_id ]++ );
162  #endif
163 
164  // select the next hit
165  const uint32 hit_id = sample( prob_tree, rf );
166  //NVBIO_CUDA_ASSERT( hit_id < hits.get_size( read_id ) );
167 
168  SeedHit* hit = &hits_data[ hit_id ];
169 
170  // this should never happen if we had infinite precision, but in practice because of rounding errors prob_tree.sum() might
171  // be non-zero even though its leaf values are: let's just check for this and call it a day.
172  if (hit->empty() == false)
173  return hit_id;
174  }
175  return 0;
176 }
177 
181 template <typename BatchType, typename ContextType> __global__
183  const BatchType read_batch,
185  uint32* rseeds,
186  const ContextType context,
187  ScoringQueuesDeviceView scoring_queues,
188  const ParamsPOD params)
189 {
190  typedef SumTree< float* > ProbTree;
191 
192  __shared__ volatile uint32 sm_broadcast[BLOCKDIM >> 5];
193  volatile uint32& warp_broadcast = sm_broadcast[ warp_id() ];
194 
195  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
196  if (thread_id >= scoring_queues.active_read_count()) return;
197 
198  // Fetch the next item from the work queue
199  const packed_read read_info = scoring_queues.active_read( thread_id );
200  const uint32 read_id = read_info.read_id;
201  uint32 top_flag = read_info.top_flag;
202 
203  NVBIO_CUDA_ASSERT( read_id < read_batch.size() );
204 
205  // check whether we finished our loop
206  if (context.stop( read_id ))
207  return;
208 
209  typedef SeedHitDequeArrayDeviceView::reference hit_deque_reference;
210 
211  // check whether there's any hits left
212  hit_deque_reference hit_deque = hits[ read_id ];
213  if (hit_deque.size() == 0)
214  return;
215 
216  ProbTree prob_tree( hit_deque.size(), hit_deque.get_probs() );
217  SeedHit* hits_data( hit_deque.get_data() );
218 
219  // check if the tree still contains some unexplored entries
220  if (prob_tree.sum() <= 0.0f)
221  {
222  // stop traversal
223  hits.erase( read_id );
224  return;
225  }
226 
227  // check if the top hit became empty
228  if (top_flag && hits_data[0].empty())
229  top_flag = 0;
230 
231  const uint32 hit_id = top_flag ? 0 : randomized_select( prob_tree, hits_data, rseeds, read_id );
232 
233  // fetch the selected hit
234  SeedHit* hit = &hits_data[ hit_id ];
235 
236  // this should never happen if we had infinite precision, but in practice because of rounding errors prob_tree.sum() might
237  // be non-zero even though its leaf values are: let's just check for this and call it a day.
238  if (hit->empty())
239  {
240  // stop traversal
241  hits.erase( read_id );
242  return;
243  }
244 
245  // fetch next SA row from the selected hit
246  const uint32 sa_pos = hit->pop_front();
247  const uint32 r_type = hit->get_readtype() ? 1u : 0u;
248 
249  // if the range for the selected seed hit became empty, zero out its probability
250  if (hit->empty())
251  prob_tree.set( hit_id, 0.0f );
252 
253  const uint32 slot = alloc( scoring_queues.active_reads.out_size, &warp_broadcast );
254  NVBIO_CUDA_ASSERT( slot < scoring_queues.active_reads.in_size );
255 
256  // write the output active read
257  scoring_queues.active_reads.out_queue[slot] = packed_read( read_id, top_flag );
258 
259  /*
260  typedef ReadHitsBinder<ScoringQueuesDeviceView> read_hits_binder;
261  typedef typename read_hits_binder::reference hit_reference;
262 
263  // create a hit binder for this read
264  read_hits_binder dst_read_hits( scoring_queues, slot );
265 
266  // copy from parent
267  dst_read_hits.set_read_info( packed_read( read_id, top_flag ) );
268 
269  // bind the hit
270  dst_read_hits.bind_hit( 0u, slot ); // write the output hit
271  */
272 
273  HitReference<HitQueuesDeviceView> out_hit( scoring_queues.hits, slot );
274  out_hit.read_id = read_id;
275  out_hit.loc = sa_pos;
276  out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, top_flag );
277 
278  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_select( read_id ), "select() : selected hit[%u], SA[%u:%u:%u] in slot [%u])\n", hit_id, sa_pos, hit->get_indexdir(), hit->get_posinread(), slot);
279 }
280 
293 template <typename BatchType, typename ContextType> __global__
295  const BatchType read_batch,
297  const ContextType context,
298  ScoringQueuesDeviceView scoring_queues,
299  const uint32 n_multi,
300  const ParamsPOD params)
301 {
302  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
303  if (thread_id >= scoring_queues.active_read_count()) return;
304 
305  const packed_read read_info = scoring_queues.active_read( thread_id );
306  const uint32 read_id = read_info.read_id;
307  uint32 top_flag = read_info.top_flag;
308 
309  NVBIO_CUDA_ASSERT( read_id < read_batch.size() );
310 
311  // check whether we finished our loop
312  if (context.stop( read_id ))
313  return;
314 
315  typedef SeedHitDequeArrayDeviceView::reference hit_deque_reference;
316 
317  // check whether there's any hits left
318  hit_deque_reference hit_deque = hits[ read_id ];
319  if (hit_deque.size() == 0)
320  return;
321 
322  typedef ReadHitsBinder<ScoringQueuesDeviceView> read_hits_binder;
323  typedef typename read_hits_binder::reference hit_reference;
324 
325  // create a hit binder for this read
326  read_hits_binder dst_read_hits( scoring_queues );
327 
328  uint32 output_lane = uint32(-1);
329  uint32 n_selected_hits = 0u;
330 
331 #if 0
332  for (uint32 i = 0; i < n_multi; ++i)
333  {
334  // get a reference to the top element in the hit queue
335  SeedHit* hit = const_cast<SeedHit*>( &hit_deque.top() );
336 
337  // check whether this hit's range became empty
338  if (hit->empty())
339  {
340  // pop the top of the hit queue
341  hit_deque.pop_top();
342 
343  // update the hit reference
344  if (hit_deque.size() == 0)
345  break;
346  else
347  {
348  hit = const_cast<SeedHit*>( &hit_deque.top() );
349 
350  // no longer on the top seed, unmask the top flag
351  top_flag = 0u;
352  }
353  }
354 
355  if (output_lane == uint32(-1))
356  {
357  // grab a slot in the output read queue - we'll call this the 'output_lane'
358  output_lane = atomicAdd( scoring_queues.active_reads.out_size, 1u );
359  NVBIO_CUDA_ASSERT( output_lane < scoring_queues.active_reads.in_size );
360 
361  // bind the read to its new location in the output queue
362  dst_read_hits.bind( output_lane );
363  }
364 
365  // fetch next SA row from the selected hit
366  const uint32 sa_pos = hit->pop_front();
367  const uint32 r_type = hit->get_readtype() ? 1u : 0u;
368 
369  // grab an output slot for writing our hit
370  const uint32 slot = atomicAdd( scoring_queues.hits_pool, 1u );
371 
372  // bind the hit
373  dst_read_hits.bind_hit( n_selected_hits, slot );
374 
375  hit_reference out_hit = dst_read_hits[ n_selected_hits ];
376  out_hit.read_id = read_id;
377  out_hit.loc = sa_pos;
378  out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, top_flag );
379 
380  ++n_selected_hits;
381  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_select( read_id ), "select() : selected SA[%u:%u:%u] in slot [%u], output-lane[%u:%u])\n", sa_pos, hit->get_indexdir(), hit->get_posinread(), slot, output_lane, i);
382  }
383 #else
384  // NOTE: the following loop relies on fragile, unsupported
385  // warp-synchronous logic.
386  // To make the work queue allocations work correctly, we need
387  // to make sure that all threads within each warp keep executing
388  // the loop until the very last thread is done.
389  // As the compiler takes true branches first, we achieve this
390  // by keeping an active flag and looping until it's true.
391  __shared__ volatile uint32 sm_broadcast1[BLOCKDIM >> 5];
392  __shared__ volatile uint32 sm_broadcast2[BLOCKDIM >> 5];
393  volatile uint32& warp_broadcast1 = sm_broadcast1[ warp_id() ];
394  volatile uint32& warp_broadcast2 = sm_broadcast2[ warp_id() ];
395 
396  bool active = true;
397 
398  for (uint32 i = 0; i < n_multi && __any(active); ++i)
399  {
400  SeedHit* hit = NULL;
401 
402  // check whether this thread is actually active, or idling around
403  if (active)
404  {
405  // get a reference to the top element in the hit queue
406  hit = const_cast<SeedHit*>( &hit_deque.top() );
407 
408  // check whether this hit's range became empty
409  if (hit->empty())
410  {
411  // pop the top of the hit queue
412  hit_deque.pop_top();
413 
414  // update the hit reference
415  if (hit_deque.size() == 0)
416  {
417  active = false;
418  // NOTE: it would be tempting to just break here, but
419  // this would break the warp-synchronous allocation
420  // below.
421  }
422  else
423  {
424  hit = const_cast<SeedHit*>( &hit_deque.top() );
425 
426  // no longer on the top seed, unmask the top flag
427  top_flag = 0u;
428  }
429  }
430  }
431 
432  // is this thread still active?
433  if (active)
434  {
435  if (output_lane == uint32(-1))
436  {
437  // grab a slot in the output read queue - we'll call this the 'output_lane'
438  output_lane = alloc( scoring_queues.active_reads.out_size, &warp_broadcast1 );
439 
440  // bind the read to its new location in the output queue
441  dst_read_hits.bind( output_lane );
442 
443  NVBIO_CUDA_ASSERT( output_lane < scoring_queues.active_reads.in_size );
444  }
445 
446  // fetch next SA row from the selected hit
447  const uint32 sa_pos = hit->pop_front();
448  const uint32 r_type = hit->get_readtype() ? 1u : 0u;
449 
450  // grab an output slot for writing our hit
451  const uint32 slot = alloc( scoring_queues.hits_pool, &warp_broadcast2 );
452 
453  // bind the hit
454  dst_read_hits.bind_hit( n_selected_hits, slot );
455 
456  hit_reference out_hit = dst_read_hits[ n_selected_hits ];
457  out_hit.read_id = read_id;
458  out_hit.loc = sa_pos;
459  out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, top_flag );
460 
461  ++n_selected_hits;
462  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_select( read_id ), "select() : selected SA[%u:%u:%u] in slot [%u], output-lane[%u:%u])\n", sa_pos, hit->get_indexdir(), hit->get_posinread(), slot, output_lane, i);
463  }
464  }
465 #endif
466 
467  // write the output read info.
468  // NOTE: this must be done at the very end, because only now we know the final state of the top_flag.
469  if (output_lane != uint32(-1))
470  {
471  // setup the output-lane's read-info
472  dst_read_hits.set_read_info( packed_read( read_id, top_flag ) );
473 
474  // set the number of hits
475  dst_read_hits.resize( n_selected_hits );
476  }
477 }
478 
491 template <typename BatchType, typename ContextType> __global__
493  const BatchType read_batch,
495  uint32* rseeds,
496  const ContextType context,
497  ScoringQueuesDeviceView scoring_queues,
498  const uint32 n_multi,
499  const ParamsPOD params)
500 {
501  typedef SumTree< float* > ProbTree;
502 
503  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
504  if (thread_id >= scoring_queues.active_read_count()) return;
505 
506  const packed_read read_info = scoring_queues.active_read( thread_id );
507  const uint32 read_id = read_info.read_id;
508  uint32 top_flag = read_info.top_flag;
509 
510  NVBIO_CUDA_ASSERT( read_id < read_batch.size() );
511 
512  // check whether we finished our loop
513  if (context.stop( read_id ))
514  return;
515 
516  typedef SeedHitDequeArrayDeviceView::reference hit_deque_reference;
517 
518  // check whether there's any hits left
519  hit_deque_reference hit_deque = hits[ read_id ];
520  if (hit_deque.size() == 0)
521  return;
522 
523  ProbTree prob_tree( hit_deque.size(), hit_deque.get_probs() );
524  SeedHit* hits_data( hit_deque.get_data() );
525 
526  typedef ReadHitsBinder<ScoringQueuesDeviceView> read_hits_binder;
527  typedef typename read_hits_binder::reference hit_reference;
528 
529  // create a hit binder for this read
530  read_hits_binder dst_read_hits( scoring_queues );
531 
532  uint32 output_lane = uint32(-1);
533  uint32 n_selected_hits = 0u;
534 
535  for (uint32 i = 0; i < n_multi; ++i)
536  {
537  // check if the tree still contains some unexplored entries
538  if (prob_tree.sum() <= 0.0f)
539  {
540  // stop traversal
541  hits.erase( read_id );
542  break;
543  }
544 
545  // check if the top hit became empty
546  if (top_flag && hits_data[0].empty())
547  top_flag = 0;
548 
549  const uint32 hit_id = top_flag ? 0 : randomized_select( prob_tree, hits_data, rseeds, read_id );
550 
551  // fetch the selected hit
552  SeedHit* hit = &hits_data[ hit_id ];
553 
554  // this should never happen if we had infinite precision, but in practice because of rounding errors prob_tree.sum() might
555  // be non-zero even though its leaf values are: let's just check for this and call it a day.
556  if (hit->empty())
557  {
558  // stop traversal
559  hits.erase( read_id );
560  continue;
561  }
562 
563  // fetch next SA row from the selected hit
564  const uint32 sa_pos = hit->pop_front();
565  const uint32 r_type = hit->get_readtype() ? 1u : 0u;
566 
567  // if the range for the selected seed hit became empty, zero out its probability
568  if (hit->empty())
569  prob_tree.set( hit_id, 0.0f );
570 
571  if (output_lane == uint32(-1))
572  {
573  // grab a slot in the output read queue - we'll call this the 'output_lane'
574  output_lane = atomicAdd( scoring_queues.active_reads.out_size, 1u );
575  NVBIO_CUDA_ASSERT( output_lane < scoring_queues.active_reads.in_size );
576 
577  // bind the read to its new location in the output queue
578  dst_read_hits.bind( output_lane );
579  }
580 
581  // grab an output slot for writing our hit
582  const uint32 slot = atomicAdd( scoring_queues.hits_pool, 1u );
583  NVBIO_CUDA_ASSERT( slot < scoring_queues.hits.read_id.size() );
584 
585  // bind the hit
586  dst_read_hits.bind_hit( n_selected_hits, slot );
587 
588  hit_reference out_hit = dst_read_hits[ n_selected_hits ];
589  out_hit.read_id = read_id;
590  out_hit.loc = sa_pos;
591  out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, top_flag );
592 
593  ++n_selected_hits;
594  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_select( read_id ), "select() : selected SA[%u:%u:%u] in slot [%u], output-lane[%u:%u])\n", sa_pos, hit->get_indexdir(), hit->get_posinread(), slot, output_lane, i);
595  }
596 
597  // write the output read info.
598  // NOTE: this must be done at the very end, because only now we know the final state of the top_flag.
599  if (output_lane != uint32(-1))
600  {
601  // setup the output-lane's read-info
602  dst_read_hits.set_read_info( packed_read( read_id, top_flag ) );
603 
604  // set the number of hits
605  dst_read_hits.resize( n_selected_hits );
606  }
607 }
608 
612 
613 //
614 // Prepare for a round of seed extension by selecting the next SA row
615 //
616 template <typename BatchType, typename ContextType>
617 void select(
618  const BatchType read_batch,
620  const ContextType context,
621  ScoringQueuesDeviceView scoring_queues,
622  const ParamsPOD params)
623 {
624  const int blocks = (scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
625 
626  select_kernel<<<blocks, BLOCKDIM>>>(
627  read_batch,
628  hits,
629  context,
630  scoring_queues,
631  params );
632 }
633 
634 //
635 // Prepare for a round of seed extension by selecting the next SA row
636 //
637 template <typename BatchType, typename ContextType>
639  const BatchType read_batch,
641  uint32* rseeds,
642  const ContextType context,
643  ScoringQueuesDeviceView scoring_queues,
644  const ParamsPOD params)
645 {
646  const int blocks = (scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
647 
648  rand_select_kernel<<<blocks, BLOCKDIM>>>(
649  read_batch,
650  hits,
651  rseeds,
652  context,
653  scoring_queues,
654  params );
655 }
656 
657 //
658 // Prepare for a round of seed extension by selecting a set of up
659 // to 'n_multi' next SA rows.
660 // For each read in the input queue, this kernel generates:
661 // 1. one or zero output reads, in the main output read queue,
662 // 2. zero to 'n_multi' SA rows. These are made of three entries,
663 // one in 'loc_queue', identifying the corresponding SA index,
664 // one in 'seed_queue', storing information about the seed hit,
665 // and one in 'parent_queue', storing the index of the "parent"
666 // read in the output queue (i.e. the slot where the read is
667 // is being stored)
668 //
669 template <typename BatchType, typename ContextType>
671  const BatchType read_batch,
673  const ContextType context,
674  ScoringQueuesDeviceView scoring_queues,
675  const uint32 n_multi,
676  const ParamsPOD params)
677 {
678  const int blocks = (scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
679 
680  select_multi_kernel<<<blocks, BLOCKDIM>>>(
681  read_batch,
682  hits,
683  context,
684  scoring_queues,
685  n_multi,
686  params );
687 }
688 
689 //
690 // Prepare for a round of seed extension by selecting a set of up
691 // to 'n_multi' next SA rows.
692 // For each read in the input queue, this kernel generates:
693 // 1. one or zero output reads, in the main output read queue,
694 // 2. zero to 'n_multi' SA rows. These are made of three entries,
695 // one in 'loc_queue', identifying the corresponding SA index,
696 // one in 'seed_queue', storing information about the seed hit,
697 // and one in 'parent_queue', storing the index of the "parent"
698 // read in the output queue (i.e. the slot where the read is
699 // is being stored)
700 //
701 template <typename BatchType, typename ContextType>
703  const BatchType read_batch,
705  uint32* rseeds,
706  const ContextType context,
707  ScoringQueuesDeviceView scoring_queues,
708  const uint32 n_multi,
709  const ParamsPOD params)
710 {
711  const int blocks = (scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
712 
713  rand_select_multi_kernel<<<blocks, BLOCKDIM>>>(
714  read_batch,
715  hits, rseeds,
716  context,
717  scoring_queues,
718  n_multi,
719  params );
720 }
721 
722 //
723 // Prepare for a round of seed extension by selecting the next SA rows for each read
724 //
725 template <typename BatchType, typename ContextType>
726 void select(
727  const BatchType read_batch,
729  uint32* rseeds,
730  const ContextType context,
731  ScoringQueuesDeviceView scoring_queues,
732  const uint32 n_multi,
733  const ParamsPOD params)
734 {
735  //
736  // Dispatch the call to the proper version based on whether we are performing
737  // single-hit or batched-hit selection, and whether we use randomization or not.
738  //
739  if (params.randomized)
740  {
741  if (n_multi > 1)
742  {
743  // batched-hit selection
745  read_batch,
746  hits, rseeds,
747  context,
748  scoring_queues,
749  n_multi,
750  params );
751  }
752  else
753  {
754  // randomized single-hit selection
755  rand_select(
756  read_batch,
757  hits, rseeds,
758  context,
759  scoring_queues,
760  params );
761  }
762  }
763  else
764  {
765  if (n_multi > 1)
766  {
767  // batched-hit selection
768  select_multi(
769  read_batch,
770  hits,
771  context,
772  scoring_queues,
773  n_multi,
774  params );
775  }
776  else
777  {
778  // non-randomized single-hit selection
779  select(
780  read_batch,
781  hits,
782  context,
783  scoring_queues,
784  params );
785  }
786  }
787 }
788 
789 //
790 // Prepare for a round of seed extension by selecting the next SA rows for each read
791 //
792 template <typename ScoringScheme, typename ContextType>
793 void select_t(
794  const ContextType context,
796  const ParamsPOD params)
797 {
798  select(
799  pipeline.reads,
800  pipeline.hits,
801  pipeline.rseeds,
802  context,
803  pipeline.scoring_queues,
804  pipeline.n_hits_per_read,
805  params );
806 }
807 
808 } // namespace cuda
809 } // namespace bowtie2
810 } // namespace nvbio