NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
aligner.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
39 #include <nvbio/io/alignments.h>
45 #include <nvbio/basic/cuda/arch.h>
46 #include <nvbio/basic/cuda/sort.h>
49 #include <nvbio/basic/timer.h>
50 #include <nvbio/basic/console.h>
51 #include <nvbio/basic/options.h>
52 #include <nvbio/basic/threads.h>
53 #include <nvbio/basic/html.h>
54 #include <nvbio/basic/dna.h>
56 #include <nvbio/fmindex/bwt.h>
57 #include <nvbio/fmindex/ssa.h>
58 #include <nvbio/fmindex/fmindex.h>
60 #include <thrust/host_vector.h>
61 #include <thrust/device_vector.h>
62 #include <thrust/scan.h>
63 #include <thrust/sort.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <vector>
68 #include <algorithm>
69 #include <numeric>
70 #include <functional>
71 
72 
73 
74 namespace nvbio {
75 namespace bowtie2 {
76 namespace cuda {
77 
78 struct Aligner
79 {
81 
84 
90 
94 
95 
98 
99  thrust::device_vector<uint8> dp_buffer_dvec;
101 
103 
106 
107  thrust::device_vector<uint32> idx_queue_dvec;
109  thrust::device_vector<uint16> sorting_queue_dvec;
110 
111  thrust::device_vector<uint8> reseed_dvec;
113  thrust::device_vector<uint32> trys_dvec;
115  thrust::device_vector<uint32> rseeds_dvec;
117  thrust::device_vector<uint8> flags_dvec;
120 
121  thrust::device_vector<io::Alignment> best_data_dvec;
122  thrust::device_vector<io::Alignment> best_data_dvec_o;
125  thrust::device_vector<uint8> mapq_dvec;
127 
128  // --- paired-end vectors --------------------------------- //
129  thrust::device_vector<uint32> opposite_queue_dvec;
131 
132  // --- all-mapping vectors -------------------------------- //
133  thrust::device_vector<io::Alignment> buffer_alignments_dvec;
135  thrust::device_vector<uint32> buffer_read_info_dvec;
137  thrust::device_vector<io::Alignment> output_alignments_dvec;
139  thrust::device_vector<uint32> output_read_info_dvec;
141 
142  thrust::device_vector<uint32> hits_count_scan_dvec;
144  thrust::device_vector<uint64> hits_range_scan_dvec;
146  // -------------------------------------------------------- //
147 
150  thrust::device_vector<uint2> cigar_coords_dvec;
152 
153  thrust::device_vector<uint64> hits_stats_dvec;
154  thrust::host_vector<uint64> hits_stats_hvec;
156 
158 
160  // --------------------------------------------------------------------------------------------- //
161 
162  // file object that we're writing into
164 
165  static uint32 band_length(const uint32 max_dist)
166  {
167  //return max_dist*2+1;
168  // compute band length
169  uint32 band_len = 4;
170  while (band_len-1 < max_dist*2+1)
171  band_len *= 2;
172  band_len -= 1;
173  return band_len;
174  }
175 
176  Aligner() : output_file(NULL) {}
177 
178  bool init(const uint32 id, const uint32 batch_size, const Params& params, const EndType type);
179 
180  void keep_stats(const uint32 count, Stats& stats);
181 
182  template <typename scoring_tag>
183  void best_approx(
184  const Params& params,
185  const fmi_type fmi,
186  const rfmi_type rfmi,
187  const UberScoringScheme& scoring_scheme,
188  const io::SequenceDataDevice& reference_data,
189  const io::FMIndexDataDevice& driver_data,
190  const io::SequenceDataDevice& read_data,
191  io::HostOutputBatchSE& cpu_batch,
192  Stats& stats);
193 
194  template <
195  typename scoring_tag,
196  typename scoring_scheme_type>
197  void best_approx_score(
198  const Params& params,
199  const fmi_type fmi,
200  const rfmi_type rfmi,
201  const scoring_scheme_type& scoring_scheme,
202  const io::SequenceDataDevice& reference_data,
203  const io::FMIndexDataDevice& driver_data,
204  const io::SequenceDataDevice& read_data,
205  const uint32 seeding_pass,
206  const uint32 seed_queue_size,
207  const uint32* seed_queue,
208  Stats& stats);
209 
210  template <typename scoring_tag>
211  void best_approx(
212  const Params& params,
213  const FMIndexDef::type fmi,
214  const FMIndexDef::type rfmi,
215  const UberScoringScheme& scoring_scheme,
216  const io::SequenceDataDevice& reference_data,
217  const io::FMIndexDataDevice& driver_data,
218  const io::SequenceDataDevice& read_data1,
219  const io::SequenceDataDevice& read_data2,
220  io::HostOutputBatchPE& cpu_batch,
221  Stats& stats);
222 
223  template <
224  typename scoring_tag,
225  typename scoring_scheme_type>
226  void best_approx_score(
227  const Params& params,
228  const fmi_type fmi,
229  const rfmi_type rfmi,
230  const scoring_scheme_type& scoring_scheme,
231  const io::SequenceDataDevice& reference_data,
232  const io::FMIndexDataDevice& driver_data,
233  const uint32 anchor,
234  const io::SequenceDataDevice& read_data1,
235  const io::SequenceDataDevice& read_data2,
236  const uint32 seeding_pass,
237  const uint32 seed_queue_size,
238  const uint32* seed_queue,
239  Stats& stats);
240 
241  template <typename scoring_tag>
242  void all(
243  const Params& params,
244  const fmi_type fmi,
245  const rfmi_type rfmi,
246  const UberScoringScheme& scoring_scheme,
247  const io::SequenceDataDevice& reference_data,
248  const io::FMIndexDataDevice& driver_data,
249  const io::SequenceDataDevice& read_data,
250  io::HostOutputBatchSE& cpu_batch,
251  Stats& stats);
252 
253  template <typename scoring_scheme_type>
254  void score_all(
255  const Params& params,
256  const fmi_type fmi,
257  const rfmi_type rfmi,
258  const UberScoringScheme& input_scoring_scheme,
259  const scoring_scheme_type& scoring_scheme,
260  const io::SequenceDataDevice& reference_data,
261  const io::FMIndexDataDevice& driver_data,
262  const io::SequenceDataDevice& read_data,
263  io::HostOutputBatchSE& cpu_batch,
264  const uint32 seed_queue_size,
265  const uint32* seed_queue,
266  Stats& stats,
267  uint64& total_alignments);
268 
269  #if defined(__CUDACC__)
270  // return a pointer to an "index" into the given sorted keys
271  //
272  template <typename iterator_type>
273  std::pair<uint32*,uint64*> sort_64_bits(
274  const uint32 count,
275  const iterator_type keys)
276  {
277  thrust::copy(
278  keys,
279  keys + count,
280  thrust::device_ptr<uint64>( (uint64*)raw_pointer( sorting_queue_dvec ) ) );
281 
282  return sort_64_bits( count );
283  }
284  #endif
285 
286  // return a pointer to an "index" into the given sorted keys
287  //
288  std::pair<uint32*,uint64*> sort_64_bits(
289  const uint32 count);
290 
291  // return a pointer to an "index" into the given keys sorted by their hi bits
292  //
294  const uint32 count,
295  const uint32* keys);
296 
297  // sort a set of keys in place
298  //
299  void sort_inplace(
300  const uint32 count,
301  uint32* keys);
302 
303  bool init_alloc(const uint32 BATCH_SIZE, const Params& params, const EndType type, bool do_alloc, std::pair<uint64,uint64>* mem_stats = NULL);
304 };
305 
306 // Compute the total number of matches found
307 void hits_stats(
308  const uint32 batch_size,
309  const SeedHit* hit_data,
310  const uint32* hit_counts,
311  uint64* hit_stats);
312 
314  const uint32* buffer,
315  const uint32 buffer_size,
316  const uint32 begin,
317  const uint32 end,
318  uint32* output);
319 
320 #if defined(__CUDACC__)
321 
322 // initialize a set of alignments
323 //
324 template <typename ReadBatch, typename ScoreFunction>
325 __global__
326 void init_alignments_kernel(
327  const ReadBatch read_batch,
328  const ScoreFunction worst_score_fun,
329  io::Alignment* best_data,
330  const uint32 best_stride,
331  const uint32 mate)
332 {
333  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
334  if (thread_id >= read_batch.size()) return;
335 
336  // compute the read length
337  const uint2 read_range = read_batch.get_range( thread_id );
338  const uint32 read_len = read_range.y - read_range.x;
339 
340  const int32 worst_score = worst_score_fun( read_len );
341 
342  io::Alignment a1 = io::Alignment( uint32(-1), io::Alignment::max_ed(), worst_score, mate );
343  io::Alignment a2 = io::Alignment( uint32(-1), io::Alignment::max_ed(), worst_score, mate );
344  best_data[ thread_id ] = a1;
345  best_data[ thread_id + best_stride ] = a2;
346 }
347 
348 // initialize a set of alignments
349 //
350 template <typename ReadBatch, typename ScoreFunction>
351 void init_alignments(
352  const ReadBatch read_batch,
353  const ScoreFunction worst_score_fun,
354  io::Alignment* best_data,
355  const uint32 best_stride,
356  const uint32 mate = 0)
357 {
358  const int blocks = (read_batch.size() + BLOCKDIM-1) / BLOCKDIM;
359 
360  init_alignments_kernel<<<blocks, BLOCKDIM>>>(
361  read_batch,
362  worst_score_fun,
363  best_data,
364  best_stride,
365  mate );
366 }
367 
368 #endif // defined(__CUDACC__)
369 
370 // mark unaligned reads that need reseeding
371 //
372 void mark_unaligned(
373  const uint32 n_active_reads,
374  const uint32* active_reads,
375  const io::Alignment* best_data,
376  uint8* reseed);
377 
378 // mark unique unaligned read pairs as discordant
379 //
380 void mark_discordant(
381  const uint32 n_reads,
382  io::Alignment* anchor_data,
383  io::Alignment* opposite_data,
384  const uint32 stride);
385 
386 } // namespace cuda
387 } // namespace bowtie2
388 } // namespace nvbio