NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
nvBowtie.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 // nvBowtie.cpp : Defines the entry point for the console application.
29 //
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <string>
35 #include <nvbio/basic/console.h>
36 #include <nvbio/basic/exceptions.h>
46 #include <nvbio/basic/omp.h>
47 
48 void crcInit();
49 
50 namespace nvbio {
51 namespace bowtie2 {
52 namespace cuda {
53 
54  void test_seed_hit_deques();
55  void test_scoring_queues();
56 
57 } // namespace cuda
58 } // namespace bowtie2
59 } // namespace nvbio
60 
61 using namespace nvbio;
62 
63 // bogus implementation of a function to check if a string is a number
64 bool is_number(const char* str, uint32 len = uint32(-1))
65 {
66  if (str[0] == '-')
67  ++str;
68 
69  for (uint32 l = 0; *str != '\0' && l < len; ++l)
70  {
71  const char c = *str; ++str;
72  if (c == '.') continue;
73  if (c >= '0' && c <= '9') continue;
74  return false;
75  }
76  return true;
77 }
78 
80 {
81  if (stats.n_mapped == 0)
82  return;
83 
84  const std::vector<uint32>& mapped = stats.mapped_log_ed_histogram;
85  log_stats(stderr, " ed :");
86  for (uint32 i = 0; i < mapped.size(); ++i)
87  {
88  if (float(mapped[i])/float(stats.n_mapped) > 1.0e-3f)
89  log_stats_cont(stderr, " %5u ", i ? 1u << (i-1) : 0u );
90  }
91  log_stats_cont(stderr,"\n");
92  log_stats(stderr, " :");
93  for (uint32 i = 0; i < mapped.size(); ++i)
94  {
95  if (float(mapped[i])/float(stats.n_mapped) > 1.0e-3f)
96  log_stats_cont(stderr, " %6.2f%%", 100.0f * float(mapped[i])/float(stats.n_mapped) );
97  }
98  log_stats_cont(stderr,"\n");
99 }
101 {
102  if (stats.n_mapped == 0)
103  return;
104 
105  log_stats(stderr, " mapq :");
106  for (uint32 i = 0; i < 8; ++i)
107  log_stats_cont(stderr, " %5u ", i ? 1u << (i-1) : 0u );
108  log_stats_cont(stderr,"\n");
109  log_stats(stderr, " :");
110  for (uint32 i = 0; i < 8; ++i)
111  log_stats_cont(stderr, " %6.2f%%", 100.0f * float(stats.mapq_log_bins[i])/float(stats.n_mapped) );
112  log_stats_cont(stderr,"\n");
113 }
114 
115 int main(int argc, char* argv[])
116 {
117  //cudaSetDeviceFlags( cudaDeviceMapHost | cudaDeviceLmemResizeToMax );
118 
119  crcInit();
120 
121  if (argc == 1 ||
122  (argc == 2 && strcmp( argv[1], "--help" ) == 0) ||
123  (argc == 2 && strcmp( argv[1], "-h" ) == 0))
124  {
125  log_info(stderr,"nvBowtie [options]\n");
126  log_info(stderr,"options:\n");
127  log_info(stderr," General:\n");
128  log_info(stderr," -U file-name unpaired reads\n");
129  log_info(stderr," -1 file-name first mate reads\n");
130  log_info(stderr," -2 file-name second mate reads\n");
131  log_info(stderr," -S file-name output file (.sam|.bam)\n");
132  log_info(stderr," -x file-name reference index\n");
133  log_info(stderr," --verbosity int [5] verbosity level\n");
134  log_info(stderr," --upto | -u int [-1] maximum number of reads to process\n");
135  log_info(stderr," --trim3 | -3 int [0] trim the first N bases of 3'\n");
136  log_info(stderr," --trim5 | -5 int [0] trim the first N bases of 5'\n");
137  log_info(stderr," --nofw do not align the forward strand\n");
138  log_info(stderr," --norc do not align the reverse-complemented strand\n");
139  log_info(stderr," --device int [0] select the given cuda device(s) (e.g. --device 0 --device 1 ...)\n");
140  log_info(stderr," --file-ref load reference from file\n");
141  log_info(stderr," --server-ref load reference from server\n");
142  log_info(stderr," --phred33 qualities are ASCII characters equal to Phred quality + 33\n");
143  log_info(stderr," --phred64 qualities are ASCII characters equal to Phred quality + 64\n");
144  log_info(stderr," --solexa-quals qualities are in the Solexa format\n");
145  log_info(stderr," --rg-id string add the RG-ID field of the SAM output header\n");
146  log_info(stderr," --rg string,val add an RG-TAG field of the SAM output header\n");
147  log_info(stderr," Paired-End:\n");
148  log_info(stderr," --ff paired mates are forward-forward\n");
149  log_info(stderr," --fr paired mates are forward-reverse\n");
150  log_info(stderr," --rf paired mates are reverse-forward\n");
151  log_info(stderr," --rr paired mates are reverse-reverse\n");
152  log_info(stderr," --minins int [0] minimum insert length\n");
153  log_info(stderr," --minins int [500] maximum insert length\n");
154  log_info(stderr," --overlap allow overlapping mates\n");
155  log_info(stderr," --dovetail allow dovetailing mates\n");
156  log_info(stderr," --no-mixed only report paired alignments\n");
157  log_info(stderr," --ungapped-mates | -ug perform ungapped mate alignment\n");
158  log_info(stderr," Seeding:\n");
159  log_info(stderr," --seed-len | -L int [22] seed lengths\n");
160  log_info(stderr," --seed-freq | -i {G|L|S},x,y seed interval, as x + y*func(read-len) (G=log,L=linear,S=sqrt)\n");
161  log_info(stderr," --max-hits int [100] maximum amount of seed hits\n");
162  log_info(stderr," --max-reseed | -R int [2] number of reseeding rounds\n");
163  log_info(stderr," Extension:\n");
164  log_info(stderr," --all | -a perform all-mapping (i.e. find and report all alignments)\n");
165  log_info(stderr," --local perform local alignment\n");
166  log_info(stderr," --rand randomized seed selection\n");
167  log_info(stderr," --no-rand do not randomize seed hit selection\n");
168  log_info(stderr," --max-dist int [15] maximum edit distance\n");
169  log_info(stderr," --max-effort-init int [15] initial maximum number of consecutive extension failures\n");
170  log_info(stderr," --max-effort | -D int [15] maximum number of consecutive extension failures\n");
171  log_info(stderr," --min-ext int [30] minimum number of extensions per read\n");
172  log_info(stderr," --max-ext int [400] maximum number of extensions per read\n");
173  log_info(stderr," --fast apply the fast presets\n");
174  log_info(stderr," --very-fast apply the very-fast presets\n");
175  log_info(stderr," --sensitive apply the sensitive presets\n");
176  log_info(stderr," --very-sensitive apply the very-sensitive presets\n");
177  log_info(stderr," --fast-local apply the fast presets\n");
178  log_info(stderr," --very-fast-local apply the very-fast presets\n");
179  log_info(stderr," --sensitive-local apply the sensitive presets\n");
180  log_info(stderr," --very-sensitive-local apply the very-sensitive presets\n");
181  log_info(stderr," Scoring:\n");
182  log_info(stderr," --scoring {sw|ed} Smith-Waterman / Edit-Distance scoring\n");
183  log_info(stderr," --score-min {G|L|S},x,y minimum score function, as x + y*func(read-len)\n");
184  log_info(stderr," --ma int match bonus\n");
185  log_info(stderr," --mp int,int mismatch min/max penalties\n");
186  log_info(stderr," --np int N penalty\n");
187  log_info(stderr," --rdg int,int read open/extension gap penalties\n");
188  log_info(stderr," --rfg int,int reference open/extension gap penalties\n");
189  log_info(stderr," Reporting:\n");
190  log_info(stderr," --mapQ-filter | -Q int [0] minimum mapQ threshold\n");
191  exit(0);
192  }
193  else if (argc == 2 && strcmp( argv[1], "-test" ) == 0)
194  {
195  log_visible(stderr, "nvBowtie tests... started\n");
198  log_visible(stderr, "nvBowtie tests... done\n");
199  exit(0);
200  }
201  else if (argc == 2 && strcmp( argv[1], "--version" ) == 0)
202  {
203  fprintf(stderr, "nvBowtie version %s\n", NVBIO_VERSION_STRING);
204  exit(0);
205  }
206 
207  // setup the number of OMP threads
209 
210  uint32 max_reads = uint32(-1);
211  uint32 max_read_len = uint32(-1);
212  uint32 trim3 = 0;
213  uint32 trim5 = 0;
214  //bool debug = false;
215  bool from_file = false;
216  bool paired_end = false;
218  io::QualityEncoding qencoding = io::Phred33;
219  std::vector<int> cuda_devices;
220 
221  std::map<std::string,std::string> string_options;
222 
223  std::string argstr;
224  for (int32 i = 1; i < argc; ++i)
225  {
226  argstr += " ";
227  argstr += argv[i];
228  }
229 
230  std::string rg_id;
231  std::string rg_string;
232 
233  bool legacy_cmdline = true;
234 
235  const char* read_name1 = "";
236  const char* read_name2 = "";
237  const char* reference_name = "";
238  const char* output_name = "";
239 
240  for (int32 i = 1; i < argc; ++i)
241  {
242  if (strcmp( argv[i], "--pe" ) == 0 ||
243  strcmp( argv[i], "-paired-ends" ) == 0 ||
244  strcmp( argv[i], "--paired-ends" ) == 0)
245  paired_end = true;
246  else if (strcmp( argv[i], "--ff" ) == 0)
247  pe_policy = io::PE_POLICY_FF;
248  else if (strcmp( argv[i], "--fr" ) == 0)
249  pe_policy = io::PE_POLICY_FR;
250  else if (strcmp( argv[i], "--rf" ) == 0)
251  pe_policy = io::PE_POLICY_RF;
252  else if (strcmp( argv[i], "--rr" ) == 0)
253  pe_policy = io::PE_POLICY_RR;
254  else if (strcmp( argv[i], "-max-reads" ) == 0 ||
255  strcmp( argv[i], "--max-reads" ) == 0 ||
256  strcmp( argv[i], "-u" ) == 0 ||
257  strcmp( argv[i], "--upto" ) == 0)
258  max_reads = (uint32)atoi( argv[++i] );
259  else if (strcmp( argv[i], "-max-read-len" ) == 0 ||
260  strcmp( argv[i], "--max-read-len" ) == 0)
261  max_read_len = (uint32)atoi( argv[++i] );
262  else if (strcmp( argv[i], "-3") == 0 ||
263  strcmp( argv[i], "--trim3") == 0)
264  trim3 = (uint32)atoi( argv[++i] );
265  else if (strcmp( argv[i], "-5") == 0 ||
266  strcmp( argv[i], "--trim5") == 0)
267  trim5 = (uint32)atoi( argv[++i] );
268  else if (strcmp( argv[i], "-file-ref" ) == 0 ||
269  strcmp( argv[i], "--file-ref" ) == 0)
270  from_file = true;
271  else if (strcmp( argv[i], "-server-ref" ) == 0 ||
272  strcmp( argv[i], "--server-ref" ) == 0)
273  from_file = false;
274  else if (strcmp( argv[i], "-input" ) == 0 ||
275  strcmp( argv[i], "--input" ) == 0)
276  {
277  if (strcmp( argv[i+1], "file" ) == 0)
278  from_file = true;
279  else if (strcmp( argv[i+1], "server" ) == 0)
280  from_file = false;
281  else
282  log_warning(stderr, "unknown \"%s\" input, skipping\n", argv[i+1]);
283 
284  ++i;
285  }
286  else if (strcmp( argv[i], "-phred33" ) == 0 ||
287  strcmp( argv[i], "--phred33" ) == 0)
288  qencoding = io::Phred33;
289  else if (strcmp( argv[i], "-phred64" ) == 0 ||
290  strcmp( argv[i], "--phred64" ) == 0)
291  qencoding = io::Phred64;
292  else if (strcmp( argv[i], "-solexa-quals" ) == 0 ||
293  strcmp( argv[i], "--solexa-quals" ) == 0)
294  qencoding = io::Solexa;
295  else if (strcmp( argv[i], "-device" ) == 0 ||
296  strcmp( argv[i], "--device" ) == 0)
297  cuda_devices.push_back( atoi( argv[++i] ) );
298  else if (strcmp( argv[i], "-verbosity" ) == 0 ||
299  strcmp( argv[i], "--verbosity" ) == 0)
300  set_verbosity( Verbosity( atoi( argv[++i] ) ) );
301  else if (strcmp( argv[i], "-rg-id" ) == 0 ||
302  strcmp( argv[i], "--rg-id" ) == 0)
303  rg_id = argv[++i];
304  else if (strcmp( argv[i], "-rg" ) == 0 ||
305  strcmp( argv[i], "--rg" ) == 0)
306  {
307  rg_string += "\t";
308  rg_string += argv[++i];
309  }
310  else if (strcmp( argv[i], "-1") == 0)
311  {
312  legacy_cmdline = false;
313  paired_end = true;
314  read_name1 = argv[++i];
315  }
316  else if (strcmp( argv[i], "-2") == 0)
317  {
318  legacy_cmdline = false;
319  paired_end = true;
320  read_name2 = argv[++i];
321  }
322  else if (strcmp( argv[i], "-U") == 0)
323  {
324  legacy_cmdline = false;
325  paired_end = false;
326  read_name1 = argv[++i];
327  }
328  else if (strcmp( argv[i], "-S") == 0)
329  {
330  legacy_cmdline = false;
331  output_name = argv[++i];
332  }
333  else if (strcmp( argv[i], "-x") == 0)
334  {
335  legacy_cmdline = false;
336  reference_name = argv[++i];
337  }
338  else if (argv[i][0] == '-')
339  {
340  // add unknown option to the string options
341  const std::string key = std::string( argv[i][1] == '-' ? argv[i] + 2 : argv[i] + 1 );
342  const char* next = argv[i+1];
343 
344  if (is_number(next) || next[0] != '-')
345  {
346  const std::string val = std::string( next ); ++i;
347  string_options.insert( std::make_pair( key, val ) );
348  }
349  else
350  string_options.insert( std::make_pair( key, "1" ) );
351  }
352  }
353 
354  log_info(stderr, "nvBowtie... started\n");
355  log_debug(stderr, " %-16s : %d\n", "max-reads", max_reads);
356  log_debug(stderr, " %-16s : %d\n", "max-length", max_read_len);
357  log_debug(stderr, " %-16s : %s\n", "quals", qencoding == io::Phred33 ? "phred33" :
358  qencoding == io::Phred64 ? "phred64" :
359  "solexa");
360  if (paired_end)
361  {
362  log_debug(stderr, " %-16s : %s\n", "pe-policy",
363  pe_policy == io::PE_POLICY_FF ? "ff" :
364  pe_policy == io::PE_POLICY_FR ? "fr" :
365  pe_policy == io::PE_POLICY_RF ? "rf" :
366  "rr" );
367  }
368  if (string_options.empty() == false)
369  {
370  for (std::map<std::string,std::string>::const_iterator it = string_options.begin(); it != string_options.end(); ++it)
371  log_debug(stderr, " %-16s : %s\n", it->first.c_str(), it->second.c_str());
372  }
373  log_debug(stderr, "\n");
374 
375  int device_count;
376  cudaGetDeviceCount(&device_count);
377  log_verbose(stderr, " cuda devices : %d\n", device_count);
378 
379  // inspect and select cuda devices
380  if (device_count > 0)
381  {
382  if (cuda_devices.empty())
383  {
384  int best_device = 0;
385  cudaDeviceProp best_device_prop;
386  cudaGetDeviceProperties( &best_device_prop, best_device );
387 
388  for (int device = 0; device < device_count; ++device)
389  {
390  cudaDeviceProp device_prop;
391  cudaGetDeviceProperties( &device_prop, device );
392  log_verbose(stderr, " device %d has compute capability %d.%d\n", device, device_prop.major, device_prop.minor);
393  log_verbose(stderr, " SM count : %u\n", device_prop.multiProcessorCount);
394  log_verbose(stderr, " SM clock rate : %u Mhz\n", device_prop.clockRate / 1000);
395  log_verbose(stderr, " memory clock rate : %.1f Ghz\n", float(device_prop.memoryClockRate) * 1.0e-6f);
396 
397  if (device_prop.major >= best_device_prop.major &&
398  device_prop.minor >= best_device_prop.minor)
399  {
400  best_device_prop = device_prop;
401  best_device = device;
402  }
403  }
404  cuda_devices.push_back( best_device );
405  }
406 
407  for (size_t i = 0; i < cuda_devices.size(); ++i)
408  {
409  cudaDeviceProp device_prop;
410  cudaGetDeviceProperties( &device_prop, cuda_devices[i] );
411  log_verbose(stderr, " chosen device %d\n", cuda_devices[i]);
412  log_verbose(stderr, " device name : %s\n", device_prop.name);
413  log_verbose(stderr, " compute capability : %d.%d\n", device_prop.major, device_prop.minor);
414  }
415  }
416  else
417  {
418  log_error(stderr, "no available CUDA devices\n");
419  exit(1);
420  }
421 
422  if (legacy_cmdline)
423  {
424  const uint32 arg_offset = paired_end ? argc-4 : argc-3;
425  reference_name = argv[arg_offset];
426  if (paired_end)
427  {
428  read_name1 = argv[arg_offset+1];
429  read_name2 = argv[arg_offset+2];
430  }
431  else
432  read_name1 = argv[arg_offset+1];
433 
434  output_name = argv[argc-1];
435  }
436 
437  try
438  {
439  //
440  // Parse program options
441  //
442 
443  // WARNING: we don't do any error checking on passed parameters!
444  bowtie2::cuda::Params params;
445  params.pe_policy = pe_policy;
446  {
447  bool init = true;
448  std::string config = string_option( string_options, "config", "" );
449  if (config != "") { bowtie2::cuda::parse_options( params, bowtie2::cuda::load_options( config.c_str() ), init ); init = false; }
450  bowtie2::cuda::parse_options( params, string_options, init );
451 
452  }
455  {
456  log_warning(stderr, "edit-distance scoring is incompatible with local alignment, switching to Smith-Waterman\n");
458  }
459 
460  //
461  // Load the reference
462  //
463 
466  if (from_file)
467  {
468  log_visible(stderr, "loading reference index... started\n");
469  log_info(stderr, " file: \"%s\"\n", reference_name);
470 
471  // load the reference data
472  reference_data = io::load_sequence_file( DNA, reference_name );
473  if (reference_data == NULL)
474  {
475  log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
476  return 1;
477  }
478 
479  log_visible(stderr, "loading reference index... done\n");
480 
482  if (!loader->load( reference_name ))
483  {
484  log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
485  return 1;
486  }
487 
488  driver_data = loader;
489  }
490  else
491  {
492  log_visible(stderr, "mapping reference index... started\n");
493  log_info(stderr, " file: \"%s\"\n", reference_name);
494 
495  // map the reference data
496  reference_data = io::map_sequence_file( reference_name );
497  if (reference_data == NULL)
498  {
499  log_visible(stderr, "mapping reference index... failed\n");
500  log_visible(stderr, "loading reference index... started\n");
501  log_info(stderr, " file: \"%s\"\n", reference_name);
502 
503  // load the reference data
504  reference_data = io::load_sequence_file( DNA, reference_name );
505  if (reference_data == NULL)
506  {
507  log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
508  return 1;
509  }
510 
511  log_visible(stderr, "loading reference index... done\n");
512 
514  if (!loader->load( reference_name ))
515  {
516  log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
517  return 1;
518  }
519 
520  driver_data = loader;
521  }
522  else
523  {
524  log_visible(stderr, "mapping reference index... done\n");
525 
527  if (!loader->load( reference_name ))
528  {
529  log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
530  return 1;
531  }
532 
533  driver_data = loader;
534  }
535  }
536 
537  //
538  // Setup the output file
539  //
540 
542  output_name,
543  paired_end ? io::PAIRED_END : io::SINGLE_END,
544  io::BNT(*reference_data) ) );
545 
546  output_file->set_rg( rg_id.c_str(), rg_string.c_str() );
547  output_file->set_program(
548  "nvBowtie",
549  "nvBowtie",
551  argstr.c_str() );
552 
553  output_file->configure_mapq_evaluator(params.mapq_filter);
554  output_file->header();
555 
556  if (paired_end)
557  {
558  //
559  // Open the input read files
560  //
561 
562  log_visible(stderr, "opening read file [1] \"%s\"\n", read_name1);
565  read_name1,
566  qencoding,
567  max_reads,
568  max_read_len,
569  io::REVERSE,
570  trim3,
571  trim5 )
572  );
573 
574  if (read_data_file1 == NULL || read_data_file1->is_ok() == false)
575  {
576  log_error(stderr, "unable to open read file \"%s\"\n", read_name1);
577  return 1;
578  }
579 
580  log_visible(stderr, "opening read file [2] \"%s\"\n", read_name2);
583  read_name2,
584  qencoding,
585  max_reads,
586  max_read_len,
587  io::REVERSE,
588  trim3,
589  trim5 )
590  );
591 
592  if (read_data_file2 == NULL || read_data_file2->is_ok() == false)
593  {
594  log_error(stderr, "unable to open read file \"%s\"\n", read_name2);
595  return 1;
596  }
597 
598 
599  // print the command line options
600  {
602  params.scoring_scheme.ed.m_score_min :
604 
605  log_verbose(stderr, " mode = %s\n", bowtie2::cuda::mapping_mode( params.mode ));
606  log_verbose(stderr, " scoring = %s\n", bowtie2::cuda::scoring_mode( params.scoring_mode ));
607  log_verbose(stderr, " score-min = %s:%.2f:%.2f\n", score_min.type_string(), score_min.k, score_min.m);
608  log_verbose(stderr, " alignment type = %s\n", params.alignment_type == bowtie2::cuda::LocalAlignment ? "local" : "end-to-end");
609  log_verbose(stderr, " pe-policy = %s\n",
610  pe_policy == io::PE_POLICY_FF ? "ff" :
611  pe_policy == io::PE_POLICY_FR ? "fr" :
612  pe_policy == io::PE_POLICY_RF ? "rf" :
613  "rr" );
614  log_verbose(stderr, " seed length = %u\n", params.seed_len);
615  log_verbose(stderr, " seed interval = (%s, %.3f, %.3f)\n", params.seed_freq.type_symbol(), params.seed_freq.k, params.seed_freq.m);
616  log_verbose(stderr, " seed rounds = %u\n", params.max_reseed);
617  log_verbose(stderr, " max hits = %u\n", params.max_hits);
618  log_verbose(stderr, " max edit dist = %u\n", params.max_dist);
619  log_verbose(stderr, " max effort = %u\n", params.max_effort);
620  log_verbose(stderr, " substitutions = %u\n", params.allow_sub);
621  log_verbose(stderr, " mapq filter = %u\n", params.mapq_filter);
622  log_verbose(stderr, " randomized = %s\n", params.randomized ? "yes" : "no");
623  if (params.allow_sub)
624  log_verbose(stderr, " subseed length = %u\n", params.subseed_len);
625  }
626 
627  //
628  // Setup the compute threads
629  //
630 
631  uint32 batch_size = uint32(-1);
632 
633  std::vector< bowtie2::cuda::Stats > device_stats( cuda_devices.size() );
634  std::vector< SharedPointer<bowtie2::cuda::ComputeThreadPE> > compute_threads( cuda_devices.size() );
635 
636  for (uint32 i = 0; i < cuda_devices.size(); ++i)
637  {
638  device_stats[i] = bowtie2::cuda::Stats( params );
639 
642  i,
643  cuda_devices[i],
644  *reference_data,
645  *driver_data,
646  string_options,
647  params,
648  device_stats[i] ) );
649 
650  batch_size = nvbio::min( batch_size, compute_threads[i]->gauge_batch_size() );
651  }
652 
653  //
654  // Setup the input thread
655  //
656 
657  Timer timer;
658  timer.start();
659 
660  bowtie2::cuda::Stats input_stats( params );
661 
662  bowtie2::cuda::InputThreadPE input_thread( read_data_file1.get(), read_data_file2.get(), input_stats, batch_size, params.avg_read_length );
663  input_thread.create();
664 
665  for (uint32 i = 0; i < cuda_devices.size(); ++i)
666  {
667  compute_threads[i]->set_input( &input_thread );
668  compute_threads[i]->set_output( output_file.get() );
669  compute_threads[i]->create();
670  }
671 
672  uint32 n_reads = 0;
677 
678  for (uint32 i = 0; i < cuda_devices.size(); ++i)
679  {
680  compute_threads[i]->join();
681 
682  n_reads += device_stats[i].n_reads;
683  concordant.merge( device_stats[i].concordant );
684  discordant.merge( device_stats[i].discordant );
685  mate1.merge( device_stats[i].mate1 );
686  mate2.merge( device_stats[i].mate2 );
687  }
688 
689  input_thread.join();
690 
691  timer.stop();
692 
693  log_verbose( stderr, " compute threads joined\n" );
694 
695  // close the output file
696  output_file->close();
697 
698  //
699  // Print statistics
700  //
701 
702  // transfer I/O statistics
703  io::IOStats iostats = output_file->get_aggregate_statistics();
705 
706  log_stats(stderr, " total : %.2f sec (avg: %.3fK reads/s).\n", timer.seconds(), 1.0e-3f * float(n_reads) / timer.seconds());
707  log_stats(stderr, " reads I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", input_stats.read_io.time, 1.0e-6f * input_stats.read_io.avg_speed(), 1.0e-6f * input_stats.read_io.max_speed);
708  log_stats(stderr, " results I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", io.time, 1.0e-6f * io.avg_speed(), 1.0e-6f * io.max_speed);
709 
710  uint32& n_mapped = concordant.n_mapped;
711  uint32& n_unique = concordant.n_unique;
712  uint32& n_ambiguous = concordant.n_ambiguous;
713  uint32& n_nonambiguous = concordant.n_unambiguous;
714  uint32& n_multiple = concordant.n_multiple;
715  {
716  log_stats(stderr, " concordant reads : %.2f %% - of these:\n", 100.0f * float(n_mapped)/float(n_reads) );
717  log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_unique)/float(n_mapped), 100.0f * float(n_mapped - n_multiple)/float(n_reads) );
718  log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_nonambiguous)/float(n_mapped), 100.0f * float(n_nonambiguous)/float(n_reads) );
719  log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_ambiguous)/float(n_mapped), 100.0f * float(n_ambiguous)/float(n_reads) );
720  log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_multiple)/float(n_mapped), 100.0f * float(n_multiple)/float(n_reads) );
721  log_ed( concordant );
722  log_mapq( concordant );
723 
724  log_stats(stderr, " discordant reads : %.2f %%\n", 100.0f * float(discordant.n_mapped)/float(n_reads) );
725  log_ed( discordant );
726  log_mapq( discordant );
727 
728  log_stats(stderr, " mate1 : %.2f %% - of these:\n", 100.0f * float(mate1.n_mapped)/float(n_reads) );
729  if (mate1.n_mapped)
730  {
731  log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_unique)/float(mate1.n_mapped), 100.0f * float(mate1.n_mapped - mate1.n_multiple)/float(n_reads) );
732  log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_unambiguous)/float(mate1.n_mapped), 100.0f * float(mate1.n_unambiguous)/float(n_reads) );
733  log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_ambiguous)/float(mate1.n_mapped), 100.0f * float(mate1.n_ambiguous)/float(n_reads) );
734  log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_multiple)/float(mate1.n_mapped), 100.0f * float(mate1.n_multiple)/float(n_reads) );
735  }
736 
737  log_stats(stderr, " mate2 : %.2f %% - of these:\n", 100.0f * float(mate2.n_mapped)/float(n_reads) );
738  if (mate2.n_mapped)
739  {
740  log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_unique)/float(mate2.n_mapped), 100.0f * float(mate2.n_mapped - mate2.n_multiple)/float(n_reads) );
741  log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_unambiguous)/float(mate2.n_mapped), 100.0f * float(mate2.n_unambiguous)/float(n_reads) );
742  log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_ambiguous)/float(mate2.n_mapped), 100.0f * float(mate2.n_ambiguous)/float(n_reads) );
743  log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_multiple)/float(mate2.n_mapped), 100.0f * float(mate2.n_multiple)/float(n_reads) );
744  }
745  }
746 
747  // generate an html report
748  if (params.report.length())
749  bowtie2::cuda::generate_report_header( n_reads, params, concordant, (uint32)cuda_devices.size(), &device_stats[0], params.report.c_str() );
750  }
751  else
752  {
753  //
754  // Open the input read file
755  //
756 
757  log_visible(stderr, "opening read file \"%s\"\n", read_name1);
760  read_name1,
761  qencoding,
762  max_reads,
763  max_read_len,
764  io::REVERSE,
765  trim3,
766  trim5 )
767  );
768 
769  if (read_data_file == NULL || read_data_file->is_ok() == false)
770  {
771  log_error(stderr, "unable to open read file \"%s\"\n", read_name1);
772  return 1;
773  }
774 
775  // print the command line options
776  {
778  params.scoring_scheme.ed.m_score_min :
780 
781  log_verbose(stderr, " mode = %s\n", bowtie2::cuda::mapping_mode( params.mode ));
782  log_verbose(stderr, " scoring = %s\n", bowtie2::cuda::scoring_mode( params.scoring_mode ));
783  log_verbose(stderr, " score-min = %s:%.2f:%.2f\n", score_min.type_string(), score_min.k, score_min.m);
784  log_verbose(stderr, " alignment type = %s\n", params.alignment_type == bowtie2::cuda::LocalAlignment ? "local" : "end-to-end");
785  log_verbose(stderr, " seed length = %u\n", params.seed_len);
786  log_verbose(stderr, " seed interval = (%s, %.3f, %.3f)\n", params.seed_freq.type_symbol(), params.seed_freq.k, params.seed_freq.m);
787  log_verbose(stderr, " seed rounds = %u\n", params.max_reseed);
788  log_verbose(stderr, " max hits = %u\n", params.max_hits);
789  log_verbose(stderr, " max edit dist = %u\n", params.max_dist);
790  log_verbose(stderr, " max effort = %u\n", params.max_effort);
791  log_verbose(stderr, " substitutions = %u\n", params.allow_sub);
792  log_verbose(stderr, " mapq filter = %u\n", params.mapq_filter);
793  log_verbose(stderr, " randomized = %s\n", params.randomized ? "yes" : "no");
794  if (params.allow_sub)
795  log_verbose(stderr, " subseed length = %u\n", params.subseed_len);
796  }
797 
798  //
799  // Setup the compute threads
800  //
801 
802  uint32 batch_size = uint32(-1);
803 
804  std::vector< bowtie2::cuda::Stats > device_stats( cuda_devices.size() );
805  std::vector< SharedPointer<bowtie2::cuda::ComputeThreadSE> > compute_threads( cuda_devices.size() );
806 
807  for (uint32 i = 0; i < cuda_devices.size(); ++i)
808  {
809  device_stats[i] = bowtie2::cuda::Stats( params );
810 
813  i,
814  cuda_devices[i],
815  *reference_data,
816  *driver_data,
817  string_options,
818  params,
819  device_stats[i] ) );
820 
821  batch_size = nvbio::min( batch_size, compute_threads[i]->gauge_batch_size() );
822  }
823 
824  //
825  // Setup the input thread
826  //
827 
828  Timer timer;
829  timer.start();
830 
831  bowtie2::cuda::Stats input_stats( params );
832 
833  bowtie2::cuda::InputThreadSE input_thread( read_data_file.get(), input_stats, batch_size, params.avg_read_length );
834  input_thread.create();
835 
836  for (uint32 i = 0; i < cuda_devices.size(); ++i)
837  {
838  compute_threads[i]->set_input( &input_thread );
839  compute_threads[i]->set_output( output_file.get() );
840  compute_threads[i]->create();
841  }
842 
843  uint32 n_reads = 0;
845 
846  for (uint32 i = 0; i < cuda_devices.size(); ++i)
847  {
848  compute_threads[i]->join();
849 
850  n_reads += device_stats[i].n_reads;
851  mate1.merge( device_stats[i].mate1 );
852  }
853 
854  input_thread.join();
855 
856  timer.stop();
857 
858  log_verbose( stderr, " compute threads joined\n" );
859 
860  // close the output file
861  output_file->close();
862 
863  //
864  // Print statistics
865  //
866 
867  // transfer I/O statistics
868  io::IOStats iostats = output_file->get_aggregate_statistics();
870 
871  log_stats(stderr, " total : %.2f sec (avg: %.3fK reads/s).\n", timer.seconds(), 1.0e-3f * float(n_reads) / timer.seconds());
872  log_stats(stderr, " reads I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", input_stats.read_io.time, 1.0e-6f * input_stats.read_io.avg_speed(), 1.0e-6f * input_stats.read_io.max_speed);
873  log_stats(stderr, " results I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", io.time, 1.0e-6f * io.avg_speed(), 1.0e-6f * io.max_speed);
874 
875  uint32& n_mapped = mate1.n_mapped;
876  uint32& n_unique = mate1.n_unique;
877  uint32& n_ambiguous = mate1.n_ambiguous;
878  uint32& n_nonambiguous = mate1.n_unambiguous;
879  uint32& n_multiple = mate1.n_multiple;
880  {
881  log_stats(stderr, " mapped reads : %.2f %% - of these:\n", 100.0f * float(n_mapped)/float(n_reads) );
882  log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_unique)/float(n_mapped), 100.0f * float(n_mapped - n_multiple)/float(n_reads) );
883  log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_nonambiguous)/float(n_mapped), 100.0f * float(n_nonambiguous)/float(n_reads) );
884  log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_ambiguous)/float(n_mapped), 100.0f * float(n_ambiguous)/float(n_reads) );
885  log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_multiple)/float(n_mapped), 100.0f * float(n_multiple)/float(n_reads) );
886  log_ed( mate1 );
887  log_mapq( mate1 );
888  }
889 
890  // generate an html report
891  if (params.report.length())
892  bowtie2::cuda::generate_report_header( n_reads, params, mate1, (uint32)cuda_devices.size(), &device_stats[0], params.report.c_str() );
893  }
894 
895  log_info( stderr, "nvBowtie... done\n" );
896  }
897  catch (nvbio::cuda_error e)
898  {
899  log_error(stderr, "caught a nvbio::cuda_error exception:\n");
900  log_error(stderr, " %s\n", e.what());
901  }
902  catch (nvbio::bad_alloc e)
903  {
904  log_error(stderr, "caught a nvbio::bad_alloc exception:\n");
905  log_error(stderr, " %s\n", e.what());
906  }
907  catch (nvbio::logic_error e)
908  {
909  log_error(stderr, "caught a nvbio::logic_error exception:\n");
910  log_error(stderr, " %s\n", e.what());
911  }
912  catch (nvbio::runtime_error e)
913  {
914  log_error(stderr, "caught a nvbio::runtime_error exception:\n");
915  log_error(stderr, " %s\n", e.what());
916  }
917  catch (std::bad_alloc e)
918  {
919  log_error(stderr, "caught a std::bad_alloc exception:\n");
920  log_error(stderr, " %s\n", e.what());
921  }
922  catch (std::logic_error e)
923  {
924  log_error(stderr, "caught a std::logic_error exception:\n");
925  log_error(stderr, " %s\n", e.what());
926  }
927  catch (std::runtime_error e)
928  {
929  log_error(stderr, "caught a std::runtime_error exception:\n");
930  log_error(stderr, " %s\n", e.what());
931  }
932  catch (...)
933  {
934  log_error(stderr, "caught an unknown exception!\n");
935  }
936 
937  return 0;
938 }
939