NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
params.cpp
Go to the documentation of this file.
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <numeric>
6 #include <functional>
7 #include <map>
8 
9 namespace nvbio {
10 namespace bowtie2 {
11 namespace cuda {
12 
13 std::map<std::string,std::string> load_options(const char* name)
14 {
15  std::map<std::string,std::string> options;
16 
17  FILE* file = fopen( name, "r" );
18  if (file == NULL)
19  {
20  log_warning( stderr, "failed opening \"%s\"\n", name );
21  return options;
22  }
23 
24  char key[1024];
25  char value[1024];
26 
27  while (fscanf( file, "%s %s", key, value ) == 2)
28  options[ key ] = std::string( value );
29 
30  fclose( file );
31 
32  return options;
33 }
34 
35 // bogus implementation of a function to check if a string is a number
36 bool is_number(const char* str, uint32 len = uint32(-1))
37 {
38  if (str[0] == '-')
39  ++str;
40 
41  for (uint32 l = 0; *str != '\0' && l < len; ++l)
42  {
43  const char c = *str; ++str;
44  if (c == '.') continue;
45  if (c >= '0' && c <= '9') continue;
46  return false;
47  }
48  return true;
49 }
50 
51 // bogus implementation of a function to check if an option is a function
52 SimpleFunc parse_function(const char* str, const SimpleFunc def)
53 {
54  if (str[1] != ',')
55  return def;
56 
57  if (!(str[0] == 'C' ||
58  str[0] == 'L' ||
59  str[0] == 'G' ||
60  str[0] == 'S'))
61  return def;
62 
63  SimpleFunc ret;
64  ret.type = (str[0] == 'C') ? SimpleFunc::LinearFunc :
65  (str[0] == 'L') ? SimpleFunc::LinearFunc :
66  (str[0] == 'G') ? SimpleFunc::LogFunc :
68 
69  std::string nums = std::string( str + 2 );
70  const size_t c = nums.find(',');
71  if (c == std::string::npos)
72  return def;
73 
74  if (is_number( nums.c_str(), (uint32)c ) == false) return def;
75  if (is_number( nums.c_str() + c + 1 ) == false) return def;
76 
77  const std::string num1 = nums.substr( 0, c );
78  const std::string num2 = std::string( nums.c_str() + c + 1 );
79 
80  ret.k = (float)atof( num1.c_str() );
81  ret.m = (float)atof( nums.c_str() + c + 1 );
82 
83  // take care of transforming constant functions in linear ones
84  if (str[0] == 'C')
85  {
86  //ret.k += ret.m;
87  ret.m = 0.0f;
88  }
89  return ret;
90 }
91 
92 template <typename options_type>
93 SimpleFunc func_option(const options_type& options, const char* name, const SimpleFunc func)
94 {
95  return (options.find( std::string(name) ) != options.end()) ?
96  parse_function( options.find(std::string(name))->second.c_str(), func ) :
97  func;
98 }
99 
100 template <typename options_type>
101 SimpleFunc func_option(const options_type& options, const char* name1, const char* name2, const SimpleFunc func)
102 {
103  return
104  (options.find( std::string(name1) ) != options.end()) ?
105  parse_function( options.find(std::string(name1))->second.c_str(), func ) :
106  (options.find( std::string(name2) ) != options.end()) ?
107  parse_function( options.find(std::string(name2))->second.c_str(), func ) :
108  func;
109 }
110 
111 void parse_options(Params& params, const std::map<std::string,std::string>& options, bool init)
112 {
113  const bool old_local = params.alignment_type == LocalAlignment;
114  const uint32 old_scoring_mode = params.scoring_mode;
115 
116  params.mode = mapping_mode( string_option(options, "mode", init ? "best" : mapping_mode( params.mode )).c_str() ); // mapping mode
117  params.scoring_mode = scoring_mode( string_option(options, "scoring", init ? "sw" : scoring_mode( params.scoring_mode )).c_str() ); // scoring mode
118  params.alignment_type = uint_option(options, "local", init ? 0u : params.alignment_type == LocalAlignment ) ? LocalAlignment : EndToEndAlignment; // local alignment
119  params.keep_stats = bool_option(options, "stats", init ? 1u : params.keep_stats); // keep stats
120  params.max_hits = uint_option(options, "max-hits", init ? 100u : params.max_hits); // too big = memory exhaustion
121  params.max_dist = uint_option(options, "max-dist", init ? 15u : params.max_dist); // must be <= MAX_BAND_LEN/2
122  params.max_effort_init = uint_option(options, "max-effort-init", init ? 15u : params.max_effort_init); // initial scoring effort limit
123  params.max_effort = uint_option(options, "max-effort", "D", init ? 15u : params.max_effort); // scoring effort limit
124  params.min_ext = uint_option(options, "min-ext", init ? 30u : params.min_ext); // min # of extensions
125  params.max_ext = uint_option(options, "max-ext", init ? 400u : params.max_ext); // max # of extensions
126  params.max_reseed = uint_option(options, "max-reseed", "R", init ? 2u : params.max_reseed); // max # of reseeding rounds
127  params.rep_seeds = uint_option(options, "rep-seeds", init ? 300u : params.rep_seeds); // reseeding threshold
128  params.allow_sub = uint_option(options, "N", init ? 0u : params.allow_sub); // allow substitution in seed
129  params.mapq_filter = uint_option(options, "mapQ-filter", "Q", init ? 0u : params.mapq_filter); // filter anything below this
130  params.report = string_option(options, "report", init ? "" : params.report.c_str()); // generate a report file
131  params.scoring_file = string_option(options, "scoring-scheme", init ? "" : params.scoring_file.c_str());
132  params.randomized = bool_option(options, "rand", init ? 1u : params.randomized); // use randomized selection
133  params.randomized =!bool_option(options, "no-rand", !params.randomized); // don't use randomized selection
134  params.top_seed = uint_option(options, "top", init ? 0u : params.top_seed); // explore top seed entirely
135  params.min_read_len = uint_option(options, "min-read-len", init ? 12u : params.min_read_len); // minimum read length
136  params.ungapped_mates = bool_option(options, "ungapped-mates", "ug", init ? 0u : params.ungapped_mates); // ungapped mate alignment
137  params.fw = !bool_option(options, "nofw", init ? false : !params.fw); // fw alignment
138  params.rc = !bool_option(options, "norc", init ? false : !params.rc); // rc alignment
139 
140  // force the all-mapping mode with the '--all|-a' option
141  if (uint_option(options, "all", "a", params.mode == AllMapping))
142  params.mode = AllMapping;
143 
144  // force Edit-Distance scoring with the '--ed' option
145  if (uint_option(options, "ed", params.scoring_mode == EditDistanceMode))
147 
148  // force Smith-Waterman scoring with the '--sw' option
149  if (uint_option(options, "sw", params.scoring_mode == SmithWatermanMode))
151 
152  const bool local = params.alignment_type == LocalAlignment;
153 
154  // set the default seeding values, or reset them if the alignment type has been changed
155  if (init || (local != old_local))
156  {
157  params.seed_len = local ? 20 : 22u;
158  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, (local ? 0.75f : 1.15f) );
159  }
160 
161  params.seed_len = uint_option(options, "seed-len", "L", params.seed_len); // no greater than 32
162  params.seed_freq = func_option( options, "seed-freq", "i", params.seed_freq ); // seed interval
163  params.subseed_len = uint_option(options, "subseed-len", init ? 0u : params.subseed_len); // no greater than 32
164 
165  params.pe_overlap = bool_option(options, "overlap", init ? true : params.pe_overlap); // paired-end overlap
166  params.pe_overlap = !bool_option(options, "no-overlap", !params.pe_overlap); // paired-end overlap
167  params.pe_dovetail = bool_option(options, "dovetail", init ? false : params.pe_dovetail); // paired-end dovetail
168  params.pe_unpaired = !bool_option(options, "no-mixed", init ? false :!params.pe_unpaired); // paired-end no-mixed
169  params.pe_discordant = !bool_option(options, "no-discordant", init ? false :!params.pe_discordant); // paired-end no-discordant
170  params.pe_discordant = bool_option(options, "discordant", params.pe_discordant); // paired-end discordant
171  params.min_frag_len = uint_option(options, "minins", "I", init ? 0u : params.min_frag_len); // paired-end minimum fragment length
172  params.max_frag_len = uint_option(options, "maxins", "X", init ? 500u : params.max_frag_len); // paired-end maximum fragment length
173 
174  // the maximum batch of reads processed in parallel
175  params.max_batch_size = uint_option(options, "batch-size", init ? 1024u : params.max_batch_size ); // maximum batch size
176  params.avg_read_length = uint_option(options, "read-length", init ? AVG_READ_LENGTH : params.avg_read_length ); // average read length
177 
178  // internal controls
179  params.scoring_window = uint_option(options, "scoring-window", init ? 32u : params.scoring_window); // scoring window size
180  params.debug.read_id = (uint32)int_option(options, "debug-read", init ? -1 : (int32)params.debug.read_id); // debug read id
181  params.debug.select = bool_option(options, "debug-select", init ? false: params.debug.select); // debug select kernel
182  params.debug.locate = bool_option(options, "debug-locate", init ? false: params.debug.locate); // debug locate kernel
183  params.debug.score = bool_option(options, "debug-score", init ? true : params.debug.score); // debug score kernel
184  params.debug.score_bad = bool_option(options, "debug-score-bad", init ? false: params.debug.score_bad); // debug score bad
185  params.debug.score_info = bool_option(options, "debug-score-info", init ? false: params.debug.score_info); // debug score info
186  params.debug.reduce = bool_option(options, "debug-reduce", init ? true : params.debug.reduce); // debug reduce kernel
187  params.debug.traceback = bool_option(options, "debug-traceback", init ? true : params.debug.traceback); // debug traceback kernel
188  params.debug.asserts = bool_option(options, "debug-asserts", init ? true : params.debug.asserts); // debug asserts
189 
190  params.persist_batch = int_option(options, "persist-batch", init ? -1 : params.persist_batch); // persist pass
191  params.persist_seeding = int_option(options, "persist-seeding", init ? -1 : params.persist_seeding); // persist pass
192  params.persist_extension = int_option(options, "persist-extension", init ? -1 : params.persist_extension); // persist pass
193  params.persist_file = string_option(options, "persist-file", init ? "" : params.persist_file.c_str() ); // persist file
194 
195  params.no_multi_hits = int_option(options, "no-multi-hits", init ? 0 : params.no_multi_hits ); // disable multi-hit selection
196 
197  params.max_effort_init = nvbio::max( params.max_effort_init, params.max_effort );
198  params.max_ext = nvbio::max( params.max_ext, params.max_effort );
199 
200  UberScoringScheme& sc = params.scoring_scheme;
201 
202  // set the default ED values, or reset them if the scoring mode has been changed
203  if (init || (params.scoring_mode != old_scoring_mode))
204  sc.ed.m_score_min = SimpleFunc( SimpleFunc::LinearFunc, -(float)params.max_dist, 0.0f );
205 
206  // set the default SW values, or reset them if the alignment type has been changed
207  if (init || (local != old_local))
208  {
209  sc.sw = local ?
212  }
213 
214  // load scoring scheme from file
215  if (params.scoring_file != "")
216  sc.sw = load_scoring_scheme( params.scoring_file.c_str(), AlignmentType( params.alignment_type ) );
217 
218  // score-min
219  sc.ed.m_score_min = func_option( options, "score-min", sc.ed.m_score_min );
220  sc.sw.m_score_min = func_option( options, "score-min", sc.sw.m_score_min );
221 
222  // match bonus
223  sc.sw.m_match.m_val = int_option( options, "ma", sc.sw.m_match.m_val );
224 
225  // mismatch penalties
226  const int2 mp = int2_option( options, "mp", make_int2( sc.sw.m_mmp.m_max_val, sc.sw.m_mmp.m_min_val ) );
227  sc.sw.m_mmp.m_max_val = mp.x;
228  sc.sw.m_mmp.m_min_val = mp.y;
229 
230  // np
231  sc.sw.m_np.m_val = int_option( options, "np", sc.sw.m_np.m_val );
232 
233  // read gaps
234  const int2 rdg = int2_option( options, "rdg", make_int2( sc.sw.m_read_gap_const, sc.sw.m_read_gap_coeff ) );
235  sc.sw.m_read_gap_const = rdg.x;
236  sc.sw.m_read_gap_coeff = rdg.y;
237 
238  // reference gaps
239  const int2 rfg = int2_option( options, "rfg", make_int2( sc.sw.m_ref_gap_const, sc.sw.m_ref_gap_coeff ) );
240  sc.sw.m_ref_gap_const = rfg.x;
241  sc.sw.m_ref_gap_coeff = rfg.y;
242 
243  // presets
244  if (params.alignment_type == EndToEndAlignment)
245  {
246  if (uint_option(options, "very-fast", 0u))
247  {
248  params.max_effort = 5u;
249  params.max_reseed = 1u;
250  params.seed_len = 22u;
251  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
252  }
253  if (uint_option(options, "fast", 0u))
254  {
255  params.max_effort = 10u;
256  params.max_reseed = 2u;
257  params.seed_len = 22u;
258  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
259  }
260  if (uint_option(options, "sensitive", 0u))
261  {
262  params.max_effort = 15u;
263  params.max_reseed = 2u;
264  params.seed_len = 22u;
265  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.15f );
266  }
267  if (uint_option(options, "very-sensitive", 0u))
268  {
269  params.max_effort = 20u;
270  params.max_reseed = 3u;
271  params.seed_len = 20u;
272  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
273  }
274 
275  if (uint_option(options, "very-fast-local", 0u))
276  {
277  if (uint_option(options, "end-to-end", 0u ) == 0)
278  {
280  params.max_effort = 5u;
281  params.max_reseed = 1u;
282  params.seed_len = 25u;
283  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
284  }
285  else
286  log_warning(stderr, "--very-fast-local is incompatible with --end-to-end\n");
287  }
288  if (uint_option(options, "fast-local", 0u))
289  {
290  if (uint_option(options, "end-to-end", 0u ) == 0)
291  {
293  params.max_effort = 10u;
294  params.max_reseed = 2u;
295  params.seed_len = 22u;
296  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
297  }
298  else
299  log_warning(stderr, "--fast-local is incompatible with --end-to-end\n");
300  }
301  if (uint_option(options, "sensitive-local", 0u))
302  {
303  if (uint_option(options, "end-to-end", 0u ) == 0)
304  {
306  params.max_effort = 15u;
307  params.max_reseed = 2u;
308  params.seed_len = 20u;
309  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
310  }
311  else
312  log_warning(stderr, "--sensitive-local is incompatible with --end-to-end\n");
313  }
314  if (uint_option(options, "very-sensitive-local", 0u))
315  {
316  if (uint_option(options, "end-to-end", 0u ) == 0)
317  {
319  params.max_effort = 20u;
320  params.max_reseed = 3u;
321  params.seed_len = 20u;
322  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
323  }
324  else
325  log_warning(stderr, "--very-sensitive-local is incompatible with --end-to-end\n");
326  }
327  }
328  else
329  {
330  if (uint_option(options, "very-fast", "very-fast-local", 0u))
331  {
332  params.max_effort = 5u;
333  params.max_reseed = 1u;
334  params.seed_len = 25u;
335  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
336  }
337  if (uint_option(options, "fast", "fast-local", 0u))
338  {
339  params.max_effort = 10u;
340  params.max_reseed = 2u;
341  params.seed_len = 22u;
342  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
343  }
344  if (uint_option(options, "sensitive", "sensitive-local", 0u))
345  {
346  params.max_effort = 15u;
347  params.max_reseed = 2u;
348  params.seed_len = 20u;
349  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
350  }
351  if (uint_option(options, "very-sensitive", "very-sensitive-local", 0u))
352  {
353  params.max_effort = 20u;
354  params.max_reseed = 3u;
355  params.seed_len = 20u;
356  params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
357  }
358  }
359 }
360 
361 } // namespace cuda
362 } // namespace bowtie2
363 } // namespace nvbio