NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
seeds_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 namespace nvbio {
31 
32 template <typename in_coord_type, typename out_coord_type>
34 
35 template <>
37 {
40 
42  result_type operator() (const argument_type i) const { return i; }
43 };
44 template <>
46 {
49 
51  result_type operator() (const argument_type i) const { return i; }
52 };
53 template <typename in_coord_type>
54 struct project_coords_functor<in_coord_type,uint32>
55 {
56  typedef in_coord_type argument_type;
58 
60  result_type operator() (const argument_type i) const { return i.x; }
61 };
62 template <typename in_coord_type>
63 struct project_coords_functor<in_coord_type,uint2>
64 {
65  typedef in_coord_type argument_type;
66  typedef uint2 result_type;
67 
69  result_type operator() (const argument_type i) const { return make_uint2( i.x, i.y ); }
70 };
71 template <typename in_coord_type>
72 struct project_coords_functor<in_coord_type,uint3>
73 {
74  typedef in_coord_type argument_type;
75  typedef uint3 result_type;
76 
78  result_type operator() (const argument_type i) const { return make_uint3( i.x, i.y, i.z ); }
79 };
80 template <typename in_coord_type>
81 struct project_coords_functor<in_coord_type,uint4>
82 {
83  typedef in_coord_type argument_type;
84  typedef uint4 result_type;
85 
87  result_type operator() (const argument_type i) const { return make_uint4( i.x, i.y, i.z, i.w ); }
88 };
89 
92 template <typename out_coord_type, typename in_coord_type>
94 out_coord_type project_coords(const in_coord_type i)
95 {
97  return p(i);
98 }
99 
100 // A functor to return the coordinates given by a seed_functor
101 //
102 template <typename index_type, typename seed_functor, typename coord_type>
104 {
105  typedef index_type argument_type;
106  typedef coord_type result_type;
107 
108  // constructor
109  //
111  string_seed_functor(const index_type _string_len, const seed_functor _seeder) :
112  string_len(_string_len), seeder(_seeder) {}
113 
114  // return the coordinate of the i-th seed
115  //
117  coord_type operator() (const index_type idx) const
118  {
119  return project_coords<coord_type>( seeder.seed( string_len, idx ) );
120  }
121 
122  const index_type string_len;
123  const seed_functor seeder;
124 };
125 
126 // A functor to return the localized coordinates given by a seed_functor
127 //
128 template <typename string_set_type, typename seed_functor, typename coord_type>
130 {
132  typedef coord_type result_type;
133 
134  // constructor
135  //
137  localized_seed_functor(const string_set_type _string_set, const seed_functor _seeder, const uint64* _cum_seeds) :
138  string_set(_string_set), seeder(_seeder), cum_seeds(_cum_seeds) {}
139 
140  // return the localized coordinate of the i-th seed
141  //
143  coord_type operator() (const uint64 global_idx) const
144  {
145  // compute the string index
146  const uint32 string_id = uint32( upper_bound( global_idx, cum_seeds, string_set.size() ) - cum_seeds );
147 
148  // fetch the string length
149  const uint32 string_len = string_set[ string_id ].length();
150 
151  // compute the local string coordinate
152  const uint64 base_offset = string_id ? cum_seeds[ string_id-1 ] : 0u;
153  const uint32 seed_idx = uint32( global_idx - base_offset );
154 
155  const uint2 seed = seeder.seed( string_len, seed_idx );
156  return project_coords<coord_type>( make_uint4( string_id, seed.x, seed.y, 0u ) );
157  }
158 
159  const string_set_type string_set;
160  const seed_functor seeder;
162 };
163 
164 // extract a set of seed coordinates out of a string, according to a given seeding functor
165 //
166 template <typename index_type, typename seed_functor, typename index_vector_type>
168  const index_type string_len,
169  const seed_functor seeder,
170  index_vector_type& indices)
171 {
172  typedef typename index_vector_type::value_type coord_type;
173 
174  // fetch the total number of output q-grams
175  const index_type n_seeds = seeder( string_len );
176 
177  // reserve enough storage
178  indices.resize( n_seeds );
179 
180  // build the list of q-gram indices
182  thrust::make_counting_iterator<index_type>(0u),
183  thrust::make_counting_iterator<index_type>(0u) + n_seeds,
184  indices.begin(),
186 
187  return n_seeds;
188 }
189 
190 // extract a set of seed coordinates out of a string-set, according to a given seeding functor
191 //
192 template <typename string_set_type, typename seed_functor, typename index_vector_type>
194  const string_set_type string_set,
195  const seed_functor seeder,
196  index_vector_type& indices)
197 {
198  // TODO: use some vector traits...
199  typedef typename index_vector_type::system_tag system_tag;
200  typedef typename index_vector_type::value_type coord_type;
201 
202  const uint32 n_strings = string_set.size();
203 
204  nvbio::vector<system_tag,uint64> cum_seeds( n_strings );
205 
206  // scan the number of q-grams produced per string
209  thrust::make_transform_iterator( thrust::make_counting_iterator<uint64>(0u), string_set_length_functor<string_set_type>( string_set ) ),
210  seeder ),
212  thrust::make_transform_iterator( thrust::make_counting_iterator<uint64>(0u), string_set_length_functor<string_set_type>( string_set ) ),
213  seeder ) + n_strings,
214  cum_seeds.begin() );
215 
216  // fetch the total nunber of q-grams to output
217  const uint64 n_seeds = cum_seeds[ n_strings-1 ];
218 
219  // reserve enough storage
220  indices.resize( n_seeds );
221 
222  // build the list of q-gram indices
224  thrust::make_counting_iterator<uint32>(0u),
225  thrust::make_counting_iterator<uint32>(0u) + n_seeds,
226  indices.begin(),
228 
229  return n_seeds;
230 }
231 
232 } // namespace nvbio