NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alphabet.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
32 
33 namespace nvbio {
34 
37 
64 
69 {
70  DNA = 0u,
71  DNA_N = 1u,
72  DNA_IUPAC = 2u,
73  PROTEIN = 3u,
74  RNA = 4u,
75  RNA_N = 5u,
76  ASCII = 6u
77 };
78 
81 template <Alphabet ALPHABET> struct AlphabetTraits {};
82 
85 template <> struct AlphabetTraits<DNA>
86 {
87  static const uint32 SYMBOL_SIZE = 2;
88  static const uint32 SYMBOL_COUNT = 4;
89 
90  enum {
91  A = 0,
92  C = 1,
93  G = 2,
94  T = 3,
95  };
96 };
99 template <> struct AlphabetTraits<DNA_N>
100 {
101  static const uint32 SYMBOL_SIZE = 4;
102  static const uint32 SYMBOL_COUNT = 5;
103 
104  enum {
105  A = 0,
106  C = 1,
107  G = 2,
108  T = 3,
109  N = 4,
110  };
111 };
114 template <> struct AlphabetTraits<DNA_IUPAC>
115 {
116  static const uint32 SYMBOL_SIZE = 4;
117  static const uint32 SYMBOL_COUNT = 16;
118 
119  enum {
120  EQUAL = 0,
121  A = 1,
122  C = 2,
123  M = 3,
124  G = 4,
125  R = 5,
126  S = 6,
127  V = 7,
128  T = 8,
129  W = 9,
130  Y = 10,
131  H = 11,
132  K = 12,
133  D = 13,
134  B = 14,
135  N = 15,
136  };
137 };
140 template <> struct AlphabetTraits<PROTEIN>
141 {
142  static const uint32 SYMBOL_SIZE = 8;
143  static const uint32 SYMBOL_COUNT = 24;
144 
145  enum {
146  A = 0,
147  C = 1,
148  D = 2,
149  E = 3,
150  F = 4,
151  G = 5,
152  H = 6,
153  I = 7,
154  K = 8,
155  L = 9,
156  M = 10,
157  N = 11,
158  O = 12,
159  P = 13,
160  Q = 14,
161  R = 15,
162  S = 16,
163  T = 17,
164  V = 18,
165  W = 19,
166  Y = 20,
167  B = 21,
168  Z = 22,
169  X = 23,
170  };
171 };
174 template <> struct AlphabetTraits<RNA>
175 {
176  static const uint32 SYMBOL_SIZE = 2;
177  static const uint32 SYMBOL_COUNT = 4;
178 
179  enum {
180  A = 0,
181  C = 1,
182  G = 2,
183  U = 3,
184  };
185 };
188 template <> struct AlphabetTraits<RNA_N>
189 {
190  static const uint32 SYMBOL_SIZE = 4;
191  static const uint32 SYMBOL_COUNT = 5;
192 
193  enum {
194  A = 0,
195  C = 1,
196  G = 2,
197  U = 3,
198  N = 4,
199  };
200 };
203 template <> struct AlphabetTraits<ASCII>
204 {
205  static const uint32 SYMBOL_SIZE = 8;
206  static const uint32 SYMBOL_COUNT = 256;
207 };
208 
213 {
214  return alphabet == DNA ? 2 :
215  alphabet == DNA_N ? 4 :
216  alphabet == DNA_IUPAC ? 4 :
217  alphabet == PROTEIN ? 8 :
218  alphabet == RNA ? 2 :
219  alphabet == RNA_N ? 4 :
220  alphabet == ASCII ? 8 :
221  8u;
222 }
223 
226 template <Alphabet ALPHABET>
228 
231 template <Alphabet ALPHABET>
233 
236 template <Alphabet ALPHABET, typename SymbolIterator>
238  const SymbolIterator begin,
239  const uint32 n,
240  char* string);
241 
244 template <Alphabet ALPHABET, typename SymbolIterator>
246  const SymbolIterator begin,
247  const SymbolIterator end,
248  char* string);
249 
252 template <Alphabet ALPHABET, typename SymbolIterator>
254  const char* begin,
255  const char* end,
256  SymbolIterator symbols);
257 
260 template <Alphabet ALPHABET, typename SymbolIterator>
262  const char* begin,
263  SymbolIterator symbols);
264 
267 template <Alphabet ALPHABET>
269 {
271  typedef char result_type;
272 
275  NVBIO_FORCEINLINE NVBIO_HOST_DEVICE char operator() (const uint8 c) const { return to_char<ALPHABET>( c ); }
276 };
277 
280 template <Alphabet ALPHABET>
282 {
283  typedef char argument_type;
285 
288  NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint8 operator() (const char c) const { return from_char<ALPHABET>( c ); }
289 };
290 
293 template <Alphabet ALPHABET, typename Iterator>
295 transform_iterator< Iterator, to_char_functor<ALPHABET> >
296 to_string(Iterator it)
297 {
299 }
300 
303 template <Alphabet ALPHABET, typename Iterator>
305 transform_iterator< Iterator, from_char_functor<ALPHABET> >
306 from_string(Iterator it)
307 {
309 }
310 
313 
314 } // namespace nvbio
315