NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
simd_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 namespace nvbio {
29 
31 simd4u8::simd4u8(const uint4 v)
32 {
33  m = v.x;
34  m |= (v.y << 8);
35  m |= (v.z << 16);
36  m |= (v.w << 24);
37 }
38 
41 {
42  m = v;
43  m |= (v << 8);
44  m |= (v << 16);
45  m |= (v << 24);
46 }
47 
49 simd4u8::simd4u8(const uint8 v1, const uint8 v2, const uint8 v3, const uint8 v4)
50 {
51  m = v1;
52  m |= (v2 << 8);
53  m |= (v3 << 16);
54  m |= (v4 << 24);
55 }
56 
59 {
60  m = v.x;
61  m |= (v.y << 8);
62  m |= (v.z << 16);
63  m |= (v.w << 24);
64  return *this;
65 }
67 simd4u8& simd4u8::operator= (const uchar4 v)
68 {
69  m = v.x;
70  m |= (v.y << 8);
71  m |= (v.z << 16);
72  m |= (v.w << 24);
73  return *this;
74 }
75 
77 simd4u8 operator== (const simd4u8 op1, const simd4u8 op2)
78 {
79 #if defined(NVBIO_DEVICE_COMPILATION)
80  return simd4u8( vcmpeq4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
81 #else
82  return simd4u8(
83  get<0>(op1) == get<0>(op2) ? 0xFFu : 0u,
84  get<1>(op1) == get<1>(op2) ? 0xFFu : 0u,
85  get<2>(op1) == get<2>(op2) ? 0xFFu : 0u,
86  get<3>(op1) == get<3>(op2) ? 0xFFu : 0u );
87 #endif
88 }
90 simd4u8 operator!= (const simd4u8 op1, const simd4u8 op2)
91 {
92 #if defined(NVBIO_DEVICE_COMPILATION)
93  return simd4u8( vcmpne4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
94 #else
95  return simd4u8(
96  get<0>(op1) != get<0>(op2) ? 0xFFu : 0u,
97  get<1>(op1) != get<1>(op2) ? 0xFFu : 0u,
98  get<2>(op1) != get<2>(op2) ? 0xFFu : 0u,
99  get<3>(op1) != get<3>(op2) ? 0xFFu : 0u );
100 #endif
101 }
103 simd4u8 operator>= (const simd4u8 op1, const simd4u8 op2)
104 {
105 #if defined(NVBIO_DEVICE_COMPILATION)
106  return simd4u8( vcmpgeu4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
107 #else
108  return simd4u8(
109  get<0>(op1) <= get<0>(op2) ? 0xFFu : 0u,
110  get<1>(op1) <= get<1>(op2) ? 0xFFu : 0u,
111  get<2>(op1) <= get<2>(op2) ? 0xFFu : 0u,
112  get<3>(op1) <= get<3>(op2) ? 0xFFu : 0u );
113 #endif
114 }
116 simd4u8 operator> (const simd4u8 op1, const simd4u8 op2)
117 {
118 #if defined(NVBIO_DEVICE_COMPILATION)
119  return simd4u8( vcmpgtu4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
120 #else
121  return simd4u8(
122  get<0>(op1) > get<0>(op2) ? 0xFFu : 0u,
123  get<1>(op1) > get<1>(op2) ? 0xFFu : 0u,
124  get<2>(op1) > get<2>(op2) ? 0xFFu : 0u,
125  get<3>(op1) > get<3>(op2) ? 0xFFu : 0u );
126 #endif
127 }
129 simd4u8 operator<= (const simd4u8 op1, const simd4u8 op2)
130 {
131 #if defined(NVBIO_DEVICE_COMPILATION)
132  return simd4u8( vcmpleu4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
133 #else
134  return simd4u8(
135  get<0>(op1) <= get<0>(op2) ? 0xFFu : 0u,
136  get<1>(op1) <= get<1>(op2) ? 0xFFu : 0u,
137  get<2>(op1) <= get<2>(op2) ? 0xFFu : 0u,
138  get<3>(op1) <= get<3>(op2) ? 0xFFu : 0u );
139 #endif
140 }
142 simd4u8 operator< (const simd4u8 op1, const simd4u8 op2)
143 {
144 #if defined(NVBIO_DEVICE_COMPILATION)
145  return simd4u8( vcmpltu4( op1.m ,op2.m ), simd4u8::base_rep_tag() );
146 #else
147  return simd4u8(
148  get<0>(op1) < get<0>(op2) ? 0xFFu : 0u,
149  get<1>(op1) < get<1>(op2) ? 0xFFu : 0u,
150  get<2>(op1) < get<2>(op2) ? 0xFFu : 0u,
151  get<3>(op1) < get<3>(op2) ? 0xFFu : 0u );
152 #endif
153 }
155 simd4u8 operator+ (const simd4u8 op1, const simd4u8 op2)
156 {
157 #if defined(NVBIO_DEVICE_COMPILATION)
158  return simd4u8( vadd4( op1.m, op2.m ), simd4u8::base_rep_tag() ); // per-byte (un)signed addition, with wrap-around: a + b
159 #else
160  return simd4u8(
161  get<0>(op1) + get<0>(op2),
162  get<1>(op1) + get<1>(op2),
163  get<2>(op1) + get<2>(op2),
164  get<3>(op1) + get<3>(op2) );
165 #endif
166 }
169 {
170 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
171  op1.m = vadd4( op1.m, op2.m ); // per-byte (un)signed addition, with wrap-around: a + b
172 #else
173  op1 = simd4u8(
174  get<0>(op1) + get<0>(op2),
175  get<1>(op1) + get<1>(op2),
176  get<2>(op1) + get<2>(op2),
177  get<3>(op1) + get<3>(op2) );
178 #endif
179  return op1;
180 }
182 simd4u8 operator- (const simd4u8 op1, const simd4u8 op2)
183 {
184 #if defined(NVBIO_DEVICE_COMPILATION)
185  return simd4u8( vsub4( op1.m, op2.m ), simd4u8::base_rep_tag() ); // per-byte (un)signed subtraction, with wrap-around: a - b
186 #else
187  return simd4u8(
188  get<0>(op1) - get<0>(op2),
189  get<1>(op1) - get<1>(op2),
190  get<2>(op1) - get<2>(op2),
191  get<3>(op1) - get<3>(op2) );
192 #endif
193 }
196 {
197 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
198  op1.m = vsub4( op1.m, op2.m ); // per-byte (un)signed subtraction, with wrap-around: a - b
199 #else
200  op1 = simd4u8(
201  get<0>(op1) - get<0>(op2),
202  get<1>(op1) - get<1>(op2),
203  get<2>(op1) - get<2>(op2),
204  get<3>(op1) - get<3>(op2) );
205 #endif
206  return op1;
207 }
208 
210 simd4u8 max(const simd4u8 op1, const simd4u8 op2)
211 {
212 #if defined(NVBIO_DEVICE_COMPILATION)
213  return simd4u8( vmaxu4( op1.m, op2.m ), simd4u8::base_rep_tag() );
214 #else
215  return simd4u8(
216  nvbio::max( get<0>(op1), get<0>(op2) ),
217  nvbio::max( get<1>(op1), get<1>(op2) ),
218  nvbio::max( get<2>(op1), get<2>(op2) ),
219  nvbio::max( get<3>(op1), get<3>(op2) ) );
220 #endif
221 }
223 simd4u8 min(const simd4u8 op1, const simd4u8 op2)
224 {
225 #if defined(NVBIO_DEVICE_COMPILATION)
226  return simd4u8( vminu4( op1.m, op2.m ), simd4u8::base_rep_tag() );
227 #else
228  return simd4u8(
229  nvbio::min( get<0>(op1), get<0>(op2) ),
230  nvbio::min( get<1>(op1), get<1>(op2) ),
231  nvbio::min( get<2>(op1), get<2>(op2) ),
232  nvbio::min( get<3>(op1), get<3>(op2) ) );
233 #endif
234 }
235 
237 simd4u8 and_op(const simd4u8 op1, const simd4u8 op2)
238 {
239  return simd4u8( op1.m & op2.m, simd4u8::base_rep_tag() );
240 }
242 simd4u8 or_op(const simd4u8 op1, const simd4u8 op2)
243 {
244  return simd4u8( op1.m | op2.m, simd4u8::base_rep_tag() );
245 }
246 
247 template <uint32 I>
249 uint8 get(const simd4u8 op)
250 {
251  return (op.m >> (I*8)) & 255u;
252 }
253 template <uint32 I>
255 void set(simd4u8 op, const uint8 v)
256 {
257  op.m &= ~(255u << (I*8));
258  op.m |= v << (I*8);
259 }
260 
262 simd4u8 ternary_op(const simd4u8 mask, const simd4u8 op1, const simd4u8 op2)
263 {
264  return or_op( and_op( mask, op1 ), and_op( ~mask, op2 ) );
265 }
266 
267 } // namespace nvbio