95 typedef NVUINTW(log2_inputs) InputIdx;
96 typedef NVUINTW(log2_outputs) OutputIdx;
98 typedef NVUINTW(Wrapped<DataType>::width + log2_outputs) DataDestType;
108 static const int width = Wrapped<DataType>::width + log2_outputs;
110 DataDestType data_dest;
112 void update_data_dest() {
113 data_dest =
static_cast<DataDestType
> (data) + (
static_cast<DataDestType
> (dest) << Wrapped<DataType>::width);
116 void extract_data_dest() {
117 data =
static_cast<DataType
> (data_dest);
118 dest =
static_cast<OutputIdx
> (data_dest >> Wrapped<DataType>::width);
121 template <
unsigned int Size>
122 void Marshall(Marshaller<Size>& m) {
147#pragma hls_unroll yes
148 for (
unsigned in = 0; in < NumInputs; in++) {
149 input_queues.reset();
151#pragma hls_unroll yes
152 for (
unsigned out = 0; out < NumOutputs; out++) {
153 output_queues.reset();
154 arbiters[out].reset();
159 bool isInputEmpty(InputIdx index) {
160 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
161 return input_queues.isEmpty(index);
164 bool isOutputEmpty(OutputIdx index) {
165 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
166 return output_queues.isEmpty(index);
169 bool isInputFull(InputIdx index) {
170 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
171 return input_queues.isFull(index);
174 bool isOutputFull(OutputIdx index) {
175 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
176 return output_queues.isFull(index);
180 void push(DataType data, InputIdx src, OutputIdx dest) {
185 input_queues.push(tmp, src);
187 tmp.update_data_dest();
188 DataDestType tmp_data_dest;
189 tmp_data_dest = tmp.data_dest;
190 input_queues.push(tmp_data_dest, src);
194 DataType peek(OutputIdx index) {
return output_queues.peek(index); }
197 DataType pop(OutputIdx index) {
return output_queues.pop(index); }
202 void xbar(DataDest input_data[NumInputs],
bool input_valid[NumInputs],
203 bool input_consumed[NumInputs], DataType data_out[NumOutputs],
204 bool valid_out[NumOutputs],
bool output_ready[NumOutputs], InputIdx source[NumOutputs]) {
207 DataType data_out_tmp[NumOutputs];
208 InputIdx source_tmp[NumOutputs];
210 NVUINTW(NumInputs) input_consumed_tmp = 0;
211 bool valid_out_tmp[NumOutputs];
219 NVUINTW(NumInputs) requests[NumOutputs];
221 NVUINTW(NumOutputs) requests_transpose[NumInputs];
223#pragma hls_unroll yes
224 for (
unsigned in = 0; in < NumInputs; in++) {
228 NVUINTW(NumOutputs) empty = input_valid[in];
229 empty <<= input_data[in].dest;
230 requests_transpose[in] = empty;
234 transpose<NumInputs,NumOutputs>(requests_transpose, requests);
243#pragma hls_unroll yes
244 for (
unsigned in = 0; in < NumInputs; in++) {
245 input_consumed_tmp[in] =
false;
249#pragma hls_unroll yes
250 for (
unsigned out = 0; out < NumOutputs; out++) {
251 valid_out_tmp[out] =
false;
254 if (output_ready[out]) {
255 NVUINTW(NumInputs) one_hot_grant = 0;
256 InputIdx source_local;
264 one_hot_grant = arbiter_pick<NumInputs>(requests[out], arbiters[out], arbiters[out]);
266 one_hot_to_bin<NumInputs, log2_inputs>(one_hot_grant, source_local);
268 input_consumed_tmp |= one_hot_grant;
270 if(!(one_hot_grant == 0)) {
271 data_out_tmp[out] = input_data[source_local].data;
272 valid_out_tmp[out] =
true;
273 source_tmp[out] = source_local;
277#pragma hls_unroll yes
278 for(
unsigned int k=0; k < NumOutputs; k++) {
279 data_out[k] = data_out_tmp[k];
280 source[k] = source_tmp[k];
281 valid_out[k] = valid_out_tmp[k];
283#pragma hls_unroll yes
284 for(
unsigned int k=0; k < NumInputs; k++) {
285 input_consumed[k] = input_consumed_tmp[k];
290 bool isAllInputEmpty() {
291 bool fifo_empty_internal[NumInputs + 1];
292 fifo_empty_internal[0] =
true;
293#pragma hls_unroll yes
294 for (
unsigned i = 0; i < NumInputs; i++) {
295 fifo_empty_internal[i + 1] = (isInputEmpty(i)) & fifo_empty_internal[i];
297 return fifo_empty_internal[NumInputs];
300 bool isAllOutputEmpty() {
301 bool fifo_empty_internal[NumOutputs + 1];
302 fifo_empty_internal[0] =
true;
303#pragma hls_unroll yes
304 for (
unsigned i = 0; i < NumOutputs; i++) {
305 fifo_empty_internal[i + 1] = (isOutputEmpty(i)) & fifo_empty_internal[i];
307 return fifo_empty_internal[NumOutputs];
310 bool isAllInputReady() {
311 bool fifo_ready_internal[NumInputs + 1];
312 fifo_ready_internal[0] =
true;
313#pragma hls_unroll yes
314 for (
unsigned i = 0; i < NumInputs; i++) {
315 fifo_ready_internal[i + 1] = (!isInputFull(i)) & fifo_ready_internal[i];
317 return fifo_ready_internal[NumInputs];
321 void pop_all_lanes(
bool valid_out[NumOutputs]) {
322#pragma hls_unroll yes
323 for (
unsigned i = 0; i < NumOutputs; i++) {
325 output_queues.pop(i);
344 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
345 bool valid_in[NumInputs], DataType data_out[NumOutputs],
346 bool valid_out[NumOutputs],
bool ready[NumInputs], InputIdx source[NumOutputs]
350 OutputIdx destin_tmp[NumInputs];
351 bool valid_in_tmp[NumInputs];
353#pragma hls_unroll yes
354 for (
unsigned in = 0; in < NumInputs; in++) {
355 destin_tmp[in] = dest_in[in];
356 valid_in_tmp[in] = valid_in[in];
359 DataDest input_data[NumInputs];
360 bool input_valid[NumInputs];
361 bool input_consumed[NumInputs];
362#pragma hls_unroll yes
363 for (
unsigned i = 0; i < NumInputs; i++) {
364 input_data[i] = BitsToType<DataDest>(0);
366 DataType output_data[NumOutputs];
367 bool output_valid[NumOutputs];
368 bool output_ready[NumOutputs];
369#pragma hls_unroll yes
370 for (
unsigned i = 0; i < NumOutputs; i++) {
371 output_data[i] = BitsToType<DataType>(0);
374 if (LenInputBuffer > 0) {
376#pragma hls_unroll yes
377 for (
unsigned in = 0; in < NumInputs; in++) {
378 ready[in] = !isInputFull(in) || !valid_in_tmp[in];
379 if (!isInputFull(in) & valid_in_tmp[in]) {
381 push(data_in[in], in, destin_tmp[in]);
383 input_valid[in] = !isInputEmpty(in);
384 if (input_valid[in]) {
386 input_data[in] = input_queues.peek(in);
388 input_data[in].data_dest = input_queues.peek(in);
389 input_data[in].extract_data_dest();
395#pragma hls_unroll yes
396 for (
unsigned in = 0; in < NumInputs; in++) {
397 input_data[in].data = data_in[in];
398 input_data[in].dest = dest_in[in];
399 input_valid[in] = valid_in_tmp[in];
402 for (
unsigned in = 0; in < NumInputs; in++) {
405 if (LenOutputBuffer > 0) {
406#pragma hls_unroll yes
407 for (
unsigned out = 0; out < NumOutputs; out++) {
408 output_ready[out] = !isOutputFull(out);
411#pragma hls_unroll yes
412 for (
unsigned out = 0; out < NumOutputs; out++) {
413 output_ready[out] =
true;
418 xbar(input_data, input_valid, input_consumed, output_data, output_valid,
419 output_ready, source);
420 for (
unsigned out = 0; out < NumOutputs; out++) {
424 if (LenInputBuffer > 0) {
429#pragma hls_unroll yes
430 for (
unsigned in = 0; in < NumInputs; in++) {
431 if (input_consumed[in]) {
432 input_queues.incrHead(in);
436#pragma hls_unroll yes
437 for (
unsigned in = 0; in < NumInputs; in++) {
438 ready[in] = input_consumed[in];
442 if (LenOutputBuffer > 0) {
444#pragma hls_unroll yes
445 for (
unsigned out = 0; out < NumOutputs; out++) {
446 if (output_valid[out]) {
447 output_queues.push(output_data[out], out);
449 valid_out[out] = !isOutputEmpty(out);
450 if (!isOutputEmpty(out)) {
451 data_out[out] = peek(out);
459 for (
unsigned out = 0; out < NumOutputs; out++) {
463#pragma hls_unroll yes
464 for (
unsigned out = 0; out < NumOutputs; out++) {
465 data_out[out] = output_data[out];
466 valid_out[out] = output_valid[out];
475 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
476 bool valid_in[NumInputs], DataType data_out[NumOutputs],
477 bool valid_out[NumOutputs],
bool ready[NumInputs]) {
478 InputIdx source[NumOutputs];
479 run(data_in, dest_in,
481 valid_out, ready, source);