86 typedef NVUINTW(log2_inputs) InputIdx;
87 typedef NVUINTW(log2_outputs) OutputIdx;
89 typedef NVUINTW(Wrapped<DataType>::width + log2_outputs) DataDestType;
99 static const int width = Wrapped<DataType>::width + log2_outputs;
101 DataDestType data_dest;
103 void update_data_dest() {
104 data_dest =
static_cast<DataDestType
> (data) + (
static_cast<DataDestType
> (dest) << Wrapped<DataType>::width);
107 void extract_data_dest() {
108 data =
static_cast<DataType
> (data_dest);
109 dest =
static_cast<OutputIdx
> (data_dest >> Wrapped<DataType>::width);
112 template <
unsigned int Size>
113 void Marshall(Marshaller<Size>& m) {
138#pragma hls_unroll yes
139 for (
unsigned in = 0; in < NumInputs; in++) {
140 input_queues.reset();
142#pragma hls_unroll yes
143 for (
unsigned out = 0; out < NumOutputs; out++) {
144 output_queues.reset();
145 arbiters[out].reset();
150 bool isInputEmpty(InputIdx index) {
151 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
152 return input_queues.isEmpty(index);
155 bool isOutputEmpty(OutputIdx index) {
156 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
157 return output_queues.isEmpty(index);
160 bool isInputFull(InputIdx index) {
161 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
162 return input_queues.isFull(index);
165 bool isOutputFull(OutputIdx index) {
166 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
167 return output_queues.isFull(index);
171 void push(DataType data, InputIdx src, OutputIdx dest) {
176 input_queues.push(tmp, src);
178 tmp.update_data_dest();
179 DataDestType tmp_data_dest;
180 tmp_data_dest = tmp.data_dest;
181 input_queues.push(tmp_data_dest, src);
185 DataType peek(OutputIdx index) {
return output_queues.peek(index); }
188 DataType pop(OutputIdx index) {
return output_queues.pop(index); }
191#pragma map_to_operator [CCORE]
192#pragma ccore_type combinational
193 void xbar(DataDest input_data[NumInputs],
bool input_valid[NumInputs],
194 bool input_consumed[NumInputs], DataType data_out[NumOutputs],
195 bool valid_out[NumOutputs],
bool output_ready[NumOutputs], InputIdx source[NumOutputs]) {
198 DataType data_out_tmp[NumOutputs];
199 InputIdx source_tmp[NumOutputs];
201 NVUINTW(NumInputs) input_consumed_tmp = 0;
202 bool valid_out_tmp[NumOutputs];
210 NVUINTW(NumInputs) requests[NumOutputs];
212 NVUINTW(NumOutputs) requests_transpose[NumInputs];
214#pragma hls_unroll yes
215 for (
unsigned in = 0; in < NumInputs; in++) {
219 NVUINTW(NumOutputs) empty = input_valid[in];
220 empty <<= input_data[in].dest;
221 requests_transpose[in] = empty;
225 transpose<NumInputs,NumOutputs>(requests_transpose, requests);
234#pragma hls_unroll yes
235 for (
unsigned in = 0; in < NumInputs; in++) {
236 input_consumed_tmp[in] =
false;
240#pragma hls_unroll yes
241 for (
unsigned out = 0; out < NumOutputs; out++) {
242 valid_out_tmp[out] =
false;
245 if (output_ready[out]) {
246 NVUINTW(NumInputs) one_hot_grant = 0;
247 InputIdx source_local;
255 one_hot_grant = arbiters[out].pick(requests[out]);
256 one_hot_to_bin<NumInputs, log2_inputs>(one_hot_grant, source_local);
258 input_consumed_tmp |= one_hot_grant;
260 if(!(one_hot_grant == 0)) {
261 data_out_tmp[out] = input_data[source_local].data;
262 valid_out_tmp[out] =
true;
263 source_tmp[out] = source_local;
267#pragma hls_unroll yes
268 for(
unsigned int k=0; k < NumOutputs; k++) {
269 data_out[k] = data_out_tmp[k];
270 source[k] = source_tmp[k];
271 valid_out[k] = valid_out_tmp[k];
273#pragma hls_unroll yes
274 for(
unsigned int k=0; k < NumInputs; k++) {
275 input_consumed[k] = input_consumed_tmp[k];
280 bool isAllInputEmpty() {
281 bool fifo_empty_internal[NumInputs + 1];
282 fifo_empty_internal[0] =
true;
283#pragma hls_unroll yes
284 for (
unsigned i = 0; i < NumInputs; i++) {
285 fifo_empty_internal[i + 1] = (isInputEmpty(i)) & fifo_empty_internal[i];
287 return fifo_empty_internal[NumInputs];
290 bool isAllOutputEmpty() {
291 bool fifo_empty_internal[NumOutputs + 1];
292 fifo_empty_internal[0] =
true;
293#pragma hls_unroll yes
294 for (
unsigned i = 0; i < NumOutputs; i++) {
295 fifo_empty_internal[i + 1] = (isOutputEmpty(i)) & fifo_empty_internal[i];
297 return fifo_empty_internal[NumOutputs];
300 bool isAllInputReady() {
301 bool fifo_ready_internal[NumInputs + 1];
302 fifo_ready_internal[0] =
true;
303#pragma hls_unroll yes
304 for (
unsigned i = 0; i < NumInputs; i++) {
305 fifo_ready_internal[i + 1] = (!isInputFull(i)) & fifo_ready_internal[i];
307 return fifo_ready_internal[NumInputs];
311 void pop_all_lanes(
bool valid_out[NumOutputs]) {
312#pragma hls_unroll yes
313 for (
unsigned i = 0; i < NumOutputs; i++) {
315 output_queues.pop(i);
334 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
335 bool valid_in[NumInputs], DataType data_out[NumOutputs],
336 bool valid_out[NumOutputs],
bool ready[NumInputs], InputIdx source[NumOutputs]
340 OutputIdx destin_tmp[NumInputs];
341 bool valid_in_tmp[NumInputs];
343#pragma hls_unroll yes
344 for (
unsigned in = 0; in < NumInputs; in++) {
345 destin_tmp[in] = dest_in[in];
346 valid_in_tmp[in] = valid_in[in];
349 DataDest input_data[NumInputs];
350 bool input_valid[NumInputs];
351 bool input_consumed[NumInputs];
352#pragma hls_unroll yes
353 for (
unsigned i = 0; i < NumInputs; i++) {
354 input_data[i] = BitsToType<DataDest>(0);
356 DataType output_data[NumOutputs];
357 bool output_valid[NumOutputs];
358 bool output_ready[NumOutputs];
359#pragma hls_unroll yes
360 for (
unsigned i = 0; i < NumOutputs; i++) {
361 output_data[i] = BitsToType<DataType>(0);
364 if (LenInputBuffer > 0) {
366#pragma hls_unroll yes
367 for (
unsigned in = 0; in < NumInputs; in++) {
368 ready[in] = !isInputFull(in) || !valid_in_tmp[in];
369 if (!isInputFull(in) & valid_in_tmp[in]) {
371 push(data_in[in], in, destin_tmp[in]);
373 input_valid[in] = !isInputEmpty(in);
374 if (input_valid[in]) {
376 input_data[in] = input_queues.peek(in);
378 input_data[in].data_dest = input_queues.peek(in);
379 input_data[in].extract_data_dest();
385#pragma hls_unroll yes
386 for (
unsigned in = 0; in < NumInputs; in++) {
387 input_data[in].data = data_in[in];
388 input_data[in].dest = dest_in[in];
389 input_valid[in] = valid_in_tmp[in];
392 for (
unsigned in = 0; in < NumInputs; in++) {
395 if (LenOutputBuffer > 0) {
396#pragma hls_unroll yes
397 for (
unsigned out = 0; out < NumOutputs; out++) {
398 output_ready[out] = !isOutputFull(out);
401#pragma hls_unroll yes
402 for (
unsigned out = 0; out < NumOutputs; out++) {
403 output_ready[out] =
true;
408 xbar(input_data, input_valid, input_consumed, output_data, output_valid,
409 output_ready, source);
410 for (
unsigned out = 0; out < NumOutputs; out++) {
414 if (LenInputBuffer > 0) {
419#pragma hls_unroll yes
420 for (
unsigned in = 0; in < NumInputs; in++) {
421 if (input_consumed[in]) {
422 input_queues.incrHead(in);
426#pragma hls_unroll yes
427 for (
unsigned in = 0; in < NumInputs; in++) {
428 ready[in] = input_consumed[in];
432 if (LenOutputBuffer > 0) {
434#pragma hls_unroll yes
435 for (
unsigned out = 0; out < NumOutputs; out++) {
436 if (output_valid[out]) {
437 output_queues.push(output_data[out], out);
439 valid_out[out] = !isOutputEmpty(out);
440 if (!isOutputEmpty(out)) {
441 data_out[out] = peek(out);
449 for (
unsigned out = 0; out < NumOutputs; out++) {
453#pragma hls_unroll yes
454 for (
unsigned out = 0; out < NumOutputs; out++) {
455 data_out[out] = output_data[out];
456 valid_out[out] = output_valid[out];
465 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
466 bool valid_in[NumInputs], DataType data_out[NumOutputs],
467 bool valid_out[NumOutputs],
bool ready[NumInputs]) {
468 InputIdx source[NumOutputs];
469 run(data_in, dest_in,
471 valid_out, ready, source);