16 #ifndef ARBITRATED_CROSSBAR_H
17 #define ARBITRATED_CROSSBAR_H
19 #include <nvhls_types.h>
22 #include <one_hot_to_bin.h>
23 #include <hls_globals.h>
24 #include <nvhls_marshaller.h>
25 #include <nvhls_message.h>
64 template <
typename DataType,
unsigned int NumInputs,
unsigned int NumOutputs,
65 unsigned int LenInputBuffer,
unsigned int LenOutputBuffer>
73 typedef NVUINTW(log2_inputs) InputIdx;
74 typedef NVUINTW(log2_outputs) OutputIdx;
76 typedef NVUINTW(Wrapped<DataType>::width + log2_outputs) DataDestType;
86 static const int width = Wrapped<DataType>::width + log2_outputs;
88 DataDestType data_dest;
90 void update_data_dest() {
91 data_dest =
static_cast<DataDestType
> (data) + (
static_cast<DataDestType
> (dest) << Wrapped<DataType>::width);
94 void extract_data_dest() {
95 data =
static_cast<DataType
> (data_dest);
96 dest =
static_cast<OutputIdx
> (data_dest >> Wrapped<DataType>::width);
99 template <
unsigned int Size>
100 void Marshall(Marshaller<Size>& m) {
125 #pragma hls_unroll yes
126 for (
unsigned in = 0; in < NumInputs; in++) {
127 input_queues.reset();
129 #pragma hls_unroll yes
130 for (
unsigned out = 0; out < NumOutputs; out++) {
131 output_queues.reset();
132 arbiters[out].reset();
137 bool isInputEmpty(InputIdx index) {
138 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
139 return input_queues.isEmpty(index);
142 bool isOutputEmpty(OutputIdx index) {
143 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
144 return output_queues.isEmpty(index);
147 bool isInputFull(InputIdx index) {
148 NVHLS_ASSERT_MSG(index <= NumInputs,
"Input index greater than number of inputs");
149 return input_queues.isFull(index);
152 bool isOutputFull(OutputIdx index) {
153 NVHLS_ASSERT_MSG(index <= NumOutputs,
"Output index greater than number of outputs");
154 return output_queues.isFull(index);
158 void push(DataType data, InputIdx src, OutputIdx dest) {
163 input_queues.push(tmp, src);
165 tmp.update_data_dest();
166 DataDestType tmp_data_dest;
167 tmp_data_dest = tmp.data_dest;
168 input_queues.push(tmp_data_dest, src);
172 DataType peek(OutputIdx index) {
return output_queues.peek(index); }
175 DataType pop(OutputIdx index) {
return output_queues.pop(index); }
178 void xbar(DataDest input_data[NumInputs],
bool input_valid[NumInputs],
179 bool input_consumed[NumInputs], DataType data_out[NumOutputs],
180 bool valid_out[NumOutputs],
bool output_ready[NumOutputs], InputIdx source[NumOutputs]) {
188 NVUINTW(NumInputs) requests[NumOutputs];
190 NVUINTW(NumOutputs) requests_transpose[NumInputs];
192 #pragma hls_unroll yes
193 for (
unsigned in = 0; in < NumInputs; in++) {
197 NVUINTW(NumOutputs) empty = input_valid[in];
198 empty <<= input_data[in].dest;
199 requests_transpose[in] = empty;
203 #pragma hls_unroll yes
204 for (
unsigned out = 0; out < NumOutputs; out++) {
205 #pragma hls_unroll yes
206 for (
unsigned in = 0; in < NumInputs; in++) {
207 requests[out][in] = requests_transpose[in][out];
218 #pragma hls_unroll yes
219 for (
unsigned in = 0; in < NumInputs; in++) {
220 input_consumed[in] =
false;
224 #pragma hls_unroll yes
225 for (
unsigned out = 0; out < NumOutputs; out++) {
226 valid_out[out] =
false;
228 NVUINTW(NumInputs) one_hot_grant = 0;
229 InputIdx source_local;
235 if (output_ready[out]) {
238 one_hot_grant = arbiters[out].pick(requests[out]);
239 one_hot_to_bin<NumInputs, log2_inputs>(one_hot_grant, source_local);
244 for (
unsigned in = 0; in < NumInputs; in++) {
246 input_consumed[in] = input_consumed[in] | (one_hot_grant[in] == 1);
250 if ((!(one_hot_grant == 0)) && (output_ready[out])) {
251 data_out[out] = input_data[source_local].data;
252 valid_out[out] =
true;
253 source[out] = source_local;
259 bool isAllInputEmpty() {
260 bool fifo_empty_internal[NumInputs + 1];
261 fifo_empty_internal[0] =
true;
262 #pragma hls_unroll yes
263 for (
unsigned i = 0; i < NumInputs; i++) {
264 fifo_empty_internal[i + 1] = (isInputEmpty(i)) & fifo_empty_internal[i];
266 return fifo_empty_internal[NumInputs];
269 bool isAllOutputEmpty() {
270 bool fifo_empty_internal[NumOutputs + 1];
271 fifo_empty_internal[0] =
true;
272 #pragma hls_unroll yes
273 for (
unsigned i = 0; i < NumOutputs; i++) {
274 fifo_empty_internal[i + 1] = (isOutputEmpty(i)) & fifo_empty_internal[i];
276 return fifo_empty_internal[NumOutputs];
279 bool isAllInputReady() {
280 bool fifo_ready_internal[NumInputs + 1];
281 fifo_ready_internal[0] =
true;
282 #pragma hls_unroll yes
283 for (
unsigned i = 0; i < NumInputs; i++) {
284 fifo_ready_internal[i + 1] = (!isInputFull(i)) & fifo_ready_internal[i];
286 return fifo_ready_internal[NumInputs];
290 void pop_all_lanes(
bool valid_out[NumOutputs]) {
291 #pragma hls_unroll yes
292 for (
unsigned i = 0; i < NumOutputs; i++) {
294 output_queues.pop(i);
313 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
314 bool valid_in[NumInputs], DataType data_out[NumOutputs],
315 bool valid_out[NumOutputs],
bool ready[NumInputs], InputIdx source[NumOutputs]
319 OutputIdx destin_tmp[NumInputs];
320 bool valid_in_tmp[NumInputs];
322 #pragma hls_unroll yes
323 for (
unsigned in = 0; in < NumInputs; in++) {
324 destin_tmp[in] = dest_in[in];
325 valid_in_tmp[in] = valid_in[in];
328 DataDest input_data[NumInputs];
329 bool input_valid[NumInputs];
330 bool input_consumed[NumInputs];
331 #pragma hls_unroll yes
332 for (
unsigned i = 0; i < NumInputs; i++) {
333 input_data[i] = BitsToType<DataDest>(0);
335 DataType output_data[NumOutputs];
336 bool output_valid[NumOutputs];
337 bool output_ready[NumOutputs];
338 #pragma hls_unroll yes
339 for (
unsigned i = 0; i < NumOutputs; i++) {
340 output_data[i] = BitsToType<DataType>(0);
343 if (LenInputBuffer > 0) {
345 #pragma hls_unroll yes
346 for (
unsigned in = 0; in < NumInputs; in++) {
347 ready[in] = !isInputFull(in) || !valid_in_tmp[in];
348 if (!isInputFull(in) & valid_in_tmp[in]) {
350 push(data_in[in], in, destin_tmp[in]);
352 input_valid[in] = !isInputEmpty(in);
353 if (input_valid[in]) {
355 input_data[in] = input_queues.peek(in);
357 input_data[in].data_dest = input_queues.peek(in);
358 input_data[in].extract_data_dest();
364 #pragma hls_unroll yes
365 for (
unsigned in = 0; in < NumInputs; in++) {
366 input_data[in].data = data_in[in];
367 input_data[in].dest = dest_in[in];
368 input_valid[in] = valid_in_tmp[in];
371 for (
unsigned in = 0; in < NumInputs; in++) {
374 if (LenOutputBuffer > 0) {
375 #pragma hls_unroll yes
376 for (
unsigned out = 0; out < NumOutputs; out++) {
377 output_ready[out] = !isOutputFull(out);
380 #pragma hls_unroll yes
381 for (
unsigned out = 0; out < NumOutputs; out++) {
382 output_ready[out] =
true;
387 xbar(input_data, input_valid, input_consumed, output_data, output_valid,
388 output_ready, source);
389 for (
unsigned out = 0; out < NumOutputs; out++) {
393 if (LenInputBuffer > 0) {
398 #pragma hls_unroll yes
399 for (
unsigned in = 0; in < NumInputs; in++) {
400 if (input_consumed[in]) {
401 input_queues.incrHead(in);
405 #pragma hls_unroll yes
406 for (
unsigned in = 0; in < NumInputs; in++) {
407 ready[in] = input_consumed[in];
411 if (LenOutputBuffer > 0) {
413 #pragma hls_unroll yes
414 for (
unsigned out = 0; out < NumOutputs; out++) {
415 if (output_valid[out]) {
416 output_queues.push(output_data[out], out);
418 valid_out[out] = !isOutputEmpty(out);
419 if (!isOutputEmpty(out)) {
420 data_out[out] = peek(out);
428 for (
unsigned out = 0; out < NumOutputs; out++) {
432 #pragma hls_unroll yes
433 for (
unsigned out = 0; out < NumOutputs; out++) {
434 data_out[out] = output_data[out];
435 valid_out[out] = output_valid[out];
444 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
445 bool valid_in[NumInputs], DataType data_out[NumOutputs],
446 bool valid_out[NumOutputs],
bool ready[NumInputs]) {
447 InputIdx source[NumOutputs];
448 run(data_in, dest_in,
450 valid_out, ready, source);
Crossbar with conflict arbitration and input queuing.
void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs], bool valid_in[NumInputs], DataType data_out[NumOutputs], bool valid_out[NumOutputs], bool ready[NumInputs], InputIdx source[NumOutputs])
Top-Level function for Arbitrated Crossbar.
void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs], bool valid_in[NumInputs], DataType data_out[NumOutputs], bool valid_out[NumOutputs], bool ready[NumInputs])
Top-Level function for Arbitrated Crossbar that does not return source.
#define NVHLS_ASSERT_MSG(X, MSG)
Compute index width of a constant.