MatchLib
All Classes Namespaces Files Functions Modules Pages
arbitrated_crossbar.h
1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License")
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef ARBITRATED_CROSSBAR_H
17#define ARBITRATED_CROSSBAR_H
18
19#include <nvhls_types.h>
20#include <fifo.h>
21#include <Arbiter.h>
22#include <one_hot_to_bin.h>
23#include <hls_globals.h>
24#include <nvhls_marshaller.h>
25#include <nvhls_message.h>
26
27// Need to add in virtual output queueing at the input. Can replace the input
28// FIFOs with a Queue class which internally has multiple parallel FIFOs and
29// keeps track of where to put each new input
64template <typename DataType, unsigned int NumInputs, unsigned int NumOutputs,
65 unsigned int LenInputBuffer, unsigned int LenOutputBuffer>
67
68 public:
69 // Define int types for input and output indices
70 static const int log2_inputs = nvhls::index_width<NumInputs>::val;
71 static const int log2_outputs = nvhls::index_width<NumOutputs>::val;
72
73 typedef NVUINTW(log2_inputs) InputIdx;
74 typedef NVUINTW(log2_outputs) OutputIdx;
75 #ifdef SKIP_LV2TYPE
76 typedef NVUINTW(Wrapped<DataType>::width + log2_outputs) DataDestType;
77 #endif
78
79 private:
80 // Convenience class which stores a data and destination
81 // This is what is stored in the input FIFOs
82 class DataDest : public nvhls_message {
83 public:
84 DataType data;
85 OutputIdx dest;
86 static const int width = Wrapped<DataType>::width + log2_outputs;
87 #ifdef SKIP_LV2TYPE
88 DataDestType data_dest;
89
90 void update_data_dest() {
91 data_dest = static_cast<DataDestType> (data) + (static_cast<DataDestType> (dest) << Wrapped<DataType>::width);
92 }
93
94 void extract_data_dest() {
95 data = static_cast<DataType> (data_dest);
96 dest = static_cast<OutputIdx> (data_dest >> Wrapped<DataType>::width);
97 }
98 #endif
99 template <unsigned int Size>
100 void Marshall(Marshaller<Size>& m) {
101 #ifdef SKIP_LV2TYPE
102 m& data_dest;
103 #else
104 m& data;
105 m& dest;
106 #endif
107 }
108 };
109
110 // Input + output FIFOs, arbiters
111 #ifndef SKIP_LV2TYPE
113 #else
115 #endif
117
118 Arbiter<NumInputs> arbiters[NumOutputs];
119
120 public:
121 ArbitratedCrossbar() { reset(); }
122
123 // reset function: reset all the queues
124 void reset() {
125#pragma hls_unroll yes
126 for (unsigned in = 0; in < NumInputs; in++) {
127 input_queues.reset();
128 }
129#pragma hls_unroll yes
130 for (unsigned out = 0; out < NumOutputs; out++) {
131 output_queues.reset();
132 arbiters[out].reset();
133 }
134 }
135
136 // The next few functions report status of a given input or output lane
137 bool isInputEmpty(InputIdx index) {
138 NVHLS_ASSERT_MSG(index <= NumInputs, "Input index greater than number of inputs");
139 return input_queues.isEmpty(index);
140 }
141
142 bool isOutputEmpty(OutputIdx index) {
143 NVHLS_ASSERT_MSG(index <= NumOutputs, "Output index greater than number of outputs");
144 return output_queues.isEmpty(index);
145 }
146
147 bool isInputFull(InputIdx index) {
148 NVHLS_ASSERT_MSG(index <= NumInputs, "Input index greater than number of inputs");
149 return input_queues.isFull(index);
150 }
151
152 bool isOutputFull(OutputIdx index) {
153 NVHLS_ASSERT_MSG(index <= NumOutputs, "Output index greater than number of outputs");
154 return output_queues.isFull(index);
155 }
156
157 // Add data to a specified input lane, with a specified destination lane
158 void push(DataType data, InputIdx src, OutputIdx dest) {
159 DataDest tmp;
160 tmp.data = data;
161 tmp.dest = dest;
162 #ifndef SKIP_LV2TYPE
163 input_queues.push(tmp, src);
164 #else
165 tmp.update_data_dest();
166 DataDestType tmp_data_dest;
167 tmp_data_dest = tmp.data_dest;
168 input_queues.push(tmp_data_dest, src);
169 #endif
170 }
171
172 DataType peek(OutputIdx index) { return output_queues.peek(index); }
173
174 // Pop the data from a specified output lane
175 DataType pop(OutputIdx index) { return output_queues.pop(index); }
176
177 // Run the crossbar (not the queues)
178 void xbar(DataDest input_data[NumInputs], bool input_valid[NumInputs],
179 bool input_consumed[NumInputs], DataType data_out[NumOutputs],
180 bool valid_out[NumOutputs], bool output_ready[NumOutputs], InputIdx source[NumOutputs]) {
181
182 // For each input lane, read the data at the head of the queue, and store it
183 // in a temporary array
184 // Also check the destination requested and form a request matrix (along
185 // with its transpose)
186 // Doing this facilitates writing it in one dimension and reading it in
187 // another
188 NVUINTW(NumInputs) requests[NumOutputs]; // requests for all outputs
189
190 NVUINTW(NumOutputs) requests_transpose[NumInputs];
191
192#pragma hls_unroll yes
193 for (unsigned in = 0; in < NumInputs; in++) {
194 // The request from one input lane is just a one-hot representation of its
195 // destination field
196 // but should be 0 if this input lane is empty
197 NVUINTW(NumOutputs) empty = input_valid[in];
198 empty <<= input_data[in].dest;
199 requests_transpose[in] = empty;
200 }
201
202// Form transpose request matrix
203#pragma hls_unroll yes
204 for (unsigned out = 0; out < NumOutputs; out++) {
205#pragma hls_unroll yes
206 for (unsigned in = 0; in < NumInputs; in++) {
207 requests[out][in] = requests_transpose[in][out];
208 }
209 }
210
211// Keep track of which input queues need to be popped
212// This signal is the OR of all grant bits sent by output lanes to this
213// input lane
214// Need to do this so that all queues are popped once at the end in a input
215// lane loop
216// This removes a bottleneck in HLS where it was inferring a dependency
217// among pops
218#pragma hls_unroll yes
219 for (unsigned in = 0; in < NumInputs; in++) {
220 input_consumed[in] = false;
221 }
222
223// Loop over output lanes: run arbiter, then resolve contention
224#pragma hls_unroll yes
225 for (unsigned out = 0; out < NumOutputs; out++) {
226 valid_out[out] = false;
227
228 NVUINTW(NumInputs) one_hot_grant = 0;
229 InputIdx source_local;
230
231 // Stall the arbiters and the crossbar if the output is full
232 // This is also needed to get any pipelining (otherwise the tool will
233 // infer that you want to write in a single cycle)
234 // For some reason separating these two if statements gives better results
235 if (output_ready[out]) {
236
237 // Run through the Arbiter pick() function, convert to binary
238 one_hot_grant = arbiters[out].pick(requests[out]);
239 one_hot_to_bin<NumInputs, log2_inputs>(one_hot_grant, source_local);
240 }
241
242// Grant logic on input queues (OR gate)
243#pragma hls_unroll
244 for (unsigned in = 0; in < NumInputs; in++) {
245 // pop_inputs[in] = pop_inputs[in] | (one_hot_grant[in] == 1);
246 input_consumed[in] = input_consumed[in] | (one_hot_grant[in] == 1);
247 }
248
249 // XBAR (using the data that was staged in the temporary array input_data)
250 if ((!(one_hot_grant == 0)) && (output_ready[out])) {
251 data_out[out] = input_data[source_local].data;
252 valid_out[out] = true;
253 source[out] = source_local;
254 }
255 }
256 } // end xbar() function
257
258 public:
259 bool isAllInputEmpty() {
260 bool fifo_empty_internal[NumInputs + 1];
261 fifo_empty_internal[0] = true;
262#pragma hls_unroll yes
263 for (unsigned i = 0; i < NumInputs; i++) {
264 fifo_empty_internal[i + 1] = (isInputEmpty(i)) & fifo_empty_internal[i];
265 }
266 return fifo_empty_internal[NumInputs];
267 }
268
269 bool isAllOutputEmpty() {
270 bool fifo_empty_internal[NumOutputs + 1];
271 fifo_empty_internal[0] = true;
272#pragma hls_unroll yes
273 for (unsigned i = 0; i < NumOutputs; i++) {
274 fifo_empty_internal[i + 1] = (isOutputEmpty(i)) & fifo_empty_internal[i];
275 }
276 return fifo_empty_internal[NumOutputs];
277 }
278
279 bool isAllInputReady() {
280 bool fifo_ready_internal[NumInputs + 1];
281 fifo_ready_internal[0] = true;
282#pragma hls_unroll yes
283 for (unsigned i = 0; i < NumInputs; i++) {
284 fifo_ready_internal[i + 1] = (!isInputFull(i)) & fifo_ready_internal[i];
285 }
286 return fifo_ready_internal[NumInputs];
287 }
288
289 // Pop the data from all selected output lanes, data is already got from peek
290 void pop_all_lanes(bool valid_out[NumOutputs]) {
291#pragma hls_unroll yes
292 for (unsigned i = 0; i < NumOutputs; i++) {
293 if (valid_out[i]) {
294 output_queues.pop(i);
295 }
296 }
297 return;
298 }
304 // Top-Level function for Arbitrated Crossbar that returns source
305 // Interface description
306 // data_in - Array of inputs containing data
307 // dest_in - Array of inputs containing destination information for each input
308 // valid_in - Array of inputs indicating if the input is valid
309 // data_out - Array of outputs containing data
310 // valid_out - Array of outputs indicating if the output is valid
311 // ready - Array of outputs indicating if an input was ready. This also
312 // indicates of the input was successfully received by arbitrated Xbar
313 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
314 bool valid_in[NumInputs], DataType data_out[NumOutputs],
315 bool valid_out[NumOutputs], bool ready[NumInputs], InputIdx source[NumOutputs]
316 ) {
317 // Need to read data into temporary variables to avoid scheduling problem in
318 // Catapult
319 OutputIdx destin_tmp[NumInputs];
320 bool valid_in_tmp[NumInputs];
321
322#pragma hls_unroll yes
323 for (unsigned in = 0; in < NumInputs; in++) {
324 destin_tmp[in] = dest_in[in];
325 valid_in_tmp[in] = valid_in[in];
326 }
327
328 DataDest input_data[NumInputs];
329 bool input_valid[NumInputs];
330 bool input_consumed[NumInputs];
331#pragma hls_unroll yes
332 for (unsigned i = 0; i < NumInputs; i++) {
333 input_data[i] = BitsToType<DataDest>(0);
334 }
335 DataType output_data[NumOutputs];
336 bool output_valid[NumOutputs];
337 bool output_ready[NumOutputs];
338#pragma hls_unroll yes
339 for (unsigned i = 0; i < NumOutputs; i++) {
340 output_data[i] = BitsToType<DataType>(0);
341 }
342
343 if (LenInputBuffer > 0) {
344// If lane is ready and input data is valid, write to it
345#pragma hls_unroll yes
346 for (unsigned in = 0; in < NumInputs; in++) {
347 ready[in] = !isInputFull(in) || !valid_in_tmp[in];
348 if (!isInputFull(in) & valid_in_tmp[in]) {
349 // push(datain_tmp[in], in, destin_tmp[in]);
350 push(data_in[in], in, destin_tmp[in]);
351 }
352 input_valid[in] = !isInputEmpty(in);
353 if (input_valid[in]) {
354 #ifndef SKIP_LV2TYPE
355 input_data[in] = input_queues.peek(in);
356 #else
357 input_data[in].data_dest = input_queues.peek(in);
358 input_data[in].extract_data_dest();
359 #endif
360 }
361 }
362
363 } else {
364#pragma hls_unroll yes
365 for (unsigned in = 0; in < NumInputs; in++) {
366 input_data[in].data = data_in[in];
367 input_data[in].dest = dest_in[in];
368 input_valid[in] = valid_in_tmp[in];
369 }
370 }
371 for (unsigned in = 0; in < NumInputs; in++) {
372 //DCOUT("DUT - Input: " << in << "\t valid: " << input_valid[in] << "\t dest: " << input_data[in].dest << "\t data: " << input_data[in].data << endl);
373 }
374 if (LenOutputBuffer > 0) {
375#pragma hls_unroll yes
376 for (unsigned out = 0; out < NumOutputs; out++) {
377 output_ready[out] = !isOutputFull(out);
378 }
379 } else {
380#pragma hls_unroll yes
381 for (unsigned out = 0; out < NumOutputs; out++) {
382 output_ready[out] = true;
383 }
384 }
385
386 // Process the XBAR and arbiters
387 xbar(input_data, input_valid, input_consumed, output_data, output_valid,
388 output_ready, source);
389 for (unsigned out = 0; out < NumOutputs; out++) {
390 //DCOUT("DUT - Output: " << out << "\t valid: " << output_valid[out] << "\t data: " << output_data[out] << "\tReady: " << output_ready[out] << endl);
391 }
392
393 if (LenInputBuffer > 0) {
394// Increment the head pointer of the input queues if their requests were granted
395// Do this in a single loop over input lanes so it's clear to the tool that each
396// input channel
397// is only popped once
398#pragma hls_unroll yes
399 for (unsigned in = 0; in < NumInputs; in++) {
400 if (input_consumed[in]) {
401 input_queues.incrHead(in);
402 }
403 }
404 } else {
405#pragma hls_unroll yes
406 for (unsigned in = 0; in < NumInputs; in++) {
407 ready[in] = input_consumed[in];
408 }
409 }
410
411 if (LenOutputBuffer > 0) {
412// Read from each output channel if it is not empty
413#pragma hls_unroll yes
414 for (unsigned out = 0; out < NumOutputs; out++) {
415 if (output_valid[out]) {
416 output_queues.push(output_data[out], out);
417 }
418 valid_out[out] = !isOutputEmpty(out);
419 if (!isOutputEmpty(out)) {
420 data_out[out] = peek(out);
421 }
422 /*peek only
423 if (!isOutputEmpty(out)) {
424 data_out[out] = pop(out);
425 }
426 */
427 }
428 for (unsigned out = 0; out < NumOutputs; out++) {
429 //DCOUT("DUT - ArbXbar Output: " << out << "\t valid: " << valid_out[out] << "\t data: " << data_out[out] << endl);
430 }
431 } else {
432#pragma hls_unroll yes
433 for (unsigned out = 0; out < NumOutputs; out++) {
434 data_out[out] = output_data[out];
435 valid_out[out] = output_valid[out];
436 }
437 }
438 } // end run() function
444 void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs],
445 bool valid_in[NumInputs], DataType data_out[NumOutputs],
446 bool valid_out[NumOutputs], bool ready[NumInputs]) {
447 InputIdx source[NumOutputs];
448 run(data_in, dest_in,
449 valid_in, data_out,
450 valid_out, ready, source);
451 } // end run() function
452
453
454}; // end ArbitratedCrossbar class
455
456#endif // end ARBITRATED_CROSSBAR_H
A generalized implementation of generic n-way roundrobin arbiter.
Definition Arbiter.h:61
Crossbar with conflict arbitration and input queuing.
Configurable FIFO class.
Definition fifo.h:65
void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs], bool valid_in[NumInputs], DataType data_out[NumOutputs], bool valid_out[NumOutputs], bool ready[NumInputs], InputIdx source[NumOutputs])
Top-Level function for Arbitrated Crossbar.
void run(DataType data_in[NumInputs], OutputIdx dest_in[NumInputs], bool valid_in[NumInputs], DataType data_out[NumOutputs], bool valid_out[NumOutputs], bool ready[NumInputs])
Top-Level function for Arbitrated Crossbar that does not return source.
#define NVHLS_ASSERT_MSG(X, MSG)
#define NVUINTW(width)
Definition nvhls_types.h:35
Compute index width of a constant.
Definition nvhls_int.h:285