62 static const int kDebugLevel = 2;
64 static const int addr_width =
nvhls::nbits<CapacityInBytes - 1>::val;
65 static const int log2_nbanks = (NumBanks == 1) ? 1 :
nvhls::nbits<NumBanks - 1>::val;
66 static const int log2_inputs = (NumInputs == 1) ? 1 :
nvhls::nbits<NumInputs - 1>::val;
68 static const bool is_nbanks_power_of_2 = (NumBanks & (NumBanks - 1)) == 0;
69 static const int bank_addr_width = (is_nbanks_power_of_2 && (NumBanks > 1)) ? (addr_width - log2_nbanks) : (addr_width - log2_nbanks + 1);
72 typedef NVUINTW(log2_nbanks) bank_sel_t;
73 typedef NVUINTW(bank_addr_width) bank_addr_t;
74 typedef NVUINTW(log2_inputs) input_sel_t;
80 input_sel_t input_chan;
81 static const int width = 1 + bank_addr_width + Wrapped<DataType>::width + log2_inputs;
83 template <
unsigned int Size>
84 void Marshall(Marshaller<Size>& m) {
94 static const int width = 1 + Wrapped<DataType>::width;
96 template <
unsigned int Size>
97 void Marshall(Marshaller<Size>& m) {
114 void compute_bank_request(
req_t &curr_cli_req,
bank_req_t bank_req[NumInputs],
115 bank_sel_t bank_sel[NumInputs],
116 bool bank_req_valid[NumInputs]) {
119 #pragma hls_unroll yes
120 for (
unsigned in_chan = 0; in_chan < NumInputs; in_chan++) {
123 if (NumInputs == 1) {
124 bank_sel[in_chan] = 0;
126 if (is_nbanks_power_of_2) {
129 bank_sel[in_chan] = curr_cli_req.addr[in_chan] % NumBanks;
133 bank_req[in_chan].do_store = (curr_cli_req.valids[in_chan] ==
true) &&
134 (curr_cli_req.type.val == CLITYPE_T::STORE);
136 if (NumInputs == 1) {
137 bank_req[in_chan].addr = curr_cli_req.addr[in_chan];
139 if (is_nbanks_power_of_2) {
140 bank_req[in_chan].addr =
nvhls::get_slc<addr_width - log2_nbanks>(curr_cli_req.addr[in_chan], log2_nbanks);
142 bank_req[in_chan].addr = curr_cli_req.addr[in_chan] / NumBanks;
146 if (bank_req[in_chan].do_store) {
147 bank_req[in_chan].wdata = curr_cli_req.data[in_chan];
151 input_sel_t input_idx = in_chan;
152 bank_req[in_chan].input_chan = input_idx;
155 bank_req_valid[in_chan] = (curr_cli_req.valids[in_chan] ==
true);
159 void banks_load_store(bank_req_t bank_req[NumBanks],
160 bool bank_req_valid[NumBanks],
161 bank_rsp_t bank_rsp[NumBanks]) {
162 #pragma hls_unroll yes
163 for (
unsigned bank = 0; bank < NumBanks; bank++) {
164 if (bank_req_valid[bank] ==
true) {
165 if (!bank_req[bank].do_store) {
166 bank_rsp[bank].valid =
true;
167 bank_rsp[bank].rdata = banks.read(bank_req[bank].addr, bank);
169 banks.write(bank_req[bank].addr, bank, bank_req[bank].wdata);
170 bank_rsp[bank].valid =
false;
173 bank_rsp[bank].valid =
false;
181 void reset() { request_xbar.reset(); }
183 #pragma map_to_operator [CCORE]
184 #pragma ccore_type combinational
185 void pseudo_cam(
const bank_req_t bank_req_winner[NumBanks],
const bank_rsp_t bank_rsp[NumBanks],
186 unsigned bank,
unsigned &res,
bool &en) {
189 for (
unsigned i = 0; i < NumBanks; i++)
191 if ( (bank_req_winner[i].input_chan == bank) && bank_rsp[i].valid)
199 #ifdef HLS_ALGORITHMICC
200 void load_store(req_t &curr_cli_req, rsp_t &load_rsp,
201 bool input_ready[NumInputs]) {
202 bank_req_t bank_req[NumInputs];
203 bank_sel_t bank_sel[NumInputs];
204 bool bank_req_valid[NumInputs];
206 compute_bank_request(curr_cli_req, bank_req, bank_sel, bank_req_valid);
208 void load_store(bank_req_t bank_req[NumInputs],
209 bank_sel_t bank_sel[NumInputs],
210 bool bank_req_valid[NumInputs],
211 rsp_t &load_rsp,
bool input_ready[NumInputs]) {
213 CDCOUT(
"\tinputs:" << endl, kDebugLevel);
214 for (
unsigned i = 0; i < NumInputs; ++i) {
216 <<
" valid=" << bank_req_valid[i]
217 <<
" select=" << bank_sel[i]
218 <<
" addr=" << bank_req[i].addr
219 <<
" wdata=" << bank_req[i].wdata
220 <<
" load=" << !bank_req[i].do_store
221 <<
" store=" << bank_req[i].do_store
222 <<
" input=" << bank_req[i].input_chan << endl, kDebugLevel);
224 CDCOUT(
"\t------" << endl, kDebugLevel);
226 bank_req_t bank_req_winner[NumBanks];
227 bool bank_req_winner_valid[NumBanks];
228 request_xbar.
run(bank_req, bank_sel, bank_req_valid, bank_req_winner,
229 bank_req_winner_valid, input_ready);
231 CDCOUT(
"\t\tbank winner transactions:" << endl, kDebugLevel);
232 for (
unsigned i = 0; i < NumBanks; ++i) {
233 CDCOUT(
"\t\t" << i <<
" :"
234 <<
" valid=" << bank_req_winner_valid[i]
235 <<
" addr=" << bank_req_winner[i].addr
236 <<
" wdata=" << bank_req_winner[i].wdata
237 <<
" load=" << !bank_req_winner[i].do_store
238 <<
" store=" << bank_req_winner[i].do_store
239 <<
" input=" << bank_req_winner[i].input_chan << endl, kDebugLevel);
241 CDCOUT(
"\t\t------" << endl, kDebugLevel);
242 CDCOUT(
"\t\tinput_ready:" << endl, kDebugLevel);
243 for (
unsigned i = 0; i < NumInputs; ++i) {
244 CDCOUT(
"\t\t" << i <<
" : ready=" << input_ready[i] << endl, kDebugLevel);
246 CDCOUT(
"\t\t------" << endl, kDebugLevel);
248 bank_rsp_t bank_rsp[NumBanks];
249 banks_load_store(bank_req_winner, bank_req_winner_valid, bank_rsp);
252 DataType data_in[NumBanks];
253 bool valid_in[NumBanks];
254 bank_sel_t source[NumInputs];
255 bool valid_src[NumInputs];
256 DataType data_out[NumInputs];
257 bool valid_out[NumInputs];
259 #pragma hls_unroll yes
260 for (
unsigned out = 0; out < NumInputs; out++) {
261 valid_src[out] =
false;
264 #pragma hls_unroll yes
265 for (
unsigned bank = 0; bank < NumBanks; bank++) {
266 valid_in[bank] = bank_rsp[bank].valid;
267 data_in[bank] = bank_rsp[bank].rdata;
270 #pragma hls_unroll yes
271 for (
unsigned inp = 0; inp < NumInputs; inp++) {
274 pseudo_cam(bank_req_winner, bank_rsp, inp, res, en);
279 crossbar<DataType, NumBanks, NumInputs>(data_in, valid_in, source,
280 valid_src, data_out, valid_out);
282 #pragma hls_unroll yes
283 for (
unsigned out = 0; out < NumInputs; out++) {
284 load_rsp.valids[out] = valid_out[out];
285 load_rsp.data[out] = data_out[out];