Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4826f26
Initial MVAU-Tiling commit into dev.
Apr 8, 2026
64d52f7
MLO GEMM fixes
Apr 14, 2026
60e8eab
precommit run.
Apr 14, 2026
4d3eaf8
removed MMAU from python code. Kept RTL under mvu_tiled for future in…
d-kor May 19, 2026
7c14297
Merge remote-tracking branch 'origin/dev' into feature/tiling_mlo
d-kor May 19, 2026
ad4cb78
pre-commit cleanup.
d-kor May 19, 2026
4a756f9
hls mvau interface update.
May 20, 2026
c23b1a1
revert the folded_input_shape for dynamic matmuls.
May 20, 2026
ae94a2c
Merge branch 'dev' into feature/tiling_mlo
preusser Jun 1, 2026
f7c9a2a
Refine replacing replay and transposes by input_gen and adding a test…
preusser Jun 1, 2026
1c3a9cb
Replace add_tree by add_multi.
preusser Jun 1, 2026
859de8e
Style fixes.
preusser Jun 1, 2026
0dacdd8
Review of fetch_weights.
preusser Jun 2, 2026
ad2a3df
Reviewed load buffers.
preusser Jun 2, 2026
c11d518
[CustomOp] Remove other occurrences of obsolete rtl file
auphelia Jun 5, 2026
aea144e
small fix, remove ram_p_c.
Jun 9, 2026
fdccda8
Merge pull request #1594 from Xilinx/review/tiling_mlo
auphelia Jun 10, 2026
7a1007f
[Tiled MVU] Adding an assertion to guard against configurations that …
STFleming Jun 17, 2026
826ff8c
[Tiled MVAU] cleaning up code comment (copilot suggestion)
STFleming Jun 18, 2026
1fb5ae8
Merge pull request #1606 from Xilinx/feature/tiling_mlo_th_assertion
auphelia Jun 22, 2026
700902d
Merge branch 'dev' into feature/tiling_mlo
auphelia Jun 22, 2026
4bdb7f8
Linting
auphelia Jun 22, 2026
c735896
Merge branch 'dev' into feature/tiling_mlo
auphelia Jun 30, 2026
2aab9ce
[MVAU] Remove MMU files and unused node attribute for gemm type
auphelia Jul 1, 2026
4fba43b
[finn-rtllib] Update copyright headers to new format
auphelia Jul 1, 2026
c542ee5
[FINNLoop] Fix axi-mm parameter handle to not accidentally pick up el…
auphelia Jul 1, 2026
2320c18
[MVAU] Turn required mlo external_mem setting into loop rolling respo…
auphelia Jul 1, 2026
60ce6ed
[Tests] Call SpecializeLayers in loop rolling test
auphelia Jul 1, 2026
71b472b
[Tests] Integrate tiled MatMul tests into existing tests
auphelia Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions finn-rtllib/dwc/hdl/axis_dwc.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
//
// This file is subject to the Xilinx Design License Agreement located
// in the LICENSE.md file in the root directory of this repository.
//
// This file contains confidential and proprietary information of Xilinx, Inc.
// and is protected under U.S. and international copyright and other
// intellectual property laws.
//
// DISCLAIMER
// This disclaimer is not a license and does not grant any rights to the materials
// distributed herewith. Except as otherwise provided in a valid license issued to
// you by Xilinx, and to the maximum extent permitted by applicable law: (1) THESE
// MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX HEREBY
// DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
// INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR
// FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
// in contract or tort, including negligence, or under any other theory of
// liability) for any loss or damage of any kind or nature related to, arising
// under or in connection with these materials, including for any direct, or any
// indirect, special, incidental, or consequential loss or damage (including loss
// of data, profits, goodwill, or any type of loss or damage suffered as a result
// of any action brought by a third party) even if such damage or loss was
// reasonably foreseeable or Xilinx had been advised of the possibility of the
// same.
//
// CRITICAL APPLICATIONS
// Xilinx products are not designed or intended to be fail-safe, or for use in
// any application requiring failsafe performance, such as life-support or safety
// devices or systems, Class III medical devices, nuclear facilities, applications
// related to the deployment of airbags, or any other applications that could lead
// to death, personal injury, or severe property or environmental damage
// (individually and collectively, "Critical Applications"). Customer assumes the
// sole risk and liability of any use of Xilinx products in Critical Applications,
// subject only to applicable laws and regulations governing limitations on product
// liability.
//
// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT ALL TIMES.

module axis_dwc #(
parameter integer DEPTH = 512,
parameter integer S_DATA_BITS = 32,
parameter integer M_DATA_BITS = 8
) (
input logic aclk,
input logic aresetn,

input logic s_axis_tvalid,
output logic s_axis_tready,
input logic [S_DATA_BITS-1:0] s_axis_tdata,
input logic [S_DATA_BITS/8-1:0] s_axis_tkeep,
input logic s_axis_tlast,

output logic m_axis_tvalid,
input logic m_axis_tready,
output logic [M_DATA_BITS-1:0] m_axis_tdata,
output logic [M_DATA_BITS/8-1:0] m_axis_tkeep,
output logic m_axis_tlast
);

axis_fifo_adapter #(
.DEPTH(DEPTH),
.S_DATA_WIDTH(S_DATA_BITS),
.M_DATA_WIDTH(M_DATA_BITS)
) inst_fifo_adapter (
.clk (aclk),
.rst (~aresetn),

.s_axis_tdata (s_axis_tdata),
.s_axis_tkeep (s_axis_tkeep),
.s_axis_tvalid (s_axis_tvalid),
.s_axis_tready (s_axis_tready),
.s_axis_tlast (s_axis_tlast),
.s_axis_tid ('0),
.s_axis_tdest ('0),
.s_axis_tuser ('0),

.pause_req('0),

.m_axis_tdata (m_axis_tdata),
.m_axis_tkeep (m_axis_tkeep),
.m_axis_tvalid (m_axis_tvalid),
.m_axis_tready (m_axis_tready),
.m_axis_tlast (m_axis_tlast),
.m_axis_tid (),
.m_axis_tdest (),
.m_axis_tuser ()
);

endmodule
61 changes: 20 additions & 41 deletions finn-rtllib/dynload/hdl/dynamic_load.sv
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ module dynamic_load #(
int unsigned WEIGHT_WIDTH,
int unsigned MH,
int unsigned MW,
int unsigned N_REPS
int unsigned N_REPS,
parameter RAM_STYLE = "distributed"
)(
input logic ap_clk,
input logic ap_rst_n,
Expand All @@ -60,8 +61,6 @@ localparam int unsigned N_TLS = SF*NF;

localparam int unsigned SIMD_BITS = (SIMD == 1) ? 1 : $clog2(SIMD);
localparam int unsigned WGT_ADDR_BITS = (N_TLS == 1) ? 1 : $clog2(N_TLS);
localparam int unsigned RAM_BITS = (WEIGHT_WIDTH + 7)/8 * 8;
localparam int unsigned WGT_EN_BITS = RAM_BITS / 8;
localparam int unsigned NF_BITS = (NF == 1) ? 1 : $clog2(NF);
localparam int unsigned SF_BITS = (SF == 1) ? 1 : $clog2(SF);
localparam int unsigned N_TLS_BITS = (N_TLS == 1) ? 1 : $clog2(N_TLS);
Expand All @@ -85,9 +84,8 @@ logic[N_TLS_BITS-1:0] curr_sf_C = '0, curr_sf_N;
logic[SIMD_BITS-1:0] curr_simd_C = '0, curr_simd_N;

// -- Signals
logic [1:0][PE-1:0][SIMD-1:0][WGT_EN_BITS-1:0] a_we; // Bank enables
logic [1:0][SIMD-1:0] a_we;
logic [1:0][WGT_ADDR_BITS-1:0] a_addr;
logic [1:0][PE-1:0][SIMD-1:0][WEIGHT_WIDTH-1:0] a_data_in;

// -- Offsets
for(genvar i = 0; i < NF; i++) begin
Expand Down Expand Up @@ -147,29 +145,16 @@ always_comb begin : DP_PROC_WR

// Buffers
a_we = '0;
for(int i = 0; i < 2; i++) begin
for(int i = 0; i < 2; i++)
a_addr[i] = offsets[curr_nf_C] + curr_sf_C;
for(int j = 0; j < PE; j++)
for(int k = 0; k < SIMD; k++)
a_data_in[i][j][k] = idat[j];
end

// Write and count
case (state_wr_C)
ST_WR_0, ST_WR_1: begin
irdy = 1'b1;

if(ivld) begin
for(int i = 0; i < PE; i++) begin
for(int j = 0; j < SIMD; j++) begin
if(curr_simd_C == j) begin
if(state_wr_C == ST_WR_0)
a_we[0][i][j] = '1;
else
a_we[1][i][j] = '1;
end
end
end
a_we[state_wr_C == ST_WR_1][curr_simd_C] = 1;

curr_nf_N = (curr_nf_C == NF-1) ? 0 : curr_nf_C + 1;
curr_simd_N = (curr_nf_C == NF-1) ? ((curr_simd_C == SIMD-1) ? 0 : curr_simd_C + 1) : curr_simd_C;
Expand Down Expand Up @@ -295,29 +280,23 @@ assign ovld = vld_C;
assign odat = odat_C;

// ----------------------------------------------------------------------------
// Matrix
// Weight RAMs
// ----------------------------------------------------------------------------

for(genvar i = 0; i < 2; i++) begin
for(genvar j = 0; j < PE; j++) begin
for(genvar k = 0; k < SIMD; k++) begin
ram_p_c #(
.ADDR_BITS(WGT_ADDR_BITS),
.DATA_BITS(RAM_BITS),
.RAM_STYLE("distributed")
) inst_ram_tp_c (
.clk(ap_clk),
.a_en(1'b1),
.a_we(a_we[i][j][k]),
.a_addr(a_addr[i]),
.b_en(ordy),
.b_addr(b_addr[i]),
.a_data_in(a_data_in[i][j][k]),
.a_data_out(),
.b_data_out(odat_ram[i][j][k])
);
for(genvar i = 0; i < 2; i++) begin : genBank
for(genvar k = 0; k < SIMD; k++) begin : genSimd
(* RAM_STYLE = RAM_STYLE *)
logic [PE-1:0][WEIGHT_WIDTH-1:0] Ram[2**WGT_ADDR_BITS];
logic [PE-1:0][WEIGHT_WIDTH-1:0] RdReg;

always_ff @(posedge ap_clk) begin
if(a_we[i][k]) Ram[a_addr[i]] <= idat;
if(ordy) begin
RdReg <= Ram[b_addr[i]];
foreach(RdReg[p]) odat_ram[i][p][k] <= RdReg[p];
end
end
end
end
end : genSimd
end : genBank

endmodule : dynamic_load
Loading
Loading