feat: implements pipelined module chaining for performance

- Introduces a pipelined module and top-level wrapper for
  performance benchmarking.
- Chains multiple pipeline modules in series to increase throughput.
- Adds generics to control pipeline depth, data width, and
  register balancing.
- Includes optional CE/RST enables and pipeline buffer.
This commit is contained in:
2025-07-11 10:05:02 +00:00
parent 96833c0f77
commit 88753b62f4
2 changed files with 226 additions and 94 deletions

View File

@@ -11,24 +11,36 @@ use ieee.math_real.all;
entity Pipeline_pb is
generic (
--@ Number of pipeline stages
G_PipelineStages : integer := 10;
--@ Number of pipeline stages inside each module
G_PipelineStages : integer := 2;
--@ Data width
G_Width : integer := 32;
G_Width : integer := 8;
--@ Register balancing attribute<br>
--@ - "no" : No register balancing <br>
--@ - "yes": Register balancing in both directions <br>
--@ - "forward": Moves a set of FFs at the inputs of a LUT to a single FF at its output. <br>
--@ - "backward": Moves a single FF at the output of a LUT to a set of FFs at its inputs.
G_RegisterBalancing : string := "yes"
G_RegisterBalancing : string := "yes";
--@ Enable pipeline buffer
--@ - true : Use pipeline buffer
--@ - false : Direct connection (bypass)
G_EnablePipelineBuffer : boolean := true;
--@ How many Pipeline modules shall be chained?
G_PipelineModules : integer := 250;
--@ Enable chip enable signal
G_Enable_CE : boolean := false;
--@ Enable reset signal
G_Enable_RST : boolean := false
);
port (
I_CLK : in std_logic;
I_RST : in std_logic;
I_CE : in std_logic;
---
I_Data : in std_logic_vector(G_Width - 1 downto 0);
I_Valid : in std_logic;
O_Ready : out std_logic;
---
O_Data : out std_logic_vector(G_Width - 1 downto 0);
O_Valid : out std_logic;
I_Ready : in std_logic
@@ -36,119 +48,116 @@ entity Pipeline_pb is
end entity Pipeline_pb;
architecture RTL of Pipeline_pb is
-- Keep attribute: Prevents the synthesis tool from removing the entity if is "true".
---------------------------------------------------------------------------
-- Attribute helpers
---------------------------------------------------------------------------
attribute keep : string;
-- IOB attribute: Attaches the FF to the IOB if is "true".
attribute IOB : string;
-- General Interace
signal R_RST : std_logic;
signal R_CE : std_logic;
-- Attribute
---------------------------------------------------------------------------
-- Bench‐wrapper FFs (synchronous IO)
---------------------------------------------------------------------------
signal R_RST : std_logic := '0';
signal R_CE : std_logic := '1';
attribute keep of R_RST, R_CE : signal is "true";
attribute IOB of R_RST, R_CE : signal is "false";
-- Input Interface
signal R_DataIn : std_logic_vector(G_Width-1 downto 0);
signal R_ValidIn : std_logic;
signal R_ReadyOut : std_logic;
-- Attribute
attribute keep of R_DataIn, R_ValidIn, R_ReadyOut : signal is "true";
attribute IOB of R_DataIn, R_ValidIn, R_ReadyOut : signal is "false";
attribute keep of R_DataIn, R_ValidIn : signal is "true";
attribute IOB of R_DataIn, R_ValidIn : signal is "false";
-- Output Interface
signal R_DataOut : std_logic_vector(G_Width-1 downto 0);
signal R_ValidOut : std_logic;
signal R_ReadyIn : std_logic;
-- Attribute
attribute keep of R_DataOut, R_ValidOut, R_ReadyIn : signal is "true";
attribute IOB of R_DataOut, R_ValidOut, R_ReadyIn : signal is "false";
signal C_Pipeline0Enable : std_logic;
signal C_Pipeline1Enable : std_logic;
---------------------------------------------------------------------------
-- Chaining arrays (sentinel element @0 and @G_PipelineModules)
---------------------------------------------------------------------------
type T_DataArray is array(0 to G_PipelineModules) of std_logic_vector(G_Width-1 downto 0);
signal S_Data : T_DataArray;
signal S_Valid : std_logic_vector(0 to G_PipelineModules);
signal S_Ready : std_logic_vector(0 to G_PipelineModules);
signal R_Valid : std_logic;
signal R_Ready : std_logic;
signal R_Data : std_logic_vector(G_Width - 1 downto 0);
begin
BenchmarkEnvironmentFFs : process (I_CLK)
GEN_Enable_CE : if G_Enable_CE = true generate
process(I_CLK)
begin
if rising_edge(I_CLK) then
-- General Interace
R_RST <= I_RST;
R_CE <= I_CE;
end if;
end process;
end generate GEN_Enable_CE;
-- Input Interface
GEN_Enable_RST : if G_Enable_RST = true generate
process(I_CLK)
begin
if rising_edge(I_CLK) then
R_RST <= I_RST;
end if;
end process;
end generate GEN_Enable_RST;
-----------------------------------------------------------------------
-- Wrapper FFs: register all top‑level ports once for fair timing
-----------------------------------------------------------------------
BenchFF : process(I_CLK)
begin
if rising_edge(I_CLK) then
--- Register inputs
R_DataIn <= I_Data;
R_ValidIn <= I_Valid;
O_Ready <= R_ReadyOut;
-- Output Interface
O_Data <= R_DataOut;
O_Valid <= R_ValidOut;
O_Ready <= S_Ready(0);
--- Register outputs
R_DataOut <= S_Data (G_PipelineModules);
R_ValidOut <= S_Valid(G_PipelineModules);
R_ReadyIn <= I_Ready;
end if;
end process;
PipelineControllerIn : entity work.PipelineController
O_Data <= R_DataOut;
O_Valid <= R_ValidOut;
-----------------------------------------------------------------------
-- Bind sentinel 0 with registered inputs
-----------------------------------------------------------------------
S_Data (0) <= R_DataIn;
S_Valid(0) <= R_ValidIn;
-----------------------------------------------------------------------
-- Bind last sentinel with registered outputs
-----------------------------------------------------------------------
S_Ready(G_PipelineModules) <= R_ReadyIn;
-----------------------------------------------------------------------
-- Generate N pipeline modules in series
-----------------------------------------------------------------------
gen_modules : for i in 0 to G_PipelineModules-1 generate
P_MOD : entity work.Pipeline_pb_Module
generic map(
G_PipelineStages => G_PipelineStages,
G_ResetActiveAt => '1'
G_Width => G_Width,
G_RegisterBalancing => G_RegisterBalancing,
G_EnablePipelineBuffer => G_EnablePipelineBuffer
)
port map(
I_CLK => I_CLK,
I_RST => R_RST,
I_CE => R_CE,
O_Enable => C_Pipeline0Enable,
I_Valid => R_ValidIn,
O_Ready => R_ReadyOut,
O_Valid => R_Valid,
I_Ready => R_Ready
-- Up‑stream side
I_Data => S_Data (i),
I_Valid => S_Valid(i),
O_Ready => S_Ready(i),
-- Down‑stream side
O_Data => S_Data (i+1),
O_Valid => S_Valid(i+1),
I_Ready => S_Ready(i+1)
);
PipelineRegisterIn : entity work.PipelineRegister
generic map(
G_PipelineStages => G_PipelineStages,
G_Width => G_Width,
G_RegisterBalancing => G_RegisterBalancing
)
port map(
I_CLK => I_CLK,
I_Enable => C_Pipeline0Enable,
I_Data => R_DataIn,
O_Data => R_Data
);
---------
PipelineControllerOut : entity work.PipelineController
generic map(
G_PipelineStages => G_PipelineStages,
G_ResetActiveAt => '1'
)
port map(
I_CLK => I_CLK,
I_RST => R_RST,
I_CE => R_CE,
O_Enable => C_Pipeline1Enable,
I_Valid => R_Valid,
O_Ready => R_Ready,
O_Valid => R_ValidOut,
I_Ready => R_ReadyIn
);
PipelineRegisterOut : entity work.PipelineRegister
generic map(
G_PipelineStages => G_PipelineStages,
G_Width => G_Width,
G_RegisterBalancing => G_RegisterBalancing
)
port map(
I_CLK => I_CLK,
I_Enable => C_Pipeline1Enable,
I_Data => R_Data,
O_Data => R_DataOut
);
end generate gen_modules;
end architecture RTL;

123
src/Pipeline_pb_Module.vhd Normal file
View File

@@ -0,0 +1,123 @@
--@ Performance Benchmarking Environment
--@ This file is a wrapper for the module which is to be tested
--@ and capsulates the module with flip-flops to create a synchronous
--@ interface for the module. This is necessary to test the synthesis
--@ results of the module.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
entity Pipeline_pb_Module is
generic (
--@ Number of pipeline stages
G_PipelineStages : integer := 10;
--@ Data width
G_Width : integer := 32;
--@ Register balancing attribute<br>
--@ - "no" : No register balancing <br>
--@ - "yes": Register balancing in both directions <br>
--@ - "forward": Moves a set of FFs at the inputs of a LUT to a single FF at its output. <br>
--@ - "backward": Moves a single FF at the output of a LUT to a set of FFs at its inputs.
G_RegisterBalancing : string := "no";
--@ Enable pipeline buffer
--@ - true : Use pipeline buffer
--@ - false : Direct connection (bypass)
G_EnablePipelineBuffer : boolean := false
);
port (
I_CLK : in std_logic;
I_RST : in std_logic;
I_CE : in std_logic;
I_Data : in std_logic_vector(G_Width - 1 downto 0);
I_Valid : in std_logic;
O_Ready : out std_logic;
O_Data : out std_logic_vector(G_Width - 1 downto 0);
O_Valid : out std_logic;
I_Ready : in std_logic
);
end entity Pipeline_pb_Module;
architecture RTL of Pipeline_pb_Module is
signal C_Pipeline0Enable : std_logic;
signal C_PipelineBufferEnable : std_logic_vector(1 downto 0) := (others => '0');
signal R_Valid : std_logic;
signal R_Ready : std_logic;
signal R_Data : std_logic_vector(G_Width - 1 downto 0);
signal C_Data : std_logic_vector(G_Width - 1 downto 0);
begin
PipelineControllerIn : entity work.PipelineController
generic map(
G_PipelineStages => G_PipelineStages,
G_ResetActiveAt => '1'
)
port map(
I_CLK => I_CLK,
I_RST => I_RST,
I_CE => I_CE,
O_Enable => C_Pipeline0Enable,
I_Valid => I_Valid,
O_Ready => O_Ready,
O_Valid => R_Valid,
I_Ready => R_Ready
);
PipelineRegisterIn : entity work.PipelineRegister
generic map(
G_PipelineStages => G_PipelineStages,
G_Width => G_Width,
G_RegisterBalancing => G_RegisterBalancing
)
port map(
I_CLK => I_CLK,
I_Enable => C_Pipeline0Enable,
I_Data => I_Data,
O_Data => R_Data
);
---------
C_Data <= std_logic_vector(unsigned(R_Data) + 3); -- Example operation, can be replaced with actual logic
---------
-- Pipeline Buffer Generation based on G_EnablePipelineBuffer
gen_pipeline_buffer : if G_EnablePipelineBuffer generate
PipelineBufferController : entity work.PipelineBufferController
generic map(
G_ResetActiveAt => '1'
)
port map(
I_CLK => I_CLK,
I_RST => I_RST,
I_CE => I_CE,
O_Enable => C_PipelineBufferEnable,
I_Valid => R_Valid,
O_Ready => R_Ready,
O_Valid => O_Valid,
I_Ready => I_Ready
);
PipelineBuffer : entity work.PipelineBuffer
generic map(
G_Width => G_Width
)
port map(
I_CLK => I_CLK,
I_Enable => C_PipelineBufferEnable,
I_Data => C_Data,
O_Data => O_Data
);
end generate gen_pipeline_buffer;
-- Direct connection when pipeline buffer is disabled
gen_direct_connection : if not G_EnablePipelineBuffer generate
-- Direct signal connections (bypass pipeline buffer)
O_Valid <= R_Valid;
O_Data <= R_Data;
R_Ready <= I_Ready;
end generate gen_direct_connection;
end architecture RTL;