From 70317e8bed6bdfcad1f2046dca0169262c74d5fa Mon Sep 17 00:00:00 2001 From: Michael Oshiro Date: Thu, 10 Oct 2024 07:43:40 -0400 Subject: [PATCH 1/5] Updated project_generation_scripts and tf_merge_streamer.vhd to synchronize bx with output, synchronize bx 0 with other bxs, and synchronize reading of memories by tf_merge_streamer --- .../common/hdl/tf_merge_streamer.vhd | 87 ++++++++++++------- emData/project_generation_scripts | 2 +- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/IntegrationTests/common/hdl/tf_merge_streamer.vhd b/IntegrationTests/common/hdl/tf_merge_streamer.vhd index d13a442b5f0..32c38c82dea 100644 --- a/IntegrationTests/common/hdl/tf_merge_streamer.vhd +++ b/IntegrationTests/common/hdl/tf_merge_streamer.vhd @@ -4,7 +4,7 @@ --! at the end of the first half of the TF algo. --! @author Jason Fan (jf847@cornell.edu) --! @date 2024-02-29 ---! @version v.1.0 +--! @version v.1.1 --=========================================================================== --! Standard library @@ -35,6 +35,7 @@ entity tf_merge_streamer is ); port ( bx_in : in std_logic_vector(2 downto 0 ); + bx_in_vld : in std_logic; rst: in std_logic; clk : in std_logic; --output read enable to tf_mem modules @@ -61,10 +62,12 @@ end entity tf_merge_streamer; architecture RTL of tf_merge_streamer is constant MAX_INPUTS : integer := 4; - constant pipe_stages : integer := 4; + constant pipe_stages : integer := 3; + constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH); type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer; - type toread_arr is array(pipe_stages-1 downto 0) of integer; + type toread_arr is array(pipe_stages downto 0) of integer; + type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0); --nent and din are repackaged from odd input type into --arrays @@ -72,66 +75,86 @@ architecture RTL of tf_merge_streamer is type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0); signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); - signal readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0'); + signal bx_pipe : bx_arr := (others => (others => '0')); + signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); begin process(clk) variable nent_arr: nent_array; variable din_arr: din_array; variable bx_last :integer :=0; + variable bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) variable mem_count : mem_count_arr := (others => 0); variable current_page: natural := 0; variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock + variable readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0'); variable toread : toread_arr := (others => 0); begin if rising_edge(clk) then - nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays + if (bx_in_vld = '1') then + bx_in_latch := bx_in; + end if; + + nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays din_arr := (din3, din2, din1, din0); - bx_change := (bx_last /= to_integer(unsigned(bx_in))); + bx_change := (bx_last /= to_integer(unsigned(bx_in_latch))); if (bx_change) then --reset with rst signal or a change in bx - -- check if bx changes and update page to read from + -- check if bx changes and update page to read from mem_count := (others => 0); + toread(0) := (NUM_INPUTS-1) mod NUM_INPUTS; end if ; - current_page := to_integer(unsigned(bx_in)) mod NUM_PAGES; - --check if memory read counter is less than nentries + current_page := to_integer(unsigned(bx_in_latch)) mod NUM_PAGES; + + --check if memory read counter is less than nentries + --this sets readmask to 1 for any inputs that still have words to read for i in 0 to NUM_INPUTS-1 loop if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then - readmask(i) <= '1'; - else - readmask(i) <= '0'; - end if; + readmask(i) := '1'; + else + readmask(i) := '0'; + end if; end loop; + if (to_integer(unsigned(readmask)) = 0) then - valid(0) <= '0'; + valid(0) <= '0'; else - for j in 0 to NUM_INPUTS-1 loop - if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then - toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS; - exit; - end if; - end loop; - addr_arr(((toread(0)+1)*clogb2(RAM_DEPTH))-1 downto (toread(0))*clogb2(RAM_DEPTH)) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), clogb2(RAM_DEPTH))); - valid(0) <= '1'; - mem_count(toread(0)) := mem_count(toread(0)) + 1; + --loop through starting with the next input in front of the current to-read (round-robin) + for j in 0 to NUM_INPUTS-1 loop + if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then + toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS; + exit; + end if; + end loop; + addr_arr_int(((toread(0)+1)*LOG2_RAM_DEPTH)-1 downto (toread(0))*LOG2_RAM_DEPTH) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), LOG2_RAM_DEPTH)); + valid(0) <= '1'; + mem_count(toread(0)) := mem_count(toread(0)) + 1; end if; + --generate output a few clocks after address is set to account for delay in RAMs if valid(pipe_stages-1) ='1' then - if (NUM_EXTRA_BITS > 0) then - merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1)); - else - merged_dout <= '1' & din_arr(toread(pipe_stages-2)); - end if ; + if (NUM_EXTRA_BITS > 0) then + merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages)); + else + merged_dout <= '1' & din_arr(toread(pipe_stages-1)); + end if ; else - merged_dout <= (others => '0'); + merged_dout <= (others => '0'); end if; - bx_last := to_integer(unsigned(bx_in)); - bx_out <= bx_in; - for j in 0 to pipe_stages-2 loop + + bx_last := to_integer(unsigned(bx_in_latch)); + bx_pipe(0) <= bx_in_latch; + bx_out <= bx_pipe(pipe_stages-1); + toread(pipe_stages) := toread(pipe_stages-1); + for j in pipe_stages-2 downto 0 loop toread(j+1) := toread(j); valid(j+1) <= valid(j); + bx_pipe(j+1) <= bx_pipe(j); end loop; end if; end process; + + addr_arr <= addr_arr_int; + end RTL; diff --git a/emData/project_generation_scripts b/emData/project_generation_scripts index 68630ed519a..efa78e05637 160000 --- a/emData/project_generation_scripts +++ b/emData/project_generation_scripts @@ -1 +1 @@ -Subproject commit 68630ed519a558e5b8fab75b2a944255c60eab60 +Subproject commit efa78e05637fe7c6850094bf7bd86d9082ae4abb From 96f835b1bee302c4ae208fd5db032d7d5648bcf9 Mon Sep 17 00:00:00 2001 From: Michael Oshiro Date: Thu, 10 Oct 2024 07:58:44 -0400 Subject: [PATCH 2/5] Remove commented out code and fix tabs --- .../common/hdl/tf_merge_streamer.vhd | 168 +++++++++--------- emData/project_generation_scripts | 2 +- 2 files changed, 85 insertions(+), 85 deletions(-) diff --git a/IntegrationTests/common/hdl/tf_merge_streamer.vhd b/IntegrationTests/common/hdl/tf_merge_streamer.vhd index 32c38c82dea..3c9a5ea0eba 100644 --- a/IntegrationTests/common/hdl/tf_merge_streamer.vhd +++ b/IntegrationTests/common/hdl/tf_merge_streamer.vhd @@ -25,37 +25,37 @@ use unisim.vcomponents.all; use work.tf_pkg.all; entity tf_merge_streamer is - generic ( - RAM_WIDTH : natural := 72; - NUM_PAGES : natural := 8; - RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH; - NUM_INPUTS : natural := 4; - NUM_EXTRA_BITS: natural := 2; - ADDR_WIDTH : natural := 7 - ); - port ( - bx_in : in std_logic_vector(2 downto 0 ); + generic ( + RAM_WIDTH: natural := 72; + NUM_PAGES : natural := 8; + RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH; + NUM_INPUTS : natural := 4; + NUM_EXTRA_BITS: natural := 2; + ADDR_WIDTH : natural := 7 + ); + port ( + bx_in : in std_logic_vector(2 downto 0 ); bx_in_vld : in std_logic; - rst: in std_logic; - clk : in std_logic; + rst: in std_logic; + clk : in std_logic; --output read enable to tf_mem modules - enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0); - bx_out : out std_logic_vector(2 downto 0); + enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0); + bx_out : out std_logic_vector(2 downto 0); --output merged stream, includes input word, up to 2 bits that encode the --original module, and a valid bit (from LSB to MSB) - merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0); + merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0); --input data,nent and addresses are best suited for unconstrained arrays --but this is not supported in vivado 2019 --module always accepts 4 input memories, but will not use all of them - din0: in std_logic_vector(RAM_WIDTH-1 downto 0); - din1: in std_logic_vector(RAM_WIDTH-1 downto 0); - din2: in std_logic_vector(RAM_WIDTH-1 downto 0); - din3: in std_logic_vector(RAM_WIDTH-1 downto 0); - nent0: in t_arr_7b(0 to NUM_PAGES-1); - nent1: in t_arr_7b(0 to NUM_PAGES-1); - nent2: in t_arr_7b(0 to NUM_PAGES-1); - nent3: in t_arr_7b(0 to NUM_PAGES-1); - addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0) + din0: in std_logic_vector(RAM_WIDTH-1 downto 0); + din1: in std_logic_vector(RAM_WIDTH-1 downto 0); + din2: in std_logic_vector(RAM_WIDTH-1 downto 0); + din3: in std_logic_vector(RAM_WIDTH-1 downto 0); + nent0: in t_arr_7b(0 to NUM_PAGES-1); + nent1: in t_arr_7b(0 to NUM_PAGES-1); + nent2: in t_arr_7b(0 to NUM_PAGES-1); + nent3: in t_arr_7b(0 to NUM_PAGES-1); + addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0) ) ; end entity tf_merge_streamer; @@ -65,95 +65,95 @@ architecture RTL of tf_merge_streamer is constant pipe_stages : integer := 3; constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH); - type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer; - type toread_arr is array(pipe_stages downto 0) of integer; + type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer; + type toread_arr is array(pipe_stages downto 0) of integer; type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0); --nent and din are repackaged from odd input type into --arrays - type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1); - type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0); + type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1); + type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0); - signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); + signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); signal bx_pipe : bx_arr := (others => (others => '0')); - signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); + signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); begin - process(clk) - variable nent_arr: nent_array; - variable din_arr: din_array; - variable bx_last :integer :=0; + process(clk) + variable nent_arr: nent_array; + variable din_arr: din_array; + variable bx_last :integer :=0; variable bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) - variable mem_count : mem_count_arr := (others => 0); - variable current_page: natural := 0; - variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock - variable readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0'); + variable mem_count : mem_count_arr := (others => 0); + variable current_page: natural := 0; + variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock + variable readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0'); - variable toread : toread_arr := (others => 0); + variable toread : toread_arr := (others => 0); - begin + begin if rising_edge(clk) then if (bx_in_vld = '1') then bx_in_latch := bx_in; end if; - nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays - din_arr := (din3, din2, din1, din0); - bx_change := (bx_last /= to_integer(unsigned(bx_in_latch))); - if (bx_change) then --reset with rst signal or a change in bx - -- check if bx changes and update page to read from - mem_count := (others => 0); + nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays + din_arr := (din3, din2, din1, din0); + bx_change := (bx_last /= to_integer(unsigned(bx_in_latch))); + if (bx_change) then --reset with rst signal or a change in bx + -- check if bx changes and update page to read from + mem_count := (others => 0); toread(0) := (NUM_INPUTS-1) mod NUM_INPUTS; - end if ; - current_page := to_integer(unsigned(bx_in_latch)) mod NUM_PAGES; + end if ; + current_page := to_integer(unsigned(bx_in_latch)) mod NUM_PAGES; --check if memory read counter is less than nentries --this sets readmask to 1 for any inputs that still have words to read - for i in 0 to NUM_INPUTS-1 loop - if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then - readmask(i) := '1'; - else - readmask(i) := '0'; - end if; - end loop; - - if (to_integer(unsigned(readmask)) = 0) then - valid(0) <= '0'; - else + for i in 0 to NUM_INPUTS-1 loop + if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then + readmask(i) := '1'; + else + readmask(i) := '0'; + end if; + end loop; + + if (to_integer(unsigned(readmask)) = 0) then + valid(0) <= '0'; + else --loop through starting with the next input in front of the current to-read (round-robin) - for j in 0 to NUM_INPUTS-1 loop - if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then - toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS; - exit; - end if; - end loop; - addr_arr_int(((toread(0)+1)*LOG2_RAM_DEPTH)-1 downto (toread(0))*LOG2_RAM_DEPTH) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), LOG2_RAM_DEPTH)); + for j in 0 to NUM_INPUTS-1 loop + if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then + toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS; + exit; + end if; + end loop; + addr_arr_int(((toread(0)+1)*LOG2_RAM_DEPTH)-1 downto (toread(0))*LOG2_RAM_DEPTH) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), LOG2_RAM_DEPTH)); valid(0) <= '1'; mem_count(toread(0)) := mem_count(toread(0)) + 1; - end if; + end if; --generate output a few clocks after address is set to account for delay in RAMs - if valid(pipe_stages-1) ='1' then - if (NUM_EXTRA_BITS > 0) then - merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages)); - else - merged_dout <= '1' & din_arr(toread(pipe_stages-1)); - end if ; - else - merged_dout <= (others => '0'); - end if; - - bx_last := to_integer(unsigned(bx_in_latch)); + if valid(pipe_stages-1) ='1' then + if (NUM_EXTRA_BITS > 0) then + merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages)); + else + merged_dout <= '1' & din_arr(toread(pipe_stages-1)); + end if ; + else + merged_dout <= (others => '0'); + end if; + + bx_last := to_integer(unsigned(bx_in_latch)); bx_pipe(0) <= bx_in_latch; - bx_out <= bx_pipe(pipe_stages-1); - toread(pipe_stages) := toread(pipe_stages-1); + bx_out <= bx_pipe(pipe_stages-1); + toread(pipe_stages) := toread(pipe_stages-1); for j in pipe_stages-2 downto 0 loop - toread(j+1) := toread(j); - valid(j+1) <= valid(j); + toread(j+1) := toread(j); + valid(j+1) <= valid(j); bx_pipe(j+1) <= bx_pipe(j); end loop; - end if; - end process; + end if; + end process; addr_arr <= addr_arr_int; diff --git a/emData/project_generation_scripts b/emData/project_generation_scripts index efa78e05637..20898b279f9 160000 --- a/emData/project_generation_scripts +++ b/emData/project_generation_scripts @@ -1 +1 @@ -Subproject commit efa78e05637fe7c6850094bf7bd86d9082ae4abb +Subproject commit 20898b279f9256ef172cf256ba029c00f201dc1a From 3be1e0acc35a85ca17190a5e32b6a728c1eb5251 Mon Sep 17 00:00:00 2001 From: Michael Oshiro Date: Wed, 23 Oct 2024 09:24:39 -0400 Subject: [PATCH 3/5] Added optimizations to help meet timing: changed tf_merge_streamer and transitioned TPAR memories to URAM --- IntegrationTests/common/hdl/tf_mem.vhd | 5 +- .../common/hdl/tf_merge_streamer.vhd | 150 +++++++++++++----- emData/project_generation_scripts | 2 +- 3 files changed, 112 insertions(+), 45 deletions(-) diff --git a/IntegrationTests/common/hdl/tf_mem.vhd b/IntegrationTests/common/hdl/tf_mem.vhd index 08a305c6ce8..2ee40dd9b43 100644 --- a/IntegrationTests/common/hdl/tf_mem.vhd +++ b/IntegrationTests/common/hdl/tf_mem.vhd @@ -36,7 +36,8 @@ entity tf_mem is INIT_HEX : boolean := true; --! Read init file in hex (default) or bin RAM_PERFORMANCE : string := "HIGH_PERFORMANCE";--! Select "HIGH_PERFORMANCE" (2 clk latency) or "LOW_LATENCY" (1 clk latency) NAME : string := "MEMNAME"; --! Name of mem for printout - DEBUG : boolean := false --! If true prints debug info + DEBUG : boolean := false; --! If true prints debug info + MEM_TYPE : string := "block" --! specifies RAM type (block/ultra) ); port ( clka : in std_logic; --! Write clock @@ -101,7 +102,7 @@ signal sv_RAM_row : std_logic_vector(RAM_WIDTH-1 downto 0) := (others =>'0'); -- ########################### Attributes ########################### attribute ram_style : string; -attribute ram_style of sa_RAM_data : signal is "block"; +attribute ram_style of sa_RAM_data : signal is MEM_TYPE; begin diff --git a/IntegrationTests/common/hdl/tf_merge_streamer.vhd b/IntegrationTests/common/hdl/tf_merge_streamer.vhd index 3c9a5ea0eba..a9986680f52 100644 --- a/IntegrationTests/common/hdl/tf_merge_streamer.vhd +++ b/IntegrationTests/common/hdl/tf_merge_streamer.vhd @@ -65,9 +65,10 @@ architecture RTL of tf_merge_streamer is constant pipe_stages : integer := 3; constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH); - type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer; - type toread_arr is array(pipe_stages downto 0) of integer; + type mem_count_arr is array(MAX_INPUTS-1 downto 0) of integer; + type toread_arr is array(pipe_stages-1 downto 0) of integer range 0 to 3; type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0); + type addr_arr_arr is array(MAX_INPUTS-1 downto 0) of std_logic_vector(LOG2_RAM_DEPTH-1 downto 0); --nent and din are repackaged from odd input type into --arrays @@ -76,66 +77,127 @@ architecture RTL of tf_merge_streamer is signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); signal bx_pipe : bx_arr := (others => (others => '0')); - signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); + --signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); + signal addr_arr_int : addr_arr_arr := (others => (others => '0')); + signal bx_last : std_logic_vector(2 downto 0) := "111"; + signal bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) + signal mem_count : mem_count_arr := (others => 0); + signal toread : toread_arr := (others => 0); + signal current_page: natural := 7 mod NUM_PAGES; + signal readmask : std_logic_vector(MAX_INPUTS-1 downto 0) := (others => '0'); begin process(clk) variable nent_arr: nent_array; variable din_arr: din_array; - variable bx_last :integer :=0; - variable bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) - variable mem_count : mem_count_arr := (others => 0); - variable current_page: natural := 0; variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock - variable readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0'); - - variable toread : toread_arr := (others => 0); + variable nextread : integer range 0 to 3 := 0; + variable mem_count_next : mem_count_arr := (others => 0); begin if rising_edge(clk) then if (bx_in_vld = '1') then - bx_in_latch := bx_in; + bx_in_latch <= bx_in; + current_page <= to_integer(unsigned(bx_in)) mod NUM_PAGES; end if; nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays din_arr := (din3, din2, din1, din0); - bx_change := (bx_last /= to_integer(unsigned(bx_in_latch))); + bx_change := (bx_last /= bx_in_latch); + if (bx_change) then --reset with rst signal or a change in bx - -- check if bx changes and update page to read from - mem_count := (others => 0); - toread(0) := (NUM_INPUTS-1) mod NUM_INPUTS; - end if ; - current_page := to_integer(unsigned(bx_in_latch)) mod NUM_PAGES; + mem_count <= (others => 0); + toread(0) <= (NUM_INPUTS-1) mod NUM_INPUTS; + valid(0) <= '0'; + + --check if memory read counter is less than nentries + --this sets readmask to 1 for any inputs that still have words to read + for i in 0 to NUM_INPUTS-1 loop + if (0 < to_integer(unsigned(nent_arr(i)(current_page)))) then + readmask(i) <= '1'; + else + readmask(i) <= '0'; + end if; + end loop; + + else + --only check for valid reads on non BX change clocks + --this gives up a clock cycle, but reduces logic levels downstream + + for i in 0 to NUM_INPUTS-1 loop + mem_count_next(i) := mem_count(i); + end loop; - --check if memory read counter is less than nentries - --this sets readmask to 1 for any inputs that still have words to read - for i in 0 to NUM_INPUTS-1 loop - if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then - readmask(i) := '1'; + if (to_integer(unsigned(readmask)) = 0) then + valid(0) <= '0'; else - readmask(i) := '0'; + valid(0) <= '1'; + --loop through starting with the next input in front of the current to-read (round-robin) + --the giant if block is gross, but for loop with exit seems to generate way too many logic levels + --hope: this figures out nextread in a single lookup + if (toread(0) = 0) then + if (readmask(1) = '1') then + nextread := 1; + elsif (readmask(2) = '1') then + nextread := 2; + elsif (readmask(3) = '1') then + nextread := 3; + elsif (readmask(0) = '1') then + nextread := 0; + end if; + elsif (toread(0) = 1) then + if (readmask(2) = '1') then + nextread := 2; + elsif (readmask(3) = '1') then + nextread := 3; + elsif (readmask(0) = '1') then + nextread := 0; + elsif (readmask(1) = '1') then + nextread := 1; + end if; + elsif (toread(0) = 2) then + if (readmask(3) = '1') then + nextread := 3; + elsif (readmask(0) = '1') then + nextread := 0; + elsif (readmask(1) = '1') then + nextread := 1; + elsif (readmask(2) = '1') then + nextread := 2; + end if; + elsif (toread(0) = 3) then + if (readmask(0) = '1') then + nextread := 0; + elsif (readmask(1) = '1') then + nextread := 1; + elsif (readmask(2) = '1') then + nextread := 2; + elsif (readmask(3) = '1') then + nextread := 3; + end if; + end if; + addr_arr_int(nextread) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(nextread), LOG2_RAM_DEPTH)); + mem_count(nextread) <= mem_count(nextread) + 1; + toread(0) <= nextread; + mem_count_next(nextread) := mem_count_next(nextread)+1; end if; - end loop; - if (to_integer(unsigned(readmask)) = 0) then - valid(0) <= '0'; - else - --loop through starting with the next input in front of the current to-read (round-robin) - for j in 0 to NUM_INPUTS-1 loop - if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then - toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS; - exit; + --check if memory read counter is less than nentries + --this sets readmask to 1 for any inputs that still have words to read + for i in 0 to NUM_INPUTS-1 loop + if ((mem_count_next(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then + readmask(i) <= '1'; + else + readmask(i) <= '0'; end if; end loop; - addr_arr_int(((toread(0)+1)*LOG2_RAM_DEPTH)-1 downto (toread(0))*LOG2_RAM_DEPTH) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), LOG2_RAM_DEPTH)); - valid(0) <= '1'; - mem_count(toread(0)) := mem_count(toread(0)) + 1; - end if; + + end if ; --generate output a few clocks after address is set to account for delay in RAMs if valid(pipe_stages-1) ='1' then if (NUM_EXTRA_BITS > 0) then - merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages)); + merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1)); else merged_dout <= '1' & din_arr(toread(pipe_stages-1)); end if ; @@ -143,18 +205,22 @@ begin merged_dout <= (others => '0'); end if; - bx_last := to_integer(unsigned(bx_in_latch)); + bx_last <= bx_in_latch; bx_pipe(0) <= bx_in_latch; - bx_out <= bx_pipe(pipe_stages-1); - toread(pipe_stages) := toread(pipe_stages-1); + bx_out <= bx_pipe(pipe_stages); for j in pipe_stages-2 downto 0 loop - toread(j+1) := toread(j); valid(j+1) <= valid(j); + toread(j+1) <= toread(j); + end loop; + for j in pipe_stages-1 downto 0 loop bx_pipe(j+1) <= bx_pipe(j); end loop; end if; end process; - addr_arr <= addr_arr_int; + GEN_ADDR: for i in 0 to NUM_INPUTS-1 generate + begin + addr_arr(LOG2_RAM_DEPTH*(i+1)-1 downto LOG2_RAM_DEPTH*i) <= addr_arr_int(i); + end generate; end RTL; diff --git a/emData/project_generation_scripts b/emData/project_generation_scripts index 20898b279f9..dbd9e736a83 160000 --- a/emData/project_generation_scripts +++ b/emData/project_generation_scripts @@ -1 +1 @@ -Subproject commit 20898b279f9256ef172cf256ba029c00f201dc1a +Subproject commit dbd9e736a833614604537cd17486d9f89dfd648b From 70e8b5d5e1ac630f20087cdb43afa6d5484d08e5 Mon Sep 17 00:00:00 2001 From: Michael Oshiro Date: Fri, 25 Oct 2024 14:39:45 -0400 Subject: [PATCH 4/5] Incorporate suggestions from Jason (change if to reverse loop) --- .../common/hdl/tf_merge_streamer.vhd | 413 ++++++++---------- 1 file changed, 187 insertions(+), 226 deletions(-) diff --git a/IntegrationTests/common/hdl/tf_merge_streamer.vhd b/IntegrationTests/common/hdl/tf_merge_streamer.vhd index a9986680f52..aee9335d809 100644 --- a/IntegrationTests/common/hdl/tf_merge_streamer.vhd +++ b/IntegrationTests/common/hdl/tf_merge_streamer.vhd @@ -1,226 +1,187 @@ ---=========================================================================== ---! @file ---! @brief Module which reads and streams out the contents of the memories ---! at the end of the first half of the TF algo. ---! @author Jason Fan (jf847@cornell.edu) ---! @date 2024-02-29 ---! @version v.1.1 ---=========================================================================== - ---! Standard library -library ieee; ---! Standard package -use ieee.std_logic_1164.all; ---! Signed/unsigned calculations -use ieee.numeric_std.all; ---! Standard functions -library std; ---! Standard TextIO functions -use std.textio.all; - ---! Xilinx library -library unisim; ---! Xilinx package -use unisim.vcomponents.all; -use work.tf_pkg.all; - -entity tf_merge_streamer is - generic ( - RAM_WIDTH: natural := 72; - NUM_PAGES : natural := 8; - RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH; - NUM_INPUTS : natural := 4; - NUM_EXTRA_BITS: natural := 2; - ADDR_WIDTH : natural := 7 - ); - port ( - bx_in : in std_logic_vector(2 downto 0 ); - bx_in_vld : in std_logic; - rst: in std_logic; - clk : in std_logic; - --output read enable to tf_mem modules - enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0); - bx_out : out std_logic_vector(2 downto 0); - --output merged stream, includes input word, up to 2 bits that encode the - --original module, and a valid bit (from LSB to MSB) - merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0); - --input data,nent and addresses are best suited for unconstrained arrays - --but this is not supported in vivado 2019 - --module always accepts 4 input memories, but will not use all of them - din0: in std_logic_vector(RAM_WIDTH-1 downto 0); - din1: in std_logic_vector(RAM_WIDTH-1 downto 0); - din2: in std_logic_vector(RAM_WIDTH-1 downto 0); - din3: in std_logic_vector(RAM_WIDTH-1 downto 0); - nent0: in t_arr_7b(0 to NUM_PAGES-1); - nent1: in t_arr_7b(0 to NUM_PAGES-1); - nent2: in t_arr_7b(0 to NUM_PAGES-1); - nent3: in t_arr_7b(0 to NUM_PAGES-1); - addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0) -) ; -end entity tf_merge_streamer; - -architecture RTL of tf_merge_streamer is - - constant MAX_INPUTS : integer := 4; - constant pipe_stages : integer := 3; - constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH); - - type mem_count_arr is array(MAX_INPUTS-1 downto 0) of integer; - type toread_arr is array(pipe_stages-1 downto 0) of integer range 0 to 3; - type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0); - type addr_arr_arr is array(MAX_INPUTS-1 downto 0) of std_logic_vector(LOG2_RAM_DEPTH-1 downto 0); - - --nent and din are repackaged from odd input type into - --arrays - type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1); - type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0); - - signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); - signal bx_pipe : bx_arr := (others => (others => '0')); - --signal addr_arr_int : std_logic_vector(NUM_INPUTS*LOG2_RAM_DEPTH-1 downto 0) := (others => '0'); - signal addr_arr_int : addr_arr_arr := (others => (others => '0')); - signal bx_last : std_logic_vector(2 downto 0) := "111"; - signal bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) - signal mem_count : mem_count_arr := (others => 0); - signal toread : toread_arr := (others => 0); - signal current_page: natural := 7 mod NUM_PAGES; - signal readmask : std_logic_vector(MAX_INPUTS-1 downto 0) := (others => '0'); - -begin - process(clk) - variable nent_arr: nent_array; - variable din_arr: din_array; - variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock - variable nextread : integer range 0 to 3 := 0; - variable mem_count_next : mem_count_arr := (others => 0); - - begin - if rising_edge(clk) then - if (bx_in_vld = '1') then - bx_in_latch <= bx_in; - current_page <= to_integer(unsigned(bx_in)) mod NUM_PAGES; - end if; - - nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays - din_arr := (din3, din2, din1, din0); - bx_change := (bx_last /= bx_in_latch); - - if (bx_change) then --reset with rst signal or a change in bx - mem_count <= (others => 0); - toread(0) <= (NUM_INPUTS-1) mod NUM_INPUTS; - valid(0) <= '0'; - - --check if memory read counter is less than nentries - --this sets readmask to 1 for any inputs that still have words to read - for i in 0 to NUM_INPUTS-1 loop - if (0 < to_integer(unsigned(nent_arr(i)(current_page)))) then - readmask(i) <= '1'; - else - readmask(i) <= '0'; - end if; - end loop; - - else - --only check for valid reads on non BX change clocks - --this gives up a clock cycle, but reduces logic levels downstream - - for i in 0 to NUM_INPUTS-1 loop - mem_count_next(i) := mem_count(i); - end loop; - - if (to_integer(unsigned(readmask)) = 0) then - valid(0) <= '0'; - else - valid(0) <= '1'; - --loop through starting with the next input in front of the current to-read (round-robin) - --the giant if block is gross, but for loop with exit seems to generate way too many logic levels - --hope: this figures out nextread in a single lookup - if (toread(0) = 0) then - if (readmask(1) = '1') then - nextread := 1; - elsif (readmask(2) = '1') then - nextread := 2; - elsif (readmask(3) = '1') then - nextread := 3; - elsif (readmask(0) = '1') then - nextread := 0; - end if; - elsif (toread(0) = 1) then - if (readmask(2) = '1') then - nextread := 2; - elsif (readmask(3) = '1') then - nextread := 3; - elsif (readmask(0) = '1') then - nextread := 0; - elsif (readmask(1) = '1') then - nextread := 1; - end if; - elsif (toread(0) = 2) then - if (readmask(3) = '1') then - nextread := 3; - elsif (readmask(0) = '1') then - nextread := 0; - elsif (readmask(1) = '1') then - nextread := 1; - elsif (readmask(2) = '1') then - nextread := 2; - end if; - elsif (toread(0) = 3) then - if (readmask(0) = '1') then - nextread := 0; - elsif (readmask(1) = '1') then - nextread := 1; - elsif (readmask(2) = '1') then - nextread := 2; - elsif (readmask(3) = '1') then - nextread := 3; - end if; - end if; - addr_arr_int(nextread) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(nextread), LOG2_RAM_DEPTH)); - mem_count(nextread) <= mem_count(nextread) + 1; - toread(0) <= nextread; - mem_count_next(nextread) := mem_count_next(nextread)+1; - end if; - - --check if memory read counter is less than nentries - --this sets readmask to 1 for any inputs that still have words to read - for i in 0 to NUM_INPUTS-1 loop - if ((mem_count_next(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then - readmask(i) <= '1'; - else - readmask(i) <= '0'; - end if; - end loop; - - end if ; - - --generate output a few clocks after address is set to account for delay in RAMs - if valid(pipe_stages-1) ='1' then - if (NUM_EXTRA_BITS > 0) then - merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1)); - else - merged_dout <= '1' & din_arr(toread(pipe_stages-1)); - end if ; - else - merged_dout <= (others => '0'); - end if; - - bx_last <= bx_in_latch; - bx_pipe(0) <= bx_in_latch; - bx_out <= bx_pipe(pipe_stages); - for j in pipe_stages-2 downto 0 loop - valid(j+1) <= valid(j); - toread(j+1) <= toread(j); - end loop; - for j in pipe_stages-1 downto 0 loop - bx_pipe(j+1) <= bx_pipe(j); - end loop; - end if; - end process; - - GEN_ADDR: for i in 0 to NUM_INPUTS-1 generate - begin - addr_arr(LOG2_RAM_DEPTH*(i+1)-1 downto LOG2_RAM_DEPTH*i) <= addr_arr_int(i); - end generate; - -end RTL; +--=========================================================================== +--! @file +--! @brief Module which reads and streams out the contents of the memories +--! at the end of the first half of the TF algo. +--! @author Jason Fan (jf847@cornell.edu) +--! @date 2024-02-29 +--! @version v.1.1 +--=========================================================================== + +--! Standard library +library ieee; +--! Standard package +use ieee.std_logic_1164.all; +--! Signed/unsigned calculations +use ieee.numeric_std.all; +--! Standard functions +library std; +--! Standard TextIO functions +use std.textio.all; + +--! Xilinx library +library unisim; +--! Xilinx package +use unisim.vcomponents.all; +use work.tf_pkg.all; + +entity tf_merge_streamer is + generic ( + RAM_WIDTH: natural := 72; + NUM_PAGES : natural := 8; + RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH; + NUM_INPUTS : natural := 4; + NUM_EXTRA_BITS: natural := 2; + ADDR_WIDTH : natural := 7 + ); + port ( + bx_in : in std_logic_vector(2 downto 0 ); + bx_in_vld : in std_logic; + rst: in std_logic; + clk : in std_logic; + --output read enable to tf_mem modules + enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0); + bx_out : out std_logic_vector(2 downto 0); + --output merged stream, includes input word, up to 2 bits that encode the + --original module, and a valid bit (from LSB to MSB) + merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0); + --input data,nent and addresses are best suited for unconstrained arrays + --but this is not supported in vivado 2019 + --module always accepts 4 input memories, but will not use all of them + din0: in std_logic_vector(RAM_WIDTH-1 downto 0); + din1: in std_logic_vector(RAM_WIDTH-1 downto 0); + din2: in std_logic_vector(RAM_WIDTH-1 downto 0); + din3: in std_logic_vector(RAM_WIDTH-1 downto 0); + nent0: in t_arr_7b(0 to NUM_PAGES-1); + nent1: in t_arr_7b(0 to NUM_PAGES-1); + nent2: in t_arr_7b(0 to NUM_PAGES-1); + nent3: in t_arr_7b(0 to NUM_PAGES-1); + addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0) +) ; +end entity tf_merge_streamer; + +architecture RTL of tf_merge_streamer is + + constant MAX_INPUTS : integer := 4; + constant pipe_stages : integer := 3; + constant LOG2_RAM_DEPTH : integer := CLOGB2(RAM_DEPTH); + + type mem_count_arr is array(MAX_INPUTS-1 downto 0) of integer; + type toread_arr is array(pipe_stages-1 downto 0) of integer range 0 to 3; + type bx_arr is array(pipe_stages downto 0) of std_logic_vector(2 downto 0); + type addr_arr_arr is array(MAX_INPUTS-1 downto 0) of std_logic_vector(LOG2_RAM_DEPTH-1 downto 0); + + --nent and din are repackaged from odd input type into + --arrays + type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1); + type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0); + + signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0'); + signal bx_pipe : bx_arr := (others => (others => '0')); + signal addr_arr_int : addr_arr_arr := (others => (others => '0')); + signal bx_last : std_logic_vector(2 downto 0) := "111"; + signal bx_in_latch : std_logic_vector(2 downto 0) := "111"; --since output triggered by BX change, initializing bx_in_latch to 7 will start write on first valid bx (0) + signal mem_count : mem_count_arr := (others => 0); + signal toread : toread_arr := (others => 0); + signal current_page: natural := 7 mod NUM_PAGES; + signal readmask : std_logic_vector(MAX_INPUTS-1 downto 0) := (others => '0'); + +begin + process(clk) + variable nent_arr: nent_array; + variable din_arr: din_array; + variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock + variable nextread : integer range 0 to 3 := 0; + variable mem_count_next : mem_count_arr := (others => 0); + + begin + if rising_edge(clk) then + if (bx_in_vld = '1') then + bx_in_latch <= bx_in; + current_page <= to_integer(unsigned(bx_in)) mod NUM_PAGES; + end if; + + nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays + din_arr := (din3, din2, din1, din0); + bx_change := (bx_last /= bx_in_latch); + + if (bx_change) then --reset with rst signal or a change in bx + mem_count <= (others => 0); + toread(0) <= (NUM_INPUTS-1) mod NUM_INPUTS; + valid(0) <= '0'; + + --check if memory read counter is less than nentries + --this sets readmask to 1 for any inputs that still have words to read + for i in 0 to NUM_INPUTS-1 loop + if (0 < to_integer(unsigned(nent_arr(i)(current_page)))) then + readmask(i) <= '1'; + else + readmask(i) <= '0'; + end if; + end loop; + + else + --only check for valid reads on non BX change clocks + --this gives up a clock cycle, but reduces logic levels downstream + + for i in 0 to NUM_INPUTS-1 loop + mem_count_next(i) := mem_count(i); + end loop; + + if (to_integer(unsigned(readmask)) = 0) then + valid(0) <= '0'; + else + valid(0) <= '1'; + --loop through starting with the next input in front of the current to-read (round-robin) + for i in 0 to 3 loop + if (readmask((toread(0) - i) mod 4) = '1') then + nextread := (toread(0) - i) mod 4; + end if; + end loop; + addr_arr_int(nextread) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(nextread), LOG2_RAM_DEPTH)); + mem_count(nextread) <= mem_count(nextread) + 1; + toread(0) <= nextread; + mem_count_next(nextread) := mem_count_next(nextread)+1; + end if; + + --check if memory read counter is less than nentries + --this sets readmask to 1 for any inputs that still have words to read + for i in 0 to NUM_INPUTS-1 loop + if ((mem_count_next(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then + readmask(i) <= '1'; + else + readmask(i) <= '0'; + end if; + end loop; + + end if ; + + --generate output a few clocks after address is set to account for delay in RAMs + if valid(pipe_stages-1) ='1' then + if (NUM_EXTRA_BITS > 0) then + merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1)); + else + merged_dout <= '1' & din_arr(toread(pipe_stages-1)); + end if ; + else + merged_dout <= (others => '0'); + end if; + + bx_last <= bx_in_latch; + bx_pipe(0) <= bx_in_latch; + bx_out <= bx_pipe(pipe_stages); + for j in pipe_stages-2 downto 0 loop + valid(j+1) <= valid(j); + toread(j+1) <= toread(j); + end loop; + for j in pipe_stages-1 downto 0 loop + bx_pipe(j+1) <= bx_pipe(j); + end loop; + end if; + end process; + + GEN_ADDR: for i in 0 to NUM_INPUTS-1 generate + begin + addr_arr(LOG2_RAM_DEPTH*(i+1)-1 downto LOG2_RAM_DEPTH*i) <= addr_arr_int(i); + end generate; + +end RTL; From 48951b97a6719b896b1b19ed41f54174106a4e9f Mon Sep 17 00:00:00 2001 From: Michael Oshiro Date: Mon, 28 Oct 2024 08:47:22 -0400 Subject: [PATCH 5/5] Merged project_generation_scripts PR --- emData/project_generation_scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emData/project_generation_scripts b/emData/project_generation_scripts index dbd9e736a83..174c325751e 160000 --- a/emData/project_generation_scripts +++ b/emData/project_generation_scripts @@ -1 +1 @@ -Subproject commit dbd9e736a833614604537cd17486d9f89dfd648b +Subproject commit 174c325751ecbc03f6836990e3186ff1908f08af