Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various FPU and warning fixes #427

Merged
merged 5 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions countbits.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ architecture behaviour of bit_counter is
begin
countzero_r: process(clk)
begin
if rising_edge(clk) and stall = '0' then
inp_r <= inp;
sum_r <= sum;
if rising_edge(clk) then
if stall = '0' then
inp_r <= inp;
sum_r <= sum;
end if;
end if;
end process;

Expand Down
5 changes: 1 addition & 4 deletions fetch1.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ architecture behaviour of fetch1 is
signal itlb_pte : tlb_pte_t;
signal itlb_hit : std_ulogic;

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
Expand Down Expand Up @@ -155,7 +152,7 @@ begin
attribute ram_style of btc_memory : signal is "block";

signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
attribute ram_style of btc_valids : signal is "distributed";
-- attribute ram_style of btc_valids : signal is "distributed";

signal btc_wr : std_ulogic;
signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
Expand Down
18 changes: 9 additions & 9 deletions fpga/arty_a7.xdc
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,15 @@ set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_po
set_property -dict { PACKAGE_PIN P15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io33 }];
set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io34 }];
set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io35 }];
set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io36 }];
set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io37 }];
set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io38 }];
set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io39 }];
set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io40 }];
set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io41 }];
set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io42 }]; # A
set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io43 }]; # SCL
set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io44 }]; # SDA
#set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io36 }];
#set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io37 }];
#set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io38 }];
#set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io39 }];
#set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io40 }];
#set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io41 }];
#set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io42 }]; # A
#set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io43 }]; # SCL
#set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io44 }]; # SDA
#set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { shield_rst }];

#set_property -dict { PACKAGE_PIN C1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_ss }];
Expand Down
12 changes: 8 additions & 4 deletions fpga/top-arty.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ architecture behaviour of toplevel is
signal ddram_clk_p_vec : std_logic_vector(0 downto 0);
signal ddram_clk_n_vec : std_logic_vector(0 downto 0);

signal uart1_rxd : std_ulogic;
signal uart1_txd : std_ulogic;

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
Expand Down Expand Up @@ -266,8 +269,8 @@ begin
uart0_rxd => uart_main_rx,

-- UART1 signals
--uart1_txd => uart_pmod_tx,
--uart1_rxd => uart_pmod_rx,
uart1_txd => uart1_txd,
uart1_rxd => uart1_rxd,

-- SPI signals
spi_flash_sck => spi_sck,
Expand Down Expand Up @@ -302,7 +305,7 @@ begin
wishbone_dma_out => wb_sddma_out
);

--uart_pmod_rts_n <= '0';
uart1_txd <= '1';

-- SPI Flash
--
Expand Down Expand Up @@ -415,8 +418,9 @@ begin
);

-- Generate SoC reset
soc_rst_gen: process(system_clk)
soc_rst_gen: process(system_clk, ext_rst_n)
begin
-- XXX why does this need to be an asynchronous reset?
if ext_rst_n = '0' then
soc_rst <= '1';
elsif rising_edge(system_clk) then
Expand Down
14 changes: 10 additions & 4 deletions fpu.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,6 @@ begin
v.denorm := '0';
v.is_subtract := '0';
v.add_bsmall := '0';
v.doing_ftdiv := "00";
v.int_ovf := '0';
v.div_close := '0';

Expand Down Expand Up @@ -1007,7 +1006,7 @@ begin
elsif new_exp < min_exp then
exp_tiny := '1';
end if;
if is_X(new_exp) or is_X(min_exp) then
if is_X(new_exp) or is_X(max_exp) then
exp_huge := 'X';
elsif new_exp > max_exp then
exp_huge := '1';
Expand Down Expand Up @@ -1038,6 +1037,7 @@ begin

v.update_fprf := '0';
v.first := '0';
v.doing_ftdiv := "00";
v.opsel_a := AIN_R;
opsel_ainv <= '0';
opsel_mask <= '0';
Expand Down Expand Up @@ -1147,8 +1147,10 @@ begin
v.instr_done := '1';

when DO_FTDIV =>
v.instr_done := '1';
v.cr_result := "0000";
-- set result_exp to the exponent of B
re_sel2 <= REXP2_B;
re_set_result <= '1';
if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
(r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
v.cr_result(2) := '1';
Expand All @@ -1157,6 +1159,7 @@ begin
r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
(r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
v.cr_result(1) := '1';
v.instr_done := '1';
else
v.doing_ftdiv := "11";
v.first := '1';
Expand All @@ -1173,7 +1176,7 @@ begin
end if;
if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
v.cr_result(1) := '0';
v.cr_result(1) := '1';
end if;

when DO_FCMP =>
Expand Down Expand Up @@ -2148,6 +2151,9 @@ begin
v.state := NORMALIZE;

when FTDIV_1 =>
-- We go through this state up to two times; the first sees if
-- B.exponent is in the range [-1021,1020], and the second tests
-- whether B.exp - A.exp is in the range [-1022,1020].
v.cr_result(1) := exp_tiny or exp_huge;
-- set shift to a.exp
rs_sel2 <= RSH2_A;
Expand Down
11 changes: 6 additions & 5 deletions icache.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,12 @@ begin
variable snoop_addr : real_addr_t;
variable next_raddr : real_addr_t;
begin
replace_way := to_unsigned(0, WAY_BITS);
if NUM_WAYS > 1 then
-- Get victim way from plru
replace_way := plru_victim;
end if;
if rising_edge(clk) then
replace_way := to_unsigned(0, WAY_BITS);
if NUM_WAYS > 1 then
-- Get victim way from plru
replace_way := plru_victim;
end if;
-- Read tags using NIA for next cycle
if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then
next_raddr := i_in.next_rpn & i_in.next_nia(MIN_LG_PGSZ - 1 downto 0);
Expand Down Expand Up @@ -649,6 +649,7 @@ begin
begin
if rising_edge(clk) then
ev.icache_miss <= '0';
ev.itlb_miss_resolved <= '0';
r.recv_valid <= '0';
-- On reset, clear all valid bits to force misses
if rst = '1' then
Expand Down
3 changes: 1 addition & 2 deletions microwatt.core
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,13 @@ filesets:
- fpga/pp_soc_uart.vhd
- fpga/pp_utilities.vhd
- fpga/firmware.hex : {copyto : firmware.hex, file_type : user}
- nonrandom.vhdl
file_type : vhdlSource-2008

xilinx_specific:
files:
- xilinx-mult.vhdl : {file_type : vhdlSource-2008}
- xilinx-mult-32s.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.xdc : {file_type : xdc}

debug_xilinx:
files:
Expand Down
60 changes: 60 additions & 0 deletions tests/fpu/fpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1665,6 +1665,65 @@ int fpu_test_25(void)
return 0;
}

struct ftvals {
unsigned long val_a;
unsigned long val_b;
int cr_ftdiv;
int cr_ftsqrt;
} ftvals[] = {
{ 0x3ff0000000000000, 0x3ff0000000000000, 0, 0 },
{ 0x0000000000000000, 0x3ff0000000000000, 0, 6 },
{ 0xfff0000000000000, 0x3ff0000000000000, 6, 6 },
{ 0x7ff1234560000000, 0x3ff0000000000000, 2, 2 },
{ 0x3ff0000000000000, 0xfff0000000000000, 6, 0 },
{ 0x3ff0000000000000, 0x8000000000000000, 6, 0 },
{ 0x3ff0000000000000, 0x7ff9234560000000, 2, 0 },
{ 0x3ff0000000000000, 0x0020000000000000, 0, 0 },
{ 0x3ff0000000000000, 0x0010000000000000, 2, 0 },
{ 0x3ff0000000000000, 0x0001000000000000, 6, 0 },
{ 0x3ff0000000000000, 0x7fb1234500000000, 0, 0 },
{ 0x3ff0000000000000, 0x7fc1234500000000, 2, 0 },
{ 0x3ff0000000000000, 0x7fd1234500000000, 2, 0 },
{ 0x3ff0000000000000, 0x7fe1234500000000, 2, 0 },
{ 0x6000000000000000, 0x2000000000000000, 2, 0 },
{ 0x5ff0000000000000, 0x2000000000000000, 2, 0 },
{ 0x5fe0000000000000, 0x2000000000000000, 0, 0 },
{ 0x2000000000000000, 0x5fc0000000000000, 0, 0 },
{ 0x2000000000000000, 0x5fd0000000000000, 2, 0 },
{ 0x0360000000000000, 0x4320000000000000, 0, 0 },
{ 0x0350000000000000, 0x4310000000000000, 2, 2 },
{ 0x0010000000000000, 0x3fd0000000000000, 2, 2 },
{ 0x0001000000000000, 0x3fd0000000000000, 2, 6 },
{ 0xbff0000000000000, 0x3ff0000000000000, 0, 2 },
{ 0x3fd0000000000000, 0x0001000000000000, 6, 0 },
};

int test26(long arg)
{
long i;
int cr;
struct ftvals *vp = ftvals;

set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(ftvals) / sizeof(ftvals[0]); ++i, ++vp) {
asm("lfd 5,0(%1); lfd 6,8(%1); ftdiv 5,5,6; ftsqrt 4,5; mfcr %0" :
"=r" (cr) : "b" (&vp->val_a) : "cr4", "cr5");
if (((cr >> 8) & 0xf) != vp->cr_ftdiv ||
((cr >> 12) & 0x1f) != vp->cr_ftsqrt) {
print_hex(i, 2, " ");
print_hex(cr, 8, " ");
return i + 1;
}
}
return 0;
}

int fpu_test_26(void)
{
enable_fp();
return trapit(0, test26);
}

int fail = 0;

void do_test(int num, int (*test)(void))
Expand Down Expand Up @@ -1715,6 +1774,7 @@ int main(void)
do_test(23, fpu_test_23);
do_test(24, fpu_test_24);
do_test(25, fpu_test_25);
do_test(26, fpu_test_26);

return fail;
}
Binary file modified tests/test_fpu.bin
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_fpu.console_out
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ test 22:PASS
test 23:PASS
test 24:PASS
test 25:PASS
test 26:PASS
5 changes: 2 additions & 3 deletions xics.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,14 @@ begin
reg_write: process(clk)
variable be_in : std_ulogic_vector(31 downto 0);
begin
-- Byteswapped input
be_in := bswap(wb_in.dat);

if rising_edge(clk) then
if rst = '1' then
for i in 0 to SRC_NUM - 1 loop
xives(i) <= (pri => pri_masked);
end loop;
elsif wb_valid = '1' and wb_in.we = '1' then
-- Byteswapped input
be_in := bswap(wb_in.dat);
if reg_is_xive then
-- TODO: When adding support for other bits, make sure to
-- properly implement wb_in.sel to allow partial writes.
Expand Down
8 changes: 5 additions & 3 deletions xilinx-mult-32s.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,11 @@ begin

process(clk)
begin
if rising_edge(clk) and stall = '0' then
m_out.valid <= m_in.valid;
product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0);
if rising_edge(clk) then
if stall = '0' then
m_out.valid <= m_in.valid;
product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0);
end if;
end if;
end process;

Expand Down
Loading