neorv32/rtl/core/neorv32_cpu_alu.vhd

376 lines
18 KiB
VHDL

-- #################################################################################################
-- # << NEORV32 CPU - Arithmetic/Logic Unit >> #
-- # ********************************************************************************************* #
-- # Main data/address ALU and ALU co-processors (= multi-cycle function units). #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # The NEORV32 RISC-V Processor, https://github.com/stnolting/neorv32 #
-- # Copyright (c) 2024, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
-- # #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
-- # conditions and the following disclaimer. #
-- # #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
-- # conditions and the following disclaimer in the documentation and/or other materials #
-- # provided with the distribution. #
-- # #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
-- # endorse or promote products derived from this software without specific prior written #
-- # permission. #
-- # #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
-- #################################################################################################
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library neorv32;
use neorv32.neorv32_package.all;
entity neorv32_cpu_alu is
generic (
-- RISC-V CPU Extensions --
CPU_EXTENSION_RISCV_B : boolean; -- implement bit-manipulation extension?
CPU_EXTENSION_RISCV_M : boolean; -- implement mul/div extension?
CPU_EXTENSION_RISCV_Zicond : boolean; -- implement integer conditional operations?
CPU_EXTENSION_RISCV_Zmmul : boolean; -- implement multiply-only M sub-extension?
CPU_EXTENSION_RISCV_Zfinx : boolean; -- implement 32-bit floating-point extension (using INT reg!)
CPU_EXTENSION_RISCV_Zxcfu : boolean; -- implement custom (instr.) functions unit?
-- Tuning Options --
FAST_MUL_EN : boolean; -- use DSPs for M extension's multiplier
FAST_SHIFT_EN : boolean -- use barrel shifter for shift operations
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
ctrl_i : in ctrl_bus_t; -- main control bus
-- CSR interface --
csr_we_i : in std_ulogic; -- global write enable
csr_addr_i : in std_ulogic_vector(11 downto 0); -- address
csr_wdata_i : in std_ulogic_vector(XLEN-1 downto 0); -- write data
csr_rdata_o : out std_ulogic_vector(XLEN-1 downto 0); -- read data
-- data input --
rs1_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 1
rs2_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 2
rs3_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 3
rs4_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 4
pc_i : in std_ulogic_vector(XLEN-1 downto 0); -- current PC
imm_i : in std_ulogic_vector(XLEN-1 downto 0); -- immediate
-- data output --
cmp_o : out std_ulogic_vector(1 downto 0); -- comparator status
res_o : out std_ulogic_vector(XLEN-1 downto 0); -- ALU result
add_o : out std_ulogic_vector(XLEN-1 downto 0); -- address computation result
-- status --
cp_done_o : out std_ulogic -- co-processor operation done?
);
end neorv32_cpu_alu;
architecture neorv32_cpu_cpu_rtl of neorv32_cpu_alu is
-- comparator --
signal cmp_rs1 : std_ulogic_vector(XLEN downto 0);
signal cmp_rs2 : std_ulogic_vector(XLEN downto 0);
signal cmp : std_ulogic_vector(1 downto 0); -- comparator status
-- operands --
signal opa, opb : std_ulogic_vector(XLEN-1 downto 0);
signal opa_x, opb_x : std_ulogic_vector(XLEN downto 0);
-- intermediate results --
signal addsub_res : std_ulogic_vector(XLEN downto 0);
signal cp_res : std_ulogic_vector(XLEN-1 downto 0);
-- co-processor interface --
type cp_data_t is array (0 to 5) of std_ulogic_vector(XLEN-1 downto 0);
signal cp_result : cp_data_t; -- co-processor result
signal cp_start : std_ulogic_vector(5 downto 0); -- co-processor trigger
signal cp_valid : std_ulogic_vector(5 downto 0); -- co-processor done
signal cp_shamt : std_ulogic_vector(index_size_f(XLEN)-1 downto 0); -- shift amount
-- CSR proxy --
signal fpu_csr_en, cfu_csr_en : std_ulogic;
signal fpu_csr_we, cfu_csr_we : std_ulogic;
signal fpu_csr_rd, cfu_csr_rd : std_ulogic_vector(XLEN-1 downto 0);
-- CSR read-backs --
signal csr_rdata_fpu, csr_rdata_cfu : std_ulogic_vector(XLEN-1 downto 0);
begin
-- Comparator Unit (for conditional branches) ---------------------------------------------
-- -------------------------------------------------------------------------------------------
cmp_rs1 <= (rs1_i(rs1_i'left) and (not ctrl_i.alu_unsigned)) & rs1_i; -- sign-extend
cmp_rs2 <= (rs2_i(rs2_i'left) and (not ctrl_i.alu_unsigned)) & rs2_i; -- sign-extend
cmp(cmp_equal_c) <= '1' when (rs1_i = rs2_i) else '0';
cmp(cmp_less_c) <= '1' when (signed(cmp_rs1) < signed(cmp_rs2)) else '0'; -- signed or unsigned comparison
cmp_o <= cmp;
-- ALU Input Operand Select ---------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
opa <= pc_i when (ctrl_i.alu_opa_mux = '1') else rs1_i;
opb <= imm_i when (ctrl_i.alu_opb_mux = '1') else rs2_i;
-- Adder/Subtracter Core ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
opa_x <= (opa(opa'left) and (not ctrl_i.alu_unsigned)) & opa; -- sign-extend
opb_x <= (opb(opb'left) and (not ctrl_i.alu_unsigned)) & opb; -- sign-extend
addsub_res <= std_ulogic_vector(unsigned(opa_x) - unsigned(opb_x)) when (ctrl_i.alu_op(0) = '1') else
std_ulogic_vector(unsigned(opa_x) + unsigned(opb_x));
add_o <= addsub_res(XLEN-1 downto 0); -- direct output of adder result
-- ALU Operation Select -------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
alu_core: process(ctrl_i, addsub_res, cp_res, rs1_i, opb)
begin
case ctrl_i.alu_op is
when alu_op_add_c => res_o <= addsub_res(XLEN-1 downto 0);
when alu_op_sub_c => res_o <= addsub_res(XLEN-1 downto 0);
when alu_op_cp_c => res_o <= cp_res;
when alu_op_slt_c => res_o(XLEN-1 downto 1) <= (others => '0');
res_o(0) <= addsub_res(addsub_res'left); -- carry/borrow
when alu_op_movb_c => res_o <= opb;
when alu_op_xor_c => res_o <= opb xor rs1_i;
when alu_op_or_c => res_o <= opb or rs1_i;
when alu_op_and_c => res_o <= opb and rs1_i;
when others => res_o <= addsub_res(XLEN-1 downto 0); -- don't care
end case;
end process alu_core;
-- **************************************************************************************************************************
-- ALU Co-Processors
-- **************************************************************************************************************************
-- co-processor select / start trigger --
-- > "cp_start" is high for one cycle to trigger operation of the according co-processor
cp_start <= ctrl_i.alu_cp_trig;
-- multi-cycle co-processor operation done? --
-- > "cp_valid" signal has to be set (for one cycle) one cycle before CP output data (cp_result) is valid
cp_done_o <= cp_valid(5) or cp_valid(4) or cp_valid(3) or cp_valid(2) or cp_valid(1) or cp_valid(0);
-- co-processor result --
-- > "cp_result" data has to be always zero unless the specific co-processor has been actually triggered
cp_res <= cp_result(5) or cp_result(4) or cp_result(3) or cp_result(2) or cp_result(1) or cp_result(0);
-- co-processor CSR read-back --
-- > "csr_rdata_*" data has to be always zero unless the specific co-processor is actually being accessed
csr_rdata_o <= csr_rdata_fpu or csr_rdata_cfu;
-- shift amount --
cp_shamt <= opb(index_size_f(XLEN)-1 downto 0);
-- Co-Processor 0: Shifter Unit (Base ISA) ------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_shifter_inst: entity neorv32.neorv32_cpu_cp_shifter
generic map (
FAST_SHIFT_EN => FAST_SHIFT_EN -- use barrel shifter for shift operations
)
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(0), -- trigger operation
-- data input --
rs1_i => rs1_i, -- rf source 1
shamt_i => cp_shamt, -- shift amount
-- result and status --
res_o => cp_result(0), -- operation result
valid_o => cp_valid(0) -- data output valid
);
-- Co-Processor 1: Integer Multiplication/Division Unit ('M' ISA Extension) ---------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_muldiv_inst_true:
if CPU_EXTENSION_RISCV_M or CPU_EXTENSION_RISCV_Zmmul generate
neorv32_cpu_cp_muldiv_inst: entity neorv32.neorv32_cpu_cp_muldiv
generic map (
FAST_MUL_EN => FAST_MUL_EN, -- use DSPs for faster multiplication
DIVISION_EN => CPU_EXTENSION_RISCV_M -- implement divider hardware
)
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(1), -- trigger operation
-- data input --
rs1_i => rs1_i, -- rf source 1
rs2_i => rs2_i, -- rf source 2
-- result and status --
res_o => cp_result(1), -- operation result
valid_o => cp_valid(1) -- data output valid
);
end generate;
neorv32_cpu_cp_muldiv_inst_false:
if (not CPU_EXTENSION_RISCV_M) and (not CPU_EXTENSION_RISCV_Zmmul) generate
cp_result(1) <= (others => '0');
cp_valid(1) <= '0';
end generate;
-- Co-Processor 2: Bit-Manipulation Unit ('B' ISA Extension) ------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_bitmanip_inst_true:
if CPU_EXTENSION_RISCV_B generate
neorv32_cpu_cp_bitmanip_inst: entity neorv32.neorv32_cpu_cp_bitmanip
generic map (
FAST_SHIFT_EN => FAST_SHIFT_EN -- use barrel shifter for shift operations
)
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(2), -- trigger operation
-- data input --
cmp_i => cmp, -- comparator status
rs1_i => rs1_i, -- rf source 1
rs2_i => rs2_i, -- rf source 2
shamt_i => cp_shamt, -- shift amount
-- result and status --
res_o => cp_result(2), -- operation result
valid_o => cp_valid(2) -- data output valid
);
end generate;
neorv32_cpu_cp_bitmanip_inst_false:
if not CPU_EXTENSION_RISCV_B generate
cp_result(2) <= (others => '0');
cp_valid(2) <= '0';
end generate;
-- Co-Processor 3: Single-Precision Floating-Point Unit ('Zfinx' ISA Extension) -----------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_fpu_inst_true:
if CPU_EXTENSION_RISCV_Zfinx generate
neorv32_cpu_cp_fpu_inst: entity neorv32.neorv32_cpu_cp_fpu
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(3), -- trigger operation
-- CSR interface --
csr_we_i => fpu_csr_we, -- write enable
csr_addr_i => csr_addr_i(1 downto 0), -- address
csr_wdata_i => csr_wdata_i, -- write data
csr_rdata_o => fpu_csr_rd, -- read data
-- data input --
cmp_i => cmp, -- comparator status
rs1_i => rs1_i, -- rf source 1
rs2_i => rs2_i, -- rf source 2
rs3_i => rs3_i, -- rf source 3
-- result and status --
res_o => cp_result(3), -- operation result
valid_o => cp_valid(3) -- data output valid
);
-- CSR proxy --
fpu_csr_en <= '1' when (csr_addr_i(11 downto 2) = csr_fflags_c(11 downto 2)) else '0';
fpu_csr_we <= fpu_csr_en and csr_we_i;
csr_rdata_fpu <= fpu_csr_rd when (fpu_csr_en = '1') else (others => '0');
end generate;
neorv32_cpu_cp_fpu_inst_false:
if not CPU_EXTENSION_RISCV_Zfinx generate
csr_rdata_fpu <= (others => '0');
cp_result(3) <= (others => '0');
cp_valid(3) <= '0';
end generate;
-- Co-Processor 4: Custom (Instructions) Functions Unit ('Zxcfu' ISA Extension) -----------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_cfu_inst_true:
if CPU_EXTENSION_RISCV_Zxcfu generate
neorv32_cpu_cp_cfu_inst: entity neorv32.neorv32_cpu_cp_cfu
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(4), -- trigger operation
-- CSR interface --
csr_we_i => cfu_csr_we, -- write enable
csr_addr_i => csr_addr_i(1 downto 0), -- address
csr_wdata_i => csr_wdata_i, -- write data
csr_rdata_o => cfu_csr_rd, -- read data
-- data input --
rs1_i => rs1_i, -- rf source 1
rs2_i => rs2_i, -- rf source 2
rs3_i => rs3_i, -- rf source 3
rs4_i => rs4_i, -- rf source 4
-- result and status --
res_o => cp_result(4), -- operation result
valid_o => cp_valid(4) -- data output valid
);
-- CSR proxy --
cfu_csr_en <= '1' when (csr_addr_i(11 downto 2) = csr_cfureg0_c(11 downto 2)) else '0';
cfu_csr_we <= cfu_csr_en and csr_we_i;
csr_rdata_cfu <= cfu_csr_rd when (cfu_csr_en = '1') else (others => '0');
end generate;
neorv32_cpu_cp_cfu_inst_false:
if not CPU_EXTENSION_RISCV_Zxcfu generate
csr_rdata_cfu <= (others => '0');
cp_result(4) <= (others => '0');
cp_valid(4) <= '0';
end generate;
-- Co-Processor 5: Integer Conditional Operations Unit ('Zicond' ISA Extension) -----------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_cp_cond_inst_true:
if CPU_EXTENSION_RISCV_Zicond generate
neorv32_cpu_cp_cond_inst: entity neorv32.neorv32_cpu_cp_cond
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl_i, -- main control bus
start_i => cp_start(5), -- trigger operation
-- data input --
rs1_i => rs1_i, -- rf source 1
rs2_i => rs2_i, -- rf source 2
-- result and status --
res_o => cp_result(5), -- operation result
valid_o => cp_valid(5) -- data output valid
);
end generate;
neorv32_cpu_cp_cond_inst_false:
if not CPU_EXTENSION_RISCV_Zicond generate
cp_result(5) <= (others => '0');
cp_valid(5) <= '0';
end generate;
end neorv32_cpu_cpu_rtl;