VHDL:Implement a complete single-cycle RISC processor using VHDL
loop.vhd
-- execute I and R type instructions library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity lab5 is port (reset : in std_logic; clk : in std_logic; y : out word); end lab5; architecture behavioral of lab5 is signal alu_func : alu_func_t := ALU_NONE; signal alu_A : word := x"00000000"; -- ALU input 1 signal alu_B : word := x"00000000"; -- ALU inout 2 signal alu_out : word := x"00000000"; signal reg_B : word := x"00000000"; signal imm : word := x"00000000"; signal imm_rd : word := x"00000000"; signal sw_imm : word := x"00000000"; -- Added one SW immediate field signal ir : word := x"00000000"; signal dmem_out : word := x"00000000"; signal rf_wdata : word := x"00000000"; signal branch_imm : unsigned(word'range) := x"00000000"; -- instruction fields signal opcode : opcode_t; signal funct3 : std_logic_vector(2 downto 0); signal funct7 : std_logic_vector(6 downto 0); signal rs1 : std_logic_vector(4 downto 0); signal rs2 : std_logic_vector(4 downto 0); signal rd : std_logic_vector(4 downto 0); signal pc : unsigned(word'range) := x"00000000"; -- control signals signal regwrite : std_logic; signal wbsel : std_logic; signal memwrite : std_logic; signal op2sel : std_logic_vector(1 downto 0); signal pcsel : std_logic; -- added a new control signal for PCSel component alu is port (alu_func : in alu_func_t; op1 : in word; op2 : in word; result : out word); end component alu; component imem is port( addr : in std_logic_vector(3 downto 0); dout : out word); end component imem; component dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end component dmem; component regfile is port (reset : in std_logic; clk : in std_logic; addra : in std_logic_vector(4 downto 0); addrb : in std_logic_vector(4 downto 0); rega : out word; regb : out word; addrw : in std_logic_vector(4 downto 0); dataw : in word; we : in std_logic); end component regfile; begin -- datapath alu0: alu port map( alu_func => alu_func, op1 => alu_A, op2 => alu_B, result => alu_out); imem0: imem port map( addr => std_logic_vector(pc(5 downto 2)), dout => ir); dmem0: dmem port map( reset => reset, clk => clk, raddr => alu_out(7 downto 2), dout => dmem_out, waddr => alu_out(7 downto 2), din => reg_B, we => memwrite); rf0: regfile port map( reset => reset, clk => clk, addra => rs1, addrb => rs2, rega => alu_A, regb => reg_B, addrw => rd, dataw => rf_wdata, we => regwrite); alu_B <= reg_B when op2sel = "00" else imm when op2sel = "01" else imm_rd; rf_wdata <= alu_out when wbsel = '0' else reg_B; -- instruction fields imm(31 downto 12) <= (others => ir(31)); imm(11 downto 0) <= ir(31 downto 20); imm_rd(31 downto 12) <= (others => funct7(6)); imm_rd(11 downto 5) <= funct7; imm_rd(4 downto 0) <= rd; rs1 <= ir(19 downto 15); rs2 <= ir(24 downto 20); rd <= ir(11 downto 7); funct3 <= ir(14 downto 12); funct7 <= ir(31 downto 25); opcode <= ir(6 downto 0); branch_imm(31 downto 13) <= (others => ir(31)); branch_imm(12 downto 0) <= unsigned(ir(31) & ir(7) & ir(30 downto 25) & ir(11 downto 8) & '0'); sw_imm(4 downto 0) <= ir(11 downto 7); sw_imm(11 downto 5) <= ir(31 downto 25); decode_proc : process (ir, funct7, funct3, opcode) is begin regwrite <= '0'; op2sel <= "00"; memwrite <= '0'; wbsel <= '0'; pcsel <= '0'; alu_func <= ALU_NONE; case opcode is when OP_ITYPE => regwrite <= '1'; op2sel <= "01"; pcsel <= '0'; case (funct3) is when "000" => alu_func <= ALU_ADD; when "001" => alu_func <= ALU_SLL; when "010" => alu_func <= ALU_SLT; when "011" => alu_func <= ALU_SLTU; when "100" => alu_func <= ALU_XOR; when "110" => alu_func <= ALU_OR; when "111" => alu_func <= ALU_AND; when "101" => if (ir(30) = '1') then alu_func <= ALU_SRA; else alu_func <= ALU_SRL; end if; when others => null; end case; --end I-type cases when OP_RTYPE => regwrite <= '1'; pcsel <= '0'; case (funct3) is when "000" => if (ir(30) = '1') then alu_func <= ALU_SUB; else alu_func <= ALU_ADD; end if; when "001" => alu_func <= ALU_SLL; when "010" => alu_func <= ALU_SLT; when "011" => alu_func <= ALU_SLTU; when "100" => alu_func <= ALU_XOR; when "101" => if (ir(30) = '1') then alu_func <= ALU_SRA; else alu_func <= ALU_SRL; end if; when "110" => alu_func <= ALU_OR; when "111" => alu_func <= ALU_AND; when others => null; end case; -- end R-type cases when OP_BRANCH => regwrite <= '0'; case (funct3) is when "000" => -- BEQ if (alu_A = reg_B) then pcsel <= '1'; else pcsel <= '0'; end if; when "001" => -- BNE if (alu_A = reg_B) then pcsel <= '0'; else pcsel <= '1'; end if; when others => null; end case; -- end Branch cases when OP_STORE => memwrite <= '1'; regwrite <= '0'; pcsel <= '0'; alu_func <= ALU_ADD; when others => null; end case; -- end opcode cases end process; y <= alu_out; acc: process(reset, clk) begin if (reset = '1') then pc <= (others => '0'); elsif rising_edge(clk) then if (pcsel='1')then pc <= pc + branch_imm; else pc <= pc + 4; end if; end if; end process; end architecture;
loop_tb.vhd
library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity lab5_tb is end lab5_tb; architecture behavioral of lab5_tb is constant clk_period : time := 10 ns; signal clk : std_logic; signal reset : std_logic; signal cpuout : word; --signal memdata : word; --signal pc : word; --signal memaddr : word; component lab5 is port (reset : in std_logic; clk : in std_logic; y : out word); end component; --component dmem is -- port (waddr : in std_logic_vector(5 downto 0); -- din : in word; -- we : in std_logic); --end component; begin u0: lab5 port map( reset => reset, clk => clk, y => cpuout); -- u1 : dmem port map( -- waddr => memaddr, -- din => memdata); proc_clock: process begin clk <= '0'; wait for clk_period/2; clk <= '1'; wait for clk_period/2; end process; proc_stimuli: process begin reset <= '1'; wait for clk_period * 2; reset <= '0'; wait for clk_period * 56; assert false report "success - end of simulation" severity failure; end process; end architecture;
imem.vhd
library ieee; use ieee.std_logic_1164.all; use ieee.std_logic_unsigned.all; library work; use work.common.all; entity imem is port( addr : in std_logic_vector(3 downto 0); dout : out word); end imem; architecture behavioral of imem is type rom_arr is array(0 to 15) of word; constant mem:rom_arr:= ( x"00000793", -- 8 0000 93070000 li a5,0 x"00C00693", -- 9 0004 9306C000 li a3,12 -- 10 .L2: x"00279713", -- 11 0008 13972700 sll a4,a5,2 x"00F72023", -- 12 000c 2320F700 sw a5,0(a4) x"00178793", -- 13 0010 93871700 add a5,a5,1 x"FED79AE3", -- 14 0014 E39AD7FE bne a5,a3,.L2 x"00000513", -- 15 0018 13050000 li a0,0 x"00008067", -- 16 001c 67800000 ret x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013"); -- 13000000 nop begin dout<=mem(conv_integer(addr)); end behavioral;
alu.vhd
library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.common.all; entity alu is port (alu_func : in alu_func_t; op1 : in word; op2 : in word; result : out word); end entity alu; architecture behavioral of alu is begin -- architecture behavioral -- purpose: arithmetic and logic -- type : combinational -- inputs : alu_func, op1, op2 -- outputs: result alu_proc : process (alu_func, op1, op2) is variable so1, so2 : signed(31 downto 0); variable uo1, uo2 : unsigned(31 downto 0); begin -- process alu_proc so1 := signed(op1); so2 := signed(op2); uo1 := unsigned(op1); uo2 := unsigned(op2); case (alu_func) is when ALU_ADD => result <= std_logic_vector(so1 + so2); when ALU_ADDU => result <= std_logic_vector(uo1 + uo2); when ALU_SUB => result <= std_logic_vector(so1 - so2); when ALU_SUBU => result <= std_logic_vector(uo1 - uo2); when ALU_SLT => if so1 < so2 then result <= "00000000000000000000000000000001"; else result <= (others => '0'); end if; when ALU_SLTU => if uo1 < uo2 then result <= "00000000000000000000000000000001"; else result <= (others => '0'); end if; when ALU_AND => result <= op1 and op2; when ALU_OR => result <= op1 or op2; when ALU_XOR => result <= op1 xor op2; when ALU_SLL => result <= std_logic_vector(shift_left(uo1, to_integer(uo2(4 downto 0)))); when ALU_SRA => result <= std_logic_vector(shift_right(so1, to_integer(uo2(4 downto 0)))); when ALU_SRL => result <= std_logic_vector(shift_right(uo1, to_integer(uo2(4 downto 0)))); when others => result <= op1; end case; end process alu_proc; end architecture behavioral;
common.vhd
library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use std.textio.all; package common is -- definition for a machine word subtype word is std_logic_vector(31 downto 0); subtype reg_addr_t is std_logic_vector(4 downto 0); subtype alu_func_t is std_logic_vector(3 downto 0); constant ALU_NONE : alu_func_t := "0000"; constant ALU_ADD : alu_func_t := "0001"; constant ALU_ADDU : alu_func_t := "0010"; constant ALU_SUB : alu_func_t := "0011"; constant ALU_SUBU : alu_func_t := "0100"; constant ALU_SLT : alu_func_t := "0101"; constant ALU_SLTU : alu_func_t := "0110"; constant ALU_AND : alu_func_t := "0111"; constant ALU_OR : alu_func_t := "1000"; constant ALU_XOR : alu_func_t := "1001"; constant ALU_SLL : alu_func_t := "1010"; constant ALU_SRA : alu_func_t := "1011"; constant ALU_SRL : alu_func_t := "1100"; subtype branch_type_t is std_logic_vector(2 downto 0); constant BEQ : branch_type_t := "000"; constant BNE : branch_type_t := "001"; constant BLT : branch_type_t := "100"; constant BGE : branch_type_t := "101"; constant BLTU : branch_type_t := "110"; constant BGEU : branch_type_t := "111"; subtype load_type_t is std_logic_vector(2 downto 0); constant LOAD_NONE : load_type_t := "000"; constant LB : load_type_t := "001"; constant LH : load_type_t := "010"; constant LW : load_type_t := "011"; constant LBU : load_type_t := "100"; constant LHU : load_type_t := "101"; subtype store_type_t is std_logic_vector(1 downto 0); constant STORE_NONE : store_type_t := "00"; constant SB : store_type_t := "01"; constant SH : store_type_t := "10"; constant SW : store_type_t := "11"; subtype system_type_t is std_logic_vector(2 downto 0); constant SYSTEM_ECALL : system_type_t := "000"; constant SYSTEM_EBREAK : system_type_t := "001"; constant SYSTEM_CSRRW : system_type_t := "010"; constant SYSTEM_CSRRS : system_type_t := "011"; constant SYSTEM_CSRRC : system_type_t := "100"; constant SYSTEM_CSRRWI : system_type_t := "101"; constant SYSTEM_CSRRSI : system_type_t := "110"; constant SYSTEM_CSRRCI : system_type_t := "111"; subtype opcode_t is std_logic_vector(6 downto 0); constant OP_ITYPE : opcode_t := "0010011"; constant OP_RTYPE : opcode_t := "0110011"; constant OP_STORE : opcode_t := "0100011"; constant OP_BRANCH : opcode_t := "1100011"; -- print a string with a newline procedure println (str : in string); procedure print (slv : in std_logic_vector); procedure write(l : inout line; slv : in std_logic_vector); function hstr(slv : std_logic_vector) return string; -- instruction formats type r_insn_t is (R_ADD, R_SLT, R_SLTU, R_AND, R_OR, R_XOR, R_SLL, R_SRL, R_SUB, R_SRA); type i_insn_t is (I_JALR, I_LB, I_LH, I_LW, I_LBU, I_LHU, I_ADDI, I_SLTI, I_SLTIU, I_XORI, I_ORI, I_ANDI, I_SLLI, I_SRLI, I_SRAI); type s_insn_t is (S_SB, S_SH, S_SW); type sb_insn_t is (SB_BEQ, SB_BNE, SB_BLT, SB_BGE, SB_BLTU, SB_BGEU); type u_insn_t is (U_LUI, U_AUIPC); type uj_insn_t is (UJ_JAL); -- ADDI r0, r0, r0 constant NOP : word := "00000000000000000000000000010011"; end package common; package body common is function hstr(slv : std_logic_vector) return string is variable hexlen : integer; variable longslv : std_logic_vector(67 downto 0) := (others => '0'); variable hex : string(1 to 16); variable fourbit : std_logic_vector(3 downto 0); begin hexlen := (slv'left+1)/4; if (slv'left+1) mod 4 /= 0 then hexlen := hexlen + 1; end if; longslv(slv'left downto 0) := slv; for i in (hexlen -1) downto 0 loop fourbit := longslv(((i*4)+3) downto (i*4)); case fourbit is when "0000" => hex(hexlen -I) := '0'; when "0001" => hex(hexlen -I) := '1'; when "0010" => hex(hexlen -I) := '2'; when "0011" => hex(hexlen -I) := '3'; when "0100" => hex(hexlen -I) := '4'; when "0101" => hex(hexlen -I) := '5'; when "0110" => hex(hexlen -I) := '6'; when "0111" => hex(hexlen -I) := '7'; when "1000" => hex(hexlen -I) := '8'; when "1001" => hex(hexlen -I) := '9'; when "1010" => hex(hexlen -I) := 'A'; when "1011" => hex(hexlen -I) := 'B'; when "1100" => hex(hexlen -I) := 'C'; when "1101" => hex(hexlen -I) := 'D'; when "1110" => hex(hexlen -I) := 'E'; when "1111" => hex(hexlen -I) := 'F'; when "ZZZZ" => hex(hexlen -I) := 'z'; when "UUUU" => hex(hexlen -I) := 'u'; when "XXXX" => hex(hexlen -I) := 'x'; when others => hex(hexlen -I) := '?'; end case; end loop; return hex(1 to hexlen); end hstr; -- print a string with a newline procedure println (str : in string) is variable l : line; begin -- procedure println write(l, str); writeline(output, l); end procedure println; procedure write(l : inout line; slv : in std_logic_vector) is begin for i in slv'range loop if slv(i) = '0' then write(l, string'("0")); elsif slv(i) = '1' then write(l, string'("1")); elsif slv(i) = 'X' then write(l, string'("X")); elsif slv(i) = 'U' then write(l, string'("U")); end if; end loop; -- i end procedure write; procedure print (slv : in std_logic_vector) is variable l : line; begin -- procedure print write(l, slv); writeline(output, l); end procedure print; end package body common;
dmem.vhd
library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.common.all; entity dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end entity dmem; -- -- Note: Because this core is FPGA-targeted, the idea is that these registers -- will get implemented as dual-port Distributed RAM. Because there is no -- such thing as triple-port memory in an FPGA (that I know of), and we -- need 3 ports to support 2 reads and 1 write per cycle, the easiest way -- to implement that is to have two identical banks of registers that contain -- the same data. Each uses 2 ports and everybody's happy. -- architecture rtl of dmem is type regbank_t is array (0 to 63) of word; signal regbank0 : regbank_t := (others => (others => '0')); begin -- architecture Behavioral -- purpose: create registers -- type : sequential -- inputs : clk -- outputs: registers_proc : process (clk) is begin -- process registers_proc if rising_edge(clk) then if (we = '1') then regbank0(to_integer(unsigned(waddr))) <= din; end if; end if; end process registers_proc; -- asynchronous read dout <= regbank0(to_integer(unsigned(raddr))); end architecture rtl;
dmem_tb.vhd
library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity dmem_tb is end dmem_tb; architecture behavioral of dmem_tb is constant clk_period : time := 10 ns; signal clk : std_logic; signal reset : std_logic; signal din : word := (others => '0'); signal waddr : std_logic_vector(5 downto 0) := (others => '0'); signal raddr : std_logic_vector(5 downto 0) := (others => '0'); signal wen : std_logic := '0'; --Outputs signal dout : word; component dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end component dmem; begin u0: dmem port map( reset => reset, clk => clk, raddr => raddr, dout => dout, waddr => waddr, din => din, we => wen); proc_clock: process begin clk <= '0'; wait for clk_period/2; clk <= '1'; wait for clk_period/2; end process; proc_set: process begin wait until falling_edge(clk); reset <= '1'; wait until falling_edge(clk); reset <= '0'; wen <= '1'; for j in 0 to 63 loop din <= std_logic_vector(to_unsigned(j, din'length)); waddr <= std_logic_vector(to_unsigned(j, waddr'length)); wait until falling_edge(clk); end loop; wen <= '0'; wait for clk_period * 1; for j in 0 to 63 loop raddr <= std_logic_vector(to_unsigned(j, raddr'length)); wait until falling_edge(clk); assert (to_integer(unsigned(dout)) = j) report "error 'dout' is " & integer'image(to_integer(unsigned(dout))) severity failure; end loop; assert false report "success - end of simulation" severity failure; end process; end architecture;
l5-Single Cycle Processor.pdf
ELEC3608 Computer Architecture
Philip H.W. Leong School of Electrical and Information Engineering,
The University of Sydney
Single Cycle Processor
Load Instructions
› Use ALU for address calculation › Mux to select data for regfile: mem or ALU
2
WBSel ALU / Mem
Op2Sel
base
offset
OpCode
ALU Control
ALU
0x4 Add
clk
addr inst
Inst. Memory
PC
RegWriteEn
clk
rd1
GPRs
rs1 rs2
wa wd rd2
we
Sign Ext
clk
MemWrite
addr
wdata
rdataData Memory
we
imm[11:0] rs1 f3 rd opcodeI offset[11:0] base width dest LOAD
Load: (dest) ß M[(base) + offset]
<11:7>
Store Instructions
› ALU for address calculation; No write back to regfile; › Tell memory it is a write è Set MemWrite to ‘1’
3
WBSel ALU / Mem
Op2Sel
base
offset
OpCode
ALU Control
ALU
0x4 Add
clk
addr inst
Inst. Memory
PC
RegWriteEn
clk
rd1
GPRs
rs1 rs2
wa wd rd2
we
Sign Ext
clk
MemWrite
addr
wdata
rdataData Memory
we
S imm[11:5] rs1 f3 imm[4:0] opcoders2 Store: M[(base) + offset] ß (src) offset[11:5] src base width offset[4:0] STORE
Determining ALU functions
› All basic integer R-R instructions have opcode = OP (“0110011”) - only funct3 and funct7 are needed to determine needed ALU function:
- E.g. 000èAdd, 001èShiftLeft, 100èXOR,0100000èSub, …
› Immediate instructions - same ALU function, but different encoding - ADDI is same as ADD, except no need to check for Sub in funct7
- opcode = OP-IMM (“0010011”)
› Need opcode to help determine ALU function - More cases like these come up later…
4
Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51
31 27 26 25 24 20 19 15 14 12 11 7 6 0
funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type
imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type
imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type
RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm
imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm
imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm
imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm
000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2
0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd
Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51
31 27 26 25 24 20 19 15 14 12 11 7 6 0
funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type
imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type
imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type
RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm
imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm
imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm
imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm
000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2
0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd
Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51
31 27 26 25 24 20 19 15 14 12 11 7 6 0
funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type
imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type
imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type
RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm
imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm
imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm
imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm
000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2
0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd
Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51
31 27 26 25 24 20 19 15 14 12 11 7 6 0
funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type
imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type
imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type
RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm
imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm
imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm
imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm
000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2
0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd
ALU Instructions Datapath
5
<30,14:12,6:0>
Op2Sel Reg / Imm
Sign Ext
OpCode
0x4 Add
clk
addr inst
Inst. Memory
PC ALU
RegWriteEn clk
rd1
GPRs
rs1 rs2
wa wd rd2
we<19:15> <24:20>
ALU Control
<11:7>
Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51
31 27 26 25 24 20 19 15 14 12 11 7 6 0
funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type
imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type
imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type
RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm
imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm
imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm
imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm
000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2
0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd
<31:20>
RISC-V Conditional Branches
› Requires: - 1. Logic to compare register values (rs1 and rs2)
- 2. Datapath to calculate branch target address relative to PC
› Current implementation: dedicated logic for both 1 and 2 - Dedicated comparison logic (=, <, [≠, ≥])
- Dedicated adder for jump target calculation
› May use ALU for (2) above - Performance tradeoff…
6
SB imm[10:5] rs1 funct3 imm[4:1] opcoders2 offset[12,10:5] src2 src1 BEQ/BNE
BLT[U] BGE[U}
offset[11,4:0] BRANCH
if (rs1 BR_OP rs2) then jump to PC + branch_imm
Look at target addresses
1 .text
2 0000 63822000 beq x1,x2,main
3 0004 63842000 main: beq x1,x2,next
4 0008 B3003100 add x1,x2,x3
5 000c B3003100 next: add x1,x2,x3
6 0010 63862000 beq x1,x2,next2
7 0014 B3003100 add x1,x2,x3
8 0018 B3003100 add x1,x2,x3
9 001c 13000000 next2: nop
7
SB imm[10:5] rs1 funct3 imm[4:1] opcoders2 offset[12,10:5] src2 src1 BEQ/BNE
BLT[U] BGE[U}
offset[11,4:0] BRANCH
Conditional Branches (BEQ/BNE/BLT/BGE/BLTU/BGEU)
8
0x4
Add
PCSel
clk
WBSelMemWrite
addr
wdata
rdata Data Memory
we
Op2SelOpCode
Bcomp?
clk
clk
addr inst
Inst. Memory
PC rd1
GPRs
rs1 rs2
wa wd rd2
we
Branch Imm
ALU
ALU Control
Add
br
pc+4
RegWrEn
Br Logic
RISC-V Unconditional JAL
UB offset[20:1] dest JAL
jump to PC + j_imm; rd ß PC+4 imm[10:1] imm[19:12] rd opcode
0x4
Add
PCSel
clk
WBSelMemWrite
addr
wdata
rdata Data Memory
we
Op2SelOpCode
Bcomp?
clk
clk
addr inst
Inst. Memory
PC rd1
GPRs
rs1 rs2
wa wd rd2
we
Branch Imm
ALU
ALU Control
Add
brjmp
pc+4 RegWrEn
Br Logic
Jump Imm
9
JALR
10
0x4
Add
PCSel
clk
WBSelMemWrite
addr
wdata
rdata Data Memory
we
Op2SelOpCode
Bcomp?
clk
clk
addr inst
Inst. Memory
PC rd1
GPRs
rs1 rs2
wa wd rd2
we
ALU
ALU Control
Add
brjmp
pc+4
RegWrEn
Br Logic
imm[11:0] rs1 f3 rd opcodeI offset[11:0] base 000 dest JALR
jump to imm + (rs1); rd ß PC+4
offset Sign Ext
jmpreg
Full RISCV1Stage Datapath
11
+4
Instruction Mem
Reg File
IType Sign Extend
Decoder Data Mem
ir[24:20]
br/jmp pc+4
pc _s
el
ir[31:20]
rs1
ALU
Control Signals
wb _s
el
Reg File
rf_ we
n
va l
m em
_r w
PC
tohost testrig_tohost
cpr_en
m em
_v al
addr wdata
rdata
Inst
BrJmp TargGen
ir[19:15]
ir[31], ir[7], ir[30:25], ir[11:8]]
PC+4 jalr
0
rs2
Branch CondGen
br_eq? br_lt?
co nt
ro l s
ta tu
s re
gi st
er s
Execute Stage
br_ltu?
PC
addr
BType Sign Extend
JumpReg TargGen
Op2Sel
Op1Sel AluFun
da ta
wa
w d
en
addr da ta
ir[ 11
:7 ]
F igu
re 6:
T h e R V 32
1-S tage
P rocessor.
13
Single-Cycle Hardwired Control
We will assume clock period is sufficiently long for all of the following steps to propagate: 1. Instruction fetch 2. Decode and register fetch 3. ALU operation 4. Data fetch if required 5. Register write-back setup time
Þ tC > tIFetch + tRFetch + tALU+ tDMem+ tRWB
At the rising edge of the following clock, the PC, register file and memory are updated
12
Hardwired Control is pure Combinational Logic
› Decoding instruction determines the setting of various muxes and ALU function
› Simple decoding helps to make faster hardware
13
combinational logic
op code
Bcomp?
Op2Sel AluFunc MemWrite WBSel RegWriteEn PCSel
Hardwired Control Table (Excerpt)
Instruction Op2Sel AluFunc WBSel RegWriteEn MemWrite PCSel
ADD SUB ADDI SLL LW SW BEQ JAL JALR
14
RS2
RS2
IMI
RS2
IMI
IMS
IMB
IMJ
IMI
ADD
SUB
ADD
SLL
ADD
ADD
X
X
X
ALU
ALU
ALU
ALU
MEM
X
X
PC+4
PC+4
T
T
T
T
T
F
F
T
T
F
F
F
F
F
T
N
N
N
PC+4
PC+4
PC+4
PC+4
PC+4
PC+4
PC+4/BA
JA
JRA
• Op2Sel: rs2, {I,B,J}-type immediate IM{I, B, J} • AluFunc: Add, Sub, Shift, XOR, etc • WBSel: what values to write to rd
Lab4
15
Lab4
16