VHDL:Implement a complete single-cycle RISC processor using VHDL

Wuweijia
Lab5.zip

loop.vhd

-- execute I and R type instructions library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity lab5 is port (reset : in std_logic; clk : in std_logic; y : out word); end lab5; architecture behavioral of lab5 is signal alu_func : alu_func_t := ALU_NONE; signal alu_A : word := x"00000000"; -- ALU input 1 signal alu_B : word := x"00000000"; -- ALU inout 2 signal alu_out : word := x"00000000"; signal reg_B : word := x"00000000"; signal imm : word := x"00000000"; signal imm_rd : word := x"00000000"; signal sw_imm : word := x"00000000"; -- Added one SW immediate field signal ir : word := x"00000000"; signal dmem_out : word := x"00000000"; signal rf_wdata : word := x"00000000"; signal branch_imm : unsigned(word'range) := x"00000000"; -- instruction fields signal opcode : opcode_t; signal funct3 : std_logic_vector(2 downto 0); signal funct7 : std_logic_vector(6 downto 0); signal rs1 : std_logic_vector(4 downto 0); signal rs2 : std_logic_vector(4 downto 0); signal rd : std_logic_vector(4 downto 0); signal pc : unsigned(word'range) := x"00000000"; -- control signals signal regwrite : std_logic; signal wbsel : std_logic; signal memwrite : std_logic; signal op2sel : std_logic_vector(1 downto 0); signal pcsel : std_logic; -- added a new control signal for PCSel component alu is port (alu_func : in alu_func_t; op1 : in word; op2 : in word; result : out word); end component alu; component imem is port( addr : in std_logic_vector(3 downto 0); dout : out word); end component imem; component dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end component dmem; component regfile is port (reset : in std_logic; clk : in std_logic; addra : in std_logic_vector(4 downto 0); addrb : in std_logic_vector(4 downto 0); rega : out word; regb : out word; addrw : in std_logic_vector(4 downto 0); dataw : in word; we : in std_logic); end component regfile; begin -- datapath alu0: alu port map( alu_func => alu_func, op1 => alu_A, op2 => alu_B, result => alu_out); imem0: imem port map( addr => std_logic_vector(pc(5 downto 2)), dout => ir); dmem0: dmem port map( reset => reset, clk => clk, raddr => alu_out(7 downto 2), dout => dmem_out, waddr => alu_out(7 downto 2), din => reg_B, we => memwrite); rf0: regfile port map( reset => reset, clk => clk, addra => rs1, addrb => rs2, rega => alu_A, regb => reg_B, addrw => rd, dataw => rf_wdata, we => regwrite); alu_B <= reg_B when op2sel = "00" else imm when op2sel = "01" else imm_rd; rf_wdata <= alu_out when wbsel = '0' else reg_B; -- instruction fields imm(31 downto 12) <= (others => ir(31)); imm(11 downto 0) <= ir(31 downto 20); imm_rd(31 downto 12) <= (others => funct7(6)); imm_rd(11 downto 5) <= funct7; imm_rd(4 downto 0) <= rd; rs1 <= ir(19 downto 15); rs2 <= ir(24 downto 20); rd <= ir(11 downto 7); funct3 <= ir(14 downto 12); funct7 <= ir(31 downto 25); opcode <= ir(6 downto 0); branch_imm(31 downto 13) <= (others => ir(31)); branch_imm(12 downto 0) <= unsigned(ir(31) & ir(7) & ir(30 downto 25) & ir(11 downto 8) & '0'); sw_imm(4 downto 0) <= ir(11 downto 7); sw_imm(11 downto 5) <= ir(31 downto 25); decode_proc : process (ir, funct7, funct3, opcode) is begin regwrite <= '0'; op2sel <= "00"; memwrite <= '0'; wbsel <= '0'; pcsel <= '0'; alu_func <= ALU_NONE; case opcode is when OP_ITYPE => regwrite <= '1'; op2sel <= "01"; pcsel <= '0'; case (funct3) is when "000" => alu_func <= ALU_ADD; when "001" => alu_func <= ALU_SLL; when "010" => alu_func <= ALU_SLT; when "011" => alu_func <= ALU_SLTU; when "100" => alu_func <= ALU_XOR; when "110" => alu_func <= ALU_OR; when "111" => alu_func <= ALU_AND; when "101" => if (ir(30) = '1') then alu_func <= ALU_SRA; else alu_func <= ALU_SRL; end if; when others => null; end case; --end I-type cases when OP_RTYPE => regwrite <= '1'; pcsel <= '0'; case (funct3) is when "000" => if (ir(30) = '1') then alu_func <= ALU_SUB; else alu_func <= ALU_ADD; end if; when "001" => alu_func <= ALU_SLL; when "010" => alu_func <= ALU_SLT; when "011" => alu_func <= ALU_SLTU; when "100" => alu_func <= ALU_XOR; when "101" => if (ir(30) = '1') then alu_func <= ALU_SRA; else alu_func <= ALU_SRL; end if; when "110" => alu_func <= ALU_OR; when "111" => alu_func <= ALU_AND; when others => null; end case; -- end R-type cases when OP_BRANCH => regwrite <= '0'; case (funct3) is when "000" => -- BEQ if (alu_A = reg_B) then pcsel <= '1'; else pcsel <= '0'; end if; when "001" => -- BNE if (alu_A = reg_B) then pcsel <= '0'; else pcsel <= '1'; end if; when others => null; end case; -- end Branch cases when OP_STORE => memwrite <= '1'; regwrite <= '0'; pcsel <= '0'; alu_func <= ALU_ADD; when others => null; end case; -- end opcode cases end process; y <= alu_out; acc: process(reset, clk) begin if (reset = '1') then pc <= (others => '0'); elsif rising_edge(clk) then if (pcsel='1')then pc <= pc + branch_imm; else pc <= pc + 4; end if; end if; end process; end architecture;

loop_tb.vhd

library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity lab5_tb is end lab5_tb; architecture behavioral of lab5_tb is constant clk_period : time := 10 ns; signal clk : std_logic; signal reset : std_logic; signal cpuout : word; --signal memdata : word; --signal pc : word; --signal memaddr : word; component lab5 is port (reset : in std_logic; clk : in std_logic; y : out word); end component; --component dmem is -- port (waddr : in std_logic_vector(5 downto 0); -- din : in word; -- we : in std_logic); --end component; begin u0: lab5 port map( reset => reset, clk => clk, y => cpuout); -- u1 : dmem port map( -- waddr => memaddr, -- din => memdata); proc_clock: process begin clk <= '0'; wait for clk_period/2; clk <= '1'; wait for clk_period/2; end process; proc_stimuli: process begin reset <= '1'; wait for clk_period * 2; reset <= '0'; wait for clk_period * 56; assert false report "success - end of simulation" severity failure; end process; end architecture;

imem.vhd

library ieee; use ieee.std_logic_1164.all; use ieee.std_logic_unsigned.all; library work; use work.common.all; entity imem is port( addr : in std_logic_vector(3 downto 0); dout : out word); end imem; architecture behavioral of imem is type rom_arr is array(0 to 15) of word; constant mem:rom_arr:= ( x"00000793", -- 8 0000 93070000 li a5,0 x"00C00693", -- 9 0004 9306C000 li a3,12 -- 10 .L2: x"00279713", -- 11 0008 13972700 sll a4,a5,2 x"00F72023", -- 12 000c 2320F700 sw a5,0(a4) x"00178793", -- 13 0010 93871700 add a5,a5,1 x"FED79AE3", -- 14 0014 E39AD7FE bne a5,a3,.L2 x"00000513", -- 15 0018 13050000 li a0,0 x"00008067", -- 16 001c 67800000 ret x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013", -- 13000000 nop x"00000013"); -- 13000000 nop begin dout<=mem(conv_integer(addr)); end behavioral;

alu.vhd

library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.common.all; entity alu is port (alu_func : in alu_func_t; op1 : in word; op2 : in word; result : out word); end entity alu; architecture behavioral of alu is begin -- architecture behavioral -- purpose: arithmetic and logic -- type : combinational -- inputs : alu_func, op1, op2 -- outputs: result alu_proc : process (alu_func, op1, op2) is variable so1, so2 : signed(31 downto 0); variable uo1, uo2 : unsigned(31 downto 0); begin -- process alu_proc so1 := signed(op1); so2 := signed(op2); uo1 := unsigned(op1); uo2 := unsigned(op2); case (alu_func) is when ALU_ADD => result <= std_logic_vector(so1 + so2); when ALU_ADDU => result <= std_logic_vector(uo1 + uo2); when ALU_SUB => result <= std_logic_vector(so1 - so2); when ALU_SUBU => result <= std_logic_vector(uo1 - uo2); when ALU_SLT => if so1 < so2 then result <= "00000000000000000000000000000001"; else result <= (others => '0'); end if; when ALU_SLTU => if uo1 < uo2 then result <= "00000000000000000000000000000001"; else result <= (others => '0'); end if; when ALU_AND => result <= op1 and op2; when ALU_OR => result <= op1 or op2; when ALU_XOR => result <= op1 xor op2; when ALU_SLL => result <= std_logic_vector(shift_left(uo1, to_integer(uo2(4 downto 0)))); when ALU_SRA => result <= std_logic_vector(shift_right(so1, to_integer(uo2(4 downto 0)))); when ALU_SRL => result <= std_logic_vector(shift_right(uo1, to_integer(uo2(4 downto 0)))); when others => result <= op1; end case; end process alu_proc; end architecture behavioral;

common.vhd

library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use std.textio.all; package common is -- definition for a machine word subtype word is std_logic_vector(31 downto 0); subtype reg_addr_t is std_logic_vector(4 downto 0); subtype alu_func_t is std_logic_vector(3 downto 0); constant ALU_NONE : alu_func_t := "0000"; constant ALU_ADD : alu_func_t := "0001"; constant ALU_ADDU : alu_func_t := "0010"; constant ALU_SUB : alu_func_t := "0011"; constant ALU_SUBU : alu_func_t := "0100"; constant ALU_SLT : alu_func_t := "0101"; constant ALU_SLTU : alu_func_t := "0110"; constant ALU_AND : alu_func_t := "0111"; constant ALU_OR : alu_func_t := "1000"; constant ALU_XOR : alu_func_t := "1001"; constant ALU_SLL : alu_func_t := "1010"; constant ALU_SRA : alu_func_t := "1011"; constant ALU_SRL : alu_func_t := "1100"; subtype branch_type_t is std_logic_vector(2 downto 0); constant BEQ : branch_type_t := "000"; constant BNE : branch_type_t := "001"; constant BLT : branch_type_t := "100"; constant BGE : branch_type_t := "101"; constant BLTU : branch_type_t := "110"; constant BGEU : branch_type_t := "111"; subtype load_type_t is std_logic_vector(2 downto 0); constant LOAD_NONE : load_type_t := "000"; constant LB : load_type_t := "001"; constant LH : load_type_t := "010"; constant LW : load_type_t := "011"; constant LBU : load_type_t := "100"; constant LHU : load_type_t := "101"; subtype store_type_t is std_logic_vector(1 downto 0); constant STORE_NONE : store_type_t := "00"; constant SB : store_type_t := "01"; constant SH : store_type_t := "10"; constant SW : store_type_t := "11"; subtype system_type_t is std_logic_vector(2 downto 0); constant SYSTEM_ECALL : system_type_t := "000"; constant SYSTEM_EBREAK : system_type_t := "001"; constant SYSTEM_CSRRW : system_type_t := "010"; constant SYSTEM_CSRRS : system_type_t := "011"; constant SYSTEM_CSRRC : system_type_t := "100"; constant SYSTEM_CSRRWI : system_type_t := "101"; constant SYSTEM_CSRRSI : system_type_t := "110"; constant SYSTEM_CSRRCI : system_type_t := "111"; subtype opcode_t is std_logic_vector(6 downto 0); constant OP_ITYPE : opcode_t := "0010011"; constant OP_RTYPE : opcode_t := "0110011"; constant OP_STORE : opcode_t := "0100011"; constant OP_BRANCH : opcode_t := "1100011"; -- print a string with a newline procedure println (str : in string); procedure print (slv : in std_logic_vector); procedure write(l : inout line; slv : in std_logic_vector); function hstr(slv : std_logic_vector) return string; -- instruction formats type r_insn_t is (R_ADD, R_SLT, R_SLTU, R_AND, R_OR, R_XOR, R_SLL, R_SRL, R_SUB, R_SRA); type i_insn_t is (I_JALR, I_LB, I_LH, I_LW, I_LBU, I_LHU, I_ADDI, I_SLTI, I_SLTIU, I_XORI, I_ORI, I_ANDI, I_SLLI, I_SRLI, I_SRAI); type s_insn_t is (S_SB, S_SH, S_SW); type sb_insn_t is (SB_BEQ, SB_BNE, SB_BLT, SB_BGE, SB_BLTU, SB_BGEU); type u_insn_t is (U_LUI, U_AUIPC); type uj_insn_t is (UJ_JAL); -- ADDI r0, r0, r0 constant NOP : word := "00000000000000000000000000010011"; end package common; package body common is function hstr(slv : std_logic_vector) return string is variable hexlen : integer; variable longslv : std_logic_vector(67 downto 0) := (others => '0'); variable hex : string(1 to 16); variable fourbit : std_logic_vector(3 downto 0); begin hexlen := (slv'left+1)/4; if (slv'left+1) mod 4 /= 0 then hexlen := hexlen + 1; end if; longslv(slv'left downto 0) := slv; for i in (hexlen -1) downto 0 loop fourbit := longslv(((i*4)+3) downto (i*4)); case fourbit is when "0000" => hex(hexlen -I) := '0'; when "0001" => hex(hexlen -I) := '1'; when "0010" => hex(hexlen -I) := '2'; when "0011" => hex(hexlen -I) := '3'; when "0100" => hex(hexlen -I) := '4'; when "0101" => hex(hexlen -I) := '5'; when "0110" => hex(hexlen -I) := '6'; when "0111" => hex(hexlen -I) := '7'; when "1000" => hex(hexlen -I) := '8'; when "1001" => hex(hexlen -I) := '9'; when "1010" => hex(hexlen -I) := 'A'; when "1011" => hex(hexlen -I) := 'B'; when "1100" => hex(hexlen -I) := 'C'; when "1101" => hex(hexlen -I) := 'D'; when "1110" => hex(hexlen -I) := 'E'; when "1111" => hex(hexlen -I) := 'F'; when "ZZZZ" => hex(hexlen -I) := 'z'; when "UUUU" => hex(hexlen -I) := 'u'; when "XXXX" => hex(hexlen -I) := 'x'; when others => hex(hexlen -I) := '?'; end case; end loop; return hex(1 to hexlen); end hstr; -- print a string with a newline procedure println (str : in string) is variable l : line; begin -- procedure println write(l, str); writeline(output, l); end procedure println; procedure write(l : inout line; slv : in std_logic_vector) is begin for i in slv'range loop if slv(i) = '0' then write(l, string'("0")); elsif slv(i) = '1' then write(l, string'("1")); elsif slv(i) = 'X' then write(l, string'("X")); elsif slv(i) = 'U' then write(l, string'("U")); end if; end loop; -- i end procedure write; procedure print (slv : in std_logic_vector) is variable l : line; begin -- procedure print write(l, slv); writeline(output, l); end procedure print; end package body common;

dmem.vhd

library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.common.all; entity dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end entity dmem; -- -- Note: Because this core is FPGA-targeted, the idea is that these registers -- will get implemented as dual-port Distributed RAM. Because there is no -- such thing as triple-port memory in an FPGA (that I know of), and we -- need 3 ports to support 2 reads and 1 write per cycle, the easiest way -- to implement that is to have two identical banks of registers that contain -- the same data. Each uses 2 ports and everybody's happy. -- architecture rtl of dmem is type regbank_t is array (0 to 63) of word; signal regbank0 : regbank_t := (others => (others => '0')); begin -- architecture Behavioral -- purpose: create registers -- type : sequential -- inputs : clk -- outputs: registers_proc : process (clk) is begin -- process registers_proc if rising_edge(clk) then if (we = '1') then regbank0(to_integer(unsigned(waddr))) <= din; end if; end if; end process registers_proc; -- asynchronous read dout <= regbank0(to_integer(unsigned(raddr))); end architecture rtl;

dmem_tb.vhd

library IEEE; use IEEE.std_logic_1164.ALL; use IEEE.NUMERIC_STD.ALL; library work; use work.common.all; entity dmem_tb is end dmem_tb; architecture behavioral of dmem_tb is constant clk_period : time := 10 ns; signal clk : std_logic; signal reset : std_logic; signal din : word := (others => '0'); signal waddr : std_logic_vector(5 downto 0) := (others => '0'); signal raddr : std_logic_vector(5 downto 0) := (others => '0'); signal wen : std_logic := '0'; --Outputs signal dout : word; component dmem is port (reset : in std_logic; clk : in std_logic; raddr : in std_logic_vector(5 downto 0); dout : out word; waddr : in std_logic_vector(5 downto 0); din : in word; we : in std_logic); end component dmem; begin u0: dmem port map( reset => reset, clk => clk, raddr => raddr, dout => dout, waddr => waddr, din => din, we => wen); proc_clock: process begin clk <= '0'; wait for clk_period/2; clk <= '1'; wait for clk_period/2; end process; proc_set: process begin wait until falling_edge(clk); reset <= '1'; wait until falling_edge(clk); reset <= '0'; wen <= '1'; for j in 0 to 63 loop din <= std_logic_vector(to_unsigned(j, din'length)); waddr <= std_logic_vector(to_unsigned(j, waddr'length)); wait until falling_edge(clk); end loop; wen <= '0'; wait for clk_period * 1; for j in 0 to 63 loop raddr <= std_logic_vector(to_unsigned(j, raddr'length)); wait until falling_edge(clk); assert (to_integer(unsigned(dout)) = j) report "error 'dout' is " & integer'image(to_integer(unsigned(dout))) severity failure; end loop; assert false report "success - end of simulation" severity failure; end process; end architecture;

l5-Single Cycle Processor.pdf

ELEC3608 Computer Architecture

Philip H.W. Leong School of Electrical and Information Engineering,

The University of Sydney

Single Cycle Processor

Load Instructions

› Use ALU for address calculation › Mux to select data for regfile: mem or ALU

2

WBSel ALU / Mem

Op2Sel

base

offset

OpCode

ALU Control

ALU

0x4 Add

clk

addr inst

Inst. Memory

PC

RegWriteEn

clk

rd1

GPRs

rs1 rs2

wa wd rd2

we

Sign Ext

clk

MemWrite

addr

wdata

rdataData Memory

we

imm[11:0] rs1 f3 rd opcodeI offset[11:0] base width dest LOAD

Load: (dest) ß M[(base) + offset]

<11:7>

Store Instructions

› ALU for address calculation; No write back to regfile; › Tell memory it is a write è Set MemWrite to ‘1’

3

WBSel ALU / Mem

Op2Sel

base

offset

OpCode

ALU Control

ALU

0x4 Add

clk

addr inst

Inst. Memory

PC

RegWriteEn

clk

rd1

GPRs

rs1 rs2

wa wd rd2

we

Sign Ext

clk

MemWrite

addr

wdata

rdataData Memory

we

S imm[11:5] rs1 f3 imm[4:0] opcoders2 Store: M[(base) + offset] ß (src) offset[11:5] src base width offset[4:0] STORE

Determining ALU functions

› All basic integer R-R instructions have opcode = OP (“0110011”) - only funct3 and funct7 are needed to determine needed ALU function:

- E.g. 000èAdd, 001èShiftLeft, 100èXOR,0100000èSub, …

› Immediate instructions - same ALU function, but different encoding - ADDI is same as ADD, except no need to check for Sub in funct7

- opcode = OP-IMM (“0010011”)

› Need opcode to help determine ALU function - More cases like these come up later…

4

Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51

31 27 26 25 24 20 19 15 14 12 11 7 6 0

funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type

imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type

imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type

RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm

imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm

imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm

imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm

imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm

imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm

000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2

0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd

Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51

31 27 26 25 24 20 19 15 14 12 11 7 6 0

funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type

imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type

imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type

RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm

imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm

imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm

imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm

imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm

imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm

000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2

0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd

Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51

31 27 26 25 24 20 19 15 14 12 11 7 6 0

funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type

imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type

imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type

RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm

imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm

imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm

imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm

imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm

imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm

000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2

0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd

Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51

31 27 26 25 24 20 19 15 14 12 11 7 6 0

funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type

imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type

imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type

RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm

imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm

imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm

imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm

imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm

imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm

000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2

0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd

ALU Instructions Datapath

5

<30,14:12,6:0>

Op2Sel Reg / Imm

Sign Ext

OpCode

0x4 Add

clk

addr inst

Inst. Memory

PC ALU

RegWriteEn clk

rd1

GPRs

rs1 rs2

wa wd rd2

we<19:15> <24:20>

ALU Control

<11:7>

Copyright © 2010–2014, The Regents of the University of California. All rights reserved. 51

31 27 26 25 24 20 19 15 14 12 11 7 6 0

funct7 rs2 rs1 funct3 rd opcode R-type imm[11:0] rs1 funct3 rd opcode I-type

imm[11:5] rs2 rs1 funct3 imm[4:0] opcode S-type imm[12|10:5] rs2 rs1 funct3 imm[4:1|11] opcode SB-type

imm[31:12] rd opcode U-type imm[20|10:1|11|19:12] rd opcode UJ-type

RV32I Base Instruction Set imm[31:12] rd 0110111 LUI rd,imm imm[31:12] rd 0010111 AUIPC rd,imm

imm[20|10:1|11|19:12] rd 1101111 JAL rd,imm imm[11:0] rs1 000 rd 1100111 JALR rd,rs1,imm

imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ rs1,rs2,imm imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE rs1,rs2,imm imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT rs1,rs2,imm imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE rs1,rs2,imm imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU rs1,rs2,imm imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU rs1,rs2,imm

imm[11:0] rs1 000 rd 0000011 LB rd,rs1,imm imm[11:0] rs1 001 rd 0000011 LH rd,rs1,imm imm[11:0] rs1 010 rd 0000011 LW rd,rs1,imm imm[11:0] rs1 100 rd 0000011 LBU rd,rs1,imm imm[11:0] rs1 101 rd 0000011 LHU rd,rs1,imm

imm[11:5] rs2 rs1 000 imm[4:0] 0100011 SB rs1,rs2,imm imm[11:5] rs2 rs1 001 imm[4:0] 0100011 SH rs1,rs2,imm imm[11:5] rs2 rs1 010 imm[4:0] 0100011 SW rs1,rs2,imm

imm[11:0] rs1 000 rd 0010011 ADDI rd,rs1,imm imm[11:0] rs1 010 rd 0010011 SLTI rd,rs1,imm imm[11:0] rs1 011 rd 0010011 SLTIU rd,rs1,imm imm[11:0] rs1 100 rd 0010011 XORI rd,rs1,imm imm[11:0] rs1 110 rd 0010011 ORI rd,rs1,imm imm[11:0] rs1 111 rd 0010011 ANDI rd,rs1,imm

000000 shamt rs1 001 rd 0010011 SLLI rd,rs1,shamt 000000 shamt rs1 101 rd 0010011 SRLI rd,rs1,shamt 010000 shamt rs1 101 rd 0010011 SRAI rd,rs1,shamt 0000000 rs2 rs1 000 rd 0110011 ADD rd,rs1,rs2 0100000 rs2 rs1 000 rd 0110011 SUB rd,rs1,rs2 0000000 rs2 rs1 001 rd 0110011 SLL rd,rs1,rs2 0000000 rs2 rs1 010 rd 0110011 SLT rd,rs1,rs2 0000000 rs2 rs1 011 rd 0110011 SLTU rd,rs1,rs2 0000000 rs2 rs1 100 rd 0110011 XOR rd,rs1,rs2 0000000 rs2 rs1 101 rd 0110011 SRL rd,rs1,rs2 0100000 rs2 rs1 101 rd 0110011 SRA rd,rs1,rs2 0000000 rs2 rs1 110 rd 0110011 OR rd,rs1,rs2 0000000 rs2 rs1 111 rd 0110011 AND rd,rs1,rs2

0000 pred succ 00000 000 00000 0001111 FENCE 0000 0000 0000 00000 001 00000 0001111 FENCE.I 0000000 00000 00000 000 00000 1110011 SCALL 0000000 00001 00000 000 00000 1110011 SBREAK 1100000 00000 00000 010 rd 1110011 RDCYCLE rd 1100000 00001 00000 010 rd 1110011 RDTIME rd 1100000 00010 00000 010 rd 1110011 RDINSTRET rd

<31:20>

RISC-V Conditional Branches

› Requires: - 1. Logic to compare register values (rs1 and rs2)

- 2. Datapath to calculate branch target address relative to PC

› Current implementation: dedicated logic for both 1 and 2 - Dedicated comparison logic (=, <, [≠, ≥])

- Dedicated adder for jump target calculation

› May use ALU for (2) above - Performance tradeoff…

6

SB imm[10:5] rs1 funct3 imm[4:1] opcoders2 offset[12,10:5] src2 src1 BEQ/BNE

BLT[U] BGE[U}

offset[11,4:0] BRANCH

if (rs1 BR_OP rs2) then jump to PC + branch_imm

Look at target addresses

1 .text

2 0000 63822000 beq x1,x2,main

3 0004 63842000 main: beq x1,x2,next

4 0008 B3003100 add x1,x2,x3

5 000c B3003100 next: add x1,x2,x3

6 0010 63862000 beq x1,x2,next2

7 0014 B3003100 add x1,x2,x3

8 0018 B3003100 add x1,x2,x3

9 001c 13000000 next2: nop

7

SB imm[10:5] rs1 funct3 imm[4:1] opcoders2 offset[12,10:5] src2 src1 BEQ/BNE

BLT[U] BGE[U}

offset[11,4:0] BRANCH

Conditional Branches (BEQ/BNE/BLT/BGE/BLTU/BGEU)

8

0x4

Add

PCSel

clk

WBSelMemWrite

addr

wdata

rdata Data Memory

we

Op2SelOpCode

Bcomp?

clk

clk

addr inst

Inst. Memory

PC rd1

GPRs

rs1 rs2

wa wd rd2

we

Branch Imm

ALU

ALU Control

Add

br

pc+4

RegWrEn

Br Logic

RISC-V Unconditional JAL

UB offset[20:1] dest JAL

jump to PC + j_imm; rd ß PC+4 imm[10:1] imm[19:12] rd opcode

0x4

Add

PCSel

clk

WBSelMemWrite

addr

wdata

rdata Data Memory

we

Op2SelOpCode

Bcomp?

clk

clk

addr inst

Inst. Memory

PC rd1

GPRs

rs1 rs2

wa wd rd2

we

Branch Imm

ALU

ALU Control

Add

brjmp

pc+4 RegWrEn

Br Logic

Jump Imm

9

JALR

10

0x4

Add

PCSel

clk

WBSelMemWrite

addr

wdata

rdata Data Memory

we

Op2SelOpCode

Bcomp?

clk

clk

addr inst

Inst. Memory

PC rd1

GPRs

rs1 rs2

wa wd rd2

we

ALU

ALU Control

Add

brjmp

pc+4

RegWrEn

Br Logic

imm[11:0] rs1 f3 rd opcodeI offset[11:0] base 000 dest JALR

jump to imm + (rs1); rd ß PC+4

offset Sign Ext

jmpreg

Full RISCV1Stage Datapath

11

+4

Instruction Mem

Reg File

IType Sign Extend

Decoder Data Mem

ir[24:20]

br/jmp pc+4

pc _s

el

ir[31:20]

rs1

ALU

Control Signals

wb _s

el

Reg File

rf_ we

n

va l

m em

_r w

PC

tohost testrig_tohost

cpr_en

m em

_v al

addr wdata

rdata

Inst

BrJmp TargGen

ir[19:15]

ir[31], ir[7], ir[30:25], ir[11:8]]

PC+4 jalr

0

rs2

Branch CondGen

br_eq? br_lt?

co nt

ro l s

ta tu

s re

gi st

er s

Execute Stage

br_ltu?

PC

addr

BType Sign Extend

JumpReg TargGen

Op2Sel

Op1Sel AluFun

da ta

wa

w d

en

addr da ta

ir[ 11

:7 ]

F igu

re 6:

T h e R V 32

1-S tage

P rocessor.

13

Single-Cycle Hardwired Control

We will assume clock period is sufficiently long for all of the following steps to propagate: 1. Instruction fetch 2. Decode and register fetch 3. ALU operation 4. Data fetch if required 5. Register write-back setup time

Þ tC > tIFetch + tRFetch + tALU+ tDMem+ tRWB

At the rising edge of the following clock, the PC, register file and memory are updated

12

Hardwired Control is pure Combinational Logic

› Decoding instruction determines the setting of various muxes and ALU function

› Simple decoding helps to make faster hardware

13

combinational logic

op code

Bcomp?

Op2Sel AluFunc MemWrite WBSel RegWriteEn PCSel

Hardwired Control Table (Excerpt)

Instruction Op2Sel AluFunc WBSel RegWriteEn MemWrite PCSel

ADD SUB ADDI SLL LW SW BEQ JAL JALR

14

RS2

RS2

IMI

RS2

IMI

IMS

IMB

IMJ

IMI

ADD

SUB

ADD

SLL

ADD

ADD

X

X

X

ALU

ALU

ALU

ALU

MEM

X

X

PC+4

PC+4

T

T

T

T

T

F

F

T

T

F

F

F

F

F

T

N

N

N

PC+4

PC+4

PC+4

PC+4

PC+4

PC+4

PC+4/BA

JA

JRA

• Op2Sel: rs2, {I,B,J}-type immediate IM{I, B, J} • AluFunc: Add, Sub, Shift, XOR, etc • WBSel: what values to write to rd

Lab4

15

Lab4

16