CPU设计实战:Loongarch版 lab6--20条指令单周期CPU
创始人
2024-11-11 17:39:21

        本文章是为了记录学习Loongarch参加龙芯杯,所有实验均来自LoongarchCPU设计实验

        debug到一半才想起来写个博客,就不写过程了,下个lab再写过程。

        这个debug卡我时间最长的就是通用寄存器的写使能信号,当时一直没找到,甚至把golden—trace生成了一遍又一遍,对照test,检查了各种,卡了很长时间,我都以为是我之前哪一步搞错了,或者环境什么的,但是一步步做下来并没有,并且“机器永远是对的”让我终于查到了错。

        其他的bug就还好,如果认真跟着实验手册走,按逻辑检查并且查龙芯架构参考手册就能马上找出来,,上面的每一句话都很重要。

        下面是两个模块的代码,运行已通过仿真验证。

        mycpu_top 

module mycpu_top(     input  wire        clk,     input  wire        resetn,     // inst sram interface     output wire        inst_sram_we,     output wire [31:0] inst_sram_addr,     output wire [31:0] inst_sram_wdata,     input  wire [31:0] inst_sram_rdata,     // data sram interface     output wire        data_sram_we,     output wire [31:0] data_sram_addr,     output wire [31:0] data_sram_wdata,     input  wire [31:0] data_sram_rdata,     // trace debug interface     output wire [31:0] debug_wb_pc,     output wire [ 3:0] debug_wb_rf_we,     output wire [ 4:0] debug_wb_rf_wnum,     output wire [31:0] debug_wb_rf_wdata ); reg         reset; always @(posedge clk) reset <= ~resetn;  reg         valid; always @(posedge clk) begin     if (reset) begin         valid <= 1'b0;     end     else begin         valid <= 1'b1;     end end  wire [31:0] seq_pc; wire [31:0] nextpc; wire        br_taken; wire [31:0] br_target; wire [31:0] inst; reg  [31:0] pc;  wire [11:0] alu_op; wire        load_op; wire        src1_is_pc; wire        src2_is_imm; wire        res_from_mem; wire        dst_is_r1; wire        gr_we; wire        mem_we; wire        src_reg_is_rd; wire [4: 0] dest; wire [31:0] rj_value; wire [31:0] rkd_value; wire [31:0] imm; wire [31:0] br_offs; wire [31:0] jirl_offs;  wire [ 5:0] op_31_26; wire [ 3:0] op_25_22; wire [ 1:0] op_21_20; wire [ 4:0] op_19_15; wire [ 4:0] rd; wire [ 4:0] rj; wire [ 4:0] rk; wire [11:0] i12; wire [19:0] i20; wire [15:0] i16; wire [25:0] i26;  wire [63:0] op_31_26_d; wire [15:0] op_25_22_d; wire [ 3:0] op_21_20_d; wire [31:0] op_19_15_d;  wire        inst_add_w; wire        inst_sub_w; wire        inst_slt; wire        inst_sltu; wire        inst_nor; wire        inst_and; wire        inst_or; wire        inst_xor; wire        inst_slli_w; wire        inst_srli_w; wire        inst_srai_w; wire        inst_addi_w; wire        inst_ld_w; wire        inst_st_w; wire        inst_jirl; wire        inst_b; wire        inst_bl; wire        inst_beq; wire        inst_bne; wire        inst_lu12i_w;  wire        need_ui5; wire        need_si12; wire        need_si16; wire        need_si20; wire        need_si26; wire        src2_is_4;  wire [ 4:0] rf_raddr1; wire [31:0] rf_rdata1; wire [ 4:0] rf_raddr2; wire [31:0] rf_rdata2; wire        rf_we   ; wire [ 4:0] rf_waddr; wire [31:0] rf_wdata;  wire [31:0] alu_src1   ; wire [31:0] alu_src2   ; wire [31:0] alu_result ;  wire [31:0] mem_result; wire [31:0] final_result; assign seq_pc       = pc + 3'h4; assign nextpc       = br_taken ? br_target : seq_pc;  always @(posedge clk) begin     if (reset) begin         pc <= 32'h1bfffffc;     //trick: to make nextpc be 0x1c000000 during reset      end     else begin         pc <= nextpc;     end end  assign inst_sram_we    = 1'b0; assign inst_sram_addr  = pc; assign inst_sram_wdata = 32'b0; assign inst            = inst_sram_rdata;  assign op_31_26  = inst[31:26]; assign op_25_22  = inst[25:22]; assign op_21_20  = inst[21:20]; assign op_19_15  = inst[19:15];  assign rd   = inst[ 4: 0]; assign rj   = inst[ 9: 5]; assign rk   = inst[14:10];  assign i12  = inst[21:10]; assign i20  = inst[24: 5]; assign i16  = inst[25:10]; assign i26  = {inst[ 9: 0], inst[25:10]};  decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d )); decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d )); decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d )); decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));  assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00]; assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02]; assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04]; assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05]; assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08]; assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09]; assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a]; assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b]; assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01]; assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09]; assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11]; assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha]; assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2]; assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6]; assign inst_jirl   = op_31_26_d[6'h13]; assign inst_b      = op_31_26_d[6'h14]; assign inst_bl     = op_31_26_d[6'h15]; assign inst_beq    = op_31_26_d[6'h16]; assign inst_bne    = op_31_26_d[6'h17]; assign inst_lu12i_w= op_31_26_d[6'h05] & ~inst[25];  assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w                     | inst_jirl | inst_bl; assign alu_op[ 1] = inst_sub_w; assign alu_op[ 2] = inst_slt; assign alu_op[ 3] = inst_sltu; assign alu_op[ 4] = inst_and; assign alu_op[ 5] = inst_nor; assign alu_op[ 6] = inst_or; assign alu_op[ 7] = inst_xor; assign alu_op[ 8] = inst_slli_w; assign alu_op[ 9] = inst_srli_w; assign alu_op[10] = inst_srai_w; assign alu_op[11] = inst_lu12i_w;  assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w; assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w; assign need_si16  =  inst_jirl | inst_beq | inst_bne; assign need_si20  =  inst_lu12i_w; assign need_si26  =  inst_b | inst_bl; assign src2_is_4  =  inst_jirl | inst_bl;  assign imm = src2_is_4 ? 32'h4                      :              need_si20 ? {i20[19:0], 12'b0}         : /*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;  assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :                              {{14{i16[15]}}, i16[15:0], 2'b0} ;  assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};  assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w;  assign src1_is_pc    = inst_jirl | inst_bl;  assign src2_is_imm   = inst_slli_w |                        inst_srli_w |                        inst_srai_w |                        inst_addi_w |                        inst_ld_w   |                        inst_st_w   |                        inst_lu12i_w|                        inst_jirl   |                        inst_bl     ;  assign res_from_mem  = inst_ld_w; assign dst_is_r1     = inst_bl; assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b; assign mem_we        = inst_st_w; assign dest          = dst_is_r1 ? 5'd1 : rd;  assign rf_raddr1 = rj; assign rf_raddr2 = src_reg_is_rd ? rd :rk; regfile u_regfile(     .clk    (clk      ),     .raddr1 (rf_raddr1),     .rdata1 (rf_rdata1),     .raddr2 (rf_raddr2),     .rdata2 (rf_rdata2),     .we     (rf_we    ),     .waddr  (rf_waddr ),     .wdata  (rf_wdata )     );  assign rj_value  = rf_rdata1; assign rkd_value = rf_rdata2;  assign rj_eq_rd = (rj_value == rkd_value); assign br_taken = (   inst_beq  &&  rj_eq_rd                    || inst_bne  && !rj_eq_rd                    || inst_jirl                    || inst_bl                    || inst_b                   ) && valid; assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (pc + br_offs) :                                                    /*inst_jirl*/ (rj_value + jirl_offs);  assign alu_src1 = src1_is_pc  ? pc[31:0] : rj_value; assign alu_src2 = src2_is_imm ? imm : (inst_bl ? 32'd4 : rkd_value);  alu u_alu(     .alu_op     (alu_op    ),     .alu_src1   (alu_src1  ),     .alu_src2   (alu_src2  ),     .alu_result (alu_result)     );  assign data_sram_we    = mem_we && valid; assign data_sram_addr  = alu_result; assign data_sram_wdata = rkd_value;  assign mem_result   = data_sram_rdata; assign final_result = res_from_mem ? mem_result : alu_result;  assign rf_we    = gr_we && valid; assign rf_waddr = dest; assign rf_wdata = final_result;  // debug info generate assign debug_wb_pc       = rf_we ? pc : debug_wb_pc; assign debug_wb_rf_we   = {4{rf_we}}; assign debug_wb_rf_wnum  = rf_we ? dest : debug_wb_rf_wnum; assign debug_wb_rf_wdata = rf_we ? final_result : debug_wb_rf_wdata;  endmodule 

        alu

module alu(   input  wire [11:0] alu_op,   input  wire [31:0] alu_src1,   input  wire [31:0] alu_src2,   output wire [31:0] alu_result );  wire op_add;   //add operation wire op_sub;   //sub operation wire op_slt;   //signed compared and set less than wire op_sltu;  //unsigned compared and set less than wire op_and;   //bitwise and wire op_nor;   //bitwise nor wire op_or;    //bitwise or wire op_xor;   //bitwise xor wire op_sll;   //logic left shift wire op_srl;   //logic right shift wire op_sra;   //arithmetic right shift wire op_lui;   //Load Upper Immediate  // control code decomposition assign op_add  = alu_op[ 0]; assign op_sub  = alu_op[ 1]; assign op_slt  = alu_op[ 2]; assign op_sltu = alu_op[ 3]; assign op_and  = alu_op[ 4]; assign op_nor  = alu_op[ 5]; assign op_or   = alu_op[ 6]; assign op_xor  = alu_op[ 7]; assign op_sll  = alu_op[ 8]; assign op_srl  = alu_op[ 9]; assign op_sra  = alu_op[10]; assign op_lui  = alu_op[11];  wire [31:0] add_sub_result; wire [31:0] slt_result; wire [31:0] sltu_result; wire [31:0] and_result; wire [31:0] nor_result; wire [31:0] or_result; wire [31:0] xor_result; wire [31:0] lui_result; wire [31:0] sll_result; wire [63:0] sr64_result; wire [31:0] sr_result;   // 32-bit adder wire [31:0] adder_a; wire [31:0] adder_b; wire        adder_cin; wire [31:0] adder_result; wire        adder_cout;  assign adder_a   = alu_src1; assign adder_b   = (op_sub | op_slt | op_sltu) ? ~alu_src2 : alu_src2;  //src1 - src2 rj-rk assign adder_cin = (op_sub | op_slt | op_sltu) ? 1'b1      : 1'b0; assign {adder_cout, adder_result} = adder_a + adder_b + adder_cin;  // ADD, SUB result assign add_sub_result = adder_result;  // SLT result assign slt_result[31:1] = 31'b0;   //rj < rk 1 assign slt_result[0]    = (alu_src1[31] & ~alu_src2[31])                         | ((alu_src1[31] ~^ alu_src2[31]) & adder_result[31]);  // SLTU result assign sltu_result[31:1] = 31'b0; assign sltu_result[0]    = ~adder_cout;  // bitwise operation assign and_result = alu_src1 & alu_src2; assign or_result  = alu_src1 | alu_src2; assign nor_result = ~or_result; assign xor_result = alu_src1 ^ alu_src2; assign lui_result = alu_src2;  // SLL result assign sll_result = alu_src1 << alu_src2[4:0];   //rj << i5  // SRL, SRA result assign sr64_result = {{32{op_sra & alu_src1[31]}}, alu_src1[31:0]} >> alu_src2[4:0]; //rj >> i5  assign sr_result   = sr64_result[31:0];  // final result mux assign alu_result = ({32{op_add|op_sub}} & add_sub_result)                   | ({32{op_slt       }} & slt_result)                   | ({32{op_sltu      }} & sltu_result)                   | ({32{op_and       }} & and_result)                   | ({32{op_nor       }} & nor_result)                   | ({32{op_or        }} & or_result)                   | ({32{op_xor       }} & xor_result)                   | ({32{op_lui       }} & lui_result)                   | ({32{op_sll       }} & sll_result)                   | ({32{op_srl|op_sra}} & sr_result);  endmodule 

相关内容

热门资讯

裸辞做“一人公司”,我后悔了 去年这个时候,一位以色列程序员正在东南亚旅行。他顺手把一个在脑子里转了很久的想法做成了产品,一个让任...
南京建成国内首个Pre-6G试... 4月21日,2026全球6G技术与产业生态大会在南京开幕。全息互动技术展台前,一名远在北京的工作人员...
超梵求职受邀参加“2025抖音... 超梵求职受邀参加“2025抖音巨量引擎成人教育行业生态大会”,探讨分享优质内容传播,服务万千学员。 ...
摩托罗拉Razr 2026(R... IT之家 4 月 22 日消息,摩托罗拉宣布新一代 Razr 折叠手机将于 4 月 29 日在美国发...
库克卸任,特纳斯领航:苹果新纪... 苹果首席执行官蒂姆·库克将卸任,硬件工程主管约翰·特纳斯将接任,苹果公司今天宣布此事。 库克将在夏季...