本文章是为了记录学习Loongarch参加龙芯杯,所有实验均来自LoongarchCPU设计实验
debug到一半才想起来写个博客,就不写过程了,下个lab再写过程。
这个debug卡我时间最长的就是通用寄存器的写使能信号,当时一直没找到,甚至把golden—trace生成了一遍又一遍,对照test,检查了各种,卡了很长时间,我都以为是我之前哪一步搞错了,或者环境什么的,但是一步步做下来并没有,并且“机器永远是对的”让我终于查到了错。
其他的bug就还好,如果认真跟着实验手册走,按逻辑检查并且查龙芯架构参考手册就能马上找出来,,上面的每一句话都很重要。
下面是两个模块的代码,运行已通过仿真验证。

mycpu_top
module mycpu_top( input wire clk, input wire resetn, // inst sram interface output wire inst_sram_we, output wire [31:0] inst_sram_addr, output wire [31:0] inst_sram_wdata, input wire [31:0] inst_sram_rdata, // data sram interface output wire data_sram_we, output wire [31:0] data_sram_addr, output wire [31:0] data_sram_wdata, input wire [31:0] data_sram_rdata, // trace debug interface output wire [31:0] debug_wb_pc, output wire [ 3:0] debug_wb_rf_we, output wire [ 4:0] debug_wb_rf_wnum, output wire [31:0] debug_wb_rf_wdata ); reg reset; always @(posedge clk) reset <= ~resetn; reg valid; always @(posedge clk) begin if (reset) begin valid <= 1'b0; end else begin valid <= 1'b1; end end wire [31:0] seq_pc; wire [31:0] nextpc; wire br_taken; wire [31:0] br_target; wire [31:0] inst; reg [31:0] pc; wire [11:0] alu_op; wire load_op; wire src1_is_pc; wire src2_is_imm; wire res_from_mem; wire dst_is_r1; wire gr_we; wire mem_we; wire src_reg_is_rd; wire [4: 0] dest; wire [31:0] rj_value; wire [31:0] rkd_value; wire [31:0] imm; wire [31:0] br_offs; wire [31:0] jirl_offs; wire [ 5:0] op_31_26; wire [ 3:0] op_25_22; wire [ 1:0] op_21_20; wire [ 4:0] op_19_15; wire [ 4:0] rd; wire [ 4:0] rj; wire [ 4:0] rk; wire [11:0] i12; wire [19:0] i20; wire [15:0] i16; wire [25:0] i26; wire [63:0] op_31_26_d; wire [15:0] op_25_22_d; wire [ 3:0] op_21_20_d; wire [31:0] op_19_15_d; wire inst_add_w; wire inst_sub_w; wire inst_slt; wire inst_sltu; wire inst_nor; wire inst_and; wire inst_or; wire inst_xor; wire inst_slli_w; wire inst_srli_w; wire inst_srai_w; wire inst_addi_w; wire inst_ld_w; wire inst_st_w; wire inst_jirl; wire inst_b; wire inst_bl; wire inst_beq; wire inst_bne; wire inst_lu12i_w; wire need_ui5; wire need_si12; wire need_si16; wire need_si20; wire need_si26; wire src2_is_4; wire [ 4:0] rf_raddr1; wire [31:0] rf_rdata1; wire [ 4:0] rf_raddr2; wire [31:0] rf_rdata2; wire rf_we ; wire [ 4:0] rf_waddr; wire [31:0] rf_wdata; wire [31:0] alu_src1 ; wire [31:0] alu_src2 ; wire [31:0] alu_result ; wire [31:0] mem_result; wire [31:0] final_result; assign seq_pc = pc + 3'h4; assign nextpc = br_taken ? br_target : seq_pc; always @(posedge clk) begin if (reset) begin pc <= 32'h1bfffffc; //trick: to make nextpc be 0x1c000000 during reset end else begin pc <= nextpc; end end assign inst_sram_we = 1'b0; assign inst_sram_addr = pc; assign inst_sram_wdata = 32'b0; assign inst = inst_sram_rdata; assign op_31_26 = inst[31:26]; assign op_25_22 = inst[25:22]; assign op_21_20 = inst[21:20]; assign op_19_15 = inst[19:15]; assign rd = inst[ 4: 0]; assign rj = inst[ 9: 5]; assign rk = inst[14:10]; assign i12 = inst[21:10]; assign i20 = inst[24: 5]; assign i16 = inst[25:10]; assign i26 = {inst[ 9: 0], inst[25:10]}; decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d )); decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d )); decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d )); decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d )); assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00]; assign inst_sub_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02]; assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04]; assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05]; assign inst_nor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08]; assign inst_and = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09]; assign inst_or = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a]; assign inst_xor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b]; assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01]; assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09]; assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11]; assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha]; assign inst_ld_w = op_31_26_d[6'h0a] & op_25_22_d[4'h2]; assign inst_st_w = op_31_26_d[6'h0a] & op_25_22_d[4'h6]; assign inst_jirl = op_31_26_d[6'h13]; assign inst_b = op_31_26_d[6'h14]; assign inst_bl = op_31_26_d[6'h15]; assign inst_beq = op_31_26_d[6'h16]; assign inst_bne = op_31_26_d[6'h17]; assign inst_lu12i_w= op_31_26_d[6'h05] & ~inst[25]; assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w | inst_jirl | inst_bl; assign alu_op[ 1] = inst_sub_w; assign alu_op[ 2] = inst_slt; assign alu_op[ 3] = inst_sltu; assign alu_op[ 4] = inst_and; assign alu_op[ 5] = inst_nor; assign alu_op[ 6] = inst_or; assign alu_op[ 7] = inst_xor; assign alu_op[ 8] = inst_slli_w; assign alu_op[ 9] = inst_srli_w; assign alu_op[10] = inst_srai_w; assign alu_op[11] = inst_lu12i_w; assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w; assign need_si12 = inst_addi_w | inst_ld_w | inst_st_w; assign need_si16 = inst_jirl | inst_beq | inst_bne; assign need_si20 = inst_lu12i_w; assign need_si26 = inst_b | inst_bl; assign src2_is_4 = inst_jirl | inst_bl; assign imm = src2_is_4 ? 32'h4 : need_si20 ? {i20[19:0], 12'b0} : /*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ; assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} : {{14{i16[15]}}, i16[15:0], 2'b0} ; assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0}; assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w; assign src1_is_pc = inst_jirl | inst_bl; assign src2_is_imm = inst_slli_w | inst_srli_w | inst_srai_w | inst_addi_w | inst_ld_w | inst_st_w | inst_lu12i_w| inst_jirl | inst_bl ; assign res_from_mem = inst_ld_w; assign dst_is_r1 = inst_bl; assign gr_we = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b; assign mem_we = inst_st_w; assign dest = dst_is_r1 ? 5'd1 : rd; assign rf_raddr1 = rj; assign rf_raddr2 = src_reg_is_rd ? rd :rk; regfile u_regfile( .clk (clk ), .raddr1 (rf_raddr1), .rdata1 (rf_rdata1), .raddr2 (rf_raddr2), .rdata2 (rf_rdata2), .we (rf_we ), .waddr (rf_waddr ), .wdata (rf_wdata ) ); assign rj_value = rf_rdata1; assign rkd_value = rf_rdata2; assign rj_eq_rd = (rj_value == rkd_value); assign br_taken = ( inst_beq && rj_eq_rd || inst_bne && !rj_eq_rd || inst_jirl || inst_bl || inst_b ) && valid; assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (pc + br_offs) : /*inst_jirl*/ (rj_value + jirl_offs); assign alu_src1 = src1_is_pc ? pc[31:0] : rj_value; assign alu_src2 = src2_is_imm ? imm : (inst_bl ? 32'd4 : rkd_value); alu u_alu( .alu_op (alu_op ), .alu_src1 (alu_src1 ), .alu_src2 (alu_src2 ), .alu_result (alu_result) ); assign data_sram_we = mem_we && valid; assign data_sram_addr = alu_result; assign data_sram_wdata = rkd_value; assign mem_result = data_sram_rdata; assign final_result = res_from_mem ? mem_result : alu_result; assign rf_we = gr_we && valid; assign rf_waddr = dest; assign rf_wdata = final_result; // debug info generate assign debug_wb_pc = rf_we ? pc : debug_wb_pc; assign debug_wb_rf_we = {4{rf_we}}; assign debug_wb_rf_wnum = rf_we ? dest : debug_wb_rf_wnum; assign debug_wb_rf_wdata = rf_we ? final_result : debug_wb_rf_wdata; endmodule alu
module alu( input wire [11:0] alu_op, input wire [31:0] alu_src1, input wire [31:0] alu_src2, output wire [31:0] alu_result ); wire op_add; //add operation wire op_sub; //sub operation wire op_slt; //signed compared and set less than wire op_sltu; //unsigned compared and set less than wire op_and; //bitwise and wire op_nor; //bitwise nor wire op_or; //bitwise or wire op_xor; //bitwise xor wire op_sll; //logic left shift wire op_srl; //logic right shift wire op_sra; //arithmetic right shift wire op_lui; //Load Upper Immediate // control code decomposition assign op_add = alu_op[ 0]; assign op_sub = alu_op[ 1]; assign op_slt = alu_op[ 2]; assign op_sltu = alu_op[ 3]; assign op_and = alu_op[ 4]; assign op_nor = alu_op[ 5]; assign op_or = alu_op[ 6]; assign op_xor = alu_op[ 7]; assign op_sll = alu_op[ 8]; assign op_srl = alu_op[ 9]; assign op_sra = alu_op[10]; assign op_lui = alu_op[11]; wire [31:0] add_sub_result; wire [31:0] slt_result; wire [31:0] sltu_result; wire [31:0] and_result; wire [31:0] nor_result; wire [31:0] or_result; wire [31:0] xor_result; wire [31:0] lui_result; wire [31:0] sll_result; wire [63:0] sr64_result; wire [31:0] sr_result; // 32-bit adder wire [31:0] adder_a; wire [31:0] adder_b; wire adder_cin; wire [31:0] adder_result; wire adder_cout; assign adder_a = alu_src1; assign adder_b = (op_sub | op_slt | op_sltu) ? ~alu_src2 : alu_src2; //src1 - src2 rj-rk assign adder_cin = (op_sub | op_slt | op_sltu) ? 1'b1 : 1'b0; assign {adder_cout, adder_result} = adder_a + adder_b + adder_cin; // ADD, SUB result assign add_sub_result = adder_result; // SLT result assign slt_result[31:1] = 31'b0; //rj < rk 1 assign slt_result[0] = (alu_src1[31] & ~alu_src2[31]) | ((alu_src1[31] ~^ alu_src2[31]) & adder_result[31]); // SLTU result assign sltu_result[31:1] = 31'b0; assign sltu_result[0] = ~adder_cout; // bitwise operation assign and_result = alu_src1 & alu_src2; assign or_result = alu_src1 | alu_src2; assign nor_result = ~or_result; assign xor_result = alu_src1 ^ alu_src2; assign lui_result = alu_src2; // SLL result assign sll_result = alu_src1 << alu_src2[4:0]; //rj << i5 // SRL, SRA result assign sr64_result = {{32{op_sra & alu_src1[31]}}, alu_src1[31:0]} >> alu_src2[4:0]; //rj >> i5 assign sr_result = sr64_result[31:0]; // final result mux assign alu_result = ({32{op_add|op_sub}} & add_sub_result) | ({32{op_slt }} & slt_result) | ({32{op_sltu }} & sltu_result) | ({32{op_and }} & and_result) | ({32{op_nor }} & nor_result) | ({32{op_or }} & or_result) | ({32{op_xor }} & xor_result) | ({32{op_lui }} & lui_result) | ({32{op_sll }} & sll_result) | ({32{op_srl|op_sra}} & sr_result); endmodule