Bootstrap

[CA] 理解 core.cpp

#include "core.h"

#include "common/logging.h"
#include "execute/alu.h"
#include "utils.h"

#include <cinttypes>

LOG_CATEGORY("machine.core");

using namespace machine;

static InstructionFlags unsupported_inst_flags_to_check(Xlen xlen,
                            ConfigIsaWord isa_word) {
    unsigned flags_to_check = IMF_SUPPORTED;
    if (xlen == Xlen::_32)
        flags_to_check |= IMF_RV64;
    if (!isa_word.contains('A'))
        flags_to_check |= IMF_AMO;
    if (!isa_word.contains('M'))
        flags_to_check |= IMF_MUL;
    return InstructionFlags(flags_to_check);
}

Core::Core(
    Registers *regs,
    BranchPredictor *predictor,
    FrontendMemory *mem_program,
    FrontendMemory *mem_data,
    CSR::ControlState *control_state,
    Xlen xlen,
    ConfigIsaWord isa_word)
    : pc_if(state.pipeline.pc.final)
    , if_id(state.pipeline.fetch.final)
    , id_ex(state.pipeline.decode.final)
    , ex_mem(state.pipeline.execute.final)
    , mem_wb(state.pipeline.memory.final)
    , xlen(xlen)
    , check_inst_flags_val(IMF_SUPPORTED)
    , check_inst_flags_mask(unsupported_inst_flags_to_check(xlen, isa_word))
    , regs(regs)
    , control_state(control_state)
    , predictor(predictor)
    , mem_data(mem_data)
    , mem_program(mem_program)
    , ex_handlers()
    , ex_default_handler(new StopExceptionHandler()) {
    stop_on_exception.fill(true);
    step_over_exception.fill(true);
    step_over_exception[EXCAUSE_INT] = false;
}

void Core::step(bool skip_break) {
    emit step_started();
    state.cycle_count++;
    do_step(skip_break);
    emit step_done(state);
}

void Core::reset() {
    state.cycle_count = 0;
    state.stall_count = 0;
    do_reset();
}

unsigned Core::get_cycle_count() const {
    return state.cycle_count;
}

unsigned Core::get_stall_count() const {
    return state.stall_count;
}

Registers *Core::get_regs() const {
    return regs;
}

CSR::ControlState *Core::get_control_state() const {
    return control_state;
}

FrontendMemory *Core::get_mem_data() const {
    return mem_data;
}

FrontendMemory *Core::get_mem_program() const {
    return mem_program;
}

BranchPredictor *Core::get_predictor() const {
    return predictor;
}

const CoreState &Core::get_state() const {
    return state;
}

void Core::insert_hwbreak(Address address) {
    hw_breaks.insert(address, new hwBreak(address));
}

void Core::remove_hwbreak(Address address) {
    hwBreak *hwbrk = hw_breaks.take(address);
    delete hwbrk;
}

bool Core::is_hwbreak(Address address) const {
    hwBreak *hwbrk = hw_breaks.value(address);
    return hwbrk != nullptr;
}

void Core::set_stop_on_exception(enum ExceptionCause excause, bool value) {
    stop_on_exception[excause] = value;
}

bool Core::get_stop_on_exception(enum ExceptionCause excause) const {
    return stop_on_exception[excause];
}

void Core::set_step_over_exception(enum ExceptionCause excause, bool value) {
    step_over_exception[excause] = value;
}

bool Core::get_step_over_exception(enum ExceptionCause excause) const {
    return step_over_exception[excause];
}
Xlen Core::get_xlen() const {
    return xlen;
}

void Core::register_exception_handler(ExceptionCause excause, ExceptionHandler *exhandler) {
    if (excause == EXCAUSE_NONE) {
        ex_default_handler.reset(exhandler);
    } else {
        ExceptionHandler *old = ex_handlers.take(excause);
        delete old;
        ex_handlers.insert(excause, exhandler);
    }
}

bool Core::handle_exception(
    ExceptionCause excause,
    const Instruction& inst,
    Address inst_addr,
    Address next_addr,
    Address jump_branch_pc,
    Address mem_ref_addr) {
    if (excause == EXCAUSE_INSN_ILLEGAL) {
        throw SIMULATOR_EXCEPTION(
            UnsupportedInstruction, "Instruction with following encoding is not supported",
            QString::number(inst.data(), 16));
    }

    if (excause == EXCAUSE_HWBREAK) { regs->write_pc(inst_addr); }

    if (control_state != nullptr) {
        control_state->write_internal(CSR::Id::MEPC, inst_addr.get_raw());
        control_state->update_exception_cause(excause);
        if (control_state->read_internal(CSR::Id::MTVEC) != 0
            && !get_step_over_exception(excause)) {
            control_state->exception_initiate(CSR::PrivilegeLevel::MACHINE, CSR::PrivilegeLevel::MACHINE);
            regs->write_pc(control_state->exception_pc_address());
        }
    }

    bool ret = false;
    ExceptionHandler *exhandler = ex_handlers.value(excause, ex_default_handler.data());
    if (exhandler != nullptr) {
        ret = exhandler->handle_exception(
            this, regs, excause, inst_addr, next_addr, jump_branch_pc, mem_ref_addr);
    }

    if (get_stop_on_exception(excause)) { emit stop_on_exception_reached(); }

    return ret;
}

static int32_t amo32_operations(enum AccessControl memctl, int32_t a, int32_t b) {
    switch(memctl) {
    case AC_AMOSWAP32: return b;
    case AC_AMOADD32:  return a + b;
    case AC_AMOXOR32:  return a ^ b;
    case AC_AMOAND32:  return a & b;
    case AC_AMOOR32:   return a | b;
    case AC_AMOMIN32:  return a < b? a: b;
    case AC_AMOMAX32:  return a < b? b: a;
    case AC_AMOMINU32: return (uint32_t)a < (uint32_t)b? a: b;
    case AC_AMOMAXU32: return (uint32_t)a < (uint32_t)b? b: a;
    default: break;
    }
    return 0;
}

static int64_t amo64_operations(enum AccessControl memctl, int64_t a, int64_t b) {
    switch(memctl) {
    case AC_AMOSWAP64: return b;
    case AC_AMOADD64:  return a + b;
    case AC_AMOXOR64:  return a ^ b;
    case AC_AMOAND64:  return a & b;
    case AC_AMOOR64:   return a | b;
    case AC_AMOMIN64:  return a < b? a: b;
    case AC_AMOMAX64:  return a < b? b: a;
    case AC_AMOMINU64: return (uint64_t)a < (uint64_t)b? a: b;
    case AC_AMOMAXU64: return (uint64_t)a < (uint64_t)b? b: a;
    default: break;
    }
    return 0;
}

enum ExceptionCause Core::memory_special(
    enum AccessControl memctl,
    int mode,
    bool memread,
    bool memwrite,
    RegisterValue &towrite_val,
    RegisterValue rt_value,
    Address mem_addr) {
    Q_UNUSED(mode)

    switch (memctl) {
    case AC_CACHE_OP:
        mem_data->sync();
        mem_program->sync();
        predictor->flush();
        break;
    case AC_LR32:
        if (!memread) { break; }
        state.LoadReservedRange = AddressRange(mem_addr, mem_addr + 3);
        towrite_val = (int32_t)(mem_data->read_u32(mem_addr));
        break;
    case AC_SC32:
        if (!memwrite) { break; }
        if (state.LoadReservedRange.contains(AddressRange(mem_addr, mem_addr + 3))) {
            mem_data->write_u32(mem_addr, rt_value.as_u32());
            towrite_val = 0;
        } else {
            towrite_val = 1;
        }
        state.LoadReservedRange.reset();
        break;
    case AC_LR64:
        if (!memread) { break; }
        state.LoadReservedRange = AddressRange(mem_addr, mem_addr + 7);
        towrite_val = mem_data->read_u64(mem_addr);
        break;
    case AC_SC64:
        if (!memwrite) { break; }
        if (state.LoadReservedRange.contains(AddressRange(mem_addr, mem_addr + 7))) {
            mem_data->write_u64(mem_addr, rt_value.as_u64());
            towrite_val = 0;
        } else {
            towrite_val = 1;
        }
        break;
    case AC_FISRT_AMO_MODIFY32 ... AC_LAST_AMO_MODIFY32:
    {
        if (!memread || !memwrite) { break; }
        int32_t fetched_value;
        fetched_value = (int32_t)(mem_data->read_u32(mem_addr));
        towrite_val = amo32_operations(memctl, fetched_value, rt_value.as_u32());
        mem_data->write_u32(mem_addr, towrite_val.as_u32());
        towrite_val = fetched_value;
        break;
    }
    case AC_FISRT_AMO_MODIFY64 ... AC_LAST_AMO_MODIFY64:
    {
        if (!memread || !memwrite) { break; }
        int64_t fetched_value;
        fetched_value = (int64_t)(mem_data->read_u64(mem_addr));
        towrite_val = (uint64_t)amo64_operations(memctl, fetched_value, rt_value.as_u64());
        mem_data->write_u64(mem_addr, towrite_val.as_u64());
        towrite_val = fetched_value;
        break;
    }
    default: break;
    }

    return EXCAUSE_NONE;
}

FetchState Core::fetch(PCInterstage pc, bool skip_break) {
    if (pc.stop_if) { return {}; }

    const Address inst_addr = Address(regs->read_pc());
    const Instruction inst(mem_program->read_u32(inst_addr));
    ExceptionCause excause = EXCAUSE_NONE;

    if (!skip_break && hw_breaks.contains(inst_addr)) { excause = EXCAUSE_HWBREAK; }

    if (control_state != nullptr) {
        control_state->increment_internal(CSR::Id::MCYCLE, 1);
    }

    if (control_state != nullptr && excause == EXCAUSE_NONE) {
        if (control_state->core_interrupt_request()) { excause = EXCAUSE_INT; }
    }

    return { FetchInternalState { .fetched_value = inst.data() },
             FetchInterstage {
                 .inst = inst,
                 .inst_addr = inst_addr,
                 .next_inst_addr = inst_addr + inst.size(),
                 .predicted_next_inst_addr = predictor->predict_next_pc_address(inst, inst_addr),
                 .excause = excause,
                 .is_valid = true,
             } };
}

DecodeState Core::decode(const FetchInterstage &dt) {
    printf("[Debug] Entering Core::decode...\n");
    
    InstructionFlags flags;
    bool w_operation = this->xlen != Xlen::_64;
    AluCombinedOp alu_op {};
    AccessControl mem_ctl;
    ExceptionCause excause = dt.excause;
    printf("[Debug] Instruction data: 0x%08x\n", dt.inst.data());
    printf("[Debug] Instruction address: 0x%016lx\n", dt.inst_addr.get_raw());

    dt.inst.flags_alu_op_mem_ctl(flags, alu_op, mem_ctl);

    if ((flags ^ check_inst_flags_val) & check_inst_flags_mask) {
        excause = EXCAUSE_INSN_ILLEGAL;
    }

    RegisterId num_rs = (flags & (IMF_ALU_REQ_RS | IMF_ALU_RS_ID)) ? dt.inst.rs() : 0;
    RegisterId num_rt = (flags & IMF_ALU_REQ_RT) ? dt.inst.rt() : 0;
    RegisterId num_rd = (flags & IMF_REGWRITE) ? dt.inst.rd() : 0;
    // When instruction does not specify register, it is set to x0 as operations on x0 have no
    // side effects (not even visualization).
    RegisterValue val_rs
        = (flags & IMF_ALU_RS_ID) ? uint64_t(size_t(num_rs)) : regs->read_gp(num_rs);
    RegisterValue val_rt = regs->read_gp(num_rt);
    RegisterValue immediate_val = dt.inst.immediate();
    const bool regwrite = flags & IMF_REGWRITE;

    CSR::Address csr_address = (flags & IMF_CSR) ? dt.inst.csr_address() : CSR::Address(0);
    RegisterValue csr_read_val
        = ((control_state != nullptr && (flags & IMF_CSR))) ? control_state->read(csr_address) : 0;
    bool csr_write = (flags & IMF_CSR) && (!(flags & IMF_CSR_TO_ALU) || (num_rs != 0));

    if ((flags & IMF_EXCEPTION) && (excause == EXCAUSE_NONE)) {
        if (flags & IMF_EBREAK) {
            excause = EXCAUSE_BREAK;
        } else if (flags & IMF_ECALL) {
            excause = EXCAUSE_ECALL_M;
            // TODO: EXCAUSE_ECALL_S, EXCAUSE_ECALL_U
        }
    }
    if (flags & IMF_FORCE_W_OP)
        w_operation = true;

    return { DecodeInternalState {
                 .alu_op_num = static_cast<unsigned>(alu_op.alu_op),
                 .excause_num = static_cast<unsigned>(excause),
                 .inst_bus = dt.inst.data(),
                 .alu_mul = bool(flags & IMF_MUL),
             },
             DecodeInterstage { .inst = dt.inst,
                                .inst_addr = dt.inst_addr,
                                .next_inst_addr = dt.next_inst_addr,
                                .predicted_next_inst_addr = dt.predicted_next_inst_addr,
                                .val_rs = val_rs,
                                .val_rs_orig = val_rs,
                                .val_rt = val_rt,
                                .val_rt_orig = val_rt,
                                .immediate_val = immediate_val,
                                .csr_read_val = csr_read_val,
                                .csr_address = csr_address,
                                .excause = excause,
                                .ff_rs = FORWARD_NONE,
                                .ff_rt = FORWARD_NONE,
                                .alu_component = (flags & IMF_AMO) ? AluComponent::PASS :
                                                 (flags & IMF_MUL) ? AluComponent::MUL : AluComponent::ALU,
                                .aluop = alu_op,
                                .memctl = mem_ctl,
                                .num_rs = num_rs,
                                .num_rt = num_rt,
                                .num_rd = num_rd,
                                .memread = bool(flags & IMF_MEMREAD),
                                .memwrite = bool(flags & IMF_MEMWRITE),
                                .alusrc = bool(flags & IMF_ALUSRC),
                                .regwrite = regwrite,
                                .alu_req_rs = bool(flags & IMF_ALU_REQ_RS),
                                .alu_req_rt = bool(flags & IMF_ALU_REQ_RT),
                                .branch_bxx = bool(flags & IMF_BRANCH),
                                .branch_jal = bool(flags & IMF_JUMP),
                                .branch_val = bool(flags & IMF_BJ_NOT),
                                .branch_jalr = bool(flags & IMF_BRANCH_JALR),
                                .stall = false,
                                .is_valid = dt.is_valid,
                                .w_operation = w_operation,
                                .alu_mod = bool(flags & IMF_ALU_MOD),
                                .alu_pc = bool(flags & IMF_PC_TO_ALU),
                                .csr = bool(flags & IMF_CSR),
                                .csr_to_alu = bool(flags & IMF_CSR_TO_ALU),
                                .csr_write = csr_write,
                                .xret = bool(flags & IMF_XRET),
                                .insert_stall_before = bool(flags & IMF_CSR) } };
}

ExecuteState Core::execute(const DecodeInterstage &dt) {
    printf("[Debug] Entering Core::execute...\n");
    enum ExceptionCause excause = dt.excause;
    
    printf("[Debug] Instruction: 0x%08x, Address: 0x%016lx\n", dt.inst.data(), dt.inst_addr.get_raw());

    // 确保 regs 非空
    if (!regs) {
        printf("[Error] Registers pointer (regs) is null!\n");
        return {};
    }
    // TODO refactor to produce multiplexor index and multiplex function
    const RegisterValue alu_fst = [=] {
        if (dt.alu_pc) return RegisterValue(dt.inst_addr.get_raw());
        return dt.val_rs;
    }();
    const RegisterValue alu_sec = [=] {
        if (dt.csr_to_alu) return dt.csr_read_val;
        if (dt.alusrc) return dt.immediate_val;
        return dt.val_rt;
    }();
    const RegisterValue alu_val = [=] {
        if (excause != EXCAUSE_NONE) return RegisterValue(0);
        return alu_combined_operate(dt.aluop, dt.alu_component, dt.w_operation, dt.alu_mod, alu_fst, alu_sec);
    }();
    const Address branch_jal_target = dt.inst_addr + dt.immediate_val.as_i64();

    const unsigned stall_status = [=] {
        if (dt.stall) return 1;
        if (dt.ff_rs != FORWARD_NONE || dt.ff_rt != FORWARD_NONE) return 2;
        return 0;
    }();

    // 首先检查指令是否属于 RVV 大类
    if ((dt.inst.opcode() & 0x7f) == 0x57) { // RVV 指令集顶层识别
        printf("Detected RVV instruction (opcode: 0x%02x)\n", dt.inst.opcode());

        if (dt.inst.opcode() == 0x57) { // 检查是否为 RVV 指令(vsetvl 顶层 opcode 是 0x57)
            // 读取 rs1 和 rs2 的值
            if (!regs) {
                printf("Error: regs is null.\n");
                return {};
            }

            uint32_t rs1_val = regs->read_gp(dt.num_rs).as_u32(); // 读取 rs1 的值
            uint32_t rs2_val = regs->read_gp(dt.num_rt).as_u32(); // 读取 rs2 的值
            

            // 检查 rd 和 rs2 的合法性
            if (dt.num_rd == 0 || rs2_val == 0) {
                printf("Error: Invalid rd or rs2 value for vsetvl\n");
                return {}; // 出现非法情况,直接返回
            }

            // 解码 vtype(仅支持 8/16/32)
            uint32_t vtype = 0;
            if (rs2_val == 8) {
                vtype = 0b000; // SEW = 8
            } else if (rs2_val == 16) {
                vtype = 0b001; // SEW = 16
            } else if (rs2_val == 32) {
                vtype = 0b010; // SEW = 32
            } else {
                printf("Error: Unsupported vtype value (rs2_val = %u)\n", rs2_val);
                return {};
            }

            // 计算 VL 值,VL = min(rs1, MAX_VECTOR_SIZE)
            const uint32_t MAX_VECTOR_SIZE = 32;
            uint32_t vl = std::min(rs1_val, MAX_VECTOR_SIZE);
            
            // 将计算结果写入 rd 和 vtype 写入 rs2
            regs->write_gp(dt.num_rd, vl);                        // 写入目标寄存器 rd
            regs->write_gp(dt.num_rt, vtype);                     // 写入 VTYPE 到 rs2


            // 打印调试日志
            printf("vsetvl executed: rs1 = %u, rs2 = %u, vl = %u, vtype = %u\n",
                rs1_val, rs2_val, vl, vtype);

            // 返回执行状态(此处为简化处理,仅返回空的状态)
            return {};
        }

    } else {
        // 非 RVV 指令的处理逻辑
        printf("Non-RVV instruction detected (opcode: 0x%02x)\n", dt.inst.opcode());
    }



    // 返回普通流水线执行状态
    return { ExecuteInternalState {
                 .alu_src1 = dt.val_rs,
                 .alu_src2 = alu_sec,
                 .immediate = dt.immediate_val,
                 .rs = dt.val_rs_orig,
                 .rt = dt.val_rt_orig,
                 .stall_status = stall_status,
                 .alu_op_num = static_cast<unsigned>(dt.aluop.alu_op),
                 .forward_from_rs1_num = static_cast<unsigned>(dt.ff_rs),
                 .forward_from_rs2_num = static_cast<unsigned>(dt.ff_rt),
                 .excause_num = static_cast<unsigned>(dt.excause),
                 .alu_src = dt.alusrc,
                 .alu_mul = dt.alu_component == AluComponent::MUL,
                 .branch_bxx = dt.branch_bxx,
                 .alu_pc = dt.alu_pc,
             },
             ExecuteInterstage {
                 .inst = dt.inst,
                 .inst_addr = dt.inst_addr,
                 .next_inst_addr = dt.next_inst_addr,
                 .predicted_next_inst_addr = dt.predicted_next_inst_addr,
                 .branch_jal_target = branch_jal_target,
                 .val_rt = dt.val_rt,
                 .alu_val = alu_val,
                 .immediate_val = dt.immediate_val,
                 .csr_read_val = dt.csr_read_val,
                 .csr_address = dt.csr_address,
                 .excause = excause,
                 .memctl = dt.memctl,
                 .num_rd = dt.num_rd,
                 .memread = dt.memread,
                 .memwrite = dt.memwrite,
                 .regwrite = dt.regwrite,
                 .is_valid = dt.is_valid,
                 .branch_bxx = dt.branch_bxx,
                 .branch_jal = dt.branch_jal,
                 .branch_val = dt.branch_val,
                 .branch_jalr = dt.branch_jalr,
                 .alu_zero = alu_val == 0,
                 .csr = dt.csr,
                 .csr_write = dt.csr_write,
                 .xret = dt.xret,
             } };
}








MemoryState Core::memory(const ExecuteInterstage &dt) {
    RegisterValue towrite_val = dt.alu_val;
    auto mem_addr = Address(get_xlen_from_reg(dt.alu_val));
    bool memread = dt.memread;
    bool memwrite = dt.memwrite;
    bool regwrite = dt.regwrite;
    Address computed_next_inst_addr;

    enum ExceptionCause excause = dt.excause;
    if (excause == EXCAUSE_NONE) {
        if (is_special_access(dt.memctl)) {
            excause = memory_special(
                dt.memctl, dt.inst.rt(), memread, memwrite, towrite_val, dt.val_rt, mem_addr);
        } else if (is_regular_access(dt.memctl)) {
            if (memwrite) { mem_data->write_ctl(dt.memctl, mem_addr, dt.val_rt); }
            if (memread) { towrite_val = mem_data->read_ctl(dt.memctl, mem_addr); }
        } else {
            Q_ASSERT(dt.memctl == AC_NONE);
            // AC_NONE is memory NOP
        }
    }

    if (dt.excause != EXCAUSE_NONE) {
        memread = false;
        memwrite = false;
        regwrite = false;
    }

    // Conditional branch (BXX = BEQ | BNE...) is executed and should be taken.
    const bool branch_bxx_taken = dt.branch_bxx && (!dt.branch_val ^ !dt.alu_zero);
    // Unconditional jump should be taken (JALX = JAL | JALR).
    const bool branch_jalx = dt.branch_jalr || dt.branch_jal;

    computed_next_inst_addr = compute_next_inst_addr(dt, branch_bxx_taken);

    // Predictor update
    if (dt.branch_jal) {
        // JAL Jump instruction (J-type (alternative to U-type with different immediate bit order))
        predictor->update(dt.inst, dt.inst_addr, dt.branch_jal_target, BranchType::JUMP, BranchResult::TAKEN);
    } else if (dt.branch_jalr) {
        // JALR Jump register instruction (I-type)
        predictor->update(
            dt.inst, dt.inst_addr, Address(get_xlen_from_reg(dt.alu_val)), BranchType::JUMP, BranchResult::TAKEN);
    } else if (dt.branch_bxx) {
        // BXX Conditional branch instruction (B-type (alternative to S-type with different
        // immediate bit order))
        predictor->update(
            dt.inst, dt.inst_addr, dt.branch_jal_target, BranchType::BRANCH,
            branch_bxx_taken ? BranchResult::TAKEN : BranchResult::NOT_TAKEN);
    }

    bool csr_written = false;
    if (control_state != nullptr && dt.is_valid && dt.excause == EXCAUSE_NONE) {
        control_state->increment_internal(CSR::Id::MINSTRET, 1);
        if (dt.csr_write) {
            control_state->write(dt.csr_address, dt.alu_val);
            csr_written = true;
        }
        if (dt.xret) {
            control_state->exception_return(CSR::PrivilegeLevel::MACHINE);
            if (this->xlen == Xlen::_32)
                computed_next_inst_addr = Address(control_state->read_internal(CSR::Id::MEPC).as_u32());
            else
                computed_next_inst_addr = Address(control_state->read_internal(CSR::Id::MEPC).as_u64());
            csr_written = true;
        }
    }

    // Predictor statistics update
    if (computed_next_inst_addr != dt.predicted_next_inst_addr) {
        predictor->increment_mispredictions();
    }

    return { MemoryInternalState {
                 .mem_read_val = towrite_val,
                 .mem_write_val = dt.val_rt,
                 .mem_addr = dt.alu_val,
                 .excause_num = static_cast<unsigned>(excause),
                 .memwrite = memwrite,
                 .memread = memread,
                 .branch_bxx = dt.branch_bxx,
                 .branch_jal = dt.branch_jal,
                 // PC should be modified by branch/jump instruction.
                 .branch_outcome = branch_bxx_taken || branch_jalx,
                 .branch_jalx = branch_jalx,
                 .branch_jalr = dt.branch_jalr,
                 .xret = dt.xret,
             },
             MemoryInterstage {
                 .inst = dt.inst,
                 .inst_addr = dt.inst_addr,
                 .next_inst_addr = dt.next_inst_addr,
                 .predicted_next_inst_addr = dt.predicted_next_inst_addr,
                 .computed_next_inst_addr = computed_next_inst_addr,
                 .mem_addr = mem_addr,
                 .towrite_val = [=]() -> RegisterValue {
                     if (dt.csr) return dt.csr_read_val;
                     if (dt.branch_jalr || dt.branch_jal) return dt.next_inst_addr.get_raw();
                     return towrite_val;
                 }(),
                 .excause = dt.excause,
                 .num_rd = dt.num_rd,
                 .memtoreg = memread,
                 .regwrite = regwrite,
                 .is_valid = dt.is_valid,
                 .csr_written = csr_written,
             } };
}

WritebackState Core::writeback(const MemoryInterstage &dt) {
    if (dt.regwrite) { regs->write_gp(dt.num_rd, dt.towrite_val); }

    return WritebackState { WritebackInternalState {
        .inst = (dt.excause == EXCAUSE_NONE)? dt.inst: Instruction::NOP,
        .inst_addr = dt.inst_addr,
        .value = dt.towrite_val,
        .num_rd = dt.num_rd,
        .regwrite = dt.regwrite,
        .memtoreg = dt.memtoreg,
    } };
}

Address Core::compute_next_inst_addr(const ExecuteInterstage &exec, bool branch_taken) const {
    if (branch_taken || exec.branch_jal) { return exec.branch_jal_target; }
    if (exec.branch_jalr) { return Address(get_xlen_from_reg(exec.alu_val)); }
    return exec.next_inst_addr;
}

uint64_t Core::get_xlen_from_reg(RegisterValue reg) const {
    switch (this->xlen) {
    case Xlen::_32: return reg.as_u32();
    case Xlen::_64: return reg.as_u64();
    default: UNREACHABLE
    }
}

CoreSingle::CoreSingle(
    Registers *regs,
    BranchPredictor *predictor,
    FrontendMemory *mem_program,
    FrontendMemory *mem_data,
    CSR::ControlState *control_state,
    Xlen xlen,
    ConfigIsaWord isa_word)
    : Core(regs, predictor, mem_program, mem_data, control_state, xlen, isa_word) {
    reset();
}

void CoreSingle::do_step(bool skip_break) {
    Pipeline &p = state.pipeline;

    p.fetch = fetch(pc_if, skip_break);
    p.decode = decode(p.fetch.final);
    p.execute = execute(p.decode.final);
    p.memory = memory(p.execute.final);
    p.writeback = writeback(p.memory.final);

    regs->write_pc(mem_wb.computed_next_inst_addr);

    if (mem_wb.excause != EXCAUSE_NONE) {
        handle_exception(
            mem_wb.excause, mem_wb.inst, mem_wb.inst_addr, regs->read_pc(), prev_inst_addr,
            mem_wb.mem_addr);
        return;
    }
    prev_inst_addr = mem_wb.inst_addr;
}

void CoreSingle::do_reset() {
    state.pipeline = {};
    prev_inst_addr = Address::null();
}

CorePipelined::CorePipelined(
    Registers *regs,
    BranchPredictor *predictor,
    FrontendMemory *mem_program,
    FrontendMemory *mem_data,
    CSR::ControlState *control_state,
    Xlen xlen,
    ConfigIsaWord isa_word,
    MachineConfig::HazardUnit hazard_unit)
    : Core(regs, predictor, mem_program, mem_data, control_state, xlen, isa_word) {
    this->hazard_unit = hazard_unit;
    reset();
}

void CorePipelined::do_step(bool skip_break) {
    Pipeline &p = state.pipeline;

    const Address jump_branch_pc = mem_wb.inst_addr;
    const FetchInterstage saved_if_id = if_id;

    p.writeback = writeback(mem_wb);
    p.memory = memory(ex_mem);
    p.execute = execute(id_ex);
    p.decode = decode(if_id);
    p.fetch = fetch(pc_if, skip_break);

    bool exception_in_progress = mem_wb.excause != EXCAUSE_NONE;
    if (exception_in_progress) { ex_mem.flush(); }
    exception_in_progress |= ex_mem.excause != EXCAUSE_NONE;
    if (exception_in_progress) { id_ex.flush(); }
    exception_in_progress |= id_ex.excause != EXCAUSE_NONE;
    if (exception_in_progress) { if_id.flush(); }

    bool stall = false;
    if (hazard_unit != MachineConfig::HU_NONE) { stall |= handle_data_hazards(); }

    /* PC and exception pseudo stage
     * ============================== */
    pc_if = {};
    if (mem_wb.excause != EXCAUSE_NONE) {
        /* By default, execution continues with the next instruction after exception. */
        regs->write_pc(mem_wb.computed_next_inst_addr);
        /* Exception handler may override this behavior and change the PC (e.g. hwbreak). */
        handle_exception(
            mem_wb.excause, mem_wb.inst, mem_wb.inst_addr, mem_wb.computed_next_inst_addr,
            jump_branch_pc, mem_wb.mem_addr);
    } else if (detect_mispredicted_jump() || mem_wb.csr_written) {
        /* If the jump was predicted incorrectly or csr register was written, we need to flush the
         * pipeline. */
        flush_and_continue_from_address(mem_wb.computed_next_inst_addr);
    } else if (exception_in_progress) {
        /* An exception is in progress which caused the pipeline before the exception to be flushed.
         * Therefore, next pc cannot be determined from if_id (now NOP).
         * To make the visualization cleaner we stop fetching (and PC update) until the exception
         * is handled. */
        pc_if.stop_if = true;
    } else if (stall || is_stall_requested()) {
        /* Fetch from the same PC is repeated due to stall in the pipeline. */
        handle_stall(saved_if_id);
    } else {
        /* Normal execution. */
        regs->write_pc(if_id.predicted_next_inst_addr);
    }
}

void CorePipelined::flush_and_continue_from_address(Address next_pc) {
    regs->write_pc(next_pc);
    if_id.flush();
    id_ex.flush();
    ex_mem.flush();
}

void CorePipelined::handle_stall(const FetchInterstage &saved_if_id) {
    /*
     * Stall handing:
     * - IF fetches new instruction, but it is not allowed to save into IF/ID register. This is
     * simulated by restoring the `if_id` to its original value.
     * - ID continues normally. On next cycle, perform the same as before as IF/ID will be
     * unchanged.
     * - EX is where stall is inserted by flush. The flushed instruction will be re-executed
     * as ID repeats its execution.
     */
    if_id = saved_if_id;
    id_ex.flush();
    id_ex.stall = true; // for visualization
    state.stall_count++;
}

bool CorePipelined::detect_mispredicted_jump() const {
    return mem_wb.computed_next_inst_addr != mem_wb.predicted_next_inst_addr;
}

bool CorePipelined::is_stall_requested() const {
    return id_ex.insert_stall_before && ex_mem.is_valid;
}

template<typename InterstageReg>
bool is_hazard_in_stage(const InterstageReg &interstage, const DecodeInterstage &id_ex) {
    return (
        interstage.regwrite && interstage.num_rd != 0
        && ((id_ex.alu_req_rs && interstage.num_rd == id_ex.num_rs)
            || (id_ex.alu_req_rt && interstage.num_rd == id_ex.num_rt)));
    // Note: We make exception with $0 as that has no effect and is used in nop instruction
}

bool CorePipelined::handle_data_hazards() {
    // Note: We make exception with $0 as that has no effect when
    // written and is used in nop instruction
    bool stall = false;

    if (is_hazard_in_stage(mem_wb, id_ex)) {
        if (hazard_unit == MachineConfig::HU_STALL_FORWARD) {
            // Forward result value
            if (id_ex.alu_req_rs && mem_wb.num_rd == id_ex.num_rs) {
                id_ex.val_rs = mem_wb.towrite_val;
                id_ex.ff_rs = FORWARD_FROM_W;
            }
            if (id_ex.alu_req_rt && mem_wb.num_rd == id_ex.num_rt) {
                id_ex.val_rt = mem_wb.towrite_val;
                id_ex.ff_rt = FORWARD_FROM_W;
            }
        } else {
            stall = true;
        }
    }
    if (is_hazard_in_stage(ex_mem, id_ex)) {
        if (hazard_unit == MachineConfig::HU_STALL_FORWARD) {
            if (ex_mem.memread) {
                stall = true;
            } else {
                // Forward result value
                if (id_ex.alu_req_rs && ex_mem.num_rd == id_ex.num_rs) {
                    id_ex.val_rs = ex_mem.alu_val;
                    id_ex.ff_rs = FORWARD_FROM_M;
                }
                if (id_ex.alu_req_rt && ex_mem.num_rd == id_ex.num_rt) {
                    id_ex.val_rt = ex_mem.alu_val;
                    id_ex.ff_rt = FORWARD_FROM_M;
                }
            }
        } else {
            stall = true;
        }
    }
    return stall;
}

void CorePipelined::do_reset() {
    state.pipeline = {};
}

bool StopExceptionHandler::handle_exception(
    Core *core,
    Registers *regs,
    ExceptionCause excause,
    Address inst_addr,
    Address next_addr,
    Address jump_branch_pc,
    Address mem_ref_addr) {
    Q_UNUSED(core)
    DEBUG(
        "Exception cause %d instruction PC 0x%08" PRIx64 " next PC 0x%08" PRIx64
        " jump branch PC 0x%08" PRIx64 "registers PC 0x%08" PRIx64 " mem ref 0x%08" PRIx64,
        excause, inst_addr.get_raw(), next_addr.get_raw(), jump_branch_pc.get_raw(),
        regs->read_pc().get_raw(), mem_ref_addr.get_raw());
    return true;
}

这段代码是关于 RISC-V 核心模拟的实现,涉及了指令的获取(fetch)、解码(decode)、执行(execute)、内存访问(memory)、写回(writeback)以及异常处理等多个阶段。每个函数的作用都在一定程度上模拟了处理器的流水线和指令执行的流程。以下是各个函数的中文解析:

核心函数

  1. unsupported_inst_flags_to_check

    • 功能:该函数根据架构的 xlen(32位或64位)和 ISA 字段(是否支持 AM 扩展)来构建指令的标志位,用于检查指令是否被支持。
    • 主要操作:根据 xlen(32 或 64 位)以及是否包含 A(原子指令)和 M(乘法指令)扩展,设置标志位,标记指令是否支持。
  2. Core::step

    • 功能:执行一个步骤,即执行一条指令的所有流水线阶段(fetch, decode, execute, memory, writeback)。
    • 主要操作:调用 do_step(skip_break) 执行步骤,并更新执行周期计数。
  3. Core::reset

    • 功能:重置核心状态。
    • 主要操作重置周期计数、停顿计数,并调用 do_reset() 重置其他状态。
  4. Core::get_cycle_countCore::get_stall_count

    • 功能:返回当前的周期计数和停顿计数。
    • 主要操作:返回当前的 cycle_countstall_count
  5. Core::insert_hwbreakCore::remove_hwbreak

    • 功能:插入或移除硬件断点。
    • 主要操作:在给定地址插入或删除硬件断点,用于调试。
  6. Core::set_stop_on_exceptionCore::get_stop_on_exception

    • 功能:设置或获取异常时是否停止执行。
    • 主要操作:更新或检查是否在特定的异常情况下停止执行。
  7. Core::register_exception_handler

    • 功能:注册异常处理程序。
    • 主要操作:根据异常类型注册不同的异常处理器。如果没有为异常类型提供处理器,则使用默认处理器。
  8. Core::handle_exception

    • 功能:处理发生的异常。
    • 主要操作:根据异常类型调用相应的异常处理程序,并更新控制状态。如果异常是硬件断点,还会更新 PC

内存操作相关

  1. memory_special

    • 功能:处理特殊的内存操作,如原子操作(AMO)和加载/存储保留(LR/SC)。
    • 主要操作:根据内存操作类型(如 AMO、LR32、SC32)执行相应的读写操作,更新 towrite_val(待写值)。
  2. Core::fetch

    • 功能:从程序内存中提取指令。
    • 主要操作:获取当前 PC 地址的指令,并返回提取的指令数据。如果有硬件断点或者中断请求,适当处理。
  3. Core::decode

    • 功能:解码指令。
    • 主要操作:根据指令的数据解码出操作类型,设置必要的控制信号(如 ALU 操作、内存访问类型等),并检查指令是否有效。
  4. Core::execute

    • 功能:执行指令。
    • 主要操作:根据解码信息执行 ALU 操作或内存访问操作,计算指令的执行结果(如 ALU 计算结果或内存地址)。
  5. Core::memory

    • 功能:处理内存访问(读写)。
    • 主要操作:根据指令是否为内存访问类型(如 memreadmemwrite),读取或写入数据到内存,并处理一些特殊内存操作(如 AMO 操作)。
  6. Core::writeback

    • 功能:将计算或内存操作的结果写回寄存器。
    • 主要操作:如果指令有 regwrite 标志,写回结果到指定的寄存器。
  7. Core::compute_next_inst_addr

    • 功能:计算下一个指令地址。
    • 主要操作:根据是否有分支或跳转,计算跳转目标地址或正常的下一条指令地址。
  8. Core::get_xlen_from_reg

    • 功能:获取寄存器值的位宽。
    • 主要操作:根据当前 xlen(32位或64位)选择将寄存器值转换为相应的位宽。

流水线阶段与跳转预测

  1. CorePipelined::do_step

    • 功能:执行一个周期的所有流水线阶段,并处理异常和跳转。
    • 主要操作:更新流水线的每个阶段(fetch, decode, execute, memory, writeback),处理跳转预测和异常。
  2. CorePipelined::flush_and_continue_from_address

    • 功能:刷新流水线并继续执行。
    • 主要操作:根据计算出的地址刷新流水线,确保指令正确执行。
  3. CorePipelined::handle_stall

    • 功能:处理流水线停顿。
    • 主要操作:如果发生停顿,恢复先前保存的 if_id 阶段并执行相关操作。
  4. CorePipelined::detect_mispredicted_jump

    • 功能:检测跳转预测是否错误。
    • 主要操作:检查是否发生了跳转预测错误,如果错误,更新 misprediction 统计信息。
  5. CorePipelined::is_stall_requested

    • 功能:检查是否需要插入停顿。
    • 主要操作:检查当前阶段是否有插入停顿的要求。

数据冒险处理

  1. handle_data_hazards
    • 功能:处理数据冒险。
    • 主要操作:检查是否存在数据冒险,并根据配置选择是否插入停顿或进行数据转发。

异常处理

  1. StopExceptionHandler::handle_exception
    • 功能:处理模拟器中的停止异常。
    • 主要操作:打印调试信息,表示异常发生,并返回 true,以继续执行。
FetchState Core::fetch(PCInterstage pc, bool skip_break) {
    // 如果设置了停止标志,直接返回空状态
    if (pc.stop_if) { return {}; }

    // 获取当前指令的地址
    const Address inst_addr = Address(regs->read_pc());

    // 从程序内存中读取指令
    const Instruction inst(mem_program->read_u32(inst_addr));

    // 默认无异常
    ExceptionCause excause = EXCAUSE_NONE;

    // 检查是否存在硬件断点
    if (!skip_break && hw_breaks.contains(inst_addr)) { 
        excause = EXCAUSE_HWBREAK;  // 如果地址匹配硬件断点,设置异常为硬件断点
    }

    // 如果存在控制状态,则更新指令周期
    if (control_state != nullptr) {
        control_state->increment_internal(CSR::Id::MCYCLE, 1);
    }

    // 如果没有异常,检查是否有中断请求
    if (control_state != nullptr && excause == EXCAUSE_NONE) {
        if (control_state->core_interrupt_request()) { 
            excause = EXCAUSE_INT;  // 如果有中断请求,设置异常为中断
        }
    }

    // 返回获取的指令数据和相关状态
    return { 
        // 包含获取的指令数据
        FetchInternalState { .fetched_value = inst.data() },
        
        // 返回当前阶段的指令信息
        FetchInterstage {
            .inst = inst,  // 当前指令
            .inst_addr = inst_addr,  // 当前指令地址
            .next_inst_addr = inst_addr + inst.size(),  // 下一条指令的地址
            .predicted_next_inst_addr = predictor->predict_next_pc_address(inst, inst_addr),  // 预测的下一条指令地址
            .excause = excause,  // 异常类型
            .is_valid = true,  // 当前阶段有效
        }
    };
}

Fetch:

注释解释:

  1. 停止标志判断

    • if (pc.stop_if):如果设置了停止标志,则返回空的 FetchState,表示不再进行指令获取。
  2. 读取指令地址和指令

    • regs->read_pc():读取当前程序计数器(PC)的值,用作当前指令的地址。
    • mem_program->read_u32(inst_addr):根据指令地址,从程序内存中读取指令的二进制数据。
  3. 异常检查

    • 如果当前指令的地址匹配硬件断点地址,则设置 excauseEXCAUSE_HWBREAK,表示触发了硬件断点。
    • 然后判断是否存在中断请求,如果有,则设置 excauseEXCAUSE_INT,表示中断异常。
  4. 更新控制状态

    • 如果 control_state 不为空,则更新内部计数器 MCYCLE,表示指令周期数。
  5. 返回 FetchState

    • 通过 FetchInternalStateFetchInterstage 返回当前阶段的指令信息,包括指令的二进制数据、指令地址、异常类型等。

比喻:

这段代码的作用就像是一个自动驾驶汽车的导航系统,它从“当前位置”出发(当前指令地址),计算出下一站的目的地(下一条指令地址),并根据当前的“路况”决定是否需要停下来(是否有异常或硬件断点)。如果途中遇到问题(比如中断请求),它会调整路线,最终将最终路线信息和相关状态返回给系统。

Decode:

DecodeState Core::decode(const FetchInterstage &dt) {
    InstructionFlags flags;
    bool w_operation = this->xlen != Xlen::_64;
    AluCombinedOp alu_op {};
    AccessControl mem_ctl;
    ExceptionCause excause = dt.excause;

    dt.inst.flags_alu_op_mem_ctl(flags, alu_op, mem_ctl);

    if ((flags ^ check_inst_flags_val) & check_inst_flags_mask) {
        excause = EXCAUSE_INSN_ILLEGAL;
    }

    RegisterId num_rs = (flags & (IMF_ALU_REQ_RS | IMF_ALU_RS_ID)) ? dt.inst.rs() : 0;
    RegisterId num_rt = (flags & IMF_ALU_REQ_RT) ? dt.inst.rt() : 0;
    RegisterId num_rd = (flags & IMF_REGWRITE) ? dt.inst.rd() : 0;
    // When instruction does not specify register, it is set to x0 as operations on x0 have no
    // side effects (not even visualization).
    RegisterValue val_rs
        = (flags & IMF_ALU_RS_ID) ? uint64_t(size_t(num_rs)) : regs->read_gp(num_rs);
    RegisterValue val_rt = regs->read_gp(num_rt);
    RegisterValue immediate_val = dt.inst.immediate();
    const bool regwrite = flags & IMF_REGWRITE;

    CSR::Address csr_address = (flags & IMF_CSR) ? dt.inst.csr_address() : CSR::Address(0);
    RegisterValue csr_read_val
        = ((control_state != nullptr && (flags & IMF_CSR))) ? control_state->read(csr_address) : 0;
    bool csr_write = (flags & IMF_CSR) && (!(flags & IMF_CSR_TO_ALU) || (num_rs != 0));

    if ((flags & IMF_EXCEPTION) && (excause == EXCAUSE_NONE)) {
        if (flags & IMF_EBREAK) {
            excause = EXCAUSE_BREAK;
        } else if (flags & IMF_ECALL) {
            excause = EXCAUSE_ECALL_M;
            // TODO: EXCAUSE_ECALL_S, EXCAUSE_ECALL_U
        }
    }
    if (flags & IMF_FORCE_W_OP)
        w_operation = true;

    return { DecodeInternalState {
                 .alu_op_num = static_cast<unsigned>(alu_op.alu_op),
                 .excause_num = static_cast<unsigned>(excause),
                 .inst_bus = dt.inst.data(),
                 .alu_mul = bool(flags & IMF_MUL),
             },
             DecodeInterstage { .inst = dt.inst,
                                .inst_addr = dt.inst_addr,
                                .next_inst_addr = dt.next_inst_addr,
                                .predicted_next_inst_addr = dt.predicted_next_inst_addr,
                                .val_rs = val_rs,
                                .val_rs_orig = val_rs,
                                .val_rt = val_rt,
                                .val_rt_orig = val_rt,
                                .immediate_val = immediate_val,
                                .csr_read_val = csr_read_val,
                                .csr_address = csr_address,
                                .excause = excause,
                                .ff_rs = FORWARD_NONE,
                                .ff_rt = FORWARD_NONE,
                                .alu_component = (flags & IMF_AMO) ? AluComponent::PASS :
                                                 (flags & IMF_MUL) ? AluComponent::MUL : AluComponent::ALU,
                                .aluop = alu_op,
                                .memctl = mem_ctl,
                                .num_rs = num_rs,
                                .num_rt = num_rt,
                                .num_rd = num_rd,
                                .memread = bool(flags & IMF_MEMREAD),
                                .memwrite = bool(flags & IMF_MEMWRITE),
                                .alusrc = bool(flags & IMF_ALUSRC),
                                .regwrite = regwrite,
                                .alu_req_rs = bool(flags & IMF_ALU_REQ_RS),
                                .alu_req_rt = bool(flags & IMF_ALU_REQ_RT),
                                .branch_bxx = bool(flags & IMF_BRANCH),
                                .branch_jal = bool(flags & IMF_JUMP),
                                .branch_val = bool(flags & IMF_BJ_NOT),
                                .branch_jalr = bool(flags & IMF_BRANCH_JALR),
                                .stall = false,
                                .is_valid = dt.is_valid,
                                .w_operation = w_operation,
                                .alu_mod = bool(flags & IMF_ALU_MOD),
                                .alu_pc = bool(flags & IMF_PC_TO_ALU),
                                .csr = bool(flags & IMF_CSR),
                                .csr_to_alu = bool(flags & IMF_CSR_TO_ALU),
                                .csr_write = csr_write,
                                .xret = bool(flags & IMF_XRET),
                                .insert_stall_before = bool(flags & IMF_CSR) } };
}

中文注释说明:

  1. 初始化变量

    • InstructionFlags flags:存储指令标志,用来表示指令的不同特性(如是否涉及 ALU 操作、是否需要写寄存器等)。
    • w_operation:决定是否执行 64 位操作,默认根据 xlen 决定(如果是 32 位,则 w_operationfalse)。
    • AluCombinedOp alu_op:ALU 操作结构体,用来保存 ALU 操作的类型。
    • AccessControl mem_ctl:内存控制结构体,用来控制内存读写。
    • ExceptionCause excause:异常类型,初始值为 dt.excause,表示当前的异常状态。
  2. 读取指令并解析标志

    • 调用 dt.inst.flags_alu_op_mem_ctl 来解析当前指令的标志、ALU 操作类型和内存控制方式。
  3. 检查指令是否合法

    • 如果指令的标志与预期的标志不匹配,则设置 excauseEXCAUSE_INSN_ILLEGAL,表示指令非法。
  4. 寄存器的处理

    • 根据指令的标志,决定是否需要读取源寄存器(如 rsrt),如果需要,则从寄存器中读取相应的值。
    • 还要处理 CSR(控制和状态寄存器)的地址和读取值。
  5. 处理异常

    • 如果标志位指示指令是 EBREAK(调试断点)或者 ECALL(系统调用),则设置相应的异常。
  6. 返回解码后的状态

    • DecodeInternalStateDecodeInterstage 包含了解码后的指令信息、寄存器值、异常状态等,用于后续执行阶段的操作。

比喻说明:

这段代码就像是为一个工厂流水线设置操作条件,首先检查每个操作是否符合标准(通过 flags),然后根据操作的要求选择不同的机器和工具(如 ALU 操作、内存读写等),并处理异常(比如调试断点或系统调用)。最后,把所有的信息整理好,传递给流水线的下一个阶段(执行阶段)。

 所以,为什么不是decode之后它就自然而然的execute了呢,vsetvl举例,decode之后获取了什么(当前代码实现)还缺什么必要的?为什么execute当前不会自己跑?非向量的add啥的不也不在这个execute里面吗?

;