系列文章目录
第一篇:初级顺序流水线
第二篇:分支预测流水线
文章目录
- 系列文章目录
- 一、控制逻辑
- 二、具体操作
- 1.判断暂停
- 2.控制冒险
- 3.跳转问题
- 4.实现
- 代码
一、控制逻辑
通过暂停和插入气泡来动态调整流水线的状态
二、具体操作
1.判断暂停
-
识别:
指令在译码阶段读取寄存器时,通过读取寄存器的id值来分别与执行阶段、访存阶段以及写回阶段所执行指令的目的寄存器进行对比如果存在寄存器id值相等的情况,就说明指令之间存在数据相关,那么该指令就要在译码阶段等待
-
方式:
通过插入气泡来代替暂停的指令气泡不会改变寄存器、内存、条件码以及程序状态
-
用途:
解决数据冒险
基于这种机制实现的流水线性能并不高,频繁暂停,降低流水线的吞吐量
-
优化:
直接将运算结果进行传递,数据转发,但对于访存指令,数据比较靠后,必须使用暂停再转发
使得流水线不用暂停就可以处理大多数情况的数据冒险
2.控制冒险
-
问题:
由于需要每一次取值操作后,必须马上确定下一条指令的地址当取出指令为ret时,下一条指令需要从栈中读出,因此必须等到访存操作结束后才能确定下一条指令的地址当取到的指令是分支条件指令时,流水线无法立即进行立即判断是否进行跳转,通过执行阶段后才能确定跳转
-
方式:
暂停执行新指令
3.跳转问题
-
策略:
预测分支总是跳转或者总是不跳转 -
解决方式:
对于需要冲刷的第一条指令,在执行阶段插入气泡,对于第二条,在译码阶段插入气泡,还要取出跳转指令后面的指令
4.实现
-
为每个流水线寄存器引入两个控制信号,分别为暂停信号和气泡信号
-
当需要暂停时,将暂停信号设为 1,寄存器就会保持以前的状态,就实现指令阻塞在流水线的某个阶段中
-
当需要插入气泡时,寄存器的状态会设置成某个固定的复位配置,复位配置等效于指令nop的状态
代码
#include<stdio.h>
#include<stdint.h>
#include<string.h>
/*
寄存器编号
*/
enum Reg {rax = 0,rcx = 1,rdx = 2,rbx = 3,rsp = 4,rbp = 5,rsi = 6,rdi = 7,r8 = 8,r9 = 9,r10 = 0xA,r11 = 0xB,r12 = 0xC,r13 = 0xD,r14 = 0xE,No_regisEer = 0xF,
};
const uint8_t PC_memory[32768] = { 0x30,0xf2,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //1 0x000 : irmovq $9, %rdx,0x30,0xf3,0x15,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //2 0x00a : irmovq $21, %rbx,0x61,0x23 //3 0x014 : subq %rdx, %rbx,0x30,0xf4,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //4 0x016 : irmovq $128, %rsp,0x40,0x43,0x64,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //5 0x020 : rmmovq %rsp, 100(%rbx),0xa0,0x2f //6 0x02a : pushq %rdx ,0xb0,0x0f //7 0x02c : popq %rax ,0x73,0x37,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //8 0x02e : je done,0x80,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00 //9 0x037 : call proc//10 0x040 : done:,0x00 // 0x040 : halt//11 0x041 : proc:,0x90 }; // 0x041 : ret
struct FReg {int predPC;
};
struct SelectPC { //PC选择逻辑单元,预测与纠错int predpc; //经过predicatePC单元,判断jxx,call跳转 还是 其他顺序执行PCinc单元(valp),预测int cnd; //执行jxx valc的限制条件,默认跳转,cnd=0 顺序执行valp ,修正jxxint M_valA; //通过selectA将valp放入valA里了 int M_icode;int W_valM; //ret默认顺序执行valp,待从栈Datamemory中读地址后更新 ,修正retint W_icode;
};
struct F_DReg {int bubble;int stall;int stat;int icode;int ifuc;int rA;int rB;int valC;int valP;
};
struct D_EReg {int bubble;int stall;int stat;int icode;int ifuc;int valC;int valA;int valB;int dstE;int dstM;int srcA;int srcB;
};
struct Cache {int valid;int Tag;int Cache_block;
}Set[16];
struct Address {int Tag;int Set_index;
}Address;
struct FwdB {int register_B;int srcB;int E_dstE;int E_valE;int M_dstE;int M_valE;int M_dstM;int M_valM;int W_dstM;int W_valM;int W_dstE;int W_valE;int valB;
};
struct SelectA_and_FwdA {int register_A;int srcA;int valP;int E_dstE;int E_valE;int M_dstE;int M_valE;int M_dstM;int M_valM;int W_dstM;int W_valM;int W_dstE;int W_valE;int valA;
};
struct E_MReg {int bubble;int stall;int stat;char icode;int Cnd;int valE;int valA;int dstE;int dstM;
};
struct CC {int ZF;int SF;int OF;
};
struct M_WReg {int bubble;int stall;int stat;int icode;int valE;int valM;int dstE;int dstM;
};
struct Inst {int icode;int ifunct;int rA_id;int rB_id;int valC;int valP;int len;
};
struct Inst inst = { 0,0,0,0,0,0,0 };
struct F_DReg f_dReg = { 0,0,1,0,0,0,0,0,0 };
struct D_EReg d_eReg = { 0,0,1,0,0,0,0,0,0,0,0,0 };
struct E_MReg e_mReg = { 0,0,1,0,0,0,0,0,0 };
struct M_WReg m_wReg = { 0,0,1,0,0,0,0xF,0xF };
struct F_DReg f_dRegNew = { 0,0,1,0,0,0,0,0,0 };
struct D_EReg d_eRegNew = { 0,0,1,0,0,0,0,0,0,0,0,0 };
struct E_MReg e_mRegNew = { 0,0,1,0,0,0,0,0,0 };
struct M_WReg m_wRegNew = { 0,0,1,0,0,0,0xF,0xF };
int Register_file[16] = { 0 };
int Datamemory[32768] = { 0 };
struct CC cc = { 0 };
struct FReg fReg = { 0 };
struct SelectPC selectpc = { 0,1,0,0,0,0 };
struct FwdB fwdb = { 0 };
struct SelectA_and_FwdA sel_fwda = { 0 };
/*
1.用暂停来避免冒险
只要一条指令的源操作数会被流水线后面某个阶段中的指令产生,处理器就会通过将指令阻塞在解码阶段来避免数据冒险
2.用转发来避免冒险-----》不需要暂停,但与存储器读有关的指令,是在流水线较后面发生的,仍需暂停
如mr 0(%edx),%eax 指令后紧接着add %ebx,&eax ;add的%eax需要在译码阶段得到值,而mr最早到访存阶段才能转发
显然已经来不及,所以不得不先暂停后转发
3.暂停+转发
将解码阶段中的指令暂停一个周期,导致执行阶段中插入一个气泡,同时暂停,使得F和D状态不变需要解决的控制逻辑:插入气泡
1.加载使用冒险:1个气泡在一条从存储器中读出一个值的指令和一条使用该值的指令之间,流水线必须暂停一个周期
2. ret:3个气泡流水线必须暂停直到ret指令到达写回阶段
3. 预测错误的分支:两个气泡在分支逻辑发现不应该选择分支之前(执行阶段才发现),分支目标处的几条指令已经进入流水线了。必须从流水线中去掉这些指令。此时第一条指令已经执行到译码阶段,第二条指令刚开始取指。具体实现:
定义F_DReg 和F_DRegNew D_EReg 和D_ERegNew E_MReg 和E_MRegNew M_WReg 和M_WRegNew
执行过程中:F:输入pc 输出 F_DRegNew;正常情况下需要更新 将F_DRegNew---->F_DReg,否则不变D:输入F_DReg 输出 D_ERegNew;E: 输入D_EReg 输出 E_MRegNew;M: 输入E_MReg 输出 M_WRegNewW: 输入M_WReg
每次执行完需要判断:需要暂停,就使用原状态F_DReg正常情况,每次执行完一个周期需要更新F_DReg = F_DRegNew*///分支预测
void Select_PC() {if (selectpc.W_icode == 9) { //ret生效printf("ret生效\n");selectpc.predpc = selectpc.W_valM;printf("selectpc.predpc =%d \n", selectpc.predpc);}else if (selectpc.W_icode == 7) {if (selectpc.cnd == 0) { //跳转失败selectpc.predpc = selectpc.M_valA;}}
}
//取值阶段
int Predict_PC(int icode, int valc, int valp) {if (icode == 7 || icode == 8) {return valc;}else {return valp;}
}
int PC_inc(int len, int pc) {return len + pc;
}
int iValC(int valC_start, int valC_end) {int i, j, valc = 0;for (j = 0, i = valC_start; i <= valC_end; i++, j = j + 2){valc += PC_memory[i] << j;}return valc;
}
void Align(int pc,int Need_regids, int Need_valC) {int valC_start;int valC_end;if (Need_regids == 0) {inst.rA_id = 0xF;inst.rB_id = 0xF;if (Need_valC) { //无寄存器、有常数 如jxx、callvalC_start = pc + 1;valC_end = pc + 8;inst.len = 1 + (valC_end - pc);inst.valC = iValC(valC_start, valC_end);}else { //无寄存器、无常数 如halt、nop、retinst.len = 1;}}if (Need_regids == 1) {inst.rA_id = 0xF;inst.rB_id = PC_memory[pc + 1] & 0x0F;if (Need_valC) { //只有寄存器rB、有常数 如irmovqvalC_start = pc + 2;valC_end = pc + 9;inst.len = 1 + (valC_end - pc);inst.valC = iValC(valC_start, valC_end);}//只有寄存器rB、无常数 此情况无}if (Need_regids == 2) {inst.rA_id = PC_memory[pc + 1] >> 4;inst.rB_id = 0xF;//只有寄存器rA、有常数 此情况无if (!Need_valC) { //有寄存器rA、无常数 如pushq、popqinst.len = 2;}}if (Need_regids == 3) {inst.rA_id = PC_memory[pc + 1] >> 4;inst.rB_id = PC_memory[pc + 1] & 0x0F;if (Need_valC) { //都有寄存器、有常数 如rrmovq、rmmovq、mrmovqvalC_start = pc + 2;valC_end = pc + 9;inst.len = 1 + (valC_end - pc);inst.valC = iValC(valC_start, valC_end);}else {// 都有寄存器、无常数 如addq、subq、andq、xorq、rrmovq、rrmovq、cmovle、cmovl、cmove、cmovne、cmovge、cmovginst.len = 2;}}}
void Split(int pc) {int Need_valC;int Need_regids; //0:都为空;1:rA为空;2:rB为空;3:都不为空uint8_t code = PC_memory[pc];uint8_t reg = PC_memory[pc + 1];inst.icode = (uint8_t)(code >> 4);inst.ifunct = code & 0x0F;if (inst.icode == 0 && inst.ifunct == 0) {Need_regids = 0;Need_valC = 0;Align(pc,Need_regids, Need_valC);}if (inst.icode == 1 && inst.ifunct == 0) {Need_valC = 0;Need_regids = 0;Align(pc,Need_regids, Need_valC);}if (inst.icode == 2) {Need_valC = 0;Need_regids = 3;Align(pc,Need_regids, Need_valC);}if (inst.icode == 3 && inst.ifunct == 0) {Need_valC = 1;Need_regids = 1;Align(pc, Need_regids, Need_valC);}if (inst.icode == 4 && inst.ifunct == 0) {Need_valC = 1;Need_regids = 3;Align(pc,Need_regids, Need_valC);}if (inst.icode == 5 && inst.ifunct == 0) {Need_valC = 1;Need_regids = 3;Align(pc, Need_regids, Need_valC);}if (inst.icode == 6) {Need_valC = 0;Need_regids = 3;Align(pc, Need_regids, Need_valC);}if (inst.icode == 7) {Need_valC = 1;Need_regids = 0;Align(pc,Need_regids, Need_valC);}if (inst.icode == 8 && inst.ifunct == 0) {Need_valC = 1;Need_regids = 0;Align(pc,Need_regids, Need_valC);}if (inst.icode == 9 && inst.ifunct == 0) {Need_valC = 0;Need_regids = 0;Align(pc,Need_regids, Need_valC);}if (inst.icode == 0xA && inst.ifunct == 0) {Need_valC = 0;Need_regids = 2;Align(pc, Need_regids, Need_valC);}if (inst.icode == 0xB && inst.ifunct == 0) {Need_valC = 0;Need_regids = 2;Align(pc, Need_regids, Need_valC);}if (!(inst.icode >= 0 && inst.icode <= 11)) {f_dReg.stat = 0;}
}
void Fetch(int pc) {Split(pc);inst.valP = PC_inc(inst.len, pc);fReg.predPC = Predict_PC(inst.icode, inst.valC, inst.valP);selectpc.predpc = fReg.predPC;f_dRegNew.bubble = 0;f_dRegNew.stall = 0;f_dRegNew.icode = inst.icode;f_dRegNew.ifuc = inst.ifunct;f_dRegNew.rA = inst.rA_id;f_dRegNew.rB = inst.rB_id;f_dRegNew.valC = inst.valC;f_dRegNew.valP = inst.valP;
}//译码阶段
/*
1.不是所以指令都需要数据转发,而且数据转发阶段也不同:(不需要回写寄存器的指令,不需要转发)执行阶段:(ALU计算结果) valE: 3irmov 6opq 2rrmov(有条件) Apush Bpop 8call 9ret(注意:回写valE到SP寄存器)访存前: (对寄存器写入端口E还没有进行写入的数据)valE: 就是执行阶段的指令访存后有:(内存的输出数据) valM: 5mrmov(有点晚) Bpop (从Datememory中读出数据后才回写)写回阶段:(对寄存器写入端口E还没进行写入的数据) valE:就是执行阶段的指令(对寄存器写入端口M还没有进行写入的数据)valM: 就是访存后指令
2.rB寄存器作为源寄存器时,需要与rB作为目的寄存器匹配验证的指令很少。有指令:opq: rA,rB->rBmrmov:D(rB),rA
3.rA寄存器作为源寄存器时,需要与rA作为目的寄存器匹配验证。rA源寄存器有指令:特别注意:jxx和call指令要经过selectA,选出的是valP,不是来自寄存器的ra值,所以不需要匹配验证mr指令也不需要
*/
//数据转发
void SelectA_and_FwdA() {if (sel_fwda.srcA == 0xF) {sel_fwda.valA = sel_fwda.valP;}else {if (sel_fwda.srcA == sel_fwda.E_dstE) {sel_fwda.valA = sel_fwda.E_valE;}else if (sel_fwda.srcA == sel_fwda.M_dstE) {sel_fwda.valA = sel_fwda.M_valE;}else if (sel_fwda.srcA == sel_fwda.M_dstM) {sel_fwda.valA = sel_fwda.M_valE;}else if (sel_fwda.srcA == sel_fwda.W_dstE) {sel_fwda.valA = sel_fwda.W_valE;}else if (sel_fwda.srcA == sel_fwda.W_dstM) {sel_fwda.valA = sel_fwda.W_valM;}else {sel_fwda.valA = sel_fwda.register_A;}}}
void FwdB() {if (fwdb.srcB == fwdb.E_dstE) {fwdb.valB = fwdb.E_valE;}else if (fwdb.srcB == fwdb.M_dstE) {fwdb.valB = fwdb.M_valE;}else if (fwdb.srcB == fwdb.M_dstM) {fwdb.valB = fwdb.M_valE;}else if (fwdb.srcB == fwdb.W_dstE) {fwdb.valB = fwdb.W_valE;}else if (fwdb.srcB == fwdb.W_dstM) {fwdb.valB = fwdb.W_valM;}else {fwdb.valB = fwdb.register_B;}
}
void Decode(int Register_file[]) {if (f_dReg.stall) {printf("Decode: Stall\n");//调整pc=pc-inst.leninst.valP = inst.valP - inst.len;printf("跳出译码阶段\n");return;}if (f_dReg.bubble) {printf("Decode: Bubble\n");d_eRegNew.bubble = 1;}printf("进入译码阶段\n");d_eRegNew.stall = 0;d_eRegNew.stat = f_dReg.stat;d_eRegNew.icode = f_dReg.icode;d_eRegNew.valC = f_dReg.valC;d_eRegNew.ifuc = f_dReg.ifuc;if (f_dReg.icode == 0 || f_dReg.icode == 1) {;}if (f_dReg.icode == 2) { //rrd_eRegNew.srcA = f_dReg.rA;sel_fwda.srcA = f_dReg.rA;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA(&sel_fwda);d_eRegNew.valA = sel_fwda.valA;d_eRegNew.srcB = f_dReg.rB;d_eRegNew.valB = 0;d_eRegNew.dstE = f_dReg.rB; //将结果valE写回寄存器rB}if (f_dReg.icode == 3) { //ird_eRegNew.srcA = f_dReg.rA;d_eRegNew.srcB = f_dReg.rB;d_eRegNew.valB = 0;d_eRegNew.dstE = f_dReg.rB; //将ALU结果valE写回寄存器rB}if (f_dReg.icode == 4) { //rm rA,D(rB) 不需要写回寄存器d_eRegNew.srcA = f_dReg.rA;sel_fwda.srcA = f_dReg.rA;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;d_eRegNew.srcB = f_dReg.rB; //对rB去寄存器取基地址fwdb.srcB = f_dReg.rB;fwdb.register_B = Register_file[f_dReg.rB];FwdB();d_eRegNew.valB = fwdb.valB;}if (f_dReg.icode == 5) { //mr D(rB),rA 需要验证d_eRegNew.srcB = f_dReg.rB; //对rB去寄存器取基地址fwdb.srcB = f_dReg.rB;fwdb.register_B = Register_file[f_dReg.rB];FwdB(&fwdb);d_eRegNew.valB = fwdb.valB;d_eRegNew.dstM = f_dReg.rA; //将Datamemory结果valM写回寄存器rA}if (f_dReg.icode == 6) { //OPq rA,rB 需要验证sel_fwda.srcA = f_dReg.rA;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA(&sel_fwda);d_eRegNew.valA = sel_fwda.valA;fwdb.srcB = f_dReg.rB;fwdb.register_B = Register_file[f_dReg.rB];FwdB(&fwdb);d_eRegNew.valB = fwdb.valB;d_eRegNew.dstE = f_dReg.rB; //将ALU结果valE写回寄存器rB}if (f_dReg.icode == 7) { //jxx sel_fwda.srcA = f_dReg.rA;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;}if (f_dReg.icode == 8) { //calld_eRegNew.srcA = 4;sel_fwda.srcA = 4;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;fwdb.srcB = 4;fwdb.register_B = Register_file[f_dReg.rB];FwdB();d_eRegNew.valB = fwdb.valB;d_eRegNew.dstE = 4; //将ALU结果valE写回寄存器sp}if (f_dReg.icode == 9) { //retd_eRegNew.srcA = 4; //对sp去寄存器取数据 sel_fwda.srcA = 4;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;d_eRegNew.srcB = 4;fwdb.srcB = 4;fwdb.register_B = Register_file[f_dReg.rB];FwdB();d_eRegNew.valB = fwdb.valB;d_eRegNew.dstE = 4; //将ALU结果valE写回寄存器sp}if (f_dReg.icode == 0xA) {//pushd_eRegNew.srcA = f_dReg.rA;sel_fwda.srcA = f_dReg.rA; //对rA去寄存器取数据sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;d_eRegNew.srcB = 4;fwdb.srcB = 4;fwdb.register_B = Register_file[f_dReg.rB];FwdB();d_eRegNew.valB = fwdb.valB;d_eRegNew.dstE = 4; //将ALU结果valE写回寄存器sp}if (f_dReg.icode == 0xB) {//popqd_eRegNew.srcA = 4; //对sp去寄存器取数据 sel_fwda.srcA = 4;sel_fwda.register_A = Register_file[f_dReg.rA];SelectA_and_FwdA();d_eRegNew.valA = sel_fwda.valA;d_eRegNew.srcB = 4; //对sp去寄存器取数据fwdb.srcB = 4;fwdb.register_B = Register_file[f_dReg.rB];FwdB();d_eRegNew.valB = fwdb.valB;d_eRegNew.dstE = 4; //将ALU结果valE写回寄存器spd_eRegNew.dstM = f_dReg.rA; //将Datamemory结果valM写回寄存器rA}
}
//执行阶段
void setCC(int a, int b, int result, int op) {if (op == 1) { //OF=1 两个同符号数相加(正数+正数 或 负数+负数),结果符号与其相反。if ((a >= 0 && b >= 0) && result < 0) {cc.OF = 1;}else if ((a < 0 && b < 0) && result >= 0) {cc.OF = 1;}else {cc.OF = 0;}}if (op == 2) { //OF=1 两数符号相反(正数-负数,或负数-正数),而结果符号与减数相同。if ((a >= 0 && b < 0) && result >= 0) {cc.OF = 1;}else if ((a < 0 && b >= 0) && result < 0) {cc.OF = 1;}else {cc.OF = 0;}}if (op == 3 || op == 4) {cc.OF = 0;}if (result < 0) {cc.SF = 1;}else {cc.SF = 0;}if (result) {cc.ZF = 1;}else {cc.ZF = 0;}
}
int ALU(int alu_a, int alu_b, int op) {if (op == 1)return alu_a + alu_b;else if (op == 2)return alu_a - alu_b;else if (op == 2)return alu_a & alu_b;elsereturn alu_a ^ alu_b;
}
int Cond(int ifunc) {if (ifunc == 1) {return (cc.SF ^ cc.OF) | cc.ZF;}if (ifunc == 2) {return cc.SF ^ cc.OF;}if (ifunc == 3) {return cc.ZF;}if (ifunc == 4) {return ~cc.ZF;}if (ifunc == 5) {return ~(cc.SF ^ cc.OF);}if (ifunc == 6) {return ~(cc.SF ^ cc.OF) & ~cc.ZF;}return 1;
}
void Excecute() {int ALU_A;int ALU_B;if (d_eReg.stall) {printf("Execute: Stall\n");e_mRegNew.bubble = 1; \return;}if (d_eReg.bubble) {printf("Execute: Bubble\n");e_mRegNew.bubble = 1;}e_mRegNew.stat = d_eReg.stat;e_mRegNew.icode = d_eReg.icode;e_mRegNew.valA = d_eReg.valA;e_mRegNew.dstM = d_eReg.dstM;int op = 1;//1:add,2:sub,3:and,4:xorif (d_eReg.icode == 0) {// 不提供数据转发e_mRegNew.dstE = 0xF;}if (d_eReg.icode == 1) {// 不提供数据转发e_mRegNew.dstE = 0xF;}if (d_eReg.icode == 2) {//rr 如果条件满足才提供数据转发ALU_A = d_eReg.valA;ALU_B = d_eReg.valB;op = 1;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);fwdb.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);if (d_eReg.ifuc == 0) {e_mRegNew.Cnd = 1;}if (d_eReg.ifuc == 1) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 2) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 3) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 4) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 5) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 6) {e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (e_mRegNew.Cnd) {e_mRegNew.dstE = d_eReg.dstE;fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_dstE = d_eReg.dstE;}else {e_mRegNew.dstE = 0xF;fwdb.E_dstE = 0xF;sel_fwda.E_dstE = 0xF;}}if (d_eReg.icode == 3) {//ir 提供数据转发op = 1;ALU_A = d_eReg.valC;ALU_B = d_eReg.valB;printf("执行立即数加法\n");e_mRegNew.valE = ALU(ALU_A, ALU_B, op);fwdb.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_dstE = d_eReg.dstE;e_mRegNew.dstE = d_eReg.dstE;}if (d_eReg.icode == 4) {//rm 不提供数据转发op = 1;ALU_A = d_eReg.valC;ALU_B = d_eReg.valB;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);e_mRegNew.dstE = 0xF;}if (d_eReg.icode == 5) {//mr 只有访存后才数据转发op = 1;ALU_A = d_eReg.valC;ALU_B = d_eReg.valB;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);e_mRegNew.dstE = d_eReg.dstE;f_dRegNew.stall = 2;d_eRegNew.stall = 2;e_mRegNew.bubble = 1;}if (d_eReg.icode == 6) {//Opq rr 提供数据转发ALU_A = d_eReg.valA;ALU_B = d_eReg.valB;e_mRegNew.dstE = d_eReg.dstE;if (d_eReg.ifuc == 0) {op = 1;}if (d_eReg.ifuc == 1) {op = 2;}if (d_eReg.ifuc == 2) {op = 3;}if (d_eReg.ifuc == 3) {op = 4;}e_mRegNew.valE = ALU(ALU_A, ALU_B, op);fwdb.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;fwdb.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_dstE = d_eReg.dstE;setCC(ALU_A, ALU_B, e_mRegNew.valE, op);}if (d_eReg.icode == 7) {//jxx 不提供数据转发if (d_eReg.ifuc == 0) { //jmpe_mRegNew.Cnd = 1;}if (d_eReg.ifuc == 1) { //jle (SF ^ OF) | ZFe_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 2) { //jl SF ^ OFe_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 3) { //je ZFe_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 4) { //jne ~ZFe_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 5) { //jge ~(SF ^ OF)e_mRegNew.Cnd = Cond(d_eReg.ifuc);}if (d_eReg.ifuc == 6) { //jg ~(SF ^ OF) & ~ZFe_mRegNew.Cnd = Cond(d_eReg.ifuc);}f_dRegNew.bubble = 1;d_eRegNew.bubble = 1;}if (d_eReg.icode == 8 || d_eReg.icode == 0xA) { //call push valB=R[%rsp] 提供数组转发ALU_B = d_eReg.valB;ALU_A = -4; //-8op = 1;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);e_mRegNew.dstE = d_eReg.dstE;fwdb.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_dstE = d_eReg.dstE;}if (d_eReg.icode == 9) { //ret valB=R[%rsp] 提供数组转发给fwdaALU_B = d_eReg.valB;ALU_A = 4;op = 1;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);e_mRegNew.dstE = d_eReg.dstE;fwdb.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_dstE = d_eReg.dstE;f_dRegNew.bubble = 1;d_eRegNew.bubble = 1;e_mRegNew.bubble = 1;}if (d_eReg.icode == 0xB) { //pop valB=R[%rsp] 提供数组转发 1.执行阶段valB=R[%rsp] 2.访存后 valMALU_B = d_eReg.valB;ALU_A = 4;op = 1;e_mRegNew.valE = ALU(ALU_A, ALU_B, op);e_mRegNew.dstE = d_eReg.dstE;fwdb.E_valE = ALU(ALU_A, ALU_B, op);fwdb.E_dstE = d_eReg.dstE;sel_fwda.E_valE = ALU(ALU_A, ALU_B, op);sel_fwda.E_dstE = d_eReg.dstE;}selectpc.cnd = e_mRegNew.Cnd;selectpc.M_icode = e_mRegNew.icode;e_mRegNew.bubble = 0;e_mRegNew.stall = 0;
}
//访存阶段
void DataMemory(int addr, int Data, int* Datamemory, int MemoryControl) {if (MemoryControl == 1) {Address.Set_index = addr &0x0F;Address.Tag = addr & 0xFF;if (Set[Address.Set_index].valid && Set[Address.Set_index].Tag == Address.Tag) {m_wRegNew.valM = Set[Address.Set_index].Cache_block;}else {Set[Address.Set_index].valid = 1;Set[Address.Set_index].Tag = Address.Tag;Set[Address.Set_index].Cache_block = Datamemory[addr];m_wRegNew.valM = Set[Address.Set_index].Cache_block;}}else if (MemoryControl == 2) { //写穿透Address.Set_index = addr &0x0F;Address.Tag = addr &0xFF;Datamemory[addr] = Data;Set[Address.Set_index].valid = 1;Set[Address.Set_index].Tag = Address.Tag;Set[Address.Set_index].Cache_block = Data;}
}
void MemoryAccess() {int MemoryControl = -1; //0:不读不写;1:读有效,输入地址输出valM;2:写有效,输入地址数据int Addr = -1;int Data = -1;if (e_mReg.stall) {printf("Execute: Stall\n");return;}if (e_mReg.bubble) {printf("Execute: Bubble\n");m_wRegNew.bubble = 1;}m_wRegNew.icode = e_mReg.icode;m_wRegNew.stat = e_mReg.stat;m_wRegNew.valE = e_mReg.valE;m_wRegNew.dstE = e_mReg.dstE;m_wRegNew.dstM = e_mReg.dstM;if (e_mReg.icode == 0 || e_mReg.icode == 1 || e_mReg.icode == 2 || e_mReg.icode == 3) {MemoryControl = 0;}if (e_mReg.icode == 4 || e_mReg.icode == 0xA) { //rm push M[valE] = valAMemoryControl = 2;Addr = e_mReg.valE;Data = e_mReg.valA;}if (e_mReg.icode == 8) { //call M[valE] = valPMemoryControl = 2;Addr = e_mReg.valE;// Data = e_mReg.valA;Data = f_dReg.valP;}if (e_mReg.icode == 5) { //mrMemoryControl = 1;Addr = e_mReg.valE;}if (e_mReg.icode == 0xB) { //pop M[valE] = valAMemoryControl = 1;Addr = e_mReg.valA;}if (e_mReg.icode == 9 ) { //ret valP = M[valE]MemoryControl = 1;Addr = e_mReg.valA;}//数据传递if (e_mReg.icode == 2 || e_mReg.icode == 3 || e_mReg.icode == 6 || e_mReg.icode == 8 || e_mReg.icode == 9|| e_mReg.icode == 0xA || e_mReg.icode == 0xB) {fwdb.M_valE = e_mReg.valE;fwdb.M_dstE = e_mReg.dstE;sel_fwda.M_valE = e_mReg.valE;sel_fwda.M_dstE = e_mReg.dstE;}DataMemory(Addr, Data, Datamemory, MemoryControl);if (e_mReg.icode == 5 || e_mReg.icode == 0xB) {fwdb.M_valM = m_wRegNew.valM;fwdb.M_dstM = e_mReg.dstM;sel_fwda.M_valM = m_wRegNew.valM;sel_fwda.M_dstM = e_mReg.dstM;}selectpc.M_valA = e_mReg.valA;m_wRegNew.bubble = 0;m_wRegNew.stall = 0;
}
//写回阶段
void writeBack() {if (m_wReg.stall) {printf("WriteBack: stall\n");return;}if (m_wReg.bubble) {printf("WriteBack: Bubble\n");return;}if (m_wReg.dstE != 0xF)Register_file[m_wReg.dstE] = m_wReg.valE;if (m_wReg.dstM != 0xF)Register_file[m_wReg.dstM] = m_wReg.valM;if (m_wReg.icode == 2 || m_wReg.icode == 3 || m_wReg.icode == 6 || m_wReg.icode == 8 || m_wReg.icode == 9|| m_wReg.icode == 0xA || m_wReg.icode == 0xB) {fwdb.M_valE = m_wReg.valE;fwdb.M_dstE = m_wReg.dstE;sel_fwda.M_valE = m_wReg.valE;sel_fwda.M_dstE = m_wReg.dstE;}if (m_wReg.icode == 5 || m_wReg.icode == 0xB) {fwdb.M_valM = m_wReg.valM;fwdb.M_dstM = m_wReg.dstM;sel_fwda.M_valM = m_wReg.valM;sel_fwda.M_dstM = m_wReg.dstM;}selectpc.W_valM = m_wReg.valM;printf("selectpc.W_valM=%d\n", selectpc.W_valM);selectpc.W_icode = m_wReg.icode;printf("selectpc.W_icode=%d\n", selectpc.W_icode);
}
void initCache() {for (int i = 0; i < 16; i++){Set[i].valid = 0;}
}
void test_cache() {for (int i = 0; i < 16; i++){if (Set[i].valid) {printf("Set[%d].Cache_block:%d\n",i, Set[i].Cache_block);}}
}
//主函数
int main() {int i = 11;initCache();while (i--) {printf("第%d条指令开始执行", 11 - i);printf("开始预测\n");Select_PC();printf("PC = %d\n", selectpc.predpc);Fetch(selectpc.predpc);if (!f_dReg.stall) {f_dReg = f_dRegNew;}else {f_dReg.stall--;}Decode(&Register_file);if (!d_eReg.stall) {d_eReg = d_eRegNew;}else {d_eReg.stall--;}Excecute();e_mReg = e_mRegNew;MemoryAccess();m_wReg = m_wRegNew;writeBack();printf("Datamemory[%d]=%d\n", Register_file[4], Datamemory[Register_file[4]]);memset(&f_dRegNew, 0, sizeof(f_dRegNew));memset(&d_eRegNew, 0, sizeof(d_eRegNew));memset(&e_mRegNew, 0, sizeof(e_mRegNew));memset(&m_wRegNew, 0, sizeof(m_wRegNew));}printf("Datamemory[124]=%d", Datamemory[124]);//test_cache();return 0;
}