SM4算法是一个分组加密算法,消息分组和密钥长度均为 128 bit,主要包括密钥扩展算法、加密算法和解密算法。算法采用32轮非线性迭代结构,加密和解密算法结构相同,只是轮密钥使用顺序相反。针对SM4密码算法的硬件设计,分为循环架构和流水线架构两种。循环架构面向资源节约优化,使SM4密码算法能够部署在资源受限的硬件设备上;流水线架构面向加密性能优化,使SM4密码算法能够部署在对吞吐量要求较高的场景中。这里采用循环结构,即不对循环结构进行展开,每加密一个消息分组需要32个时钟周期。
具体verilog代码如下:
module sm4_top(input clk,rst_n,input [1:0] cmd, //00:pause 01:key_exp 10:encrypt 11:decryptinput [127:0] sm4_din,output [31:0] sm4_dout,output reg res_vld
);
localparam IDLE = 3'd0;
localparam KEYEXP = 3'd1;
localparam ENCRYPT = 3'd2;
localparam DECRYPT = 3'd3;
localparam STRES = 3'd4;
localparam OUTPUT = 3'd5;reg [2:0] state_r;wire state_is_idle = (state_r==IDLE );
wire state_is_decrypt = (state_r==DECRYPT );reg [1:0] counter;
reg [4:0] ikey_en,ikey_de;
reg [127:0] sm4_round_din;
wire [127:0] sm4_round_dout;
wire [31:0] ikey;
wire key_exp_done;
wire key_exp_start = state_is_idle&cmd==2'b01;
wire [4:0] ikey_n = (state_is_decrypt|cmd==2'b11)?ikey_de:ikey_en;assign sm4_dout = sm4_round_din[127:96];always @(posedge clk,negedge rst_n) begin
if(~rst_n)beginstate_r<=IDLE;counter<=2'd0;ikey_en<=5'd0;ikey_de<=5'd31;sm4_round_din<=128'd0;res_vld<=1'b0;
end
else begincase (state_r)IDLE:beginres_vld<=1'b0;sm4_round_din<=sm4_din;case (cmd)2'b00:state_r<=IDLE;2'b01:state_r<=KEYEXP;2'b10:beginikey_en<=ikey_en+1'b1;state_r<=ENCRYPT;end2'b11:beginikey_de<=ikey_de-1'b1;state_r<=DECRYPT;enddefault: state_r<=IDLE;endcaseendKEYEXP:beginif(key_exp_done)beginres_vld<=1'b1;state_r<=IDLE;endendENCRYPT:beginsm4_round_din<=sm4_round_dout;ikey_en<=ikey_en+1'b1;if(ikey_en==5'd31)state_r<=STRES;endDECRYPT:beginsm4_round_din<=sm4_round_dout;ikey_de<=ikey_de-1'b1;if(ikey_de==5'd0)state_r<=STRES;endSTRES:beginsm4_round_din<={sm4_round_dout[31:0],sm4_round_dout[63:32],sm4_round_dout[95:64],sm4_round_dout[127:96]};res_vld<=1'b1;state_r<=OUTPUT;endOUTPUT:begincounter<=counter+1'b1;sm4_round_din<={sm4_round_din[95:0],32'd0};if(counter==2'd3)beginres_vld<=1'b0;state_r<=IDLE;endenddefault: state_r<=IDLE;endcase
end
endsm4_encdec_round u_sm4_round(.round_din(sm4_round_din),.round_key(ikey),.round_dout(sm4_round_dout)
);key_expansion u_key_exp(.clk(clk),.rst_n(rst_n),.mkey(sm4_din),.key_exp_start(key_exp_start),.ikey_n(ikey_n),.ikey(ikey),.key_exp_done(key_exp_done)
);
endmodule
module key_expansion(input clk,rst_n,input [127:0] mkey,input key_exp_start,input [4:0] ikey_n,output [31:0] ikey,output key_exp_done
);
localparam FK0 = 32'ha3b1bac6;
localparam FK1 = 32'h56aa3350;
localparam FK2 = 32'h677d9197;
localparam FK3 = 32'hb27022dc;reg state_is_idle;
reg ram_ikey_wea;
reg [4:0] exp_counter;
reg [127:0] round_din_r;
wire [31:0] round_key_r;
wire [4:0] ram_ikey_addr;
wire [127:0] round_dout;
wire key_exp_trigger = state_is_idle&key_exp_start;assign key_exp_done = ~state_is_idle&exp_counter==5'd31;
assign ram_ikey_addr = state_is_idle?ikey_n:exp_counter;always @(posedge clk,negedge rst_n) beginif(~rst_n)state_is_idle<=1'b1;else if(key_exp_trigger)state_is_idle<=1'b0;else if(key_exp_done)state_is_idle<=1'b1;else state_is_idle<=state_is_idle;
endalways @(posedge clk,negedge rst_n) beginif(~rst_n)ram_ikey_wea<=1'b0;else if(key_exp_trigger)ram_ikey_wea<=1'b1;else if(key_exp_done)ram_ikey_wea<=1'b0;else ram_ikey_wea<=ram_ikey_wea;
endalways @(posedge clk,negedge rst_n) beginif(~rst_n)exp_counter<=5'd0;else if(~state_is_idle)exp_counter<=exp_counter+1'b1;
endalways @(posedge clk,negedge rst_n) beginif(~rst_n)round_din_r<=128'd0;else if(key_exp_trigger)round_din_r<=mkey^{FK0,FK1,FK2,FK3};else if(~state_is_idle)round_din_r<=round_dout;else round_din_r<=round_din_r;
endget_cki u_cki(.round_cnt(exp_counter),.cki(round_key_r)
);sm4_key_round u_key_round(.round_din(round_din_r),.round_ckey(round_key_r),.round_dout(round_dout)
);ram_ikey #(.DP(32),.AW(5),.DW(32)
) u_ram_ikey(.clk(clk),.din(round_dout[31:0]),.addr(ram_ikey_addr),.wea(ram_ikey_wea),.dout(ikey)
);endmodule
module sm4_encdec_round(input [127:0] round_din,input [31:0] round_key,output [127:0] round_dout
);wire [31:0] word_0,word_1,word_2,word_3;
wire [31:0] transform_din;
wire [31:0] transform_dout;
wire [7:0] sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3;
wire [7:0] sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3;
wire [31:0] sbox_wout={sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3};assign {word_0,word_1,word_2,word_3} = round_din;
assign transform_din = word_1^word_2^word_3^round_key;
assign {sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3}=transform_din;
assign transform_dout = ((sbox_wout^{sbox_wout[29:0],sbox_wout[31:30]})^({sbox_wout[21:0],sbox_wout[31:22]}^{sbox_wout[13:0],sbox_wout[31:14]}))^{sbox_wout[7:0],sbox_wout[31:8]};
assign round_dout = {word_1,word_2,word_3,transform_dout^word_0};s_box sbox0(.s_in(sbox_bin0),.s_out(sbox_bout0)
);
s_box sbox1(.s_in(sbox_bin1),.s_out(sbox_bout1)
);
s_box sbox2(.s_in(sbox_bin2),.s_out(sbox_bout2)
);
s_box sbox3(.s_in(sbox_bin3),.s_out(sbox_bout3)
);endmodule
module sm4_key_round(input [127:0] round_din,input [31:0] round_ckey,output [127:0] round_dout
);wire [31:0] word_0,word_1,word_2,word_3;
wire [31:0] transform_din;
wire [31:0] transform_dout;
wire [7:0] sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3;
wire [7:0] sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3;
wire [31:0] sbox_wout={sbox_bout0,sbox_bout1,sbox_bout2,sbox_bout3};assign {word_0,word_1,word_2,word_3} = round_din;
assign transform_din = word_1^word_2^word_3^round_ckey;
assign {sbox_bin0,sbox_bin1,sbox_bin2,sbox_bin3}=transform_din;
assign transform_dout = (sbox_wout^{sbox_wout[18:0],sbox_wout[31:19]})^{sbox_wout[8:0],sbox_wout[31:9]};
assign round_dout = {word_1,word_2,word_3,transform_dout^word_0};s_box sbox0(.s_in(sbox_bin0),.s_out(sbox_bout0)
);
s_box sbox1(.s_in(sbox_bin1),.s_out(sbox_bout1)
);
s_box sbox2(.s_in(sbox_bin2),.s_out(sbox_bout2)
);
s_box sbox3(.s_in(sbox_bin3),.s_out(sbox_bout3)
);endmodule
module get_cki(input [4:0] round_cnt,output reg [31:0] cki
);always@(*)case(round_cnt)5'h00: cki <= 32'h00070e15;5'h01: cki <= 32'h1c232a31;5'h02: cki <= 32'h383f464d;5'h03: cki <= 32'h545b6269;5'h04: cki <= 32'h70777e85;5'h05: cki <= 32'h8c939aa1;5'h06: cki <= 32'ha8afb6bd;5'h07: cki <= 32'hc4cbd2d9;5'h08: cki <= 32'he0e7eef5;5'h09: cki <= 32'hfc030a11;5'h0a: cki <= 32'h181f262d;5'h0b: cki <= 32'h343b4249;5'h0c: cki <= 32'h50575e65;5'h0d: cki <= 32'h6c737a81;5'h0e: cki <= 32'h888f969d;5'h0f: cki <= 32'ha4abb2b9;5'h10: cki <= 32'hc0c7ced5;5'h11: cki <= 32'hdce3eaf1;5'h12: cki <= 32'hf8ff060d;5'h13: cki <= 32'h141b2229;5'h14: cki <= 32'h30373e45;5'h15: cki <= 32'h4c535a61;5'h16: cki <= 32'h686f767d;5'h17: cki <= 32'h848b9299;5'h18: cki <= 32'ha0a7aeb5;5'h19: cki <= 32'hbcc3cad1;5'h1a: cki <= 32'hd8dfe6ed;5'h1b: cki <= 32'hf4fb0209;5'h1c: cki <= 32'h10171e25;5'h1d: cki <= 32'h2c333a41;5'h1e: cki <= 32'h484f565d;5'h1f: cki <= 32'h646b7279;default: cki <= 32'h0;endcaseendmodule
module s_box(input [7:0] s_in,output reg [7:0] s_out
);always@(*)case(s_in)8'h00: s_out <= 8'hd6;8'h01: s_out <= 8'h90;8'h02: s_out <= 8'he9;8'h03: s_out <= 8'hfe;8'h04: s_out <= 8'hcc;8'h05: s_out <= 8'he1;8'h06: s_out <= 8'h3d;8'h07: s_out <= 8'hb7;8'h08: s_out <= 8'h16;8'h09: s_out <= 8'hb6;8'h0a: s_out <= 8'h14;8'h0b: s_out <= 8'hc2;8'h0c: s_out <= 8'h28;8'h0d: s_out <= 8'hfb;8'h0e: s_out <= 8'h2c;8'h0f: s_out <= 8'h05;8'h10: s_out <= 8'h2b;8'h11: s_out <= 8'h67;8'h12: s_out <= 8'h9a;8'h13: s_out <= 8'h76;8'h14: s_out <= 8'h2a;8'h15: s_out <= 8'hbe;8'h16: s_out <= 8'h04;8'h17: s_out <= 8'hc3;8'h18: s_out <= 8'haa;8'h19: s_out <= 8'h44;8'h1a: s_out <= 8'h13;8'h1b: s_out <= 8'h26;8'h1c: s_out <= 8'h49;8'h1d: s_out <= 8'h86;8'h1e: s_out <= 8'h06;8'h1f: s_out <= 8'h99;8'h20: s_out <= 8'h9c;8'h21: s_out <= 8'h42;8'h22: s_out <= 8'h50;8'h23: s_out <= 8'hf4;8'h24: s_out <= 8'h91;8'h25: s_out <= 8'hef;8'h26: s_out <= 8'h98;8'h27: s_out <= 8'h7a;8'h28: s_out <= 8'h33;8'h29: s_out <= 8'h54;8'h2a: s_out <= 8'h0b;8'h2b: s_out <= 8'h43;8'h2c: s_out <= 8'hed;8'h2d: s_out <= 8'hcf;8'h2e: s_out <= 8'hac;8'h2f: s_out <= 8'h62;8'h30: s_out <= 8'he4;8'h31: s_out <= 8'hb3;8'h32: s_out <= 8'h1c;8'h33: s_out <= 8'ha9;8'h34: s_out <= 8'hc9;8'h35: s_out <= 8'h08;8'h36: s_out <= 8'he8;8'h37: s_out <= 8'h95;8'h38: s_out <= 8'h80;8'h39: s_out <= 8'hdf;8'h3a: s_out <= 8'h94;8'h3b: s_out <= 8'hfa;8'h3c: s_out <= 8'h75;8'h3d: s_out <= 8'h8f;8'h3e: s_out <= 8'h3f;8'h3f: s_out <= 8'ha6;8'h40: s_out <= 8'h47;8'h41: s_out <= 8'h07;8'h42: s_out <= 8'ha7;8'h43: s_out <= 8'hfc;8'h44: s_out <= 8'hf3;8'h45: s_out <= 8'h73;8'h46: s_out <= 8'h17;8'h47: s_out <= 8'hba;8'h48: s_out <= 8'h83;8'h49: s_out <= 8'h59;8'h4a: s_out <= 8'h3c;8'h4b: s_out <= 8'h19;8'h4c: s_out <= 8'he6;8'h4d: s_out <= 8'h85;8'h4e: s_out <= 8'h4f;8'h4f: s_out <= 8'ha8;8'h50: s_out <= 8'h68;8'h51: s_out <= 8'h6b;8'h52: s_out <= 8'h81;8'h53: s_out <= 8'hb2;8'h54: s_out <= 8'h71;8'h55: s_out <= 8'h64;8'h56: s_out <= 8'hda;8'h57: s_out <= 8'h8b;8'h58: s_out <= 8'hf8;8'h59: s_out <= 8'heb;8'h5a: s_out <= 8'h0f;8'h5b: s_out <= 8'h4b;8'h5c: s_out <= 8'h70;8'h5d: s_out <= 8'h56;8'h5e: s_out <= 8'h9d;8'h5f: s_out <= 8'h35;8'h60: s_out <= 8'h1e;8'h61: s_out <= 8'h24;8'h62: s_out <= 8'h0e;8'h63: s_out <= 8'h5e;8'h64: s_out <= 8'h63;8'h65: s_out <= 8'h58;8'h66: s_out <= 8'hd1;8'h67: s_out <= 8'ha2;8'h68: s_out <= 8'h25;8'h69: s_out <= 8'h22;8'h6a: s_out <= 8'h7c;8'h6b: s_out <= 8'h3b;8'h6c: s_out <= 8'h01;8'h6d: s_out <= 8'h21;8'h6e: s_out <= 8'h78;8'h6f: s_out <= 8'h87;8'h70: s_out <= 8'hd4;8'h71: s_out <= 8'h00;8'h72: s_out <= 8'h46;8'h73: s_out <= 8'h57;8'h74: s_out <= 8'h9f;8'h75: s_out <= 8'hd3;8'h76: s_out <= 8'h27;8'h77: s_out <= 8'h52;8'h78: s_out <= 8'h4c;8'h79: s_out <= 8'h36;8'h7a: s_out <= 8'h02;8'h7b: s_out <= 8'he7;8'h7c: s_out <= 8'ha0;8'h7d: s_out <= 8'hc4;8'h7e: s_out <= 8'hc8;8'h7f: s_out <= 8'h9e;8'h80: s_out <= 8'hea;8'h81: s_out <= 8'hbf;8'h82: s_out <= 8'h8a;8'h83: s_out <= 8'hd2;8'h84: s_out <= 8'h40;8'h85: s_out <= 8'hc7;8'h86: s_out <= 8'h38;8'h87: s_out <= 8'hb5;8'h88: s_out <= 8'ha3;8'h89: s_out <= 8'hf7;8'h8a: s_out <= 8'hf2;8'h8b: s_out <= 8'hce;8'h8c: s_out <= 8'hf9;8'h8d: s_out <= 8'h61;8'h8e: s_out <= 8'h15;8'h8f: s_out <= 8'ha1;8'h90: s_out <= 8'he0;8'h91: s_out <= 8'hae;8'h92: s_out <= 8'h5d;8'h93: s_out <= 8'ha4;8'h94: s_out <= 8'h9b;8'h95: s_out <= 8'h34;8'h96: s_out <= 8'h1a;8'h97: s_out <= 8'h55;8'h98: s_out <= 8'had;8'h99: s_out <= 8'h93;8'h9a: s_out <= 8'h32;8'h9b: s_out <= 8'h30;8'h9c: s_out <= 8'hf5;8'h9d: s_out <= 8'h8c;8'h9e: s_out <= 8'hb1;8'h9f: s_out <= 8'he3;8'ha0: s_out <= 8'h1d;8'ha1: s_out <= 8'hf6;8'ha2: s_out <= 8'he2;8'ha3: s_out <= 8'h2e;8'ha4: s_out <= 8'h82;8'ha5: s_out <= 8'h66;8'ha6: s_out <= 8'hca;8'ha7: s_out <= 8'h60;8'ha8: s_out <= 8'hc0;8'ha9: s_out <= 8'h29;8'haa: s_out <= 8'h23;8'hab: s_out <= 8'hab;8'hac: s_out <= 8'h0d;8'had: s_out <= 8'h53;8'hae: s_out <= 8'h4e;8'haf: s_out <= 8'h6f;8'hb0: s_out <= 8'hd5;8'hb1: s_out <= 8'hdb;8'hb2: s_out <= 8'h37;8'hb3: s_out <= 8'h45;8'hb4: s_out <= 8'hde;8'hb5: s_out <= 8'hfd;8'hb6: s_out <= 8'h8e;8'hb7: s_out <= 8'h2f;8'hb8: s_out <= 8'h03;8'hb9: s_out <= 8'hff;8'hba: s_out <= 8'h6a;8'hbb: s_out <= 8'h72;8'hbc: s_out <= 8'h6d;8'hbd: s_out <= 8'h6c;8'hbe: s_out <= 8'h5b;8'hbf: s_out <= 8'h51;8'hc0: s_out <= 8'h8d;8'hc1: s_out <= 8'h1b;8'hc2: s_out <= 8'haf;8'hc3: s_out <= 8'h92;8'hc4: s_out <= 8'hbb;8'hc5: s_out <= 8'hdd;8'hc6: s_out <= 8'hbc;8'hc7: s_out <= 8'h7f;8'hc8: s_out <= 8'h11;8'hc9: s_out <= 8'hd9;8'hca: s_out <= 8'h5c;8'hcb: s_out <= 8'h41;8'hcc: s_out <= 8'h1f;8'hcd: s_out <= 8'h10;8'hce: s_out <= 8'h5a;8'hcf: s_out <= 8'hd8;8'hd0: s_out <= 8'h0a;8'hd1: s_out <= 8'hc1;8'hd2: s_out <= 8'h31;8'hd3: s_out <= 8'h88;8'hd4: s_out <= 8'ha5;8'hd5: s_out <= 8'hcd;8'hd6: s_out <= 8'h7b;8'hd7: s_out <= 8'hbd;8'hd8: s_out <= 8'h2d;8'hd9: s_out <= 8'h74;8'hda: s_out <= 8'hd0;8'hdb: s_out <= 8'h12;8'hdc: s_out <= 8'hb8;8'hdd: s_out <= 8'he5;8'hde: s_out <= 8'hb4;8'hdf: s_out <= 8'hb0;8'he0: s_out <= 8'h89;8'he1: s_out <= 8'h69;8'he2: s_out <= 8'h97;8'he3: s_out <= 8'h4a;8'he4: s_out <= 8'h0c;8'he5: s_out <= 8'h96;8'he6: s_out <= 8'h77;8'he7: s_out <= 8'h7e;8'he8: s_out <= 8'h65;8'he9: s_out <= 8'hb9;8'hea: s_out <= 8'hf1;8'heb: s_out <= 8'h09;8'hec: s_out <= 8'hc5;8'hed: s_out <= 8'h6e;8'hee: s_out <= 8'hc6;8'hef: s_out <= 8'h84;8'hf0: s_out <= 8'h18;8'hf1: s_out <= 8'hf0;8'hf2: s_out <= 8'h7d;8'hf3: s_out <= 8'hec;8'hf4: s_out <= 8'h3a;8'hf5: s_out <= 8'hdc;8'hf6: s_out <= 8'h4d;8'hf7: s_out <= 8'h20;8'hf8: s_out <= 8'h79;8'hf9: s_out <= 8'hee;8'hfa: s_out <= 8'h5f;8'hfb: s_out <= 8'h3e;8'hfc: s_out <= 8'hd7;8'hfd: s_out <= 8'hcb;8'hfe: s_out <= 8'h39;8'hff: s_out <= 8'h48;default: s_out <= 8'h00;endcase
endmodule
module ram_ikey #(parameter DP = 32,parameter AW = 8,parameter DW = 32
)(input clk,input [DW-1:0] din,input [AW-1:0] addr,input wea,output [DW-1:0] dout
);
reg [DW-1:0] mem_r [0:DP-1];
reg [AW-1:0] addr_r;always @(posedge clk)if(~wea)addr_r<=addr;always @(posedge clk)if(wea)mem_r[addr] <= din;assign dout=mem_r[addr_r];endmodule
module tb;
reg clk,rst_n;
reg [127:0] sm4_din;
reg [1:0] cmd;
wire [31:0] sm4_dout;
wire res_vld;initial begin
clk=0;rst_n=0;
sm4_din=128'h0123456789abcdeffedcba9876543210;
cmd=0;#10 rst_n=1;
#10 cmd=1;
#10 cmd=0;
wait(res_vld);
#10 cmd=2;
#10 cmd=0;
wait(res_vld);
#75 sm4_din=128'h681edf34d206965e86b3e94f536e4246;
#10 cmd=3;
#10 cmd=0;
wait(res_vld);
endalways #5 clk=~clk;sm4_top u_sm4_top(.clk(clk),.rst_n(rst_n),.cmd(cmd), //00:pause 01:key_exp 10:encrypt 11:decrypt.sm4_din(sm4_din),.sm4_dout(sm4_dout),.res_vld(res_vld)
);
endmodule