cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
kwjcoo
Visitor
Visitor
455 Views
Registered: ‎07-22-2019

Abnormal program termination (EXCEPTION_ACCESS_VIOLATION) happening

Hello,

I've been struggling with Abnormal program termination (EXCEPTION_ACCESS_VIOLATION) for hours.

I was able to generate a bitstream, but after I changed the behavior of "pool" registers, suddenly I cannot synthesize the design with EXCEPTION_ACCESS_VIOLATION error.

Attached are my whole project code, block design and settings for a BRAM used by conv_layer.

Thanks in advance

module start_finish(
  output reg [31:0]AHB_INTERFACE_0_haddr,
  output reg [2:0]AHB_INTERFACE_0_hburst,
  output reg [3:0]AHB_INTERFACE_0_hprot,
  input [31:0]AHB_INTERFACE_0_hrdata,
  output reg AHB_INTERFACE_0_hready_in,
  input AHB_INTERFACE_0_hready_out,
  input AHB_INTERFACE_0_hresp,
  output reg [2:0]AHB_INTERFACE_0_hsize,
  output reg [1:0]AHB_INTERFACE_0_htrans,
  output reg [31:0]AHB_INTERFACE_0_hwdata,
  output reg AHB_INTERFACE_0_hwrite,
  output reg AHB_INTERFACE_0_sel,
  
  output reg start,
  input finish,
  
  input clk,
  input reset
);

reg [3:0] state;

wire [31:0] start_code;
assign start_code = 32'h01020304;
wire [31:0] finish_code;
assign finish_code = 32'h04030201;

reg [31:0] read_data_reg;

always @(posedge clk) begin
    if ( !reset ) begin
        AHB_INTERFACE_0_haddr <= 0;
        AHB_INTERFACE_0_hburst <= 0;
        AHB_INTERFACE_0_hprot <= 0;
        AHB_INTERFACE_0_hready_in <= 0;
        AHB_INTERFACE_0_hsize <= 0;
        AHB_INTERFACE_0_htrans <= 0;
        AHB_INTERFACE_0_hwdata <= 0;
        AHB_INTERFACE_0_hwrite <= 0;
        AHB_INTERFACE_0_sel <= 0;
        
        start <= 0;
        state <= 0;
    end
    else begin
    case(state)
        4'd0: begin // try to read
            if ( AHB_INTERFACE_0_hready_out == 1 ) begin
                AHB_INTERFACE_0_htrans <= 2'b10;
                AHB_INTERFACE_0_haddr <= 32'h5000_0000; // start code position
                AHB_INTERFACE_0_hburst <= 0;
                AHB_INTERFACE_0_hsize <= 3'b010;
                AHB_INTERFACE_0_hready_in <= 1;
                AHB_INTERFACE_0_sel <= 1;
                AHB_INTERFACE_0_hprot <= 1;
                
                state <= 4'd1;                
            end
        end
        4'd1: begin // read the data
            if ( AHB_INTERFACE_0_hready_out == 1 ) begin
                read_data_reg <= AHB_INTERFACE_0_hrdata;
                AHB_INTERFACE_0_sel <= 0;
                AHB_INTERFACE_0_hprot <= 0;
                AHB_INTERFACE_0_hready_in <= 1;
                
                state <= 4'd2;
            end
            else begin
                AHB_INTERFACE_0_hready_in <= 0;
            end
            AHB_INTERFACE_0_htrans <= 0;
        end
        4'd2: begin // compare the data to start code
            if ( read_data_reg == start_code ) begin
                start <= 1;
                state <= 4'd3;
            end
            else begin
                state <= 4'd0;
            end
        end
        4'd3: begin // wait for finish signal from processing module
            if ( finish == 1 ) begin
                state <= 4'd4;
            end
            AHB_INTERFACE_0_haddr <= 0;
            AHB_INTERFACE_0_hburst <= 0;
            AHB_INTERFACE_0_hprot <= 0;
            AHB_INTERFACE_0_hready_in <= 0;
            AHB_INTERFACE_0_hsize <= 0;
            AHB_INTERFACE_0_htrans <= 0;
            AHB_INTERFACE_0_hwdata <= 0;
            AHB_INTERFACE_0_hwrite <= 0;
            AHB_INTERFACE_0_sel <= 0;
        end
        4'd4: begin // write finish code
            if ( AHB_INTERFACE_0_hready_out == 1 && AHB_INTERFACE_0_hresp == 0) begin
                AHB_INTERFACE_0_htrans <= 2'b10;
                AHB_INTERFACE_0_haddr <= 32'h5000_0004;
                AHB_INTERFACE_0_hburst <= 0;
                AHB_INTERFACE_0_hsize <= 3'b010; //Byte
                AHB_INTERFACE_0_hready_in <= 1;
                AHB_INTERFACE_0_sel <= 1;
                AHB_INTERFACE_0_hprot <= 9;
                AHB_INTERFACE_0_hwdata <= finish_code;
                AHB_INTERFACE_0_hwrite <= 1;
                state <= 4'd5;
            end
        end
        4'd5: begin
            AHB_INTERFACE_0_htrans <= 0;
            AHB_INTERFACE_0_hready_in <= 0;
            state <= 4'd6;
        end
        4'd6: begin
            if ( AHB_INTERFACE_0_hready_out == 0 && AHB_INTERFACE_0_hresp == 0) begin
                AHB_INTERFACE_0_sel <= 0;
                AHB_INTERFACE_0_hprot <= 0;
                AHB_INTERFACE_0_hwdata <= 0;
                AHB_INTERFACE_0_hwrite <= 0;
                AHB_INTERFACE_0_hready_in <= 1;
                state <= 4'd7;
            end
        end
        default: begin
        AHB_INTERFACE_0_haddr <= 0;
        AHB_INTERFACE_0_hburst <= 0;
        AHB_INTERFACE_0_hprot <= 0;
        AHB_INTERFACE_0_hready_in <= 0;
        AHB_INTERFACE_0_hsize <= 0;
        AHB_INTERFACE_0_htrans <= 0;
        AHB_INTERFACE_0_hwdata <= 0;
        AHB_INTERFACE_0_hwrite <= 0;
        AHB_INTERFACE_0_sel <= 0;
        end
        
    endcase
    end
end
endmodule

whole_bram.pngportA.pngportB.png

//Copyright 1986-2018 Xilinx, Inc. All Rights Reserved.
//--------------------------------------------------------------------------------
//Tool Version: Vivado v.2018.2 (win64) Build 2258646 Thu Jun 14 20:03:12 MDT 2018
//Date        : Sat Nov  9 17:20:01 2019
//Host        : DESKTOP-A26RU88 running 64-bit major release  (build 9200)
//Command     : generate_target design_1_wrapper.bd
//Design      : design_1_wrapper
//Purpose     : IP block netlist
//--------------------------------------------------------------------------------
`timescale 1 ps / 1 ps
(* dont_touch = "true" *)

module top
   (
    DDR_0_addr,
    DDR_0_ba,
    DDR_0_cas_n,
    DDR_0_ck_n,
    DDR_0_ck_p,
    DDR_0_cke,
    DDR_0_cs_n,
    DDR_0_dm,
    DDR_0_dq,
    DDR_0_dqs_n,
    DDR_0_dqs_p,
    DDR_0_odt,
    DDR_0_ras_n,
    DDR_0_reset_n,
    DDR_0_we_n,
    FIXED_IO_0_ddr_vrn,
    FIXED_IO_0_ddr_vrp,
    FIXED_IO_0_mio,
    FIXED_IO_0_ps_clk,
    FIXED_IO_0_ps_porb,
    FIXED_IO_0_ps_srstb,
    UART0_rxd,
    UART0_txd
    );

  inout [14:0]DDR_0_addr;
  inout [2:0]DDR_0_ba;
  inout DDR_0_cas_n;
  inout DDR_0_ck_n;
  inout DDR_0_ck_p;
  inout DDR_0_cke;
  inout DDR_0_cs_n;
  inout [3:0]DDR_0_dm;
  inout [31:0]DDR_0_dq;
  inout [3:0]DDR_0_dqs_n;
  inout [3:0]DDR_0_dqs_p;
  inout DDR_0_odt;
  inout DDR_0_ras_n;
  inout DDR_0_reset_n;
  inout DDR_0_we_n;
  inout FIXED_IO_0_ddr_vrn;
  inout FIXED_IO_0_ddr_vrp;
  inout [53:0]FIXED_IO_0_mio;
  inout FIXED_IO_0_ps_clk;
  inout FIXED_IO_0_ps_porb;
  inout FIXED_IO_0_ps_srstb;
  input UART0_rxd;
  output UART0_txd;

  wire [31:0]AHB_INTERFACE_0_haddr;
  wire [2:0]AHB_INTERFACE_0_hburst;
  wire [3:0]AHB_INTERFACE_0_hprot;
  wire [31:0]AHB_INTERFACE_0_hrdata;
  wire AHB_INTERFACE_0_hready_in;
  wire AHB_INTERFACE_0_hready_out;
  wire AHB_INTERFACE_0_hresp;
  wire [2:0]AHB_INTERFACE_0_hsize;
  wire [1:0]AHB_INTERFACE_0_htrans;
  wire [31:0]AHB_INTERFACE_0_hwdata;
  wire AHB_INTERFACE_0_hwrite;
  wire AHB_INTERFACE_0_sel;
  wire [14:0]DDR_0_addr;
  wire [2:0]DDR_0_ba;
  wire DDR_0_cas_n;
  wire DDR_0_ck_n;
  wire DDR_0_ck_p;
  wire DDR_0_cke;
  wire DDR_0_cs_n;
  wire [3:0]DDR_0_dm;
  wire [31:0]DDR_0_dq;
  wire [3:0]DDR_0_dqs_n;
  wire [3:0]DDR_0_dqs_p;
  wire DDR_0_odt;
  wire DDR_0_ras_n;
  wire DDR_0_reset_n;
  wire DDR_0_we_n;
  wire FIXED_IO_0_ddr_vrn;
  wire FIXED_IO_0_ddr_vrp;
  wire [53:0]FIXED_IO_0_mio;
  wire FIXED_IO_0_ps_clk;
  wire FIXED_IO_0_ps_porb;
  wire FIXED_IO_0_ps_srstb;
  wire UART0_rxd;
  wire UART0_txd;
  wire clk;
  wire [0:0]reset;
  
  wire start;
  wire finish;

  wire [31:0]AHB_INTERFACE_0_haddr_0;
  wire [2:0]AHB_INTERFACE_0_hburst_0;
  wire [3:0]AHB_INTERFACE_0_hprot_0;
  wire AHB_INTERFACE_0_hready_in_0;
  wire [2:0]AHB_INTERFACE_0_hsize_0;
  wire [1:0]AHB_INTERFACE_0_htrans_0;
  wire [31:0]AHB_INTERFACE_0_hwdata_0;
  wire AHB_INTERFACE_0_hwrite_0;
  wire AHB_INTERFACE_0_sel_0;
  
  wire [31:0]AHB_INTERFACE_0_haddr_1;
  wire [2:0]AHB_INTERFACE_0_hburst_1;
  wire [3:0]AHB_INTERFACE_0_hprot_1;
  wire AHB_INTERFACE_0_hready_in_1;
  wire [2:0]AHB_INTERFACE_0_hsize_1;
  wire [1:0]AHB_INTERFACE_0_htrans_1;
  wire [31:0]AHB_INTERFACE_0_hwdata_1;
  wire AHB_INTERFACE_0_hwrite_1;
  wire AHB_INTERFACE_0_sel_1;
  
  assign AHB_INTERFACE_0_haddr = (start && !finish)? AHB_INTERFACE_0_haddr_1 : AHB_INTERFACE_0_haddr_0;
  assign AHB_INTERFACE_0_hburst = (start && !finish)? AHB_INTERFACE_0_hburst_1 : AHB_INTERFACE_0_hburst_0;
  assign AHB_INTERFACE_0_hprot = (start && !finish)? AHB_INTERFACE_0_hprot_1 : AHB_INTERFACE_0_hprot_0;
  assign AHB_INTERFACE_0_hready_in = (start && !finish)? AHB_INTERFACE_0_hready_in_1 : AHB_INTERFACE_0_hready_in_0;
  assign AHB_INTERFACE_0_hsize = (start && !finish)? AHB_INTERFACE_0_hsize_1 : AHB_INTERFACE_0_hsize_0;
  assign AHB_INTERFACE_0_htrans = (start && !finish)? AHB_INTERFACE_0_htrans_1 : AHB_INTERFACE_0_htrans_0;
  assign AHB_INTERFACE_0_hwdata = (start && !finish)? AHB_INTERFACE_0_hwdata_1 : AHB_INTERFACE_0_hwdata_0;
  assign AHB_INTERFACE_0_hwrite = (start && !finish)? AHB_INTERFACE_0_hwrite_1 : AHB_INTERFACE_0_hwrite_0;
  assign AHB_INTERFACE_0_sel = (start && !finish)? AHB_INTERFACE_0_sel_1 : AHB_INTERFACE_0_sel_0;
  
  start_finish start_finish_i (
    .AHB_INTERFACE_0_haddr(AHB_INTERFACE_0_haddr_0),
    .AHB_INTERFACE_0_hburst(AHB_INTERFACE_0_hburst_0),
    .AHB_INTERFACE_0_hprot(AHB_INTERFACE_0_hprot_0),
    .AHB_INTERFACE_0_hrdata(AHB_INTERFACE_0_hrdata),
    .AHB_INTERFACE_0_hready_in(AHB_INTERFACE_0_hready_in_0),
    .AHB_INTERFACE_0_hready_out(AHB_INTERFACE_0_hready_out),
    .AHB_INTERFACE_0_hresp(AHB_INTERFACE_0_hresp),
    .AHB_INTERFACE_0_hsize(AHB_INTERFACE_0_hsize_0),
    .AHB_INTERFACE_0_htrans(AHB_INTERFACE_0_htrans_0),
    .AHB_INTERFACE_0_hwdata(AHB_INTERFACE_0_hwdata_0),
    .AHB_INTERFACE_0_hwrite(AHB_INTERFACE_0_hwrite_0),
    .AHB_INTERFACE_0_sel(AHB_INTERFACE_0_sel_0),
    .start(start),
    .finish(finish),
    .clk(clk),
    .reset(reset)
    );
  
  conv_layer conv_layer_i (
    .AHB_INTERFACE_0_haddr(AHB_INTERFACE_0_haddr_1),
    .AHB_INTERFACE_0_hburst(AHB_INTERFACE_0_hburst_1),
    .AHB_INTERFACE_0_hprot(AHB_INTERFACE_0_hprot_1),
    .AHB_INTERFACE_0_hrdata(AHB_INTERFACE_0_hrdata),
    .AHB_INTERFACE_0_hready_in(AHB_INTERFACE_0_hready_in_1),
    .AHB_INTERFACE_0_hready_out(AHB_INTERFACE_0_hready_out),
    .AHB_INTERFACE_0_hresp(AHB_INTERFACE_0_hresp),
    .AHB_INTERFACE_0_hsize(AHB_INTERFACE_0_hsize_1),
    .AHB_INTERFACE_0_htrans(AHB_INTERFACE_0_htrans_1),
    .AHB_INTERFACE_0_hwdata(AHB_INTERFACE_0_hwdata_1),
    .AHB_INTERFACE_0_hwrite(AHB_INTERFACE_0_hwrite_1),
    .AHB_INTERFACE_0_sel(AHB_INTERFACE_0_sel_1),
    .start(start),
    .finish(finish),
    .clk(clk)
    );
    
  design_1 design_1_i
       (.AHB_INTERFACE_0_haddr(AHB_INTERFACE_0_haddr),
        .AHB_INTERFACE_0_hburst(AHB_INTERFACE_0_hburst),
        .AHB_INTERFACE_0_hprot(AHB_INTERFACE_0_hprot),
        .AHB_INTERFACE_0_hrdata(AHB_INTERFACE_0_hrdata),
        .AHB_INTERFACE_0_hready_in(AHB_INTERFACE_0_hready_in),
        .AHB_INTERFACE_0_hready_out(AHB_INTERFACE_0_hready_out),
        .AHB_INTERFACE_0_hresp(AHB_INTERFACE_0_hresp),
        .AHB_INTERFACE_0_hsize(AHB_INTERFACE_0_hsize),
        .AHB_INTERFACE_0_htrans(AHB_INTERFACE_0_htrans),
        .AHB_INTERFACE_0_hwdata(AHB_INTERFACE_0_hwdata),
        .AHB_INTERFACE_0_hwrite(AHB_INTERFACE_0_hwrite),
        .AHB_INTERFACE_0_sel(AHB_INTERFACE_0_sel),
        .DDR_0_addr(DDR_0_addr),
        .DDR_0_ba(DDR_0_ba),
        .DDR_0_cas_n(DDR_0_cas_n),
        .DDR_0_ck_n(DDR_0_ck_n),
        .DDR_0_ck_p(DDR_0_ck_p),
        .DDR_0_cke(DDR_0_cke),
        .DDR_0_cs_n(DDR_0_cs_n),
        .DDR_0_dm(DDR_0_dm),
        .DDR_0_dq(DDR_0_dq),
        .DDR_0_dqs_n(DDR_0_dqs_n),
        .DDR_0_dqs_p(DDR_0_dqs_p),
        .DDR_0_odt(DDR_0_odt),
        .DDR_0_ras_n(DDR_0_ras_n),
        .DDR_0_reset_n(DDR_0_reset_n),
        .DDR_0_we_n(DDR_0_we_n),
        .FIXED_IO_0_ddr_vrn(FIXED_IO_0_ddr_vrn),
        .FIXED_IO_0_ddr_vrp(FIXED_IO_0_ddr_vrp),
        .FIXED_IO_0_mio(FIXED_IO_0_mio),
        .FIXED_IO_0_ps_clk(FIXED_IO_0_ps_clk),
        .FIXED_IO_0_ps_porb(FIXED_IO_0_ps_porb),
        .FIXED_IO_0_ps_srstb(FIXED_IO_0_ps_srstb),
        .UART0_rxd(UART0_rxd),
        .UART0_txd(UART0_txd),
        .clk(clk),
        .reset(reset));
        
endmodule
`timescale 1 ps / 1 ps

module conv_layer (
    input clk,
    input start,
    output reg finish,
    output reg [31:0] AHB_INTERFACE_0_haddr,
    output reg [2:0] AHB_INTERFACE_0_hburst,
    output reg [3:0] AHB_INTERFACE_0_hprot,
    input [31:0] AHB_INTERFACE_0_hrdata,
    output reg AHB_INTERFACE_0_hready_in,
    input AHB_INTERFACE_0_hready_out,
    input AHB_INTERFACE_0_hresp,
    output reg [2:0] AHB_INTERFACE_0_hsize,
    output reg [1:0] AHB_INTERFACE_0_htrans,
    output reg [31:0] AHB_INTERFACE_0_hwdata,
    output reg AHB_INTERFACE_0_hwrite,
    output reg AHB_INTERFACE_0_sel
    );
    
/******* Register declaration START *******/     
    /* "count" registers */
    reg [12:0] cntTotal; // count the number of Pool calculation
    reg [12:0] cntConv; // count the number of convolution calculation
    reg [1:0] cntDataPool;  // count the number of data read for Pool
    reg [3:0] cntDataImage; // count the number of data read for Image
    reg [1:0] cntDataImageWarm; // count the number of warm data read for Image
    reg [3:0] cntDataWeight;    // count the number of data read for Weight
    
    /* FSM registers */
    reg [1:0] state;    // Overall FSM
    parameter COLD_WEIGHT = 2'b00;  // obtaining weight matrix 
    parameter COLD_FETCH = 2'b01;   // obtaining whole(3*3) matrix
    parameter WARM_FETCH = 2'b10;   // obtaining partial(3*1) matrix
    parameter CALCULATE = 2'b11;    // do convolution
    
    reg read;   // read FSM
    parameter READ_REQ = 1'b0;  // reqeusting read
    parameter READ_READ = 1'b1; // obatining read data
    
    reg [1:0] warmRead;   // read FSM specially designed for WARM_FETCH
    parameter WARM_UPDATE = 2'b00;  // updating 6 elements
    parameter WARM_REQ = 2'b01;     // requesting read
    parameter WARM_READ = 2'b10;    // obtaining read data
    
    reg [2:0] calc;     // calculating FSM
    parameter NEG = 3'b000;  // 2's complementing data
    parameter MUL_PREPARE = 3'b001; // absoluting data and sign-extending to 16 bits
    parameter MUL = 3'b010; // multiplying data considering original signs
    parameter ADD = 3'b011;  // adding 9 components
    parameter RESULT = 3'b100;    // reduce 16 bits into 8 bits
    parameter LOAD = 3'b101; // loading to array C
    parameter WRAP_UP = 3'b110; // wrapping up write
    
    reg [2:0] poolState;    // pooling FSM
    parameter POOL_WAIT = 3'b000;   // buffering state between wrap_up and req
    parameter POOL_REQ = 3'b001; // requesting for read from BRAM
    parameter POOL_READ = 3'b010;   // reading data and ReLU
    parameter POOL_CALC = 3'b011;   // requesting for write to BRAM (PS)
    parameter POOL_DONE = 3'b100;   // finishing write request
    parameter POOL_WRAP_UP = 3'b101;    // wrapping up write and determine current position
          
    /* registers for memory data */
    reg [71:0] dataImage;
    reg [71:0] dataWeight;
    reg [7:0] dataOut;
    reg [31:0] addrImage;
    reg [31:0] nextAddrImage;   // for next calculation
    reg [31:0] addrWeight;
    reg [31:0] addrOut;
    // parameters for BRAM communicating with PS
    parameter startAddrImage = 32'h4000_0000;
    parameter startAddrWeight = 32'h4001_0000;
    parameter startAddrOut = 32'h4002_0000;
    
    /* BRAM IN/OUT */
    // IN (Port A)
    reg [7:0] dataTemp;
    reg [12:0] addrTemp;
    reg ena;
    reg wea;
    // OUT (Port B)
    wire [7:0] dataPool;
    reg [12:0] addrPool;
    reg [12:0] nextAddrPool;    // for next calculation
    reg enb;
    
    /* registers for convolution calculation */
    reg [8:0] signWeight;   // saving sign information for 9 elements
    reg [8:0] signImage;
    reg [71:0] negDataWeight;   // saving negative values for 9 elements
    reg [71:0] negDataImage;
    reg [15:0] dataTempImage1;  // temporary values for calculation 
    reg [15:0] dataTempWeight1;
    reg [15:0] dataTempImage2;
    reg [15:0] dataTempWeight2;    
    reg [15:0] dataTempImage3;
    reg [15:0] dataTempWeight3;
    reg [15:0] dataTempImage4;
    reg [15:0] dataTempWeight4;
    reg [15:0] dataTempImage5;
    reg [15:0] dataTempWeight5;
    reg [15:0] dataTempImage6;
    reg [15:0] dataTempWeight6;
    reg [15:0] dataTempImage7;
    reg [15:0] dataTempWeight7;
    reg [15:0] dataTempImage8;
    reg [15:0] dataTempWeight8;
    reg [15:0] dataTempImage9;
    reg [15:0] dataTempWeight9;
   
    /* registers for pooling */
    reg pool;   // determines pooling is ready or not
    reg poolReg;    // to get rising edge of pool
    reg startPool;
    reg finishPool;
/******* Register declaration END *******/    
    
    
/******* BRAM START *******/
    blk_mem_gen_0 blk_mem_gen_0_i (
        // Port A
        .addra(addrTemp),
        .clka(clk),
        .dina(dataTemp),
        .ena(ena),
        .wea(wea),
        // Port B
        .addrb(addrPool),
        .clkb(clk),
        .doutb(dataPool),
        .enb(enb),
        .web(0)
    );
/******* BRAM END *******/


/******* FSM START *******/
    always @(posedge clk)
    begin
        if(!start)
        begin
            state <= COLD_WEIGHT;
            read <= READ_REQ;
            warmRead <= WARM_UPDATE;
            calc <= MUL_PREPARE;
            poolState <= POOL_REQ;
        end
        else
        begin
            // Calculation and Pooling work concurrently
            /* FSM for calculating part (1/2 of whole) */
            case(state)
                COLD_WEIGHT: if(cntDataWeight == 4'b1000) state <= COLD_FETCH;  // if read all 9 elements of Weight matrix
                COLD_FETCH: if(cntDataImage == 4'b1000) state <= CALCULATE; // if read all 9 elements from Image matrix
                WARM_FETCH: if(cntDataImageWarm == 2'b10) state <= CALCULATE;   // if read 3 elements from Image matrix
                CALCULATE: if(cntConv == 81) state <= COLD_FETCH;
                           else if(!((cntConv+1) % 82)) state <= COLD_FETCH; // if one row of result is calculated
                           else state <= WARM_FETCH;
                default: state <= state;
            endcase
            
            /* FSM for pooling part (2/2 of whole) */
            case(poolState)
                POOL_WAIT: poolState <= POOL_REQ;
                POOL_REQ: poolState <= POOL_READ;
                POOL_READ: if(cntDataPool % 4 == 3) poolState <= POOL_CALC; // if fetched 2 x 2 data for pooling
                           else poolState <= POOL_REQ;
                POOL_CALC: if(AHB_INTERFACE_0_hready_out == 1 && AHB_INTERFACE_0_hresp == 0) 
                            poolState <= POOL_DONE; // if requesting write is possible
                           else poolState <= poolState;
                POOL_DONE: poolState <= POOL_WRAP_UP;
                POOL_WRAP_UP: if(AHB_INTERFACE_0_hready_out == 0 && AHB_INTERFACE_0_hresp == 0)
                                poolState <= POOL_WRAP_UP;  // if write transcation is not done
                              else if(cntTotal == 6724) poolState <= 3'b101;    // going to default case if done (6724 = 82 x 82)
                              else poolState <= POOL_WAIT;   // continue pooling for next elemets
                default: poolState <= poolState;
            endcase
                           
            /* FSM for read */
            case(read)
                READ_REQ: if(AHB_INTERFACE_0_hready_out == 1)   read <= READ_READ;  // if reading is possible
                          else read <= read;
                READ_READ: if(AHB_INTERFACE_0_hready_out == 1) read <= READ_REQ;    // if data ready, go to the initial state
                           else read <= read;
            endcase
            
            /* FSM for warm read */
            case(warmRead)
                WARM_UPDATE: warmRead <= WARM_REQ;
                WARM_REQ: if(AHB_INTERFACE_0_hready_out == 1) warmRead <= WARM_READ;    // if reading is possible
                          else warmRead <= warmRead;
                WARM_READ: if(AHB_INTERFACE_0_hready_out == 1)  // if read data is retrieved
                           begin
                                if(cntDataImageWarm == 2'b10) warmRead <= WARM_REQ; // if done, initialzing for future use
                                else warmRead <= WARM_REQ;  // if not, keep on reading
                           end
                           else warmRead <= warmRead;
                default: warmRead <= warmRead;
            endcase
            
            /* FSM for calculation */
            case(calc)
                NEG: calc <= MUL_PREPARE;
                MUL_PREPARE: calc <= MUL;
                MUL: calc <= ADD;
                ADD: calc <= RESULT;
                RESULT: calc <= LOAD;
                LOAD: calc <= WRAP_UP;
                WRAP_UP: calc <= NEG;
            endcase    
       end
    end
/******* FSM END *******/


/******* Data update START *******/
    /* Action FSM for calculating part (1/2 of whole) */
    always @(posedge clk)
    begin
        if(!start)
        begin
            cntConv <= 0;
            cntDataImage <= 0;
            cntDataImageWarm <= 0;
            cntDataWeight <= 0;
            
            dataImage <= 0;
            dataWeight <= 0;
            dataOut <= 0;
            addrImage <= startAddrImage;
            nextAddrImage <= 0;
            addrWeight <= startAddrWeight;
            
            wea <= 0;   // This part controls port A
            ena <= 0;
            
            pool <= 0;
        end
        else
        begin
            /* FSM for calculating part */
            case(state)
                COLD_WEIGHT:
                    begin
                        case(read)
                            READ_REQ:
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // if ready to read
                                    begin
                                        AHB_INTERFACE_0_htrans <= 2'b10;
                                        AHB_INTERFACE_0_haddr <= addrWeight;
                                        AHB_INTERFACE_0_hburst <= 0;
                                        AHB_INTERFACE_0_hsize <= 3'b000;    // byte-sized transfer
                                        AHB_INTERFACE_0_hready_in <= 1;
                                        AHB_INTERFACE_0_sel <= 1;
                                        AHB_INTERFACE_0_hprot <= 0;
                                    end
                                end
                            READ_READ:
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // if requested data return
                                    begin
                                        case(cntDataWeight)
                                            4'b0000: dataWeight[7:0] <= AHB_INTERFACE_0_hrdata;
                                            4'b0001: dataWeight[15:8] <= AHB_INTERFACE_0_hrdata;
                                            4'b0010: dataWeight[23:16] <= AHB_INTERFACE_0_hrdata;
                                            4'b0011: dataWeight[31:24] <= AHB_INTERFACE_0_hrdata;
                                            4'b0100: dataWeight[39:32] <= AHB_INTERFACE_0_hrdata;
                                            4'b0101: dataWeight[47:40] <= AHB_INTERFACE_0_hrdata;
                                            4'b0110: dataWeight[55:48] <= AHB_INTERFACE_0_hrdata;
                                            4'b0111: dataWeight[63:56] <= AHB_INTERFACE_0_hrdata;
                                            4'b1000: dataWeight[71:64] <= AHB_INTERFACE_0_hrdata;
                                        endcase
                                    
                                        AHB_INTERFACE_0_sel <= 0;
                                        AHB_INTERFACE_0_hprot <= 0;
                                        AHB_INTERFACE_0_hready_in <= 1;
                                    
                                        addrWeight <= addrWeight + 1;
                                        if(cntDataWeight != 4'b1000) cntDataWeight <= cntDataWeight + 1;
                                        else cntDataWeight <= 0;
                                    end
                                    else    AHB_INTERFACE_0_hready_in <= 0;
                                
                                    AHB_INTERFACE_0_htrans <= 2'b00;
                                end
                        endcase
                    end
                COLD_FETCH:
                    begin
                        case(read)
                            READ_REQ:
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // reqeusting read for image
                                    begin
                                        AHB_INTERFACE_0_htrans <= 2'b10;
                                        AHB_INTERFACE_0_haddr <= addrImage;
                                        AHB_INTERFACE_0_hburst <= 0;
                                        AHB_INTERFACE_0_hsize <= 3'b000;
                                        AHB_INTERFACE_0_hready_in <= 1;
                                        AHB_INTERFACE_0_sel <= 1;
                                        AHB_INTERFACE_0_hprot <= 1;
                                    end    
                                end
                            READ_READ:
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // if request data return
                                    begin
                                        case(cntDataImage)
                                            4'b0000: dataImage[7:0] <= AHB_INTERFACE_0_hrdata;
                                            4'b0001: dataImage[15:8] <= AHB_INTERFACE_0_hrdata;
                                            4'b0010: dataImage[23:16] <= AHB_INTERFACE_0_hrdata;
                                            4'b0011: dataImage[31:24] <= AHB_INTERFACE_0_hrdata;
                                            4'b0100: dataImage[39:32] <= AHB_INTERFACE_0_hrdata;
                                            4'b0101: dataImage[47:40] <= AHB_INTERFACE_0_hrdata;
                                            4'b0110: dataImage[55:48] <= AHB_INTERFACE_0_hrdata;
                                            4'b0111: dataImage[63:56] <= AHB_INTERFACE_0_hrdata;
                                            4'b1000: dataImage[71:64] <= AHB_INTERFACE_0_hrdata;
                                        endcase
                                
                                        AHB_INTERFACE_0_sel <= 0;
                                        AHB_INTERFACE_0_hprot <= 0;
                                        AHB_INTERFACE_0_hready_in <= 1;
                                
                                        // [0 1 2] => address from element 2 to 3 must jump by 84-2
                                        // [3 4 5] => address from element 5 to 6 must jump by 84-2
                                        // [6 7 8]
                                        if(cntDataImage == 4'b0010 || cntDataImage == 5'b0101) addrImage <= addrImage + 32'd82;
                                        else if(cntDataImage == 4'b0000) begin nextAddrImage <= addrImage + 1; addrImage <= addrImage + 1; end
                                        else if(cntDataImage != 4'b1000) addrImage <= addrImage + 1;
                                        else if(cntDataImage == 4'b1000) addrImage <= nextAddrImage;
                                        else addrImage <= addrImage;
                                
                                        if(cntDataImage != 4'b1000) cntDataImage <= cntDataImage + 1;
                                        else cntDataImage <= 0;
                                    end
                                    else AHB_INTERFACE_0_hready_in <= 0;
                            
                                    AHB_INTERFACE_0_htrans <= 2'b00;                                    
                                end
                        endcase        
                    end
                WARM_FETCH:
                    begin
                        case(warmRead)
                            WARM_UPDATE:
                                begin
                                    dataImage[63:56] <= dataImage[71:64];
                                    dataImage[55:48] <= dataImage[63:56];
                                    dataImage[39:32] <= dataImage[47:40];
                                    dataImage [31:24] <= dataImage[39:32];
                                    dataImage[15:8] <= dataImage[23:16];
                                    dataImage[7:0] <= dataImage[15:8];
                                end
                            WARM_REQ:   
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // requesting read for image
                                    begin
                                        AHB_INTERFACE_0_htrans <= 2'b10;
                                        AHB_INTERFACE_0_haddr <= addrImage;
                                        AHB_INTERFACE_0_hburst <= 0;
                                        AHB_INTERFACE_0_hsize <= 3'b000;
                                        AHB_INTERFACE_0_hready_in <= 1;
                                        AHB_INTERFACE_0_sel <= 1;
                                        AHB_INTERFACE_0_hprot = 1;
                                    end
                                end
                            WARM_READ:
                                begin
                                    if(AHB_INTERFACE_0_hready_out == 1) // if request data return
                                    begin
                                        case(cntDataImageWarm)
                                            2'b00: dataImage[23:16] <= AHB_INTERFACE_0_hrdata;
                                            2'b01: dataImage[47:40] <= AHB_INTERFACE_0_hrdata;
                                            2'b10: dataImage[71:64] <= AHB_INTERFACE_0_hrdata;
                                        endcase
                                        
                                        AHB_INTERFACE_0_sel <= 0;
                                        AHB_INTERFACE_0_hprot <= 0;
                                        AHB_INTERFACE_0_hready_in <= 1;
                                        
                                        if(cntDataImageWarm == 2'b00) begin nextAddrImage <= addrImage + 1; addrImage <= addrImage + 32'd84; end    // next image is one coloumn far
                                        else if(cntDataImageWarm == 2'b01) addrImage <= addrImage + 32'd84;
                                        else if(cntDataImageWarm == 2'b10) addrImage <= nextAddrImage;
                                        else addrImage <= addrImage;
                                        
                                        if(cntDataImageWarm != 2'b10) cntDataImage <= cntDataImage + 1;
                                        else cntDataImageWarm <= 0;
                                    end
                                    else AHB_INTERFACE_0_hready_in <= 0;
                                    
                                    AHB_INTERFACE_0_htrans <= 2'b00;
                                end
                        endcase         
                    end
                CALCULATE:
                    begin
                        case(calc)
                            NEG:
                                begin
                                    negDataImage <= ~dataImage;
                                    negDataWeight <= ~dataWeight;
                                end
                            MUL_PREPARE:
                                begin
                                    // sign-extended abs
                                    dataTempImage1 <= (negDataImage[7]) ? {4'b0000, dataImage[6:0], 5'b00000} : {4'b0000, negDataImage[6:0], 5'b00000} + 6'b100000;
                                    dataTempWeight1 <= (negDataWeight[7]) ? {9'b000000000, dataWeight[6:0]} : {9'b000000000, negDataWeight[6:0]} + 1;
                                    dataTempImage2 <= (negDataImage[15]) ? {4'b0000, dataImage[14:8], 5'b00000} : {4'b0000, negDataImage[14:8], 5'b00000} + 6'b100000;
                                    dataTempWeight2 <= (negDataWeight[15]) ? {9'b000000000, dataWeight[14:8]} : {9'b000000000, negDataWeight[14:8]} + 1;             
                                    dataTempImage3 <= (negDataImage[23]) ? {4'b0000, dataImage[22:16], 5'b00000} : {4'b0000, negDataImage[22:16], 5'b00000} + 6'b100000;
                                    dataTempWeight3 <= (negDataWeight[23]) ? {9'b000000000, dataWeight[22:16]} : {9'b000000000, negDataWeight[22:16]} + 1;
                                    dataTempImage4 <= (negDataImage[31]) ? {4'b0000, dataImage[30:24], 5'b00000} : {4'b0000, negDataImage[30:24], 5'b00000} + 6'b100000;
                                    dataTempWeight4 <= (negDataWeight[31]) ? {9'b000000000, dataWeight[30:24]} : {9'b000000000, negDataWeight[30:24]} + 1;                                                                                               
                                    dataTempImage5 <= (negDataImage[39]) ? {4'b0000, dataImage[38:32], 5'b00000} : {4'b0000, negDataImage[38:32], 5'b00000} + 6'b100000;
                                    dataTempWeight5 <= (negDataWeight[39]) ? {9'b000000000, dataWeight[38:32]} : {9'b000000000, negDataWeight[38:32]} + 1;
                                    dataTempImage6 <= (negDataImage[47]) ? {4'b0000, dataImage[46:40], 5'b00000} : {4'b0000, negDataImage[46:40], 5'b00000} + 6'b100000;
                                    dataTempWeight6 <= (negDataWeight[47]) ? {9'b000000000, dataWeight[46:40]} : {9'b000000000, negDataWeight[46:40]} + 1;
                                    dataTempImage7 <= (negDataImage[55]) ? {4'b0000, dataImage[54:48], 5'b00000} : {4'b0000, negDataImage[54:48], 5'b00000} + 6'b100000;
                                    dataTempWeight7 <= (negDataWeight[55]) ? {9'b000000000, dataWeight[54:48]} : {9'b000000000, negDataWeight[54:48]} + 1;                 
                                    dataTempImage8 <= (negDataImage[63]) ? {4'b0000, dataImage[62:56], 5'b00000} : {4'b0000, negDataImage[62:56], 5'b00000} + 6'b100000;
                                    dataTempWeight8 <= (negDataWeight[63]) ? {9'b000000000, dataWeight[62:56]} : {9'b000000000, negDataWeight[62:56]} + 1;
                                    dataTempImage9 <= (negDataImage[71]) ? {4'b0000, dataImage[70:64], 5'b00000} : {4'b0000, negDataImage[70:64], 5'b00000} + 6'b100000;
                                    dataTempWeight9 <= (negDataWeight[71]) ? {9'b000000000, dataWeight[70:64]} : {9'b000000000, negDataWeight[70:64]} + 1;                                                                                                                                                               
                                    
                                    // saving sign
                                    signImage <= {dataImage[71], dataImage[63], dataImage[55], dataImage[47], dataImage[39], dataImage[31], dataImage[23], dataImage[15], dataImage[7]};
                                    signWeight <= {dataWeight[71], dataWeight[63], dataWeight[55], dataWeight[47], dataWeight[39], dataWeight[31], dataWeight[23], dataWeight[15], dataWeight[7]};
                                end
                            MUL:
                                begin
                                    // multiplying with sign
                                    dataTempImage1 <= ((signImage[0] ^ signWeight[0]) ? ~(dataTempImage1 * dataTempWeight1) + 1 : dataTempImage1 * dataTempWeight1) >>> 12;
                                    dataTempImage2 <= ((signImage[1] ^ signWeight[1]) ? ~(dataTempImage2 * dataTempWeight2) + 1 : dataTempImage2 * dataTempWeight2) >>> 12;
                                    dataTempImage3 <= ((signImage[2] ^ signWeight[2]) ? ~(dataTempImage3 * dataTempWeight3) + 1 : dataTempImage3 * dataTempWeight3) >>> 12;
                                    dataTempImage4 <= ((signImage[3] ^ signWeight[3]) ? ~(dataTempImage4 * dataTempWeight4) + 1 : dataTempImage4 * dataTempWeight4) >>> 12;
                                    dataTempImage5 <= ((signImage[4] ^ signWeight[4]) ? ~(dataTempImage5 * dataTempWeight5) + 1 : dataTempImage5 * dataTempWeight5) >>> 12;
                                    dataTempImage6 <= ((signImage[5] ^ signWeight[5]) ? ~(dataTempImage6 * dataTempWeight6) + 1 : dataTempImage6 * dataTempWeight6) >>> 12;
                                    dataTempImage7 <= ((signImage[6] ^ signWeight[6]) ? ~(dataTempImage7 * dataTempWeight7) + 1 : dataTempImage7 * dataTempWeight7) >>> 12;
                                    dataTempImage8 <= ((signImage[7] ^ signWeight[7]) ? ~(dataTempImage8 * dataTempWeight8) + 1 : dataTempImage8 * dataTempWeight8) >>> 12;
                                    dataTempImage9 <= ((signImage[8] ^ signWeight[8]) ? ~(dataTempImage9 * dataTempWeight9) + 1 : dataTempImage9 * dataTempWeight9) >>> 12;                                    
                                end
                            ADD:
                                begin
                                    // fixed-point calculation only requries simple addition
                                    dataTempImage1 <= dataTempImage1 + dataTempImage2 + dataTempImage3 + dataTempImage4 + dataTempImage5 + dataTempImage6 + dataTempImage7 + dataTempImage8 + dataTempImage9;
                                end
                            RESULT:
                                begin
                                    if(dataTempImage1[15])  // if data is negative
                                        dataTemp <= 8'b0000_0000;   // ReLU
                                    else if(dataTempImage1[14:7] > 8'b0001_1111)    // overflow
                                        dataTemp <= 8'b0001_1111;
                                    else if((dataTempImage1[14:7] == 8'b0000_0000) && (7'b001_1111 >= dataTempImage1[6:0]))  // underflow
                                        dataTemp <= 8'b0000_0000;
                                    else dataTemp <= {dataTempImage1[15], dataTempImage1[6:0]};
                                end
                            LOAD:
                                begin
                                    ena <= 1;
                                    wea <= 1;
                                    addrTemp <= addrTemp + 1;
                                end
                            WRAP_UP:
                                begin
                                    ena <= 0;
                                    wea <= 0;
                                    
                                    cntConv <= cntConv + 1;
                                    
                                    if(cntConv == 81) pool <= 0;
                                    else if(!((cntConv+1) % 82)) pool <= 1;
                                    else pool <= 0;
                                end
                        endcase
                    end
            endcase    
        end                            
    end

    /* Action FSM for pooling part (2/2 of whole) */
    always @(posedge clk)
    begin
        if(!start)
        begin
            cntTotal <= 0;
            cntDataPool <= 0;
            
            poolReg <= 0;
            startPool <= 0;
            finishPool <= 0;
        end
        else
        begin
            // determining start or not
            poolReg <= pool;
            if(pool && !poolReg)    startPool <= 1; // if rising edge
            else if(finishPool) startPool <= 0; // if pooling done
            else startPool <= startPool;
            
            // main function for pooling
            if(startPool)
            begin
                case(poolState)
                    POOL_WAIT:
                        begin
                            // NULL STATE, waiting for poolng process to be completely done
                        end
                    POOL_REQ:
                        begin
                            enb <= 1;
                        end
                    POOL_READ:
                        begin
                            enb <= 0;
                            cntDataPool <= cntDataPool + 1;
                            case(cntDataPool % 4)
                                2'b00: begin dataOut <= dataPool; addrPool <= addrPool + 1; nextAddrPool <= addrPool + 1; end
                                2'b01: begin if(dataPool > dataOut) dataOut <= dataPool; else dataOut <= dataOut; addrPool <= addrPool + 12'd81; end    // 81 = 82 - 1
                                2'b10: begin if(dataPool > dataOut) dataOut <= dataPool; else dataOut <= dataOut; addrPool <= addrPool + 1; end
                                2'b11: begin if(dataPool > dataOut) dataOut <= dataPool; else dataOut <= dataOut; addrPool <= nextAddrPool; end
                            endcase
                        end
                    POOL_CALC:
                        begin
                            if(AHB_INTERFACE_0_hready_out == 1 && AHB_INTERFACE_0_hresp == 0)
                            begin
                                AHB_INTERFACE_0_htrans <= 2'b10;
                                AHB_INTERFACE_0_haddr <= addrPool;
                                AHB_INTERFACE_0_hburst <= 0;
                                AHB_INTERFACE_0_hsize <= 3'b000;
                                AHB_INTERFACE_0_hready_in <= 1;
                                AHB_INTERFACE_0_sel <= 1;
                                AHB_INTERFACE_0_hprot <= 0;
                                AHB_INTERFACE_0_hwdata <= dataPool;
                                AHB_INTERFACE_0_hwrite <= 1;
                                
                                addrOut <= addrOut + 1;
                                cntTotal <= cntTotal + 1;
                            end
                        end
                    POOL_DONE:
                        begin
                            AHB_INTERFACE_0_htrans <= 0;
                            AHB_INTERFACE_0_hready_in <= 0;
                        end
                    POOL_WRAP_UP:
                        begin
                            if(AHB_INTERFACE_0_hready_out == 0 && AHB_INTERFACE_0_hresp == 0)
                            begin
                                AHB_INTERFACE_0_sel <= 0;
                                AHB_INTERFACE_0_hprot <= 0;
                                AHB_INTERFACE_0_hwdata <= 0;
                                AHB_INTERFACE_0_hwrite <= 0;
                                AHB_INTERFACE_0_hready_in <= 1;
                            end
                            else if(cntTotal == 13'd6724)   // 6724 = 82 * 82
                            begin
                                finish <= 1;
                            end
                            else
                            begin
                                // 82 elements in one row
                                if(!((cntTotal+1)%82)) finishPool <= 1;
                            end
                        end
                    default:
                        begin
                            AHB_INTERFACE_0_haddr <= 0;
                            AHB_INTERFACE_0_hburst <= 0;
                            AHB_INTERFACE_0_hprot <= 0;
                            AHB_INTERFACE_0_hready_in <= 0;
                            AHB_INTERFACE_0_hsize <= 0;
                            AHB_INTERFACE_0_htrans <= 0;
                            AHB_INTERFACE_0_hwdata <= 0;
                            AHB_INTERFACE_0_hwrite <= 0;
                            AHB_INTERFACE_0_sel <= 0;
                        end
                endcase
            end            
        end
    end 
/******* Data update END *******/
    

endmodule
  

 

 

0 Kudos
1 Reply
kwjcoo
Visitor
Visitor
454 Views
Registered: ‎07-22-2019

BTW, I am using Vivado 2018.2 on Windows 10.

0 Kudos