修改时间 04-07-2021 08:48 PM
需要对一张1000*1024的图像进行两次卷积操作,通过一些手动展开和数组分割的方法减少了一些II,但还是显示具有数据依赖以及端口数受限的问题,更改了几次数组分割的大小和方式,但没什么用,II最低只能是3,不知道之后还可以怎样优化。
#include <iostream>
using namespace std;
void erode(bool input[],bool output[])
{
int i=0;
int j=0;
int k=1;
//static int sumer_array[4] = {0};
int sume1 = 0; int sume2 = 0;
int sum1e1 = 0; int sum1e2 = 0;
int sum2e1 = 0; int sum2e2 = 0;
int sum3e1 = 0; int sum3e2 = 0;
int sum4e1 = 0; int sum4e2 = 0;
int sum5e1 = 0; int sum5e2 = 0;
int sum6e1 = 0; int sum6e2 = 0;
int sum7e1 = 0; int sum7e2 = 0;
int sum8e1 = 0; int sum8e2 = 0;
int sume = 0;int sum1e = 0;int sum2e = 0;int sum3e = 0;int sum4e = 0;int sum5e = 0;int sum6e = 0;int sum7e = 0;int sum8e = 0;
int Q=0;int Q1=0;int Q2=0;int Q3=0;int Q4=0;int Q5=0;int Q6=0;int Q7=0;int Q8=0;
//int sumd1 = 0; int sumd2 = 0;
for (i = 0; i < ROW; i++) {
for (j = 0; j < COL; j=j+6) {
#pragma HLS PIPELINE
Q = i*COL + j;
sume1 = input[Q] + input[Q + 1];
sume2= input[Q + COL] + input[Q + COL + 1];
sume=sume1+sume2;
output[Q] = !(sume-4);
Q1 = i*COL + j+1;
sum1e1 = input[Q1] + input[Q1 + 1];
sum1e2= input[Q1 + COL] + input[Q1 + COL + 1];
sum1e=sum1e1+sum1e2;
output[Q1] = !(sum1e-4);
Q2 = i*COL + j+2;
sum2e1 = input[Q2] + input[Q2 + 1];
sum2e2= input[Q2 + COL] + input[Q2 + COL + 1];
sum2e=sum2e1+sum2e2;
output[Q2] = !(sum2e-4);
Q3 = i*COL + j+3;
sum3e1 = input[Q3] + input[Q3 + 1];
sum3e2= input[Q3 + COL] + input[Q3 + COL + 1];
sum3e=sum3e1+sum3e2;
output[Q3] = !(sum3e-4);
Q4 = i*COL + j+4;
sum4e1 = input[Q4] + input[Q4 + 1];
sum4e2= input[Q4 + COL] + input[Q4 + COL + 1];
sum4e=sum4e1+sum4e2;
output[Q4] = !(sum4e-4);
Q5 = i*COL + j+5;
sum5e1 = input[Q5] + input[Q5 + 1];
sum5e2= input[Q5 + COL] + input[Q5 + COL + 1];
sum5e=sum5e1+sum5e2;
output[Q5] = !(sum5e-4);
}
}
}
void dilate(bool input[],bool output[])
{
int sumd1 = 0; int sumd2 = 0;
int sum1d1 = 0; int sum1d2 = 0;
int sum2d1 = 0; int sum2d2 = 0;
int sum3d1 = 0; int sum3d2 = 0;
int sum4d1 = 0; int sum4d2 = 0;
int sum5d1 = 0; int sum5d2 = 0;
int sum6d1 = 0; int sum6d2 = 0;
int sum7d1 = 0; int sum7d2 = 0;
int sum8d1 = 0; int sum8d2 = 0;
int sumd = 0;int sum1d = 0;int sum2d = 0;int sum3d = 0;int sum4d = 0;int sum5d = 0;int sum6d = 0;int sum7d = 0;int sum8d=0;
int S=0;int S1=0;int S2=0;int S3=0;int S4=0;int S5=0;int S6=0;int S7=0;int S8=0;
for (int i = 0; i < ROW; i++) {
for (int j = 0; j < COL; j=j+6) {
#pragma HLS PIPELINE
S = i*COL + j;
sumd1 = input[S] + input[S + 1];
sumd2= input[S + COL] + input[S + COL + 1];
sumd=sumd1+sumd2;
output[S] = max(sumd,0);
S1 = i*COL + j+1;
sum1d1 = input[S1] + input[S1 + 1];
sum1d2= input[S1 + COL] + input[S1 + COL + 1];
sum1d=sum1d1+sum1d2;
output[S1] = max(sum1d,0);
S2 = i*COL + j+2;
sum2d1 = input[S2] + input[S2 + 1];
sum2d2= input[S2 + COL] + input[S2 + COL + 1];
sum2d=sum2d1+sum2d2;
output[S2] = max(sum2d,0);
S3 = i*COL + j+3;
sum3d1 = input[S3] + input[S3 + 1];
sum3d2= input[S3 + COL] + input[S3 + COL + 1];
sum3d=sum3d1+sum3d2;
output[S3] = max(sum3d,0);
S4 = i*COL + j+4;
sum4d1 = input[S4] + input[S4 + 1];
sum4d2= input[S4 + COL] + input[S4 + COL + 1];
sum4d=sum4d1+sum4d2;
output[S4] = max(sum4d,0);
S5 = i*COL + j+5;
sum5d1 = input[S5] + input[S5 + 1];
sum5d2= input[S5 + COL] + input[S5 + COL + 1];
sum5d=sum5d1+sum5d2;
output[S5] = max(sum5d,0);
}
}
}
void detect_position(AXI_VAL img_input[ROW*COL],AXI_VAL end_1_out[select_size] )
{
static bool image_c[ROW*COL] = {0};
#pragma HLS ARRAY_PARTITION variable=image_c cyclic factor=9 dim=1
static bool image_d[ROW*COL] = { 0 };
static bool image_e[ROW*COL] = { 0 };
#pragma HLS ARRAY_PARTITION variable=image_e cyclic factor=9 dim=1
dilate(image_c, image_e);
erode(image_e, image_d);
}