UPGRADE YOUR BROWSER

We have detected your current browser version is not the latest one. Xilinx.com uses the latest web technologies to bring you the best online experience possible. Please upgrade to a Xilinx.com supported browser:Chrome, Firefox, Internet Explorer 11, Safari. Thank you!

cancel
Showing results for 
Search instead for 
Did you mean: 
Visitor dprze
Visitor
436 Views
Registered: ‎05-17-2018

XfOpencv - Sobel Live I/O 8NPPC problem

Hi,

I am trying to perform Sobel (from xfOpencv) on live input stream, processing 8 pixels per clock cycle, as it is possible according to documentation. Earlier I have succesfully ran Sobel with 1NPPC on live I/O.

I have modified code for reading input and writing output as follows:

 

#ifndef NUM_OF_PIXEL_PER_CLOCK
#define NUM_OF_PIXEL_PER_CLOCK XF_NPPC8
#endif
#pragma SDS data mem_attribute("inoutUV.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data mem_attribute(frm_data_in:NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data copy("inoutUV.data"[0:"inoutUV.size"])
#if (NUM_OF_PIXEL_PER_CLOCK == XF_NPPC1)
#pragma SDS data copy(frm_data_in[0:pcnt])
#else //NUM_OF_PIXEL_PER_CLOCK == XF_NPPC8
#pragma SDS data copy(frm_data_in[0:pcnt>>3])
#endif
#pragma SDS data access_pattern("inoutUV.data":SEQUENTIAL)
#pragma SDS data access_pattern(frm_data_in:SEQUENTIAL)
#pragma SDS data mem_attribute("inLuma.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data copy("inLuma.data"[0:"inLuma.size"])
#pragma SDS data access_pattern("inLuma.data":SEQUENTIAL)
void read_input(XF_TNAME(XF_16UC1, NUM_OF_PIXEL_PER_CLOCK) *frm_data_in,
		    xf::Mat<XF_8UC1, HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK> &inLuma,
		    xf::Mat<XF_8UC1, HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK> &inoutUV,
		    uint32_t in_fourcc, int pcnt)
{
#if (NUM_OF_PIXEL_PER_CLOCK == XF_NPPC1)
	unsigned short lumamask    = (V4L2_PIX_FMT_YUYV==in_fourcc)? 0x00FF : 0xFF00;
	unsigned short lumashift   = (V4L2_PIX_FMT_YUYV==in_fourcc)? 0      : 8;

	unsigned short chromamask  = (V4L2_PIX_FMT_YUYV==in_fourcc)? 0xFF00 : 0x00FF;
	unsigned short chromashift = (V4L2_PIX_FMT_YUYV==in_fourcc)? 8      : 0;

	for(int i=0; i<pcnt; i++){
#pragma HLS pipeline II=1
		unsigned short yuvpix = frm_data_in[i];
		ap_uint<8> ypix =  (ap_uint<8>)((yuvpix & lumamask)>>lumashift);
		ap_uint<8> uvpix = (ap_uint<8>)((yuvpix & chromamask)>>chromashift);
		inLuma.data[i] = ypix;
		inoutUV.data[i] = uvpix;
	}
#else //NUM_OF_PIXEL_PER_CLOCK == XF_NPPC8
	ap_uint<128> lumamask_8 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x00FF0000000000000000000000000000 : 0xFF000000000000000000000000000000;
	ap_uint<16>	lumashift_8 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 56 : 64;

	ap_uint<128> lumamask_7 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x000000FF000000000000000000000000 : 0x0000FF00000000000000000000000000;
	ap_uint<16>	lumashift_7 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 48 : 56;

	ap_uint<128> lumamask_6 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x0000000000FF00000000000000000000 : 0x00000000FF0000000000000000000000;
	ap_uint<16>	lumashift_6 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 40 : 48;

	ap_uint<128> lumamask_5 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x00000000000000FF0000000000000000 : 0x000000000000FF000000000000000000;
	ap_uint<16>	lumashift_5 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 32 : 40;

	ap_uint<128> lumamask_4 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x000000000000000000FF000000000000 : 0x0000000000000000FF00000000000000;
	ap_uint<16>	lumashift_4 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 24 : 32;

	ap_uint<128> lumamask_3 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x0000000000000000000000FF00000000 : 0x00000000000000000000FF0000000000;
	ap_uint<16>	lumashift_3 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 16 : 24;

	ap_uint<128> lumamask_2 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x00000000000000000000000000FF0000 : 0x000000000000000000000000FF000000;
	ap_uint<16>	lumashift_2 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 8 : 16;

	ap_uint<128> lumamask_1 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x000000000000000000000000000000FF : 0x0000000000000000000000000000FF00;
	ap_uint<16>	lumashift_1 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0 : 8;


	ap_uint<128> chromamask_8 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0xFF000000000000000000000000000000 : 0x00FF0000000000000000000000000000;
	ap_uint<16>	chromashift_8 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 64 : 56;

	ap_uint<128> chromamask_7 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x0000FF00000000000000000000000000 : 0x000000FF000000000000000000000000;
	ap_uint<16>	chromashift_7 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 56 : 48;

	ap_uint<128> chromamask_6 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x00000000FF0000000000000000000000 : 0x0000000000FF00000000000000000000;
	ap_uint<16>	chromashift_6 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 48 : 40;

	ap_uint<128> chromamask_5 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x000000000000FF000000000000000000 : 0x00000000000000FF0000000000000000;
	ap_uint<16>	chromashift_5 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 40 : 32;

	ap_uint<128> chromamask_4 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x0000000000000000FF00000000000000 : 0x000000000000000000FF000000000000;
	ap_uint<16>	chromashift_4 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 32 : 24;

	ap_uint<128> chromamask_3 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x00000000000000000000FF0000000000 : 0x0000000000000000000000FF00000000;
	ap_uint<16>	chromashift_3 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 24 : 16;

	ap_uint<128> chromamask_2 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x000000000000000000000000FF000000 : 0x00000000000000000000000000FF0000;
	ap_uint<16>	chromashift_2 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 16 : 8;

	ap_uint<128> chromamask_1 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 0x0000000000000000000000000000FF00 : 0x000000000000000000000000000000FF;
	ap_uint<16>	chromashift_1 = (V4L2_PIX_FMT_YUYV==in_fourcc) ? 8 : 0;


	for(int i=0; i<pcnt>>3; i++){
	#pragma HLS pipeline II=1
			ap_uint<128> yuvpix = frm_data_in[i];

			ap_uint<64> ypix =  (ap_uint<64>)((yuvpix & lumamask_8)>>lumashift_8) |
								(ap_uint<64>)((yuvpix & lumamask_7)>>lumashift_7) |
								(ap_uint<64>)((yuvpix & lumamask_6)>>lumashift_6) |
								(ap_uint<64>)((yuvpix & lumamask_5)>>lumashift_5) |
								(ap_uint<64>)((yuvpix & lumamask_4)>>lumashift_4) |
								(ap_uint<64>)((yuvpix & lumamask_3)>>lumashift_3) |
								(ap_uint<64>)((yuvpix & lumamask_2)>>lumashift_2) |
								(ap_uint<64>)((yuvpix & lumamask_1)>>lumashift_1);

			ap_uint<64> uvpix =	(ap_uint<64>)((yuvpix & chromamask_8)>>chromashift_8) |
								(ap_uint<64>)((yuvpix & chromamask_7)>>chromashift_7) |
								(ap_uint<64>)((yuvpix & chromamask_6)>>chromashift_6) |
								(ap_uint<64>)((yuvpix & chromamask_5)>>chromashift_5) |
								(ap_uint<64>)((yuvpix & chromamask_4)>>chromashift_4) |
								(ap_uint<64>)((yuvpix & chromamask_3)>>chromashift_3) |
								(ap_uint<64>)((yuvpix & chromamask_2)>>chromashift_2) |
								(ap_uint<64>)((yuvpix & chromamask_1)>>chromashift_1);

			inLuma.data[i] = ypix;
			inoutUV.data[i] = uvpix;
		}


#endif
}
#pragma SDS data mem_attribute("inoutUV.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data copy("inoutUV.data"[0:"inoutUV.size"])
#pragma SDS data access_pattern("inoutUV.data":SEQUENTIAL)
#pragma SDS data mem_attribute("outLuma.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data copy("outLuma.data"[0:"outLuma.size"])
#pragma SDS data access_pattern("outLuma.data":SEQUENTIAL)
#pragma SDS data mem_attribute(frm_data_out:NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#if (NUM_OF_PIXEL_PER_CLOCK == XF_NPPC1)
#pragma SDS data copy(frm_data_out[0:pcnt])
#else //NUM_OF_PIXEL_PER_CLOCK == XF_NPPC8
#pragma SDS data copy(frm_data_out[0:pcnt>>3])
#endif
#pragma SDS data access_pattern(frm_data_out:SEQUENTIAL)
void write_output(xf::Mat<XF_8UC1, HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK> &outLuma,
		      xf::Mat<XF_8UC1, HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK> &inoutUV,
			  XF_TNAME(XF_16UC1, NUM_OF_PIXEL_PER_CLOCK) *frm_data_out, uint32_t out_fourcc, int pcnt)
{
#if (NUM_OF_PIXEL_PER_CLOCK == XF_NPPC1)
	unsigned short lumashift = (V4L2_PIX_FMT_YUYV==out_fourcc)? 0 : 8;
	unsigned short chromashift = (V4L2_PIX_FMT_YUYV==out_fourcc)? 8 : 0;

	for(int i=0; i<pcnt; i++){
	#pragma HLS pipeline II=1
		ap_uint<8> ypix = outLuma.data[i];
		ap_uint<8> uvpix = inoutUV.data[i];
		unsigned short yuvpix = ((unsigned short) uvpix << chromashift) | ((unsigned short) ypix << lumashift);
		frm_data_out[i] = yuvpix;
	}
#else //NUM_OF_PIXEL_PER_CLOCK == XF_NPPC8

	unsigned short lumashift_8 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 56 : 64;
	unsigned short lumashift_7 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 48 : 56;
	unsigned short lumashift_6 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 40 : 48;
	unsigned short lumashift_5 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 32 : 40;
	unsigned short lumashift_4 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 24 : 32;
	unsigned short lumashift_3 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 16 : 24;
	unsigned short lumashift_2 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 8 : 16;
	unsigned short lumashift_1 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 0 : 8;

	unsigned short chromashift_8 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 64 : 56;
	unsigned short chromashift_7 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 56 : 48;
	unsigned short chromashift_6 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 48 : 40;
	unsigned short chromashift_5 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 40 : 32;
	unsigned short chromashift_4 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 32 : 24;
	unsigned short chromashift_3 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 24 : 16;
	unsigned short chromashift_2 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 16 : 8;
	unsigned short chromashift_1 = (V4L2_PIX_FMT_YUYV==out_fourcc)? 8 : 0;

	for(int i=0; i<pcnt>>3; i++){
	#pragma HLS pipeline II=1
			ap_uint<64> ypix = outLuma.data[i];
			ap_uint<64> uvpix = inoutUV.data[i];

			ap_uint<64> yuvpix_8 = ((ap_uint<64>) uvpix << chromashift_8) | ((ap_uint<64>) ypix << lumashift_8);
			ap_uint<64> yuvpix_7 = ((ap_uint<64>) uvpix << chromashift_7) | ((ap_uint<64>) ypix << lumashift_7);
			ap_uint<64> yuvpix_6 = ((ap_uint<64>) uvpix << chromashift_6) | ((ap_uint<64>) ypix << lumashift_6);
			ap_uint<64> yuvpix_5 = ((ap_uint<64>) uvpix << chromashift_5) | ((ap_uint<64>) ypix << lumashift_5);
			ap_uint<64> yuvpix_4 = ((ap_uint<64>) uvpix << chromashift_4) | ((ap_uint<64>) ypix << lumashift_4);
			ap_uint<64> yuvpix_3 = ((ap_uint<64>) uvpix << chromashift_3) | ((ap_uint<64>) ypix << lumashift_8);
			ap_uint<64> yuvpix_2 = ((ap_uint<64>) uvpix << chromashift_2) | ((ap_uint<64>) ypix << lumashift_2);
			ap_uint<64> yuvpix_1 = ((ap_uint<64>) uvpix << chromashift_1) | ((ap_uint<64>) ypix << lumashift_1);
			frm_data_out[i] = 	yuvpix_8 | yuvpix_7 | yuvpix_6 | yuvpix_5 |
								yuvpix_4 | yuvpix_3 | yuvpix_2 | yuvpix_1;
		}

#endif
}

I am calling Sobel as follows:

xf::Sobel<XF_BORDER_CONSTANT, 3, XF_8UC1, XF_8UC1,HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK>(*image->inLuma, *image->SobelX, *image->SobelY);

These are all code changes in comparision to 1NPPC processing and this solution does not work, and frankly I have no idea why - the image from before starting Sobel is frozen on output screen.
If it makes any difference, I am processing 4K image.
I am working on ZCU102 platform, SDx 2017.4.


I would appreciate your help,
best regards!

0 Kudos
2 Replies
Visitor vandenplas
Visitor
375 Views
Registered: ‎11-02-2017

Re: XfOpencv - Sobel Live I/O 8NPPC problem

Hi

 

Could you help me get the same think working on my ZCU102 board

 

I've tried to compile the revision example, but it just comes up with a  M_AXI_GP port error.

 

My plan is to be able to grab image frames from a camera and put a rectangle around any object

 

0 Kudos
Contributor
Contributor
127 Views
Registered: ‎05-02-2011

Re: XfOpencv - Sobel Live I/O 8NPPC problem

hi @dprze :

xf::Mat<XF_8UC1, HEIGHT, WIDTH, NUM_OF_PIXEL_PER_CLOCK> &inoutUV,
XF_8UC1 : the bit number of pixel is 8
so
for(int i=0; i<pcnt>>3; i++){
#pragma HLS pipeline II=1
ap_uint<64> ypix = outLuma.data[i];
ap_uint<64> uvpix = inoutUV.data[i];
is not correct





by miter
0 Kudos