cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
aripod
Explorer
Explorer
1,029 Views
Registered: ‎04-18-2017

IP with large loop not responding

Hello,

 

I am testing a very trivial IP in HLS that works well, but when I increase the number of iterations on the for loop, it does not respond anymore.

 

Here is the IP in HLS:

 

#include "hls_stream.h"
#include "ap_axi_sdata.h"
#include "ap_int.h"

// Declare 32-bit integer with side-channel.
typedef ap_axis<32,2,5,6> intSdCh;

void doGain(hls::stream<intSdCh> &inStream, hls::stream<intSdCh> &outStream, int gain)
{
#pragma HLS INTERFACE axis port=outStream
#pragma HLS INTERFACE axis port=inStream
#pragma HLS INTERFACE s_axilite port=gain bundle=CTRL_BUS
#pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS

    // These assertions let HLS know the upper bounds of loops
	int i;

	for(i=0; i<50000; i++)
	{
#pragma HLS PIPELINE
		// Read and cache (Block here if FIFO sender is empty).
		intSdCh valIn = inStream.read();
		intSdCh valOut;

		valOut.data = valIn.data * gain;

		// Just copy from the input to the other side-channel (keep, strobe, id, dest, last)
		valOut.keep = valIn.keep;
		valOut.strb = valIn.strb;
		valOut.user = valIn.user;
		valOut.last = valIn.last;
		valOut.id = valIn.id;
		valOut.dest = valIn.dest;

		// Send to the stream (Block if the FIFO receiver is full).
		outStream.write(valOut);
	}
}

It worked well when it was 5000 but for 50000 didn't work...and I need it to be 256*256. For that case, I should switch i from int to long also, right?

 

Just in case, the architecture is this:

architecture.png

 

And the code in SDK:

#include <stdio.h>
#include <xparameters.h>
#include "xdogain.h"
#include "xaxidma.h"

#define SIZE_ARR 50000

// UART
#define UART_DEVICE_ID		XPAR_XUARTPS_0_DEVICE_ID
#define INTC_DEVICE_ID		XPAR_SCUGIC_SINGLE_DEVICE_ID
#define UART_INT_IRQ_ID		XPAR_XUARTPS_1_INTR
#define RECV_BUFFER_SIZE	256

//DMA Addresses
#define MEM_BASE_ADDR 0x01000000
#define TX_BUFFER_BASE (MEM_BASE_ADDR + 0x00100000)
#define RX_BUFFER_BASE (MEM_BASE_ADDR + 0x00300000)

XDogain doGain;
XDogain_Config *doGain_cfg;
XAxiDma axiDMA;
XAxiDma_Config *axiDMA_cfg;

int inStreamData[SIZE_ARR];

void initPeripherals()
{
	int Status;

	printf("Initializing doGain\n");
	doGain_cfg = XDogain_LookupConfig(XPAR_DOGAIN_0_DEVICE_ID);
	if (doGain_cfg)
	{
		int status = XDogain_CfgInitialize(&doGain,doGain_cfg);
		if(status != XST_SUCCESS)
		{
			printf("Error Initializing doGain core\n");
		}
	}

	printf("Initializing AxiDMA\n");
	axiDMA_cfg = XAxiDma_LookupConfig(XPAR_AXI_DMA_0_DEVICE_ID);
	if (axiDMA_cfg)
	{
		Status = XAxiDma_CfgInitialize(&axiDMA,axiDMA_cfg);
		if(Status != XST_SUCCESS)
		{
			printf("Error Initializing AXI DMA core\n");
		}
	}
	//Disable Interrups
	XAxiDma_IntrDisable(&axiDMA, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
	XAxiDma_IntrDisable(&axiDMA, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE);
}


int main()
{
	int i, j, k;
	int gain = 5;
	//printf ("Welcome:");
	//Pointers to DMA TX/RX addresses
	int *m_dma_buffer_TX = (int*) TX_BUFFER_BASE;
	int *m_dma_buffer_RX = (int*) RX_BUFFER_BASE;


	initPeripherals();

	//Do the Stream Calculation
	for (int idx = 0; idx < SIZE_ARR; idx++)
	{
		inStreamData[idx] = 1;
	}

	XDogain_Set_gain(&doGain,gain);
	XDogain_Start(&doGain);

	//Flush the cache of the buffers
	Xil_DCacheFlushRange((u32)inStreamData, SIZE_ARR*sizeof(int));
	Xil_DCacheFlushRange((u32)m_dma_buffer_RX, SIZE_ARR*sizeof(int));

	printf("Sending Data to IP core slave\n");
	XAxiDma_SimpleTransfer(&axiDMA,(u32)inStreamData,SIZE_ARR*sizeof(int),XAXIDMA_DMA_TO_DEVICE);

	printf("Get The Data\n");
	XAxiDma_SimpleTransfer(&axiDMA,(u32)m_dma_buffer_RX,SIZE_ARR*sizeof(int),XAXIDMA_DEVICE_TO_DMA);
	while(XAxiDma_Busy(&axiDMA,XAXIDMA_DEVICE_TO_DMA));

	//Invalidate
	Xil_DCacheInvalidateRange((u32)m_dma_buffer_RX,SIZE_ARR*sizeof(int));

	while(!XDogain_IsDone(&doGain));
	printf("Calculation is Complete\n");

	//Display Data
	for (int idx = 0; idx < 10; idx++)
		{
			printf("Recv[%d]=%d\n", idx,m_dma_buffer_RX[idx]);
		}
	
	return 0;
}

Do I need to add some pragma maybe? Or save all the entire array in a local variable and then do the processing (in this case multiply by the gain)?

 

Thanks for the help.

 

0 Kudos
2 Replies
hbucher
Scholar
Scholar
1,006 Views
Registered: ‎03-22-2016

@aripod I see you have been watching Leonardo's tutorials on YouTube. Very good.

At first I thought the problem was the DMA engine.  There is a hardware limit for single transfers. It is somewhere in a header but I believe it is 8MB-1byte. But you are allocating only 200KB. 

I would think the problem is with your memory allocation (MEM_BASE_ADDR). As the array size grows (inStreamData) and you are allocating it in the executable, the binary is growing and probably reaching the point where MEM_BASE_ADDR points to. 

I would recommend setting MEM_BASE_ADDR upper in memory. Another way to do it is just allocate an array like you do with inStreamData and let the linker figure out a space for it.

vitorian.com --- We do this for fun. Always give kudos. Accept as solution if your question was answered.
I will not answer to personal messages - use the forums instead.
0 Kudos
aripod
Explorer
Explorer
996 Views
Registered: ‎04-18-2017

@hbucher I took it from his tutorial...he helped me several times already.

 

I tried to initialize the array like inStreamData but still, it gets stuck at while(XAxiDma_Busy(&axiDMA,XAXIDMA_DEVICE_TO_DMA));

 

#include <stdio.h>
#include <xparameters.h>
#include "xdogain.h"
#include "xaxidma.h"

#define SIZE_ARR 50000

// UART
#define UART_DEVICE_ID		XPAR_XUARTPS_0_DEVICE_ID
#define INTC_DEVICE_ID		XPAR_SCUGIC_SINGLE_DEVICE_ID
#define UART_INT_IRQ_ID		XPAR_XUARTPS_1_INTR
#define RECV_BUFFER_SIZE	256

//DMA Addresses
#define MEM_BASE_ADDR 0x01000000
#define TX_BUFFER_BASE (MEM_BASE_ADDR + 0x00100000)
#define RX_BUFFER_BASE (MEM_BASE_ADDR + 0x00300000)

XDogain doGain;
XDogain_Config *doGain_cfg;
XAxiDma axiDMA;
XAxiDma_Config *axiDMA_cfg;

int inStreamData[SIZE_ARR];

void initPeripherals()
{
	int Status;

	printf("Initializing doGain\n");
	doGain_cfg = XDogain_LookupConfig(XPAR_DOGAIN_0_DEVICE_ID);
	if (doGain_cfg)
	{
		int status = XDogain_CfgInitialize(&doGain,doGain_cfg);
		if(status != XST_SUCCESS)
		{
			printf("Error Initializing doGain core\n");
		}
	}

	printf("Initializing AxiDMA\n");
	axiDMA_cfg = XAxiDma_LookupConfig(XPAR_AXI_DMA_0_DEVICE_ID);
	if (axiDMA_cfg)
	{
		Status = XAxiDma_CfgInitialize(&axiDMA,axiDMA_cfg);
		if(Status != XST_SUCCESS)
		{
			printf("Error Initializing AXI DMA core\n");
		}
	}
	//Disable Interrups
	XAxiDma_IntrDisable(&axiDMA, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
	XAxiDma_IntrDisable(&axiDMA, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE);
}


int main()
{
	int i, j, k;
	int gain = 5;
	//printf ("Welcome:");
	//Pointers to DMA TX/RX addresses
	int *m_dma_buffer_TX = (int*) TX_BUFFER_BASE;
	//int *m_dma_buffer_RX = (int*) RX_BUFFER_BASE;
         int RX[SIZE_ARR]={0};


	initPeripherals();

	//Do the Stream Calculation
	for (int idx = 0; idx < SIZE_ARR; idx++)
	{
		inStreamData[idx] = 1;
	}

	XDogain_Set_gain(&doGain,gain);
	XDogain_Start(&doGain);

	//Flush the cache of the buffers
	Xil_DCacheFlushRange((u32)inStreamData, SIZE_ARR*sizeof(int));
	//Xil_DCacheFlushRange((u32)m_dma_buffer_RX, SIZE_ARR*sizeof(int));
	Xil_DCacheFlushRange((u32)RX, SIZE_ARR*sizeof(int));

	printf("Sending Data to IP core slave\n");
	XAxiDma_SimpleTransfer(&axiDMA,(u32)inStreamData,SIZE_ARR*sizeof(int),XAXIDMA_DMA_TO_DEVICE);

	printf("Get The Data\n");
	//XAxiDma_SimpleTransfer(&axiDMA,(u32)m_dma_buffer_RX,SIZE_ARR*sizeof(int),XAXIDMA_DEVICE_TO_DMA);
	XAxiDma_SimpleTransfer(&axiDMA,(u32)RX,SIZE_ARR*sizeof(int),XAXIDMA_DEVICE_TO_DMA);
	while(XAxiDma_Busy(&axiDMA,XAXIDMA_DEVICE_TO_DMA));

	//Invalidate
	   //Xil_DCacheInvalidateRange((u32)m_dma_buffer_RX,SIZE_ARR*sizeof(int));
	   Xil_DCacheInvalidateRange((u32)RX,SIZE_ARR*sizeof(int));

	while(!XDogain_IsDone(&doGain));
	printf("Calculation is Complete\n");

	//Display Data
	for (int idx = 0; idx < 10; idx++)
		{
			printf("Recv[%d]=%d\n", idx,m_dma_buffer_RX[idx]);
		}
	
	return 0;
}

To also try the other approach, how should I know where to move MEM_BASE_ADDR?

 

Thanks for the help.

0 Kudos