cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
sharpmddr
Contributor
Contributor
845 Views
Registered: ‎05-10-2018

What is the reason of turning longer time for executing my program?

Jump to solution

I made a program for predicting stock-index by using the logit algorithm. However, I only used the A53 ARM processor of the ZCU102 development board to run this program for 46 seconds, and I had used the toggle SW/HW to switch to hardware execution, and the execution time should be decreased. Instead, it has grown a lot. I put the source code of the entire program in the attachment. Please help me to check the HLS language what I used is inappropriate. How should I fix it to decrease execution time? Any suggestions are very grateful.

 

=============main.cpp==========================================================

#include "sds_lib.h"
#include "hw.h"

int count_all = 0;
int use_rows = 0;

float data_all[rows][columns] ;
float data_x[rows][columns - 1];
float data_y[rows]; /

 

int read_data(string file,float data[rows][columns],string header[columns])
{

ifstream inFile(file, ios::in);
string lineStr;
string str;

for(int j = 0;j < rows;j ++ )
{
// read data
getline(inFile, lineStr);
stringstream ss(lineStr);

if (j==0)
{
for(int i = 0; i < columns ;i ++ )
{
// analysis data
getline(ss, str, ',') ;
header[i] = str;
}
}
else
{
for(int i = 0; i < columns ;i ++ )
{
// analysis data
getline(ss, str, ',') ;
if (inFile.eof())
{
return 0;
}
data[j][i] = stof(str);
}
count_all += 1;
}
}
return 0;
}

int read_config(string config_file,int *start_date, int *end_date)
{

ifstream inFile(config_file, ios::in);
string lineStr;
string str;
getline(inFile, lineStr);
stringstream ss(lineStr);
getline(ss, str, ',');
str.replace(4,1,"");
str.replace(6,3,"");
*start_date = stoi(str);
getline(ss, str, ',');
str.replace(4,1,"");
str.replace(6,3,"");
*end_date = stoi(str);
return 0;
}

int process_data(float data_all[rows][columns],float data_x[rows][columns - 1],float data_y[rows],int start_date,int end_date)
{

for (int i = 0; i < count_all; i++)
{
if ( data_all[i][0] < start_date)
{
continue;
}
if ( data_all[i][0] > end_date)
{
break;
}
for (int j = 0; j < columns - 1; j++)
{
if (j == columns - 2)
{
data_x[use_rows][j] = 1.0;
data_y[use_rows] = data_all[i][j + 1];
}
else
{
data_x[use_rows][j] = data_all[i][j + 1] / 300;
}
}
use_rows += 1;
}
return 0;
}


int main()
{
string header[columns];
float w[columns - 1] = {0};

int start_date = 0;
int end_date = 0;
string config_file = "/mnt/work3/config/config.txt";// read config data from a file
read_config(config_file,&start_date,&end_date);

string file = "/mnt/work3/data/data_rank.csv";// read data from a file
read_data(file,data_all,header);

process_data(data_all,data_x,data_y,start_date,end_date);
cout << use_rows << endl;

cout << hypothesis(data_x[1],w) << endl;
unsigned long long s = sds_clock_counter();
for (int now_time = 0; now_time < train_time ;now_time ++)
{
logic_regression(data_x,data_y,w,learn_rate,use_rows);
}
unsigned long long e = sds_clock_counter();
printf("\t Clock cycle: %llu\n", e - s);
printf("\t Time: %.4f [sec]\n", (e - s)*1.0/sds_clock_frequency());

string out_file = "/mnt/work3/result/result.txt";
ofstream outFile;
outFile.open(out_file, ios::out);
outFile << start_date << "," << end_date << endl;
for (int i = 0; i < columns - 1; i ++)
{
outFile << w[i] << "," ;
}
outFile << endl;
return 0;
}

=============================================================================

 

===================hw.h=======================================================

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <algorithm>
#include <math.h>

using namespace std;

const int columns = 38;
const int rows = 20000;
const float learn_rate = 0.5;//learning rate
const int train_time = 1000;//train time
const int feature_num = columns - 1; //feature

float hypothesis(float data_sample[columns - 1],float w[columns - 1]);
float cost_function(float data_x[rows][columns - 1],float data_y[rows],float w[columns - 1], int use_rows);
void logic_regression(float data_x[rows][columns - 1],float data_y[rows],float w[columns -1],float a, int use_rows);

=============================================================================

 

 

=================hw.cpp_Toggle  ALL Function to  HW========================================================

#include "hw.h"

// define logit algorithm
float hypothesis(float data_sample[columns - 1],float w[columns - 1])
{
#pragma HLS inline
float sum=0.0;
for(int i=0;i<columns - 1;i++)
{
#pragma HLS PIPELINE II=1
sum+=w[i]*data_sample[i];
}
return 1/(1+exp(-sum));
}

#pragma SDS data access_pattern(data_x:SEQUENTIAL,data_y:SEQUENTIAL)
float cost_function(float data_x[rows][columns - 1],float data_y[rows],float w[columns - 1], int use_rows)
{
#pragma HLS inline
float sum=0.0;
for(int i=0;i<use_rows;i++)
{
#pragma HLS PIPELINE II=1
sum+=-data_y[i]*log(hypothesis(data_x[i],w))-(1-data_y[i])*log(1-hypothesis(data_x[i],w));
}
return sum/use_rows;
}

#pragma SDS data access_pattern(data_x:SEQUENTIAL,data_y:SEQUENTIAL)
void logic_regression(float data_x[rows][columns - 1],float data_y[rows],float w[columns -1], float a, int use_rows)
{
float delta_w[columns - 1];
float wout[columns - 1];

#pragma HLS ARRAY_PARTITION variable=delta_w block factor= 37
#pragma HLS ARRAY_PARTITION variable=wout block factor= 37
for(int j=0;j<columns - 1;j++)
{
float sum=0.0;
for(int i=0;i<use_rows;i++)
{
#pragma HLS PIPELINE II=1
sum+=(hypothesis(data_x[i],w)-data_y[i])*data_x[i][j];
}
delta_w[j] = sum/rows * a;
}
for(int i=0;i<columns - 1;i++)
{
#pragma HLS unroll
#pragma HLS PIPELINE II=1
wout[i]=w[i]-delta_w[i];
}
cost_function(data_x,data_y,wout,use_rows);
}

===========================================================================

 

Tags (3)
0 Kudos
1 Solution

Accepted Solutions
stephenm
Xilinx Employee
Xilinx Employee
895 Views
Registered: ‎09-12-2007

It shouldnt be assumed that moving a function to HW will make it perform quicker. 

 

For example, when you mark a function to be accellerated the SDSoC tools will create a datamover interface

using the information at its dispossal. ie, the interfaces available in the platform, and the pragmas set by the user.

In an ideal world all the data to be sent from the accell HW to the processor will be doing in a sequential manner via a DMA

over a High Speed interface. There are pragmas available to steer to tools to do this. 

 

I would suggest that you do a performance estimation to see if there is an improvement, tnen see where the bottlenecks are in the code, and how this could be improved by wither changing the code, or using pragmas to better steer the tools to create a better datamover

https://www.xilinx.com/support/documentation/sw_manuals/xilinx2018_1/ug1253-sdx-pragma-reference.pdf

View solution in original post

0 Kudos
1 Reply
stephenm
Xilinx Employee
Xilinx Employee
896 Views
Registered: ‎09-12-2007

It shouldnt be assumed that moving a function to HW will make it perform quicker. 

 

For example, when you mark a function to be accellerated the SDSoC tools will create a datamover interface

using the information at its dispossal. ie, the interfaces available in the platform, and the pragmas set by the user.

In an ideal world all the data to be sent from the accell HW to the processor will be doing in a sequential manner via a DMA

over a High Speed interface. There are pragmas available to steer to tools to do this. 

 

I would suggest that you do a performance estimation to see if there is an improvement, tnen see where the bottlenecks are in the code, and how this could be improved by wither changing the code, or using pragmas to better steer the tools to create a better datamover

https://www.xilinx.com/support/documentation/sw_manuals/xilinx2018_1/ug1253-sdx-pragma-reference.pdf

View solution in original post

0 Kudos