背景:很多时候需要PS与PL共享DDR作为global memory,例如卷积之中,PS将weight in与feature写入DDR,然后PL调用DDR进行运算,再将结果写入DDR进行下一次迭代。
目的:1. PS与PL共享DDR,读和写。并且像卷积一样需要三个指针。2. IPcore设置变量,能通过PS能查看到IPcore运行位置。3. 运用BRAM实现一定的数据搬运。
一、IPcore编写
1.1 一种错误的接口
int share_dram_core(int write_nums,int read_nums,
					volatile float * write_ptr,volatile float *read_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=write_ptr offset=slave
#pragma HLS INTERFACE m_axi depth=4096 port=read_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=write_nums
#pragma HLS INTERFACE s_axilite port=read_nums
#pragma HLS INTERFACE s_axilite port=location_idx
#pragma HLS INTERFACE s_axilite port=write_loop_idx
#pragma HLS INTERFACE s_axilite port=read_loop_idx
#pragma HLS INTERFACE s_axilite port=read_sum
DRAM上不能有两个m_axi类型的指针,否则可能会遇到重叠等问题。
1.2 IPcore代码
int share_dram_core(int write_nums,int read_nums,
					volatile float * data_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=data_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return register
#pragma HLS INTERFACE s_axilite port=write_nums register
#pragma HLS INTERFACE s_axilite port=read_nums register
#pragma HLS INTERFACE s_axilite port=location_idx register
#pragma HLS INTERFACE s_axilite port=write_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_sum register
	location_idx=0;
	write_loop_idx=0;
	read_loop_idx=0;
	read_sum=0;
	for(int read_loc=0;read_loc
		read_loop_idx++;
	}
	location_idx=1;//Done read process
volatile float *write_ptr=&data_ptr[read_nums];
	for(int write_loc=0;write_loc
		write_loop_idx++;
	}
	location_idx=2;//done write process
	return 1; //return=1 means done
}
只要一个指针指向DRAM。
1.3 位置信息
location_idx表示IPcore当前位置,0表示刚开始,1表示完成写操作,2表示完成读操作
read_loop_idx表示当前IPcore读出DRAM的次数
write_loop_idx表示当前IPcore写入DRAM的次数
return 1表示程序运行完成且成功。
1.4 接口
  s_axilite
运用带return的s_axilite来设置IPcore的值与完成IPcore。传输位置IPcore的位置信息
  m_axi
运用主axi协议运用IPcore对DDR进行读写。只能有一个
Depth的设置问题:可能是IPcore可以读写DDR上的地址。我们设为4096(1024个4字节的浮点数)
二、testBench
2.1 程序编写
#include
#include
int share_dram_core(int write_nums,int read_nums,
					volatile float * data_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum);
int main(){
	int PL_write_nums=50;
	int PL_read_nums=50;
	volatile float * PL_write_ptr;
	volatile float * PL_read_ptr;
	PL_read_ptr=(volatile float *)malloc(sizeof(float)*(PL_read_nums+PL_write_nums));
	//PL_write_ptr=(volatile float *)malloc(sizeof(float)*PL_write_nums);
	//PL_read_ptr=(volatile float *)0x00ac1680;
	PL_write_ptr=&PL_read_ptr[PL_read_nums];
	printf("Initilize SUCCESS!PL_write_num is %d,PL_read_num is %d\n",PL_write_nums,PL_read_nums);
	printf("PL_read_ptr is %8x, PL_write_ptr is %8x \n",PL_write_ptr,PL_read_ptr);
	for(int cur_PL_read_loc=0;cur_PL_read_loc
	}
	printf("PS write on PL read loc SUCCESS!\n");
	int result=share_dram_core(PL_write_nums,PL_read_nums,
					PL_read_ptr,
					0,0,0,0);
	for(int cur_PL_write_loc=0;cur_PL_write_loc
			printf("PL write ERROR!loc is %d, prt loc is %8x \n",&PL_write_ptr[cur_PL_write_loc]);
		}
	}
	printf("Check PL write done!\n");
	if(result==1){
		printf("IPcore result SUCCESS!\n");
	}
	return 0;
}
2.2 PS与PL的交互
PS传出数据很简单,但是PL传出数据不易。所以尽量以PS多输出信息来验证PL的正确性。
更多信息通过一些参数传出来。例如location_idx, write_loop_idx; read_loop_idx; read_sum;
INFO: [SIM 4] CSIM will launch GCC as the compiler.
   Compiling ../../../../src/share_dram_HLS_test.cpp in debug mode
   Generating csim.exe
Initilize SUCCESS!PL_write_num is 50,PL_read_num is 50
PL_read_ptr is   a21748, PL_write_ptr is   a21680
PS write on PL read loc SUCCESS!
Check PL write done!
IPcore result SUCCESS!
synthesis,然后export RTL
三、系统搭建与hdf生成
运用已有的样板文件,hello world。加入HLS的IP。搭建系统。

使能GP与HP0,自动连接,create HDL wrapper,生成比特流,export到 local include bitstream
四、SDK
//created by Xing Xiangrui on 2018.12.25
//This is the SDK code to test share DRAM
//Write through PS to DDR
//Run PL : read from DDR to PL and write from PL to DDR
//Then read from DDR to PS
#include 
#include 
//#include 
//#include "platform.h"
//#include 
#include "xshare_dram_core.h"
XShare_dram_core XShare_dram_core_instance;
int main()
{
	printf("\n --------------program start------------- \n");
	//read and write param
	int ps_wirte_size=5; int ps_read_size=5;
	int core_location_idx=100;int core_write_loop_idx=100;int core_read_loop_idx=100;int core_read_sum=100;
	int core_return_value=100;
	volatile float * ps_write_ptr;
	volatile float * ps_read_ptr;
	//pointer intialize
	ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
	//ps_write_ptr= 0x10000000;
	ps_read_ptr=&ps_write_ptr[ps_wirte_size];
	if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
	if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
	memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
	memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
	printf("Initialize ps_read_ptr and ps_write_ptr SUCCESS!\n");
	printf("ps_read_ptr is %8x \n",ps_read_ptr);
	printf("ps_write_ptr is %8x \n",ps_write_ptr);
	for(int cur_print_loc=0;cur_print_loc
	}
	//initialize IPcore
	XShare_dram_core_Initialize(&XShare_dram_core_instance, XPAR_SHARE_DRAM_CORE_0_DEVICE_ID);
	printf("XShare_dram_core_Initialize SUCCESS!\n");
	//get and printf values
	core_location_idx=XShare_dram_core_Get_location_idx(&XShare_dram_core_instance);
	core_write_loop_idx=XShare_dram_core_Get_write_loop_idx(&XShare_dram_core_instance);
	core_read_loop_idx=XShare_dram_core_Get_read_loop_idx(&XShare_dram_core_instance);
	core_read_sum=XShare_dram_core_Get_read_sum(&XShare_dram_core_instance);
	core_return_value=XShare_dram_core_Get_return(&XShare_dram_core_instance);
	printf("core_location_idx=%d \n",core_location_idx);
	printf("core_write_loop_idx=%d \n",core_write_loop_idx);
	printf("core_read_loop_idx=%d \n",core_read_loop_idx);
	printf("core_read_sum=%d \n",core_read_sum);
	printf("core_return_value=%d \n",core_return_value);
	//initialize IPcore value
	XShare_dram_core_Set_write_nums(&XShare_dram_core_instance, ps_read_size);
	XShare_dram_core_Set_read_nums(&XShare_dram_core_instance, ps_wirte_size);
	XShare_dram_core_Set_data_ptr(&XShare_dram_core_instance, ps_write_ptr);
	printf("-------------Core value set SUCCESS! \n");
	//get and printf values
	core_location_idx=XShare_dram_core_Get_location_idx(&XShare_dram_core_instance);
	core_write_loop_idx=XShare_dram_core_Get_write_loop_idx(&XShare_dram_core_instance);
	core_read_loop_idx=XShare_dram_core_Get_read_loop_idx(&XShare_dram_core_instance);
	core_read_sum=XShare_dram_core_Get_read_sum(&XShare_dram_core_instance);
	core_return_value=XShare_dram_core_Get_return(&XShare_dram_core_instance);
	printf("core_location_idx=%d \n",core_location_idx);
	printf("core_write_loop_idx=%d \n",core_write_loop_idx);
	printf("core_read_loop_idx=%d \n",core_read_loop_idx);
	printf("core_read_sum=%d \n",core_read_sum);
	printf("core_return_value=%d \n",core_return_value);
	//IPcore start
	XShare_dram_core_Start(&XShare_dram_core_instance);
	printf("-------------IPCore start SUCCESS! \n");
	//get and printf values
	core_location_idx=XShare_dram_core_Get_location_idx(&XShare_dram_core_instance);
	core_write_loop_idx=XShare_dram_core_Get_write_loop_idx(&XShare_dram_core_instance);
	core_read_loop_idx=XShare_dram_core_Get_read_loop_idx(&XShare_dram_core_instance);
	core_read_sum=XShare_dram_core_Get_read_sum(&XShare_dram_core_instance);
	core_return_value=XShare_dram_core_Get_return(&XShare_dram_core_instance);
	printf("core_location_idx=%d \n",core_location_idx);
	printf("core_write_loop_idx=%d \n",core_write_loop_idx);
	printf("core_read_loop_idx=%d \n",core_read_loop_idx);
	printf("core_read_sum=%d \n",core_read_sum);
	printf("core_return_value=%d \n",core_return_value);
	while(!XShare_dram_core_IsDone(&XShare_dram_core_instance)){
		printf("Calculating...\n");
	}
	printf("IsDone done SUCCESS!\n");
	//get and printf values
	core_location_idx=XShare_dram_core_Get_location_idx(&XShare_dram_core_instance);
	core_write_loop_idx=XShare_dram_core_Get_write_loop_idx(&XShare_dram_core_instance);
	core_read_loop_idx=XShare_dram_core_Get_read_loop_idx(&XShare_dram_core_instance);
	core_read_sum=XShare_dram_core_Get_read_sum(&XShare_dram_core_instance);
	core_return_value=XShare_dram_core_Get_return(&XShare_dram_core_instance);
	printf("core_location_idx=%d \n",core_location_idx);
	printf("core_write_loop_idx=%d \n",core_write_loop_idx);
	printf("core_read_loop_idx=%d \n",core_read_loop_idx);
	printf("core_read_sum=%d \n",core_read_sum);
	printf("core_return_value=%d \n",core_return_value);
	for(int cur_print_loc=0;cur_print_loc
	}
	printf("-----------Program end SUCCESS!- \n\n");
	return 0;
}
用SDK打开vivado生成的文件夹下的 .sdk文件夹然后加载相应的hdf,生成bsp,创建c程序,hello world。build它。
启动FPGA,program FPGA将比特流烧录进去,然后运行程序。
4.1 用malloc的方式开辟内存
//pointer intialize
	ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
	//ps_write_ptr= 0x10000000;
	ps_read_ptr=&ps_write_ptr[ps_wirte_size];
	if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
	if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
	memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
	memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
FPGA始终输出0,即IPcore并未有正确的动作。
--------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is   114764
ps_write_ptr is   114750
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
Calculating...
Calculating...
。。。
IPcore会一直不结束。
4.2 指定指针位置
ps_write_ptr= 0x10000000;
ps_read_ptr=&ps_write_ptr[ps_wirte_size];
if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
依然无法用IPcore写入值。
 --------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is 10000014
ps_write_ptr is 10000000
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
IsDone done SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
-----------Program end SUCCESS!-
五、SoC
SDK实现过程中会出现地址冲突的问题,难以实现共享DDR,我们用SoC的方法共享DDR。
5.1 交叉编译
MIZ7035交叉编译单片机程序运行  https://blog.csdn.net/weixin_36474809/article/details/86487043
5.2 驱动
驱动由HLS和vivado生成,相应的地址在vivado中可查。在zynqNet基础上更改:
#ifndef SHARED_DRAM_H_9B5B43B5
#define SHARED_DRAM_H_9B5B43B5
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include "xfpga_hw.hpp" // Register addresses
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
// Location + Size of SHARED DRAM segment:
// - from Vivado Block Designer (Address Editor):
//     AXI M memory bus starts at 0x00000000 – 0xFFFFFFFF, SIZE: 4GB
// - from information by Simon Wright:
// 	   top 128MB of 1GB system memory are not OS-managed
// - from "free -m" on Zynq:
//	   total mem 882MB -> 118MB not OS-managed
// 	   -> place SHARED_DRAM at 896MB (-> max. activations ~100MB)
//	   -> 896MB = 896*1024*1024 = 0x3800'0000 bytes
//	   -> 96MB = 96*1024*1024 = 0x600'0000 bytes
const off_t SHARED_DRAM_BASE_ADDR = 0x20000000;
const size_t SHARED_DRAM_MEM_SIZE = 0x06000000;
extern int SHARED_DRAM_FD;
extern volatile u32* SHARED_DRAM_PTR;
// External Interface
bool SHARED_DRAM_open();
bool SHARED_DRAM_close();
volatile u32* SHARED_DRAM_virtual();
volatile u32* SHARED_DRAM_physical();
// Internal Functions
volatile u32* map_SHARED_DRAM(off_t base_addr);
void release_SHARED_DRAM(volatile u32* axilite);
// unused:
// 32-bit word read + write (other sizes not supported!)
/* void shared_DRAM_write(u32 byte_addr, u32 value);
u32 shared_DRAM_read(u32 byte_addr); */
#endif /* end of include guard: SHARED_DRAM_H_9B5B43B5 */
#include "shared_dram.hpp"
int SHARED_DRAM_FD = -1;
volatile u32* SHARED_DRAM_PTR = NULL;
bool SHARED_DRAM_open() {
  printf("XFPGA Driver: open /dev/mem handle\n");
  // Check that it's not yet open
  if (SHARED_DRAM_FD > -1) {
    printf("SHARED_DRAM already open!\n");
    return false;
  }
  // Memory Map SHARED_DRAM
  SHARED_DRAM_PTR = map_SHARED_DRAM(SHARED_DRAM_BASE_ADDR);
  printf("SHARED_DRAM_PTR=%X\n", (unsigned long)SHARED_DRAM_PTR);
  // Make sure the file handle is really set
  return (SHARED_DRAM_FD > -1);
}
bool SHARED_DRAM_close() {
  printf("XFPGA Driver: close /dev/mem handle\n");
  // Check that memory file is really open
  if (SHARED_DRAM_FD == -1) {
    printf("SHARED_DRAM bus not open!\n");
    return false;
  }
  // Release Memory Region and File handle
  release_SHARED_DRAM(SHARED_DRAM_PTR);
  // Make sure file was correctly released
  return (SHARED_DRAM_FD == -1);
}
volatile u32* SHARED_DRAM_virtual() {
  return (volatile u32*) SHARED_DRAM_PTR;
}
volatile u32* SHARED_DRAM_physical() {
  return (volatile u32*) SHARED_DRAM_BASE_ADDR;
}
////////////////////////////////////////////////////
////////////////// Helper Functions ////////////////
volatile u32* map_SHARED_DRAM(off_t base_addr) {
  printf("XFPGA Driver: map shared DRAM at base address %X\n", (unsigned long)base_addr);
  // make sure that base addr is aligned to memory pages...
  base_addr &= ~(getpagesize() - 1);
  // Open /dev/mem file (need root privileges or setuid!)
  SHARED_DRAM_FD = open("/dev/mem", O_RDWR);
  if (SHARED_DRAM_FD < 0) err(errno, "could not open /dev/mem. need to be root");
  // Map SHARED_DRAM memory region to pointer
  volatile u32* pointer = (u32*)mmap(NULL, SHARED_DRAM_MEM_SIZE, PROT_READ | PROT_WRITE,
                            MAP_SHARED, SHARED_DRAM_FD, base_addr);
  if (pointer == MAP_FAILED) err(errno, "could not map memory for SHARED_DRAM bus");
  return pointer;
}
void release_SHARED_DRAM(volatile u32* pointer) {
  printf("XFPGA Driver: unmap shared DRAM\n");
  // Release SHARED_DRAM memory region (unmap)
  int retval = munmap((void*)pointer, SHARED_DRAM_MEM_SIZE);
  if (retval < 0) err(errno, "could not unmap memory region for SHARED_DRAM bus");
  // release file handle
  retval = close(SHARED_DRAM_FD);
  if (retval < 0) err(errno, "could not release /dev/mem file handle");
  // set file handle variable s.t. we know it's closed
  SHARED_DRAM_FD = -1;
}
5.3 运行
交叉编译,挂载,运行
---------------------
作者:邢翔瑞
来源:CSDN
原文:https://blog.csdn.net/weixin_36474809/article/details/85111550