如何解决使用递归指针的OpenACC并行FOR循环
我在OpenMP中有一个并行的FOR循环:
#pragma omp parallel for default(none) private(k,cell) shared(sim,mesh)
for(i=0;i<mesh->ncells;i++){
cell=&(mesh->cell[i]);
for(k=0;k<sim->nvar;k++){
cell->U_aux[k]=cell->U[k];
cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy + (cell->w6->fL_star[k]-cell->w5->fR_star[k])/cell->dz);
}
}
}
使用cell
指向结构mesh
中的单元格数组的指针。我想将其与OpenACC并行化。
结构sim
仅包含标量,但结构mesh
包含cell
和wall
的一维数组
struct t_mesh_{
int ncells;
t_cell *cell;
t_wall *wall;
// and other variables not used in this function...
};
其中结构cell
定义为:
struct t_cell_{
double dx,dy,dz;
double *U,*U_aux;
t_wall *w1,*w2,*w3,*w4,*w5,*w6;
// and other variables not used in this function...
};
请注意,*U,*U_aux
是小的一维数组,但是*w1,*w6
是数组wall
的指针:
struct t_wall_{
double *fR_star,*fL_star;
// and other variables not used in this function...
};
其中fR_star
和fL_star
是小的一维数组。
¿我应该如何定义OpenACC编译指示并管理内存?
谢谢。
[已编辑]
在下面的脚本中,显示了结构的创建和分配以及在感兴趣的循环中使用的指针。大部分代码已被省略,只保留了问题中提到的内容:
//macros omitted
////////////////////////////////////////////////////
////////////// S T R U C T U R E S ///////////////
////////////////////////////////////////////////////
typedef struct t_node_ t_node;
typedef struct t_cell_ t_cell;
typedef struct t_wall_ t_wall;
typedef struct t_mesh_ t_mesh;
typedef struct t_sim_ t_sim;
struct t_node_{
int id;
double x,y;
};
struct t_cell_{
int id;
int l,m;
double *U; //array of cell-valued variables
double dx,dy;
double xc,yc;
int n1,n2,n3,n4;
int w1_id,w2_id,w3_id,w4_id;
t_wall *w1,*w4; //pointers to mesh->wall
};
struct t_wall_{
int id;
double *fR_star,*fL_star; //array of left and right fluxes at each wall (same dimension than U)
int cellR_id,cellL_id;
t_cell *cellR,*cellL; //pointers to the right and left cells of the wall (mesh->cell)
double nx,ny;
};
struct t_mesh_{
int xcells,ycells;
double dx,dy;
int ncells; //number of cells
int nwalls; //number of walls
int nnodes;
t_cell *cell; //array of cell structures
t_wall *wall; //array of wall structures
t_node *node; //array of node structures
t_sim *sim;
};
struct t_sim_{
double dt,t,CFL;
double tf,tVolc;
int rk_steps;
int order;
int nvar; //number of variables (dimension of U,fR_star,fL_star...)
};
////////////////////////////////////////////////////
////// F U N C T I O N D E F I N I T I O N //////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh,t_sim *sim);
void update_cellK1(t_mesh *mesh,t_sim *sim);
////////////////////////////////////////////////////
////// P R E - P R O C. F U N C T I O N S ///////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh,t_sim *sim){
int i,l,m,k,aux,p;
int xcells,ycells;
t_cell *cell;
t_wall *wall;
t_node *node;
int semiSt;
mesh->sim=sim;
//Cells
xcells=mesh->xcells;
ycells=mesh->ycells;
mesh->ncells=xcells*ycells;
mesh->cell=(t_cell*)malloc(mesh->ncells*sizeof(t_cell));
cell=mesh->cell;
//Walls
mesh->nwalls=2*mesh->ncells+xcells+mesh->ycells;
mesh->wall=(t_wall*)malloc(mesh->nwalls*sizeof(t_wall));
wall=mesh->wall;
for(k=0;k<mesh->nwalls;k++){
wall[k].id=k;
wall[k].fR_star=(double*)malloc(sim->nvar*sizeof(double));
wall[k].fL_star=(double*)malloc(sim->nvar*sizeof(double));
}
//Walls and nodes of the cells
for(m=0;m<ycells-1;m++){
for(l=0;l<xcells-1;l++){
k=xcells*m+l;
cell[k].id=k;
cell[k].l=l;
cell[k].m=m;
cell[k].w1_id=2*(k)+m;
// ...
cell[k].w1=&(mesh->wall[cell[k].w1_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w2=&(mesh->wall[cell[k].w2_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w3=&(mesh->wall[cell[k].w3_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w4=&(mesh->wall[cell[k].w4_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
// ...
// ...
}
}
//Assigment of wall's neighbour cells
for(m=0;m<ycells;m++){
for(l=0;l<xcells;l++){
k=xcells*m+l;
cell[k].w1->cellR_id=cell[k].id;
// ...
cell[k].w1->cellR=&(cell[k]);
cell[k].w4->cellR=&(cell[k]);
cell[k].w2->cellL=&(cell[k]);
cell[k].w3->cellL=&(cell[k]);
//...
//other special cases omitted
//...
}
}
//Allocation of arrays of variables "U" in cells and walls
for(k=0;k<mesh->ncells;k++){
mesh->cell[k].U =(double*)malloc(sim->nvar*sizeof(double));
}
return 1;
}
void update_cellK1(t_mesh *mesh,t_sim *sim){
int i,k;
t_cell *cell;
#pragma omp parallel for default(none) private(k,mesh)
for(i=0;i<mesh->ncells;i++){
cell=&(mesh->cell[i]);
for(k=0;k<sim->nvar;k++){
cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy);
}
}
}
////////////////////////////////////////////////////
//////////////////// M A I N ///////////////////////
////////////////////////////////////////////////////
int main(int argc,char * argv[]){
int i,j,p;
t_mesh *mesh;
t_sim *sim;
double tf,t;
int nIt;
double timeac;
omp_set_num_threads(NTHREADS);
//Mesh and sim allocation
mesh=(t_mesh*)malloc(sizeof(t_mesh));
sim =(t_sim*)malloc(sizeof(t_sim));
////////////////////////////////////////////////////
////////////// P R E - P R O C E S S ///////////////
////////////////////////////////////////////////////
//...
//variable initialization and file reading omitted
//cell->dx= ...
//cell->dy= ...
//...
create_mesh(mesh,sim);
update_initial(mesh); //this function (omitted) assings the initial values of cell[k].U[0] ... cell[k].U[4]
////////////////////////////////////////////////////
////////////// C A L C U L A T I O N ///////////////
////////////////////////////////////////////////////
tf=sim->tf;
sim->t=0.0;
t=0.0;
while(t<tf){
compute_fluxes(mesh,sim); //this function (omitted) computes *fR_star,*fL_star of walls (line 32),which are then used in "update_cellK1()"
update_dt(mesh,sim); //this function (omitted) computes sim->dt
update_cellK1(mesh,sim);
t+=sim->dt; //Time updated
sim->t=t;
}
return 1;
}
解决方法
最简单的方法是使用CUDA统一内存进行编译,前提是您使用的是PGI编译器(-ta = tesla:managed)或NVIDIA HPC编译器(-acc -gpu = managed)。如果已分配内存,CUDA运行时将为您处理所有数据管理。
尽管您确实需要手动管理数据,但在这种情况下比较棘手,但这并不困难。如果您可以提供示例代码来显示如何创建完整结构,那么我可以帮助您如何管理数据。很可能我们希望使用OpenACC API调用,以便我们可以直接管理设备指针,并使用“ acc_attach”或“ acc_map_data”调用来使“ w”变量指向正确的“墙”。
[编辑]
下面是我第一次尝试添加手动数据移动。我选择了使用指令的简单方法,然后附加已分配的设备数据。问题在于,由于代码不完整(最好有用于初始化的虚拟值),所以我实际上无法测试代码是否按预期工作。有时,在这些类型的代码中,我们需要选择仅使用API调用,以便可以直接控制指针。
//macros omitted
#include <stdlib.h>
#include <stdio.h>
#ifdef _OPENACC
#include <openacc.h>
#endif
////////////////////////////////////////////////////
////////////// S T R U C T U R E S ///////////////
////////////////////////////////////////////////////
typedef struct t_node_ t_node;
typedef struct t_cell_ t_cell;
typedef struct t_wall_ t_wall;
typedef struct t_mesh_ t_mesh;
typedef struct t_sim_ t_sim;
struct t_node_{
int id;
double x,y;
};
struct t_cell_{
int id;
int l,m;
double *U; //array of cell-valued variables
double dx,dy;
double xc,yc;
int n1,n2,n3,n4;
int w1_id,w2_id,w3_id,w4_id;
t_wall *w1,*w2,*w3,*w4; //pointers to mesh->wall
};
struct t_wall_{
int id;
double *fR_star,*fL_star; //array of left and right fluxes at each wall (same dimension than U)
int cellR_id,cellL_id;
t_cell *cellR,*cellL; //pointers to the right and left cells of the wall (mesh->cell)
double nx,ny;
};
struct t_mesh_{
int xcells,ycells;
double dx,dy;
int ncells; //number of cells
int nwalls; //number of walls
int nnodes;
t_cell *cell; //array of cell structures
t_wall *wall; //array of wall structures
t_node *node; //array of node structures
t_sim *sim;
};
struct t_sim_{
double dt,t,CFL;
double tf,tVolc;
int rk_steps;
int order;
int nvar; //number of variables (dimension of U,fR_star,fL_star...)
};
////////////////////////////////////////////////////
////// F U N C T I O N D E F I N I T I O N //////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh,t_sim *sim);
void update_cellK1(t_mesh *mesh,t_sim *sim);
////////////////////////////////////////////////////
////// P R E - P R O C. F U N C T I O N S ///////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh,t_sim *sim){
int i,l,m,k,aux,p;
int xcells,ycells;
t_cell *cell;
t_wall *wall;
t_node *node;
int semiSt;
mesh->sim=sim;
//Cells
xcells=mesh->xcells;
ycells=mesh->ycells;
mesh->ncells=xcells*ycells;
mesh->cell=(t_cell*)malloc(mesh->ncells*sizeof(t_cell));
cell=mesh->cell;
//Walls
mesh->nwalls=2*mesh->ncells+xcells+mesh->ycells;
mesh->wall=(t_wall*)malloc(mesh->nwalls*sizeof(t_wall));
#ifdef _OPENACC
acc_attach((void**)&mesh->sim);
#pragma acc update device(mesh->ncells,mesh->nwalls)
#pragma acc enter data create(mesh->cell[:mesh->ncells],mesh->wall[:mesh->nwalls])
#endif
wall=mesh->wall;
for(k=0;k<mesh->nwalls;k++){
wall[k].id=k;
wall[k].fR_star=(double*)malloc(sim->nvar*sizeof(double));
wall[k].fL_star=(double*)malloc(sim->nvar*sizeof(double));
#pragma acc enter data create(wall[k].fR_star[:sim->nvar],wall[k].fL_star[:sim->nvar])
}
//Walls and nodes of the cells
for(m=0;m<ycells-1;m++){
for(l=0;l<xcells-1;l++){
k=xcells*m+l;
cell[k].id=k;
cell[k].l=l;
cell[k].m=m;
cell[k].w1_id=2*(k)+m;
// ...
cell[k].w1=&(mesh->wall[cell[k].w1_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w2=&(mesh->wall[cell[k].w2_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w3=&(mesh->wall[cell[k].w3_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w4=&(mesh->wall[cell[k].w4_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
// ...
// ...
#ifdef _OPENACC
#pragma acc update device(cell[k:1])
acc_attach((void**)&cell[k].w1);
acc_attach((void**)&cell[k].w2);
acc_attach((void**)&cell[k].w3);
acc_attach((void**)&cell[k].w4);
#endif
}
}
//Assigment of wall's neighbour cells
for(m=0;m<ycells;m++){
for(l=0;l<xcells;l++){
k=xcells*m+l;
cell[k].w1->cellR_id=cell[k].id;
// ...
cell[k].w1->cellR=&(cell[k]);
cell[k].w4->cellR=&(cell[k]);
cell[k].w2->cellL=&(cell[k]);
cell[k].w3->cellL=&(cell[k]);
//...
//other special cases omitted
//...
#ifdef _OPENACC
#pragma acc update device(cell[k].w1->cellR_id)
acc_attach((void**)&cell[k].w1->cellR);
acc_attach((void**)&cell[k].w4->cellR);
acc_attach((void**)&cell[k].w2->cellL);
acc_attach((void**)&cell[k].w3->cellL);
#endif
}
}
//Allocation of arrays of variables "U" in cells and walls
for(k=0;k<mesh->ncells;k++){
mesh->cell[k].U =(double*)malloc(sim->nvar*sizeof(double));
#pragma acc enter data create(mesh->cell[k].U[:sim->nvar])
}
return 1;
}
void update_cellK1(t_mesh *mesh,k;
t_cell *cell;
#pragma acc parallel loop present(mesh,sim) private(cell)
for(i=0;i<mesh->ncells;i++){
cell=&(mesh->cell[i]);
for(k=0;k<sim->nvar;k++){
cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy);
}
}
}
////////////////////////////////////////////////////
//////////////////// M A I N ///////////////////////
////////////////////////////////////////////////////
int main(int argc,char * argv[]){
int i,j,p;
t_mesh *mesh;
t_sim *sim;
double tf,t;
int nIt;
double timeac;
//Mesh and sim allocation
mesh=(t_mesh*)malloc(sizeof(t_mesh));
sim =(t_sim*)malloc(sizeof(t_sim));
////////////////////////////////////////////////////
////////////// P R E - P R O C E S S ///////////////
////////////////////////////////////////////////////
//...
//variable initialization and file reading omitted
//cell->dx= ...
//cell->dy= ...
//...
#pragma acc enter data copyin(mesh[:1]) create(sim[:1])
create_mesh(mesh,sim);
// update_initial(mesh); //this function (omitted) assings the initial values of cell[k].U[0] ... cell[k].U[4]
////////////////////////////////////////////////////
////////////// C A L C U L A T I O N ///////////////
////////////////////////////////////////////////////
tf=sim->tf;
sim->t=0.0;
t=0.0;
// while(t<tf){
// compute_fluxes(mesh,sim); //this function (omitted) computes *fR_star,*fL_star of walls (line 32),which are then used in "update_cellK1()"
// update_dt(mesh,sim); //this function (omitted) computes sim->dt
update_cellK1(mesh,sim);
// t+=sim->dt; //Time updated
// sim->t=t;
// }
return 1;
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。