使用递归指针的OpenACC并行FOR循环

如何解决使用递归指针的OpenACC并行FOR循环

我在OpenMP中有一个并行的FOR循环:

#pragma omp parallel for default(none) private(k,cell) shared(sim,mesh)
    for(i=0;i<mesh->ncells;i++){
        cell=&(mesh->cell[i]);
        for(k=0;k<sim->nvar;k++){
            cell->U_aux[k]=cell->U[k];
            cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy + (cell->w6->fL_star[k]-cell->w5->fR_star[k])/cell->dz);
        }            
    }
}

使用cell指向结构mesh中的单元格数组的指针。我想将其与OpenACC并行化。

结构sim仅包含标量,但结构mesh包含cellwall的一维数组

struct t_mesh_{
    int ncells;
    t_cell *cell;
    t_wall *wall;
    // and other variables not used in this function...
};

其中结构cell定义为:

struct t_cell_{
    double dx,dy,dz;
    double *U,*U_aux;
    t_wall *w1,*w2,*w3,*w4,*w5,*w6;
    // and other variables not used in this function...
};

请注意,*U,*U_aux是小的一维数组,但是*w1,*w6是数组wall的指针:

struct t_wall_{
    double *fR_star,*fL_star;
    // and other variables not used in this function...
};

其中fR_starfL_star是小的一维数组。

¿我应该如何定义OpenACC编译指示并管理内存?

谢谢。

[已编辑]

在下面的脚本中,显示了结构的创建和分配以及在感兴趣的循环中使用的指针。大部分代码已被省略,只保留了问题中提到的内容:

//macros omitted

////////////////////////////////////////////////////
//////////////  S T R U C T U R E S  ///////////////
////////////////////////////////////////////////////

typedef struct t_node_ t_node;
typedef struct t_cell_ t_cell; 
typedef struct t_wall_ t_wall;
typedef struct t_mesh_ t_mesh;
typedef struct t_sim_ t_sim;

struct t_node_{
    int id;
    double x,y; 

};

struct t_cell_{
    int id;
    int l,m;
    double *U;  //array of cell-valued variables
    double dx,dy;
    double xc,yc;
    int n1,n2,n3,n4; 
    int w1_id,w2_id,w3_id,w4_id;
    t_wall *w1,*w4; //pointers to mesh->wall 
};

struct t_wall_{
    int id;
    double *fR_star,*fL_star; //array of left and right fluxes at each wall (same dimension than U)
    int cellR_id,cellL_id; 
    t_cell *cellR,*cellL; //pointers to the right and left cells of the wall (mesh->cell)
    double nx,ny;

};

struct t_mesh_{
    int xcells,ycells; 
      double dx,dy;
    int ncells; //number of cells
    int nwalls; //number of walls
    int nnodes;
    t_cell *cell; //array of cell structures
    t_wall *wall; //array of wall structures
    t_node *node; //array of node structures
    t_sim *sim;
};

struct t_sim_{
    double dt,t,CFL; 
    double tf,tVolc; 
    int rk_steps; 
    int order; 
    int nvar; //number of variables (dimension of U,fR_star,fL_star...)

};


////////////////////////////////////////////////////
//////  F U N C T I O N   D E F I N I T I O N //////
////////////////////////////////////////////////////


int create_mesh(t_mesh *mesh,t_sim *sim);
void update_cellK1(t_mesh *mesh,t_sim *sim);


////////////////////////////////////////////////////
//////  P R E - P R O C.   F U N C T I O N S ///////
////////////////////////////////////////////////////

int create_mesh(t_mesh *mesh,t_sim *sim){
    int i,l,m,k,aux,p;
    int xcells,ycells; 
    t_cell *cell;
    t_wall *wall;
    t_node *node;
    int semiSt;
    

    mesh->sim=sim;

    //Cells
    xcells=mesh->xcells;
    ycells=mesh->ycells;
    mesh->ncells=xcells*ycells;
    mesh->cell=(t_cell*)malloc(mesh->ncells*sizeof(t_cell));
    cell=mesh->cell;    
      
    //Walls
    mesh->nwalls=2*mesh->ncells+xcells+mesh->ycells;
    mesh->wall=(t_wall*)malloc(mesh->nwalls*sizeof(t_wall));

    wall=mesh->wall;
    for(k=0;k<mesh->nwalls;k++){
        wall[k].id=k;
        wall[k].fR_star=(double*)malloc(sim->nvar*sizeof(double));
        wall[k].fL_star=(double*)malloc(sim->nvar*sizeof(double));
    }

    //Walls and nodes of the cells
    for(m=0;m<ycells-1;m++){
        for(l=0;l<xcells-1;l++){
            
            k=xcells*m+l;
            cell[k].id=k;
            cell[k].l=l;
            cell[k].m=m;
            
            cell[k].w1_id=2*(k)+m;
            // ...

            cell[k].w1=&(mesh->wall[cell[k].w1_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall 
            cell[k].w2=&(mesh->wall[cell[k].w2_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall 
            cell[k].w3=&(mesh->wall[cell[k].w3_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall 
            cell[k].w4=&(mesh->wall[cell[k].w4_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall 
                  // ...
                    // ...

        }
    }

    //Assigment of wall's neighbour cells 
    for(m=0;m<ycells;m++){
        for(l=0;l<xcells;l++){
            
            k=xcells*m+l;
            cell[k].w1->cellR_id=cell[k].id;
                  // ...

            cell[k].w1->cellR=&(cell[k]);
            cell[k].w4->cellR=&(cell[k]);
            cell[k].w2->cellL=&(cell[k]);
            cell[k].w3->cellL=&(cell[k]);

            //...
                  //other special cases omitted
                  //...

        }
    }

    //Allocation of arrays of variables "U" in cells and walls
    for(k=0;k<mesh->ncells;k++){        
        mesh->cell[k].U    =(double*)malloc(sim->nvar*sizeof(double));
    }
    
    return 1;
}


void update_cellK1(t_mesh *mesh,t_sim *sim){   
    int i,k;
    t_cell *cell;  
#pragma omp parallel for default(none) private(k,mesh)
    for(i=0;i<mesh->ncells;i++){
        cell=&(mesh->cell[i]);
        for(k=0;k<sim->nvar;k++){
            cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy);
        }
    }   
}


////////////////////////////////////////////////////
//////////////////// M A I N ///////////////////////
////////////////////////////////////////////////////

int main(int argc,char * argv[]){
    
    int i,j,p;
    t_mesh *mesh;
    t_sim *sim;
    double tf,t;
    int nIt; 
    double timeac; 
      
    omp_set_num_threads(NTHREADS);     

    //Mesh and sim allocation
    mesh=(t_mesh*)malloc(sizeof(t_mesh));
    sim =(t_sim*)malloc(sizeof(t_sim));
    
    ////////////////////////////////////////////////////
    ////////////// P R E - P R O C E S S ///////////////
    ////////////////////////////////////////////////////

    //...
    //variable initialization and file reading omitted
    //cell->dx= ...
    //cell->dy= ...
    //...
      
    create_mesh(mesh,sim); 
    update_initial(mesh); //this function (omitted) assings the initial values of cell[k].U[0] ... cell[k].U[4]

    ////////////////////////////////////////////////////
    ////////////// C A L C U L A T I O N ///////////////
    ////////////////////////////////////////////////////
    tf=sim->tf;
    sim->t=0.0;
    t=0.0;
    while(t<tf){        
        compute_fluxes(mesh,sim); //this function (omitted) computes *fR_star,*fL_star of walls (line 32),which are then used in "update_cellK1()"
        update_dt(mesh,sim);      //this function (omitted) computes sim->dt
        update_cellK1(mesh,sim);                                
        t+=sim->dt; //Time updated
        sim->t=t;          
    }
    
    return 1;

}

解决方法

最简单的方法是使用CUDA统一内存进行编译,前提是您使用的是PGI编译器(-ta = tesla:managed)或NVIDIA HPC编译器(-acc -gpu = managed)。如果已分配内存,CUDA运行时将为您处理所有数据管理。

尽管您确实需要手动管理数据,但在这种情况下比较棘手,但这并不困难。如果您可以提供示例代码来显示如何创建完整结构,那么我可以帮助您如何管理数据。很可能我们希望使用OpenACC API调用,以便我们可以直接管理设备指针,并使用“ acc_attach”或“ acc_map_data”调用来使“ w”变量指向正确的“墙”。

[编辑]

下面是我第一次尝试添加手动数据移动。我选择了使用指令的简单方法,然后附加已分配的设备数据。问题在于,由于代码不完整(最好有用于初始化的虚拟值),所以我实际上无法测试代码是否按预期工作。有时,在这些类型的代码中,我们需要选择仅使用API​​调用,以便可以直接控制指针。

//macros omitted
#include <stdlib.h>
#include <stdio.h>

#ifdef _OPENACC
#include <openacc.h>
#endif

////////////////////////////////////////////////////
//////////////  S T R U C T U R E S  ///////////////
////////////////////////////////////////////////////

typedef struct t_node_ t_node;
typedef struct t_cell_ t_cell;
typedef struct t_wall_ t_wall;
typedef struct t_mesh_ t_mesh;
typedef struct t_sim_ t_sim;

struct t_node_{
    int id;
    double x,y;

};

struct t_cell_{
    int id;
    int l,m;
    double *U;  //array of cell-valued variables
    double dx,dy;
    double xc,yc;
    int n1,n2,n3,n4;
    int w1_id,w2_id,w3_id,w4_id;
    t_wall *w1,*w2,*w3,*w4; //pointers to mesh->wall
};

struct t_wall_{
    int id;
    double *fR_star,*fL_star; //array of left and right fluxes at each wall (same dimension than U)
    int cellR_id,cellL_id;
    t_cell *cellR,*cellL; //pointers to the right and left cells of the wall (mesh->cell)
    double nx,ny;

};

struct t_mesh_{
    int xcells,ycells;
      double dx,dy;
    int ncells; //number of cells
    int nwalls; //number of walls
    int nnodes;
    t_cell *cell; //array of cell structures
    t_wall *wall; //array of wall structures
    t_node *node; //array of node structures
    t_sim *sim;
};

struct t_sim_{
    double dt,t,CFL;
    double tf,tVolc;
    int rk_steps;
    int order;
    int nvar; //number of variables (dimension of U,fR_star,fL_star...)

};


////////////////////////////////////////////////////
//////  F U N C T I O N   D E F I N I T I O N //////
////////////////////////////////////////////////////


int create_mesh(t_mesh *mesh,t_sim *sim);
void update_cellK1(t_mesh *mesh,t_sim *sim);


////////////////////////////////////////////////////
//////  P R E - P R O C.   F U N C T I O N S ///////
////////////////////////////////////////////////////

int create_mesh(t_mesh *mesh,t_sim *sim){
    int i,l,m,k,aux,p;
    int xcells,ycells;
    t_cell *cell;
    t_wall *wall;
    t_node *node;
    int semiSt;

    mesh->sim=sim;

    //Cells
    xcells=mesh->xcells;
    ycells=mesh->ycells;
    mesh->ncells=xcells*ycells;
    mesh->cell=(t_cell*)malloc(mesh->ncells*sizeof(t_cell));
    cell=mesh->cell;

    //Walls
    mesh->nwalls=2*mesh->ncells+xcells+mesh->ycells;
    mesh->wall=(t_wall*)malloc(mesh->nwalls*sizeof(t_wall));

#ifdef _OPENACC
    acc_attach((void**)&mesh->sim);
#pragma acc update device(mesh->ncells,mesh->nwalls)
#pragma acc enter data create(mesh->cell[:mesh->ncells],mesh->wall[:mesh->nwalls])
#endif

    wall=mesh->wall;
    for(k=0;k<mesh->nwalls;k++){
        wall[k].id=k;
        wall[k].fR_star=(double*)malloc(sim->nvar*sizeof(double));
        wall[k].fL_star=(double*)malloc(sim->nvar*sizeof(double));
#pragma acc enter data create(wall[k].fR_star[:sim->nvar],wall[k].fL_star[:sim->nvar])
    }

    //Walls and nodes of the cells
    for(m=0;m<ycells-1;m++){
        for(l=0;l<xcells-1;l++){

            k=xcells*m+l;
            cell[k].id=k;
            cell[k].l=l;
            cell[k].m=m;

            cell[k].w1_id=2*(k)+m;
            // ...

            cell[k].w1=&(mesh->wall[cell[k].w1_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
            cell[k].w2=&(mesh->wall[cell[k].w2_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
            cell[k].w3=&(mesh->wall[cell[k].w3_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
            cell[k].w4=&(mesh->wall[cell[k].w4_id]);  // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
                  // ...
                    // ...
#ifdef _OPENACC
#pragma acc update device(cell[k:1])
            acc_attach((void**)&cell[k].w1);
            acc_attach((void**)&cell[k].w2);
            acc_attach((void**)&cell[k].w3);
            acc_attach((void**)&cell[k].w4);
#endif
        }
    }

    //Assigment of wall's neighbour cells
    for(m=0;m<ycells;m++){
        for(l=0;l<xcells;l++){

            k=xcells*m+l;
            cell[k].w1->cellR_id=cell[k].id;
                  // ...

            cell[k].w1->cellR=&(cell[k]);
            cell[k].w4->cellR=&(cell[k]);
            cell[k].w2->cellL=&(cell[k]);
            cell[k].w3->cellL=&(cell[k]);

            //...
                  //other special cases omitted
                  //...
#ifdef _OPENACC
#pragma acc update device(cell[k].w1->cellR_id)
            acc_attach((void**)&cell[k].w1->cellR);
            acc_attach((void**)&cell[k].w4->cellR);
            acc_attach((void**)&cell[k].w2->cellL);
            acc_attach((void**)&cell[k].w3->cellL);
#endif

        }
    }

    //Allocation of arrays of variables "U" in cells and walls
    for(k=0;k<mesh->ncells;k++){
        mesh->cell[k].U    =(double*)malloc(sim->nvar*sizeof(double));
#pragma acc enter data create(mesh->cell[k].U[:sim->nvar])
    }

    return 1;
}


void update_cellK1(t_mesh *mesh,k;
    t_cell *cell;
#pragma acc parallel loop present(mesh,sim) private(cell)
    for(i=0;i<mesh->ncells;i++){
        cell=&(mesh->cell[i]);
        for(k=0;k<sim->nvar;k++){
            cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy);
        }
    }
}


////////////////////////////////////////////////////
//////////////////// M A I N ///////////////////////
////////////////////////////////////////////////////

int main(int argc,char * argv[]){

    int i,j,p;
    t_mesh *mesh;
    t_sim *sim;
    double tf,t;
    int nIt;
    double timeac;

    //Mesh and sim allocation
    mesh=(t_mesh*)malloc(sizeof(t_mesh));
    sim =(t_sim*)malloc(sizeof(t_sim));


    ////////////////////////////////////////////////////
    ////////////// P R E - P R O C E S S ///////////////
    ////////////////////////////////////////////////////

    //...
    //variable initialization and file reading omitted
    //cell->dx= ...
    //cell->dy= ...
    //...
#pragma acc enter data copyin(mesh[:1]) create(sim[:1])

    create_mesh(mesh,sim);
//    update_initial(mesh); //this function (omitted) assings the initial values of cell[k].U[0] ... cell[k].U[4]

    ////////////////////////////////////////////////////
    ////////////// C A L C U L A T I O N ///////////////
    ////////////////////////////////////////////////////
    tf=sim->tf;
    sim->t=0.0;
    t=0.0;
//    while(t<tf){
//        compute_fluxes(mesh,sim); //this function (omitted) computes *fR_star,*fL_star of walls (line 32),which are then used in "update_cellK1()"
//        update_dt(mesh,sim);      //this function (omitted) computes sim->dt
        update_cellK1(mesh,sim);
//        t+=sim->dt; //Time updated
//        sim->t=t;
//    }

    return 1;

}

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


依赖报错 idea导入项目后依赖报错,解决方案:https://blog.csdn.net/weixin_42420249/article/details/81191861 依赖版本报错:更换其他版本 无法下载依赖可参考:https://blog.csdn.net/weixin_42628809/a
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下 2021-12-03 13:33:33.927 ERROR 7228 [ main] o.s.b.d.LoggingFailureAnalysisReporter : *************************** APPL
错误1:gradle项目控制台输出为乱码 # 解决方案:https://blog.csdn.net/weixin_43501566/article/details/112482302 # 在gradle-wrapper.properties 添加以下内容 org.gradle.jvmargs=-Df
错误还原:在查询的过程中,传入的workType为0时,该条件不起作用 &lt;select id=&quot;xxx&quot;&gt; SELECT di.id, di.name, di.work_type, di.updated... &lt;where&gt; &lt;if test=&qu
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct redisServer’没有名为‘server_cpulist’的成员 redisSetCpuAffinity(server.server_cpulist); ^ server.c: 在函数‘hasActiveC
解决方案1 1、改项目中.idea/workspace.xml配置文件,增加dynamic.classpath参数 2、搜索PropertiesComponent,添加如下 &lt;property name=&quot;dynamic.classpath&quot; value=&quot;tru
删除根组件app.vue中的默认代码后报错:Module Error (from ./node_modules/eslint-loader/index.js): 解决方案:关闭ESlint代码检测,在项目根目录创建vue.config.js,在文件中添加 module.exports = { lin
查看spark默认的python版本 [root@master day27]# pyspark /home/software/spark-2.3.4-bin-hadoop2.7/conf/spark-env.sh: line 2: /usr/local/hadoop/bin/hadoop: No s
使用本地python环境可以成功执行 import pandas as pd import matplotlib.pyplot as plt # 设置字体 plt.rcParams[&#39;font.sans-serif&#39;] = [&#39;SimHei&#39;] # 能正确显示负号 p
错误1:Request method ‘DELETE‘ not supported 错误还原:controller层有一个接口,访问该接口时报错:Request method ‘DELETE‘ not supported 错误原因:没有接收到前端传入的参数,修改为如下 参考 错误2:cannot r
错误1:启动docker镜像时报错:Error response from daemon: driver failed programming external connectivity on endpoint quirky_allen 解决方法:重启docker -&gt; systemctl r
错误1:private field ‘xxx‘ is never assigned 按Altʾnter快捷键,选择第2项 参考:https://blog.csdn.net/shi_hong_fei_hei/article/details/88814070 错误2:启动时报错,不能找到主启动类 #
报错如下,通过源不能下载,最后警告pip需升级版本 Requirement already satisfied: pip in c:\users\ychen\appdata\local\programs\python\python310\lib\site-packages (22.0.4) Coll
错误1:maven打包报错 错误还原:使用maven打包项目时报错如下 [ERROR] Failed to execute goal org.apache.maven.plugins:maven-resources-plugin:3.2.0:resources (default-resources)
错误1:服务调用时报错 服务消费者模块assess通过openFeign调用服务提供者模块hires 如下为服务提供者模块hires的控制层接口 @RestController @RequestMapping(&quot;/hires&quot;) public class FeignControl
错误1:运行项目后报如下错误 解决方案 报错2:Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.8.1:compile (default-compile) on project sb 解决方案:在pom.
参考 错误原因 过滤器或拦截器在生效时,redisTemplate还没有注入 解决方案:在注入容器时就生效 @Component //项目运行时就注入Spring容器 public class RedisBean { @Resource private RedisTemplate&lt;String
使用vite构建项目报错 C:\Users\ychen\work&gt;npm init @vitejs/app @vitejs/create-app is deprecated, use npm init vite instead C:\Users\ychen\AppData\Local\npm-