2016-03-30 35 views
2

我想在MPI动态过程创建开始。我有一个父代码(main.c)试图产生新的worker/child进程(worker.c),并将它们合并到一个intracommunicator中。父代码(main.c中)是问题与MPI产卵和合并

#include<stdio.h> 
#include "mpi.h" 

MPI_Comm child_comm; 
int rank, size; 
MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
MPI_Comm_size(MPI_COMM_WORLD, &size); 

if(rank == 0) 
{ 
    int num_processes_to_spawn = 2; 
    MPI_Comm_spawn("worker", MPI_ARGV_NULL, num_processes_to_spawn, MPI_INFO_NULL, 0, MPI_COMM_SELF, &child_comm, MPI_ERRCODES_IGNORE); 

MPI_Comm intra_comm; 
MPI_Intercomm_merge(child_comm,0, &intra_comm); 
MPI_Barrier(child_comm); 


int tmp_size; 
MPI_Comm_size(intra_comm, &tmp_size); 
printf("size of intra comm world = %d\n", tmp_size); 

MPI_Comm_size(child_comm, &tmp_size); 
printf("size of child comm world = %d\n", tmp_size); 

MPI_Comm_size(MPI_COMM_WORLD, &tmp_size); 
printf("size of parent comm world = %d\n", tmp_size); 

} 

MPI_Finalize(); 

工人(孩子)的代码是:

#include<stdio.h> 
    #include "mpi.h" 
    int main(int argc, char *argv[]) 
    { 
    int numprocs, myrank; 
    MPI_Comm parentcomm; 
    MPI_Comm intra_comm; 

    MPI_Init(&argc, &argv); 
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Comm_get_parent(&parentcomm); 

    MPI_Intercomm_merge(parentcomm, 1, &intra_comm); 
    MPI_Barrier(parentcomm); 

    if(myrank == 0) 
    { 
    int tmp_size; 
    MPI_Comm_size(parentcomm, &tmp_size); 
    printf("child size of parent comm world = %d\n", tmp_size); 

    MPI_Comm_size(MPI_COMM_WORLD, &tmp_size); 
    printf("child size of child comm world = %d\n", tmp_size); 

    MPI_Comm_size(intra_comm, &tmp_size); 
    printf("child size of intra comm world = %d\n", tmp_size); 

    MPI_Finalize(); 
    return 0; 
    } 
} 

我运行使用

mpirun -np 12 main.c 

此代码后分裂和合并,我预计输出为

size of intra comm world = 14 
size of child comm world = 2 
size of parent comm world = 12 
child size of parent comm world = 12 
child size of child comm world = 2 
child size of intra comm world = 14 

但是我得到了fo输出不正确的输出。

size of intra comm world = 3 
    size of child comm world = 1 
    size of parent comm world = 12 
    child size of parent comm world = 2 
    child size of child comm world = 2 
    child size of intra comm world = 3 

我不明白它的错误在哪里,可能有人让我知道错误在哪里。

感谢, 克里斯

回答

1

你的代码的一些问题,我将在这里列出受到影响:

  • 在主控部分,只处理0话费MPI_Comm_spawn()。这不是一个错误(特别是因为您使用MPI_COMM_SELF作为父母沟通者),但它实际上排除了后续合并中的所有其他进程。
  • 在主控部分和工作部分中,您使用MPI_Comm_size()来获取远程通信器的大小而不是MPI_Comm_remote_size()。因此,您将只能获得通信器内部本地通信器的大小,而不是远程通信器的大小。
  • 在主码,只处理0话费MPI_Finalise()(更不用说main()MPI_Init()缺失)

这里是你的代码的一些固定版本:

master.c

#include <stdio.h> 
#include <mpi.h> 

int main(int argc, char *argv[]) { 

    MPI_Init(&argc, &argv); 
    int rank; 
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); 

    MPI_Comm child_comm; 
    int num_processes_to_spawn = 2; 
    MPI_Comm_spawn("./worker", MPI_ARGV_NULL, 
        num_processes_to_spawn, MPI_INFO_NULL, 
        0, MPI_COMM_WORLD, 
        &child_comm, MPI_ERRCODES_IGNORE); 

    MPI_Comm intra_comm; 
    MPI_Intercomm_merge(child_comm, 0, &intra_comm); 

    if (rank == 0) { 
     int tmp_size; 
     MPI_Comm_size(intra_comm, &tmp_size); 
     printf("size of intra comm world = %d\n", tmp_size); 

     MPI_Comm_remote_size(child_comm, &tmp_size); 
     printf("size of child comm world = %d\n", tmp_size); 

     MPI_Comm_size(MPI_COMM_WORLD, &tmp_size); 
     printf("size of parent comm world = %d\n", tmp_size); 
    } 

    MPI_Finalize(); 

    return 0; 
} 

worker.c

#include <stdio.h> 
#include <mpi.h> 

int main(int argc, char *argv[]) { 

    MPI_Init(&argc, &argv); 

    int myrank; 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Comm parentcomm; 
    MPI_Comm_get_parent(&parentcomm); 

    MPI_Comm intra_comm; 
    MPI_Intercomm_merge(parentcomm, 1, &intra_comm); 

    if (myrank == 0) { 
     int tmp_size; 
     MPI_Comm_remote_size(parentcomm, &tmp_size); 
     printf("child size of parent comm world = %d\n", tmp_size); 

     MPI_Comm_size(MPI_COMM_WORLD, &tmp_size); 
     printf("child size of child comm world = %d\n", tmp_size); 

     MPI_Comm_size(intra_comm, &tmp_size); 
     printf("child size of intra comm world = %d\n", tmp_size); 
    } 

    MPI_Finalize(); 

    return 0; 
} 

在我的笔记本电脑给:

~> mpirun -n 12 ./master 
child size of parent comm world = 12 
child size of child comm world = 2 
child size of intra comm world = 14 
size of intra comm world = 14 
size of child comm world = 2 
size of parent comm world = 12 
+0

感谢吉尔。我意识到这是一个远程团队规模的问题; – marc