2012-11-24 71 views
6

我试图使用MPI和opemMP一起制作“谐波级数总和”问题的并行版本。但是输出是互不相同的过程。谐波级数和C++ MPI和OpenMP

有人可以帮我解决这个问题吗?

并行程序:(MPI和OpenMP)

#include <stdio.h> 
#include <stdlib.h> 
#include <iostream> 
#include <sstream> 
#include <time.h> 
#include <omp.h> 
#include <mpi.h> 

#define d 10 //Numbers of Digits (Example: 5 => 0,xxxxx) 
#define n 1000 //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5) 

using namespace std; 

double t_ini, t_fim, t_tot; 

int getProcessId(){ 
    int rank; 
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
    return rank; 
} 

int numberProcess(){ 
    int numProc; 
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); 
    return numProc; 
} 

void reduce(long unsigned int digits1 []) 
{ 
    long unsigned int digits2[d + 11]; 
    int i = 0; 
    for(i = 0; i < d + 11; i++) digits2[i] = 0; 

    MPI_Allreduce(digits1, digits2,(d+11),MPI_INT,MPI_SUM,MPI_COMM_WORLD); 

    for(i = 0; i < d + 11; i++) digits1[i] = digits2[i]; 

} 

void slave(long unsigned int *digits) 
{ 
    int idP = getProcessId(), numP = numberProcess(); 

    int i; 
    long unsigned int digit; 
    long unsigned int remainder; 

    #pragma omp parallel for private(i, remainder, digit) 
    for (i = idP+1; i <= n; i+=numP){ 
     remainder = 1; 
     for (digit = 0; digit < d + 11 && remainder; ++digit) { 
      long unsigned int div = remainder/i; 
      long unsigned int mod = remainder % i; 
      #pragma omp atomic 
      digits[digit] += div; 
      remainder = mod * 10; 
     } 
    } 
} 

void HPS(char* output) { 
    long unsigned int digits[d + 11]; 

    for (int digit = 0; digit < d + 11; ++digit) 
     digits[digit] = 0; 

    reduce(digits); 
    slave(digits); 

    for (int i = d + 11 - 1; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 

    if (digits[d + 1] >= 5) ++digits[d]; 


    for (int i = d; i > 0; --i) { 
     digits[i - 1] += digits[i]/10; 
     digits[i] %= 10; 
    } 
    stringstream stringstreamA; 
    stringstreamA << digits[0] << ","; 


    for (int i = 1; i <= d; ++i) stringstreamA << digits[i]; 

    string stringA = stringstreamA.str(); 
    stringA.copy(output, stringA.size()); 
} 

int main(int argc, char **argv) { 
    MPI_Init(&argc,&argv); 

    t_ini = clock(); 

    //Parallel MPI com OpenMP Method 
    cout << "Parallel MPI com OpenMP Method: " << endl; 
    char output[d + 10]; 
    HPS(output); 

    t_fim = clock(); 
    t_tot = t_fim-t_ini; 

    cout << "Parallel MPI with OpenMP Method: " << (t_tot/1000) << endl; 
    cout << output << endl; 

    MPI_Finalize(); 

    system("PAUSE"); 
    return 0; 
} 

实例:

输入:

#define d 10 
#define n 1000 

输出:

7,4854708606 

输入:

#define d 12 
#define n 7 

输出:

2,592857142857 

回答

4

你有一个错误,他回复:

void HPS(char* output) { 
    ... 
    reduce(digits); 
    slave(digits); 

    ... 
} 

您应该首先计算并执行减少而不是另一种方式。更改为:

void HPS(char* output) { 
    ... 

    slave(digits); 
    reduce(digits); 
    ... 
} 

既然你想使用MPI + OpenMP的,你也可以离开这个:

for (i = idP+1; i <= n; i+=numP) 

是进程之间的鸿沟。而在线程之间的内部循环鸿沟:

#pragma omp parallel for private(remainder) 
for (digit = 0; digit < d + 11 && remainder; ++digit) 

因此具有这样的事情:

for (i = idP+1; i <= n; i+=numP){ 
     remainder = 1; 
     #pragma omp parallel for private(i, remainder, digit) 
     for (digit = 0; digit < d + 11 && remainder; ++digit) { 
      long unsigned int div = remainder/i; 
      long unsigned int mod = remainder % i; 
      #pragma omp atomic 
      digits[digit] += div; 
      remainder = mod * 10; 
     } 
    } 

您还可以,如果你喜欢(这类似于你做了什么),划分的数量在所有的并行任务(线程/进程),这样的外部循环的工作:

int idT = omp_get_thread_num();  // Get the thread id 
int numT = omp_get_num_threads(); // Get the number of threads. 
int numParallelTask = numT * numP; // Number of parallel task 
int start = (idP+1) + (idT*numParallelTask); // The first position here each thread will work 

#pragma omp parallel 
{ 

for (i = start; i <= n; i+=numParallelTask) 

... 
} 

请注意,我不是说这会给你最好的性能,但它是一个开始。在MPI + OpenMP中正确使用算法后,您可以继续使用更复杂的方法。