방법: 매핑 수행 및 병렬 작업 줄이기

아티클
06/09/2015

이 예제는 concurrency::parallel_transform 및 concurrency::parallel_reduce 알고리즘 및 concurrency::concurrent_unordered_map 클래스를 사용하여 파일의 단어 개수를 세는 방법을 보여줍니다.

맵 작업은 시퀀스의 각 값에 함수를 적용합니다. 감소는 작업 시퀀스를 하나의 값의 요소를 결합합니다. 표준 템플릿 라이브러리 (STL)std::transform std::accumulate 클래스를 사용하여 맵핑을 수행하고 작업을 줄일 수 있습니다. 그러나, 많은 문제에 대한 성능을 향상시키기 위해 parallel_transform 알고리즘을 사용하여 맵 작업을 병렬로 수해거나 parallel_reduce 알고리즘을 사용하여 동시에 축소 작업을 수행합니다. 경우에 따라, concurrent_unordered_map 을 사용하여 맵핑을 수행하고 한번의 작업으로 줄일 수 있습니다.

예제

다음 예제에서는 파일에 있는 단어 발생의 수를 셉니다. std:: vector를 사용하여 두 파일의 내용을 나타낼 수 있습니다. 맵 연산은 각 벡터에서 각 단어의 항목을 계산합니다. 축소 작업 벡터에서 단어 수를 누적합니다.

// parallel-map-reduce.cpp 
// compile with: /EHsc
#include <ppl.h>
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <numeric>
#include <unordered_map>
#include <windows.h>

using namespace concurrency;
using namespace std;

class MapFunc 
{ 
public:
    unordered_map<wstring, size_t> operator()(vector<wstring>& elements) const 
    { 
        unordered_map<wstring, size_t> m;
        for_each(begin(elements), end(elements), [&m](const wstring& elem)
        { 
            m[elem]++;
        });
        return m; 
    }
}; 

struct ReduceFunc : binary_function<unordered_map<wstring, size_t>, 
                    unordered_map<wstring, size_t>, unordered_map<wstring, size_t>>
{
    unordered_map<wstring, size_t> operator() (
        const unordered_map<wstring, size_t>& x, 
        const unordered_map<wstring, size_t>& y) const
    {
        unordered_map<wstring, size_t> ret(x);
        for_each(begin(y), end(y), [&ret](const pair<wstring, size_t>& pr) {
            auto key = pr.first;
            auto val = pr.second;
            ret[key] += val;
        });
        return ret; 
    }
}; 

int wmain()
{ 
    // File 1 
    vector<wstring> v1;
    v1.push_back(L"word1"); //1 
    v1.push_back(L"word1"); //2 
    v1.push_back(L"word2"); 
    v1.push_back(L"word3"); 
    v1.push_back(L"word4"); 

    // File 2 
    vector<wstring> v2; 
    v2.push_back(L"word5"); 
    v2.push_back(L"word6"); 
    v2.push_back(L"word7"); 
    v2.push_back(L"word8"); 
    v2.push_back(L"word1"); //3 

    vector<vector<wstring>> v;
    v.push_back(v1);
    v.push_back(v2);

    vector<unordered_map<wstring, size_t>> map(v.size()); 

    // The Map operation
    parallel_transform(begin(v), end(v), begin(map), MapFunc()); 

    // The Reduce operation 
    unordered_map<wstring, size_t> result = parallel_reduce(
        begin(map), end(map), unordered_map<wstring, size_t>(), ReduceFunc());

    wcout << L"\"word1\" occurs " << result.at(L"word1") << L" times. " << endl;
} 
/* Output:
   "word1" occurs 3 times.
*/

코드 컴파일

코드를 컴파일하려면 코드를 복사한 다음, Visual Studio 프로젝트 또는 parallel-map-reduce.cpp 파일에 붙여넣고 Visual Studio 명령 프롬프트 창에서 다음 명령을 실행합니다.

cl.exe /EHsc parallel-map-reduce.cpp

강력한 프로그래밍

이 예제에서는 concurrent_unordered_map.h—to에 정의 되어 있는 concurrent_unordered_map 클래스를 사용하여 맵핑을 수행하고 작업을 한번으로 줄일 수 있습니다.

// File 1 
vector<wstring> v1;
v1.push_back(L"word1"); //1 
v1.push_back(L"word1"); //2 
v1.push_back(L"word2"); 
v1.push_back(L"word3"); 
v1.push_back(L"word4"); 

// File 2 
vector<wstring> v2; 
v2.push_back(L"word5"); 
v2.push_back(L"word6"); 
v2.push_back(L"word7"); 
v2.push_back(L"word8"); 
v2.push_back(L"word1"); //3 

vector<vector<wstring>> v;
v.push_back(v1);
v.push_back(v2);

concurrent_unordered_map<wstring, size_t> result;
for_each(begin(v), end(v), [&result](const vector<wstring>& words) {
    parallel_for_each(begin(words), end(words), [&result](const wstring& word) {
        InterlockedIncrement(&result[word]);
    });
});

wcout << L"\"word1\" occurs " << result.at(L"word1") << L" times. " << endl;

/* Output:
   "word1" occurs 3 times.
*/

일반적으로 외부나 내부 루프 병렬화합니다. 상대적으로 적은 파일이 각 파일에 많은 단어가 포함되어있는 경우 내부 루프를 병렬화합니다. 상대적으로 많은 파일을 가지고 각 파일에 많은 단어가 포함되어있는 경우 외부 루프를 병렬화합니다.