ParallelSearch/ParallelSearch.hpp

132 lines
3.4 KiB
C++

#ifndef PARALLELSEARCH_HPP
#define PARALLELSEARCH_HPP
#include <string>
#include <vector>
#include <iostream>
#include <thread>
// #include "SearchJob.hpp"
template <typename Iterator>
class ParallelSearch
{
public:
// Type of the search result
typedef std::vector<Iterator> ResultList;
class SearchJob
{
public:
// initially reserve n entries in result-list (to cut down number of reallocations)
enum { INITIAL_RESERVE = 128 };
// Create partial search job for provided range
SearchJob(Iterator begin, Iterator end, const std::string& pattern);
// Destructor
~SearchJob() {}
// run the search job and collect results internally
void execute();
// return the result
const ResultList& get_result() const { return result_; };
private:
// pattern to search for
std::string pattern_;
// start of search range
Iterator begin_;
// end of search range
Iterator end_;
// result of the search
ResultList result_;
};
// construct parallel search by splitting the search range across <workerCount> SearchJob objects
ParallelSearch(unsigned workerCount, Iterator begin, Iterator end, const std::string& pattern);
// Destructor
~ParallelSearch() {}
// run the search workers and collect results
void run();
// return the result
const ResultList& get_result() const { return result_; };
private:
// jobs to execute
std::vector<SearchJob> jobList_;
// result of the search
ResultList result_;
};
template <typename Iterator>
ParallelSearch<Iterator>::SearchJob::SearchJob(Iterator begin, Iterator end, const std::string& pattern) : begin_(begin), end_(end), pattern_(pattern)
{
result_.reserve(INITIAL_RESERVE);
}
template <typename Iterator>
void ParallelSearch<Iterator>::SearchJob::execute()
{
for (Iterator itr = begin_; itr != end_; itr++)
{
if (0 == itr->compare(0, pattern_.size(), pattern_))
{
// found a match, save the index
result_.push_back(itr);
}
}
}
template <typename Iterator>
ParallelSearch<Iterator>::ParallelSearch(unsigned workerCount, Iterator begin, Iterator end, const std::string& pattern)
{
std::size_t totalCount = end - begin;
std::size_t bucketSize = totalCount / workerCount;
std::size_t remains = totalCount % workerCount;
std::size_t offset = 0;
while (offset < totalCount)
{
// compute [first; last( iterators for sub-range
std::size_t size = bucketSize + remains;
Iterator first = begin + offset;
Iterator last = begin + offset + size;
// add appropriate search job
jobList_.push_back(SearchJob(first, last, pattern));
// increment offset
offset += size;
// clear remains (it has been added to first bucket)
remains = 0;
}
};
template <typename Iterator>
void ParallelSearch<Iterator>::run()
{
// start thread for each job
std::vector<std::thread> threads;
for (SearchJob& job : jobList_)
{
threads.push_back(std::thread(&SearchJob::execute, &job));
}
// wait for all threads to join
for (std::thread& th : threads)
{
th.join();
}
// collect the results
for (SearchJob& job : jobList_)
{
result_.insert(result_.end(), job.get_result().begin(), job.get_result().end());
}
}
#endif