Compare commits

...

3 Commits

Author SHA1 Message Date
67cab6efd1 - remove testdata length parameter
- added result checks
2021-12-16 13:09:26 +01:00
6e5ddb3c80 Parsing command line args 2021-12-16 12:21:10 +01:00
f8729011e8 made SearchJob a sub-class 2021-12-16 11:35:38 +01:00
5 changed files with 213 additions and 122 deletions

View File

@ -6,6 +6,6 @@ cmake_minimum_required (VERSION 3.8)
project ("ParallelSearch")
# Fügen Sie der ausführbaren Datei dieses Projekts eine Quelle hinzu.
add_executable (ParallelSearch "main.cpp" "SearchJob.hpp" "TestRunner.hpp" "ParallelSearch.hpp")
add_executable (ParallelSearch "main.cpp" "TestRunner.hpp" "ParallelSearch.hpp" "Parameters.hpp")
# TODO: Fügen Sie bei Bedarf Tests hinzu, und installieren Sie Ziele.

View File

@ -5,18 +5,85 @@
#include <vector>
#include <iostream>
#include <thread>
#include "SearchJob.hpp"
// #include "SearchJob.hpp"
template <typename Iterator>
class ParallelSearch
{
public:
// Type of the search result
typedef std::vector<Iterator> ResultList;
// construct parallel search by splitting the search range across <workerCount> SearchJob objects
ParallelSearch(unsigned workerCount, Iterator begin, Iterator end, const std::string& pattern)
class SearchJob
{
public:
// initially reserve n entries in result-list (to cut down number of reallocations)
enum { INITIAL_RESERVE = 128 };
// Create partial search job for provided range
SearchJob(Iterator begin, Iterator end, const std::string& pattern);
// Destructor
~SearchJob() {}
// run the search job and collect results internally
void execute();
// return the result
const ResultList& get_result() const { return result_; };
private:
// pattern to search for
std::string pattern_;
// start of search range
Iterator begin_;
// end of search range
Iterator end_;
// result of the search
ResultList result_;
};
// construct parallel search by splitting the search range across <workerCount> SearchJob objects
ParallelSearch(unsigned workerCount, Iterator begin, Iterator end, const std::string& pattern);
// Destructor
~ParallelSearch() {}
// run the search workers and collect results
void run();
// return the result
const ResultList& get_result() const { return result_; };
private:
// jobs to execute
std::vector<SearchJob> jobList_;
// result of the search
ResultList result_;
};
template <typename Iterator>
ParallelSearch<Iterator>::SearchJob::SearchJob(Iterator begin, Iterator end, const std::string& pattern) : begin_(begin), end_(end), pattern_(pattern)
{
result_.reserve(INITIAL_RESERVE);
}
template <typename Iterator>
void ParallelSearch<Iterator>::SearchJob::execute()
{
for (Iterator itr = begin_; itr != end_; itr++)
{
if (0 == itr->compare(0, pattern_.size(), pattern_))
{
// found a match, save the index
result_.push_back(itr);
}
}
}
template <typename Iterator>
ParallelSearch<Iterator>::ParallelSearch(unsigned workerCount, Iterator begin, Iterator end, const std::string& pattern)
{
std::size_t totalCount = end - begin;
std::size_t bucketSize = totalCount / workerCount;
std::size_t remains = totalCount % workerCount;
@ -30,28 +97,23 @@ public:
Iterator last = begin + offset + size;
// add appropriate search job
jobList_.push_back(SearchJob<Iterator>(first, last, pattern));
jobList_.push_back(SearchJob(first, last, pattern));
// increment offset
offset += size;
// clear remains (it has been added to first bucket)
remains = 0;
}
}
};
// Destructor
~ParallelSearch()
{
}
// run the search workers and collect results
void run()
{
template <typename Iterator>
void ParallelSearch<Iterator>::run()
{
// start thread for each job
std::vector<std::thread> threads;
for (SearchJob<Iterator>& job : jobList_)
for (SearchJob& job : jobList_)
{
threads.push_back(std::thread(&SearchJob<Iterator>::run, &job));
threads.push_back(std::thread(&SearchJob::execute, &job));
}
// wait for all threads to join
@ -61,19 +123,10 @@ public:
}
// collect the results
for (SearchJob<Iterator>& job : jobList_)
for (SearchJob& job : jobList_)
{
result_.insert(result_.end(), job.get_result().begin(), job.get_result().end());
}
}
}
// return the result
const ResultList& get_result() const { return result_; };
private:
// jobs to execute
std::vector<SearchJob<Iterator>> jobList_;
// result of the search
ResultList result_;
};
#endif

60
Parameters.hpp Normal file
View File

@ -0,0 +1,60 @@
#ifndef PARAMETERS_HPP
#define PARAMETERS_HPP
#include <string>
#include <iostream>
#include <thread>
class Parameters
{
public:
Parameters():
workerCount_(std::thread::hardware_concurrency()),
searchPattern_("")
{};
~Parameters() {};
bool parseCmdLine(int argc, char** argv)
{
for (int i = 1; i < argc; i++)
{
std::string arg = argv[i];
if (0 == arg.compare(0, 2, "-w"))
{
workerCount_ = std::stoi(arg.substr(2));
}
else if (0 == arg.compare(0, 1, "-"))
{
// invalid option
return false;
}
else
{
// no leading dash ... must be pattern
searchPattern_ = arg;
}
}
return true;
}
void show_usage(const char* cmd)
{
std::cerr << "Invalid command line args" << std::endl;
std::cerr
<< "Usage: " << cmd << " [options] [search_pattern]" << std::endl
<< "Options" << std::endl
<< " -wX Number of parallel workers" << std::endl
<< std::endl;
};
unsigned WorkerCount() const { return workerCount_; };
std::string SearchPattern() const { return searchPattern_; };
private:
unsigned workerCount_;
std::string searchPattern_;
};
#endif

View File

@ -1,54 +0,0 @@
#ifndef SEARCHJOB_HPP
#define SEARCHJOB_HPP
#include <string>
#include <vector>
template <typename Iterator>
class SearchJob
{
public:
// initially reserve n entries in result-list (to cut down number of reallocations)
enum { INITIAL_RESERVE = 128 };
// Type of the search result
typedef std::vector<Iterator> ResultList;
// Create partial search job for provided range
SearchJob(Iterator begin, Iterator end, const std::string& pattern) : begin_(begin), end_(end), pattern_(pattern)
{
result_.reserve(INITIAL_RESERVE);
}
// Destructor
~SearchJob()
{
}
// run the search job and collect results internally
void run()
{
for (Iterator itr = begin_; itr != end_; itr++)
{
if (0 == itr->compare(0, pattern_.size(), pattern_))
{
// found a match, save the index
result_.push_back(itr);
}
}
}
// return the result
const ResultList& get_result() const { return result_; };
private:
// pattern to search for
std::string pattern_;
// start of search range
Iterator begin_;
// end of search range
Iterator end_;
// result of the search
ResultList result_;
};
#endif

View File

@ -3,18 +3,14 @@
#include <vector>
#include <algorithm>
#include <random>
#include <thread>
#include "SearchJob.hpp"
#include "ParallelSearch.hpp"
#include "TestRunner.hpp"
#include "Parameters.hpp"
// Type of our hay stack: a list of strings
typedef std::vector<std::string> WordList;
typedef SearchJob<WordList::const_iterator> WordSearch;
typedef ParallelSearch<WordList::const_iterator> ParallelWordSearch;
void create_testdata(WordList& words)
@ -27,7 +23,9 @@ void create_testdata(WordList& words)
for (str[1] = start; str[1] <= end; str[1]++)
for (str[2] = start; str[2] <= end; str[2]++)
for (str[3] = start; str[3] <= end; str[3]++)
{
words.push_back(std::string(str, 4));
}
}
@ -37,41 +35,75 @@ void shuffle_testdata(WordList& words)
std::shuffle(words.begin(), words.end(), rng);
}
void run_wordsearch(WordSearch& search)
void run_wordsearch(ParallelWordSearch& search)
{
search.run();
}
void run_parallel_wordsearch(ParallelWordSearch& search)
void check_result(const WordList& haystack, const std::string& pattern, const ParallelWordSearch::ResultList& result)
{
search.run();
// check number of matches
const unsigned TESTPATTERN_LENGTH = 4;
unsigned expected_count = static_cast<unsigned>(pow(26.0, double(TESTPATTERN_LENGTH - pattern.size())));
std::cout << "Number of matches expected: " << expected_count << " got: " << result.size() << " --> " << ((result.size() == expected_count) ? "OK" : "ERROR") << std::endl;
// verify matches contain the search-pattern
unsigned valid_count = 0;
for (WordList::const_iterator itr : result)
{
if (itr->substr(0, pattern.size()) == pattern)
{
valid_count++;
}
else
{
std::cout << "Found mismatch: " << *itr << std::endl;
}
}
std::cout << "Verified " << valid_count << " matches --> " << ((result.size() == valid_count) ? "OK" : "ERROR") << std::endl;
}
int main()
int main(int argc, char** argv)
{
Parameters parameters;
double time_span;
std::string pattern = "ABC";
unsigned workerCount = std::thread::hardware_concurrency();
WordList test_data;
std::cout << "ParallelSearch Demo" << std::endl << std::endl;
if (false == parameters.parseCmdLine(argc, argv))
{
parameters.show_usage(argv[0]);
exit(1);
}
std::cout << "Workercount: " << parameters.WorkerCount() << std::endl;
std::cout << "Search pattern: " << parameters.SearchPattern() << std::endl;
std::cout << std::endl;
time_span = TestRunner(create_testdata, test_data);
std::cout << "created test_data (" << test_data.size() << " words) " << time_span << " seconds" << std::endl;
time_span = TestRunner(shuffle_testdata, test_data);
std::cout << "shuffled test_data " << time_span << " seconds" << std::endl;
WordSearch wordSearch(test_data.begin(), test_data.end(), pattern);
ParallelWordSearch wordSearch(1, test_data.begin(), test_data.end(), parameters.SearchPattern());
time_span = TestRunner(run_wordsearch, wordSearch);
std::cout << "linear search found " << wordSearch.get_result().size() << " matches: " << time_span << " seconds" << std::endl;
ParallelWordSearch parallelWordSearch(workerCount, test_data.begin(), test_data.end(), pattern);
time_span = TestRunner(run_parallel_wordsearch, parallelWordSearch);
ParallelWordSearch parallelWordSearch(parameters.WorkerCount(), test_data.begin(), test_data.end(), parameters.SearchPattern());
time_span = TestRunner(run_wordsearch, parallelWordSearch);
std::cout << "parallel search found " << parallelWordSearch.get_result().size() << " matches: " << time_span << " seconds" << std::endl;
// TODO add validation of results
// TODO brush up output
// TODO command line args for workerCount, test-pattern generation, search pattern ?
std::cout << std::endl;
std::cout << "checking result of parallel search ..." << std::endl;
check_result(test_data, parameters.SearchPattern(), parallelWordSearch.get_result());
std::cout << std::endl;
std::cout << "Done." << std::endl;
std::cout << std::endl;