diff --git a/srcbpatch/CMakeLists.txt b/srcbpatch/CMakeLists.txt index b3446cc..2ad9493 100644 --- a/srcbpatch/CMakeLists.txt +++ b/srcbpatch/CMakeLists.txt @@ -21,6 +21,7 @@ set(SOURCE_FILES stdafx.cpp streamreplacer.cpp timemeasurer.cpp + trie.cpp ) set(HEADER_FILES actionscollection.h @@ -37,6 +38,7 @@ set(HEADER_FILES stdafx.h streamreplacer.h timemeasurer.h + trie.h ) # Define the executable target diff --git a/srcbpatch/streamreplacer.cpp b/srcbpatch/streamreplacer.cpp index bb2ba01..c5c7dc5 100644 --- a/srcbpatch/streamreplacer.cpp +++ b/srcbpatch/streamreplacer.cpp @@ -2,6 +2,7 @@ #include "binarylexeme.h" #include "fileprocessing.h" #include "streamreplacer.h" +#include "trie.h" namespace bpatch { @@ -94,7 +95,7 @@ class UsualReplacer final : public ReplacerWithNext mutable size_t cachedAmount_ = 0; // we cached this amount of data - // this is used to hold temporary data while the logic is + // this is used to hold temporary data while the logic is // looking for the new beginning of the cached value mutable vector cachedData_; }; @@ -173,7 +174,7 @@ static unique_ptr CreateSimpleReplacer( /// |--SRC 1 TRG 1 | /// O - |-- ... | - o /// |--SRC N TRG N | -/// +/// class ChoiceReplacer final : public ReplacerWithNext { typedef struct @@ -300,7 +301,7 @@ class ChoiceReplacer final : public ReplacerWithNext mutable size_t cachedAmount_ = 0; // we cached this amount of data mutable size_t indexOfPartialMatch_ = 0; // this index from rpairs_ represents last partial match - // this is used to hold temporary data while the logic is + // this is used to hold temporary data while the logic is // looking for the new beginning of the cached value mutable vector cachedData_; }; @@ -359,14 +360,7 @@ class UniformLexemeReplacer final : public ReplacerWithNext { const span& src = alpair.first->access(); const span& trg = alpair.second->access(); - if (auto result = replaceOptions_.insert( - { - string_view(src.data(), src.size()), - string_view(trg.data(), trg.size()), - }); !result.second) - { - cout << coloredconsole::toconsole(warningDuplicatePattern) << endl; - } + trie_.insert(string_view(src.data(), src.size()), string_view(trg.data(), trg.size())); } } @@ -374,11 +368,10 @@ class UniformLexemeReplacer final : public ReplacerWithNext protected: // here we hold pairs of sources and targets - unordered_map replaceOptions_; - + mutable Trie trie_; mutable size_t cachedAmount_ = 0; // we cache this amount of data in the cachedData_ - // this is used to hold temporary data while the logic is + // this is used to hold temporary data while the logic is // looking for the new beginning of the cached value mutable vector cachedData_; }; @@ -405,20 +398,22 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod // set buffer of cached at once - char* const& pBuffer = cachedData_.data(); - pBuffer[cachedAmount_++] = toProcess; - if (cachedAmount_ >= cachedData_.size()) + cachedData_[cachedAmount_++] = toProcess; + if (cachedAmount_ == cachedData_.size()) { - if (const auto it = replaceOptions_.find(string_view(pBuffer, cachedAmount_)); it != replaceOptions_.cend()) - { // found - string_view trg = it->second; - for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); } + if (auto [target, fullMatch] = trie_.searchFullMatch(std::span (cachedData_.data(), cachedAmount_)); fullMatch) + { + for (char q: target) + { + pNext_->DoReplacements(q, false); + } cachedAmount_ = 0; } else - { // not found - pNext_->DoReplacements(pBuffer[0], false); // send 1 char - std::shift_left(pBuffer, pBuffer + cachedAmount_--, 1); + { + // not found + pNext_->DoReplacements(cachedData_[0], false); + std::shift_left(cachedData_.begin(), cachedData_.begin() + cachedAmount_--, 1); } } } diff --git a/srcbpatch/trie.cpp b/srcbpatch/trie.cpp new file mode 100644 index 0000000..4ca31a5 --- /dev/null +++ b/srcbpatch/trie.cpp @@ -0,0 +1,34 @@ +#include "trie.h" + +void Trie::insert(std::string_view key, std::string_view value) +{ + std::reference_wrapper node = root; + for (char character : key) + { + auto [it, inserted] = node.get().children.emplace(character, nodes.emplace_back()); + node = it->second.get(); + } + node.get().target = value; +} + +[[nodiscard]] std::pair Trie::searchFullMatch(std::span cachedData) const +{ + std::reference_wrapper node = root; + for (char c : cachedData) + { + auto res = node.get().children.find(c); + if (res == node.get().children.end()) + { + return std::make_pair(std::string_view(), false); + } + node = res->second.get(); + } + + // full match + if (node.get().target) + { + return std::make_pair(node.get().target.value(), true); + } + + return std::make_pair(std::string_view(), false); +} diff --git a/srcbpatch/trie.h b/srcbpatch/trie.h new file mode 100644 index 0000000..fde36b1 --- /dev/null +++ b/srcbpatch/trie.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +/// @brief A node of prefix Trie, each node contains current target (if exist) and child nodes in children. +/// And doesn't contain a char of current node +class TrieNode { + public: + /// a list of child nodes of current node. + std::unordered_map> children; + /// target of current node (if there are node target at this node in our lexemePairs --> target is std::nullopt) + std::optional target; +}; + +/// @brief Prefix tree class to speed up UniformLexemeReplacer::DoReplace +class Trie final{ +public: + /// + /// Adds a new key-value pair in prefix tree + /// + /// source lexeme + /// target lexeme + void insert(std::string_view key, std::string_view value); + + /// + /// Looking for a full match in prefix tree + /// + /// key to find + /// string_view: target, bool: FullMatch + [[nodiscard]] std::pair searchFullMatch(std::span cachedData) const; + +private: + /// a root node, doesn't contains target value + TrieNode root; + /// holds all the nodes of Trie. While default Trie uses pointers, we are going to use reference_wrapper to this deque elements + std::deque nodes; +}; \ No newline at end of file