Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions srcbpatch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(SOURCE_FILES
stdafx.cpp
streamreplacer.cpp
timemeasurer.cpp
trie.cpp
)
set(HEADER_FILES
actionscollection.h
Expand All @@ -37,6 +38,7 @@ set(HEADER_FILES
stdafx.h
streamreplacer.h
timemeasurer.h
trie.h
)

# Define the executable target
Expand Down
43 changes: 19 additions & 24 deletions srcbpatch/streamreplacer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "binarylexeme.h"
#include "fileprocessing.h"
#include "streamreplacer.h"
#include "trie.h"

namespace bpatch
{
Expand Down Expand Up @@ -94,7 +95,7 @@ class UsualReplacer final : public ReplacerWithNext

mutable size_t cachedAmount_ = 0; // we cached this amount of data

// this is used to hold temporary data while the logic is
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
Expand Down Expand Up @@ -173,7 +174,7 @@ static unique_ptr<StreamReplacer> CreateSimpleReplacer(
/// |--SRC 1 TRG 1 |
/// O - |-- ... | - o
/// |--SRC N TRG N |
///
///
class ChoiceReplacer final : public ReplacerWithNext
{
typedef struct
Expand Down Expand Up @@ -300,7 +301,7 @@ class ChoiceReplacer final : public ReplacerWithNext
mutable size_t cachedAmount_ = 0; // we cached this amount of data
mutable size_t indexOfPartialMatch_ = 0; // this index from rpairs_ represents last partial match

// this is used to hold temporary data while the logic is
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
Expand Down Expand Up @@ -359,26 +360,18 @@ class UniformLexemeReplacer final : public ReplacerWithNext
{
const span<const char>& src = alpair.first->access();
const span<const char>& trg = alpair.second->access();
if (auto result = replaceOptions_.insert(
{
string_view(src.data(), src.size()),
string_view(trg.data(), trg.size()),
}); !result.second)
{
cout << coloredconsole::toconsole(warningDuplicatePattern) << endl;
}
trie_.insert(string_view(src.data(), src.size()), string_view(trg.data(), trg.size()));
}
}

void DoReplacements(const char toProcess, const bool aEod) const override;

protected:
// here we hold pairs of sources and targets
unordered_map<string_view, string_view> replaceOptions_;

mutable Trie trie_;
mutable size_t cachedAmount_ = 0; // we cache this amount of data in the cachedData_

// this is used to hold temporary data while the logic is
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
Expand All @@ -405,20 +398,22 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod


// set buffer of cached at once
char* const& pBuffer = cachedData_.data();
pBuffer[cachedAmount_++] = toProcess;
if (cachedAmount_ >= cachedData_.size())
cachedData_[cachedAmount_++] = toProcess;
if (cachedAmount_ == cachedData_.size())
{
if (const auto it = replaceOptions_.find(string_view(pBuffer, cachedAmount_)); it != replaceOptions_.cend())
{ // found
string_view trg = it->second;
for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); }
if (auto [target, fullMatch] = trie_.searchFullMatch(std::span<char> (cachedData_.data(), cachedAmount_)); fullMatch)
{
for (char q: target)
{
pNext_->DoReplacements(q, false);
}
cachedAmount_ = 0;
}
else
{ // not found
pNext_->DoReplacements(pBuffer[0], false); // send 1 char
std::shift_left(pBuffer, pBuffer + cachedAmount_--, 1);
{
// not found
pNext_->DoReplacements(cachedData_[0], false);
std::shift_left(cachedData_.begin(), cachedData_.begin() + cachedAmount_--, 1);
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions srcbpatch/trie.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "trie.h"

void Trie::insert(std::string_view key, std::string_view value)
{
std::reference_wrapper<TrieNode> node = root;
for (char character : key)
{
auto [it, inserted] = node.get().children.emplace(character, nodes.emplace_back());
node = it->second.get();
}
node.get().target = value;
}

[[nodiscard]] std::pair<std::string_view, bool> Trie::searchFullMatch(std::span<const char> cachedData) const
{
std::reference_wrapper<const TrieNode> node = root;
for (char c : cachedData)
{
auto res = node.get().children.find(c);
if (res == node.get().children.end())
{
return std::make_pair(std::string_view(), false);
}
node = res->second.get();
}

// full match
if (node.get().target)
{
return std::make_pair(node.get().target.value(), true);
}

return std::make_pair(std::string_view(), false);
}
38 changes: 38 additions & 0 deletions srcbpatch/trie.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <string_view>
#include <deque>

/// @brief A node of prefix Trie, each node contains current target (if exist) and child nodes in children.
/// And doesn't contain a char of current node
class TrieNode {
public:
/// a list of child nodes of current node.
std::unordered_map<char, std::reference_wrapper<TrieNode>> children;
/// target of current node (if there are node target at this node in our lexemePairs --> target is std::nullopt)
std::optional<std::string_view> target;
};

/// @brief Prefix tree class to speed up UniformLexemeReplacer::DoReplace
class Trie final{
public:
/// <summary>
/// Adds a new key-value pair in prefix tree
/// </summary>
/// <param name="key"> source lexeme </param>
/// <param name="value"> target lexeme </param>
void insert(std::string_view key, std::string_view value);

/// <summary>
/// Looking for a full match in prefix tree
/// </summary>
/// <param name="cachedData"> key to find </param>
/// <returns>string_view: target, bool: FullMatch</returns>
[[nodiscard]] std::pair<std::string_view, bool> searchFullMatch(std::span<const char> cachedData) const;

private:
/// a root node, doesn't contains target value
TrieNode root;
/// holds all the nodes of Trie. While default Trie uses pointers, we are going to use reference_wrapper to this deque elements
std::deque<TrieNode> nodes;
};