diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index 62fc96c4ea..359b616a1b 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -25,4 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly -src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter +src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp diff --git a/src/llm/BUILD b/src/llm/BUILD index 92ca4d9c70..d9e6e8c334 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -172,6 +172,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w ], deps = [ "@com_github_tencent_rapidjson//:rapidjson", + "//src/port:rapidjson_document", "//src:libovmslogging", "//src:libovmsstring_utils", ":partial_json_builder", diff --git a/src/llm/io_processing/base_output_parser.hpp b/src/llm/io_processing/base_output_parser.hpp index 3a020eabf1..943ccaa331 100644 --- a/src/llm/io_processing/base_output_parser.hpp +++ b/src/llm/io_processing/base_output_parser.hpp @@ -97,15 +97,15 @@ class BaseOutputParser { // Otherwise we return a JSON object containing the delta that conforms to OpenAI API. virtual std::optional parseChunk(const std::string& chunkResponse, ov::genai::GenerationFinishReason finishReason) = 0; - // Get the tag that marks the beginning of the segment that should be processed by the parser. + // Get the tags that marks the beginning of the segment that should be processed by the parser. // This method is used in streaming mode to determine if the parser should start processing the content. // If empty string is returned, it means that the parser will never start processing the content. - virtual const std::string& getParsingStartTag() const = 0; + virtual const std::vector& getParsingStartTags() const = 0; // Get a vector of additional tags that mark beginning of the segment that should be processed by the parser. // These tags are considered only if they are the first output produced by the model. // In streaming mode it means that they are considered only in UNKNOWN phase. - virtual const std::unordered_set& getSpecialParsingStartTags() const = 0; + virtual const std::vector& getSpecialParsingStartTags() const = 0; // Get the tag that marks the end of the segment that should be processed by the parser. // This method is used in streaming mode to determine if the parser should stop processing the content. diff --git a/src/llm/io_processing/gptoss/reasoning_parser.cpp b/src/llm/io_processing/gptoss/reasoning_parser.cpp index c48d072694..340716211f 100644 --- a/src/llm/io_processing/gptoss/reasoning_parser.cpp +++ b/src/llm/io_processing/gptoss/reasoning_parser.cpp @@ -18,12 +18,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "../../../stringutils.hpp" @@ -56,10 +51,10 @@ std::optional GptOssReasoningParser::parseChunk(const std:: StreamState lastState = state; - if (startsWith(chunk, getParsingStartTag())) { + if (startsWith(chunk, getParsingStartTags()[0])) { // Final content state = StreamState::READING_REASONING; - chunk = chunk.substr(getParsingStartTag().size()); + chunk = chunk.substr(getParsingStartTags()[0].size()); } else if (startsWith(chunk, "<|start|>assistant<|channel|>final<|message|>")) { // Final content state = StreamState::READING_CONTENT; diff --git a/src/llm/io_processing/gptoss/reasoning_parser.hpp b/src/llm/io_processing/gptoss/reasoning_parser.hpp index 705f048ce2..83f080f083 100644 --- a/src/llm/io_processing/gptoss/reasoning_parser.hpp +++ b/src/llm/io_processing/gptoss/reasoning_parser.hpp @@ -17,15 +17,9 @@ #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../base_output_parser.hpp" @@ -58,12 +52,15 @@ class GptOssReasoningParser : public BaseOutputParser { // Streaming std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + // If you add another element you have to update implementation as well + // as mostly it assumed just one element + static const std::vector parsingStartTags{parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = { + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags = { "<|channel|>final<|message|>", "<|channel|>commentary<|message|>", // Preable to reasoning, users usually sees that "<|start|>assistant<|channel|>final<|message|>", // Final content users sees diff --git a/src/llm/io_processing/gptoss/tool_parser.cpp b/src/llm/io_processing/gptoss/tool_parser.cpp index 3497e3929e..1a2e19d45e 100644 --- a/src/llm/io_processing/gptoss/tool_parser.cpp +++ b/src/llm/io_processing/gptoss/tool_parser.cpp @@ -19,12 +19,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "../../../stringutils.hpp" @@ -91,7 +86,7 @@ std::optional GptOssToolParser::parseChunk(const std::strin std::string chunk = newChunk; std::optional result; - if (chunk.find(getParsingStartTag()) != std::string::npos) { + if (chunk.find(getParsingStartTags()[0]) != std::string::npos) { toolCallIndex++; // starting with -1, first call will be 0 return std::nullopt; } diff --git a/src/llm/io_processing/gptoss/tool_parser.hpp b/src/llm/io_processing/gptoss/tool_parser.hpp index 2c713be771..c7015b809d 100644 --- a/src/llm/io_processing/gptoss/tool_parser.hpp +++ b/src/llm/io_processing/gptoss/tool_parser.hpp @@ -18,15 +18,9 @@ #include #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../base_output_parser.hpp" @@ -63,12 +57,13 @@ class GptOssToolParser : public BaseOutputParser { // Streaming std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags{parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags = {}; return specialParsingStartTags; } diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp index 9b1d533031..ccef2e524a 100644 --- a/src/llm/io_processing/hermes3/tool_parser.cpp +++ b/src/llm/io_processing/hermes3/tool_parser.cpp @@ -18,12 +18,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "tool_parser.hpp" diff --git a/src/llm/io_processing/hermes3/tool_parser.hpp b/src/llm/io_processing/hermes3/tool_parser.hpp index 47f5a57c7e..043096875d 100644 --- a/src/llm/io_processing/hermes3/tool_parser.hpp +++ b/src/llm/io_processing/hermes3/tool_parser.hpp @@ -18,15 +18,9 @@ #include #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "src/llm/io_processing/base_output_parser.hpp" #include "src/llm/io_processing/partial_json_builder.hpp" @@ -82,11 +76,12 @@ class Hermes3ToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags = {parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set beginningOnlyTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector beginningOnlyTags = {}; return beginningOnlyTags; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. diff --git a/src/llm/io_processing/llama3/tool_parser.cpp b/src/llm/io_processing/llama3/tool_parser.cpp index 2ff6f6dfdd..b18d041405 100644 --- a/src/llm/io_processing/llama3/tool_parser.cpp +++ b/src/llm/io_processing/llama3/tool_parser.cpp @@ -19,12 +19,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "tool_parser.hpp" diff --git a/src/llm/io_processing/llama3/tool_parser.hpp b/src/llm/io_processing/llama3/tool_parser.hpp index 2293b14b14..3e1d0d9eae 100644 --- a/src/llm/io_processing/llama3/tool_parser.hpp +++ b/src/llm/io_processing/llama3/tool_parser.hpp @@ -18,15 +18,9 @@ #include #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "src/llm/io_processing/base_output_parser.hpp" #include "src/llm/io_processing/partial_json_builder.hpp" @@ -62,11 +56,12 @@ class Llama3ToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags = {parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {"{"}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags = {"{"}; return specialParsingStartTags; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp index 2c97980742..c6ca6b7a56 100644 --- a/src/llm/io_processing/mistral/tool_parser.cpp +++ b/src/llm/io_processing/mistral/tool_parser.cpp @@ -19,12 +19,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "tool_parser.hpp" diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp index dafbaeecec..d2e6d4a8d3 100644 --- a/src/llm/io_processing/mistral/tool_parser.hpp +++ b/src/llm/io_processing/mistral/tool_parser.hpp @@ -18,15 +18,9 @@ #include #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "src/llm/io_processing/base_output_parser.hpp" @@ -41,12 +35,12 @@ class MistralToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - static const std::string toolCallStartTag = "[TOOL_CALLS]"; - return toolCallStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector toolCallStartTags{"[TOOL_CALLS]"}; + return toolCallStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags{}; return specialParsingStartTags; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index b41ee5ed27..a774b0bf39 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -34,7 +34,6 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s if (tag.empty()) { return TagLookupStatus::NOT_FOUND; } - if (tag.size() > buffer.size()) { /* If the tag is longer than the buffer, we check if the buffer and tag overlap (either partially or fully for exact match) @@ -79,7 +78,7 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s } } -OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::unordered_set& tags) const { +OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::vector& tags) const { // We look for multiple tags and return the status in the following priority: FOUND COMPLETE > FOUND_INCOMPLETE > NOT_FOUND TagLookupStatus finalTagLookupStatus = TagLookupStatus::NOT_FOUND; for (const auto& tag : tags) { @@ -157,7 +156,6 @@ std::optional OutputParser::parseReasoningChunk(ov::genai:: OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string toolParserName, const std::string reasoningParserName, const ToolsSchemas_t& toolNameSchemaMap) : tokenizer(tokenizer) { - SPDLOG_TRACE("OutputParser created with toolNameSchemaMap of size: {}", toolNameSchemaMap.size()); if (toolParserName == "llama3") { toolParser = std::make_unique(tokenizer); } else if (toolParserName == "hermes3") { @@ -208,7 +206,7 @@ void OutputParser::enableImmediateToolParsing() { std::string OutputParser::getToolParserStartTag() const { if (toolParser) { - return toolParser->getParsingStartTag(); + return toolParser->getParsingStartTags()[0]; } else { throw std::runtime_error("Tool parser is not available, cannot get start tag"); } @@ -243,13 +241,13 @@ std::optional OutputParser::parseChunk(const std::string& c so only use those methods or return nullopt. */ - bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTag().empty() && !reasoningParser->getParsingEndTag().empty(); - bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTag().empty(); + bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTags().empty() && !reasoningParser->getParsingEndTag().empty(); + bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTags().empty(); bool applyToolParser = toolParserExistsAndSupportsStreaming && toolsAvailable; if (applyToolParser && toolParser->isImmediateParsingEnabled() && processingPhase == UNKNOWN) { // If zero trigger parsing is enabled, we assume the start tag has been injected to the prompt. - streamOutputCache.add(toolParser->getParsingStartTag()); + streamOutputCache.add(getToolParserStartTag()); } streamOutputCache.add(chunkResponse); @@ -259,7 +257,7 @@ std::optional OutputParser::parseChunk(const std::string& c TagLookupStatus anyStartTagStatus = TagLookupStatus::NOT_FOUND; if (reasoningParserExistsAndSupportsStreaming) { // Check if reasoning start tag has been received - TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTag(reasoningParser->getParsingStartTag()); + TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getParsingStartTags()); if (reasoningStartTagStatus == TagLookupStatus::NOT_FOUND) { // If reasoning start tag is not found, check if any of the special start tags are found reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getSpecialParsingStartTags()); @@ -277,7 +275,7 @@ std::optional OutputParser::parseChunk(const std::string& c return parseToolCallChunk(finishReason); } else { // Check if tool call start tag has been received - TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag()); + TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags()); if (toolCallStartTagStatus == TagLookupStatus::NOT_FOUND) { // If tool call start tag is not found, check if any of the special start tags are found toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getSpecialParsingStartTags()); @@ -311,7 +309,7 @@ std::optional OutputParser::parseChunk(const std::string& c // If we are in the CONTENT phase, we check if tool parser start tag is found and if so, switch to TOOL_CALLS phase. // TOOL_CALLS is the only phase that can be processed after CONTENT. if (applyToolParser) { - TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag()); + TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags()); if (toolStartTagStatus == TagLookupStatus::FOUND_COMPLETE) { return parseToolCallChunk(finishReason); } else if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) { @@ -335,7 +333,7 @@ std::optional OutputParser::parseChunk(const std::string& c } else if (processingPhase == TOOL_CALLS_WAITING_FOR_TOOL) { // In this phase we are waiting for next tool call or finish of generation. // If we get next tool call start tag, we switch to TOOL_CALLS phase, otherwise if generation finishes we switch to CONTENT phase to flush any remaining content. - TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag()); + TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags()); if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) { return std::nullopt; // Wait for more chunks to determine if start tag is complete } diff --git a/src/llm/io_processing/output_parser.hpp b/src/llm/io_processing/output_parser.hpp index 2fd81017b7..cb3becc41f 100644 --- a/src/llm/io_processing/output_parser.hpp +++ b/src/llm/io_processing/output_parser.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include "base_output_parser.hpp" @@ -41,7 +40,7 @@ class OutputParser { public: TagLookupStatus lookupTag(const std::string& tag) const; - TagLookupStatus lookupTags(const std::unordered_set& tags) const; + TagLookupStatus lookupTags(const std::vector& tags) const; void add(const std::string& chunk); void clear(); const std::string& getBuffer() const; diff --git a/src/llm/io_processing/phi4/tool_parser.cpp b/src/llm/io_processing/phi4/tool_parser.cpp index 1fab2a015f..51ab7d698d 100644 --- a/src/llm/io_processing/phi4/tool_parser.cpp +++ b/src/llm/io_processing/phi4/tool_parser.cpp @@ -19,12 +19,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "tool_parser.hpp" diff --git a/src/llm/io_processing/phi4/tool_parser.hpp b/src/llm/io_processing/phi4/tool_parser.hpp index 1510a2c997..aa5b0a5457 100644 --- a/src/llm/io_processing/phi4/tool_parser.hpp +++ b/src/llm/io_processing/phi4/tool_parser.hpp @@ -18,15 +18,9 @@ #include #include #include -#include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "src/llm/io_processing/base_output_parser.hpp" #include "src/llm/io_processing/partial_json_builder.hpp" @@ -75,11 +69,12 @@ class Phi4ToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags = {this->parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags = {}; return specialParsingStartTags; } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. diff --git a/src/llm/io_processing/qwen3/reasoning_parser.cpp b/src/llm/io_processing/qwen3/reasoning_parser.cpp index f7dccded40..87b3614920 100644 --- a/src/llm/io_processing/qwen3/reasoning_parser.cpp +++ b/src/llm/io_processing/qwen3/reasoning_parser.cpp @@ -18,12 +18,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" #include "reasoning_parser.hpp" @@ -31,7 +26,7 @@ namespace ovms { void Qwen3ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { - std::string startReasoningTag = getParsingStartTag(); + std::string startReasoningTag = getParsingStartTags()[0]; std::string endReasoningTag = getParsingEndTag(); size_t startPos = parsedOutput.content.find(startReasoningTag); size_t endPos = parsedOutput.content.find(endReasoningTag); @@ -52,7 +47,7 @@ std::optional Qwen3ReasoningParser::parseChunk(const std::s return std::nullopt; } - if (chunk.find(getParsingStartTag()) != std::string::npos || chunk.find(getParsingEndTag()) != std::string::npos) { + if (chunk.find(getParsingStartTags()[0]) != std::string::npos || chunk.find(getParsingEndTag()) != std::string::npos) { return std::nullopt; } else { rapidjson::StringBuffer buffer; diff --git a/src/llm/io_processing/qwen3/reasoning_parser.hpp b/src/llm/io_processing/qwen3/reasoning_parser.hpp index 2be0f57e19..6254e874e5 100644 --- a/src/llm/io_processing/qwen3/reasoning_parser.hpp +++ b/src/llm/io_processing/qwen3/reasoning_parser.hpp @@ -20,12 +20,7 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6313) -#include -#include -#include -#pragma warning(pop) +#include "src/port/rapidjson_document.hpp" #include "../base_output_parser.hpp" @@ -43,11 +38,12 @@ class Qwen3ReasoningParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return parsingStartTag; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags{this->parsingStartTag}; + return parsingStartTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags{}; return specialParsingStartTags; } const std::string& getParsingEndTag() const override { diff --git a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp index d90b816fb9..0479067fb6 100644 --- a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp +++ b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp @@ -127,6 +127,22 @@ static const ParametersTypeMap_t parseToolSchema(const std::string& functionName return result; } +// helper function to escape \n +static std::string escapeString(const std::string& input) { + std::string output; + output.reserve(input.size()); + for (char c : input) { + switch (c) { + case '\n': + output += "\\n"; + break; + default: + output += c; + } + } + return output; +} + static std::string setCorrectValueType(std::string& inputValue, const std::string& currentParameterName, const ParametersTypeMap_t& parametersType) { auto paramIt = parametersType.find(currentParameterName); if (paramIt == parametersType.end()) { @@ -157,10 +173,24 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) { auto previousState = this->currentState; switch (this->currentState) { case State::Content: { - DEFINE_TAG_POSITION_AND_BREAK_IF_NOT_FOUND(Qwen3CoderToolParser::TOOL_START_TAG); - this->lastProcessedPosition = pos + Qwen3CoderToolParser::TOOL_START_TAG.length(); - this->currentState = State::InsideToolCall; - this->toolCallPositions.begin.push(pos); + // normally we expect tag but we observed that sometimes model generates directly + // so we will check for both tags and handle accordingly + auto posTool = this->streamContent.find(Qwen3CoderToolParser::TOOL_START_TAG, this->getLastProcessedPosition()); + auto posFunc = this->streamContent.find(Qwen3CoderToolParser::FUNCTION_NAME_TAG, this->getLastProcessedPosition()); + if (posFunc == std::string::npos && posTool == std::string::npos) { + SPDLOG_TRACE("Did not find: {} or {}", Qwen3CoderToolParser::TOOL_START_TAG, Qwen3CoderToolParser::FUNCTION_NAME_TAG); + } else if (posTool < posFunc) { + // found first + this->lastProcessedPosition = posTool + Qwen3CoderToolParser::TOOL_START_TAG.length(); + this->currentState = State::InsideToolCall; + this->toolCallPositions.begin.push(posTool); + } else { + // found first, we will assume is missing + SPDLOG_DEBUG("Did not find: {}, assuming it should exist", Qwen3CoderToolParser::TOOL_START_TAG); + this->lastProcessedPosition = posFunc + Qwen3CoderToolParser::FUNCTION_NAME_TAG.length(); + this->currentState = State::InsideFunctionName; + this->toolCallPositions.begin.push(posFunc); + } break; } case State::InsideToolCall: { @@ -180,7 +210,6 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) { auto funcEnd = streamContent.find(Qwen3CoderToolParser::FUNCTION_END_TAG, this->lastProcessedPosition); auto paramStart = streamContent.find(Qwen3CoderToolParser::PARAMETER_NAME_TAG, this->lastProcessedPosition); if (funcEnd == std::string::npos && paramStart == std::string::npos) { - break; } else if (paramStart < funcEnd) { // next parameter this->lastProcessedPosition = paramStart + Qwen3CoderToolParser::PARAMETER_NAME_TAG.length(); this->currentState = State::InsideParameterName; @@ -207,7 +236,7 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) { if (paramIt == this->toolsParametersTypeMap.end()) { SPDLOG_DEBUG("Tool schema not found for tool: {}, leaving parameter: {} as string", this->currentFunction.name, this->currentParameterName); } else { - parameterValue = setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second); + parameterValue = escapeString(setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second)); } auto res = this->currentFunction.parameters.try_emplace(this->currentParameterName, parameterValue); if (!res.second) @@ -249,7 +278,7 @@ std::optional Qwen3CoderToolParserImpl::parseChunk(const std::strin } static ToolsParameterTypeMap_t createToolsParametersTypesMap(const ToolsSchemas_t& toolsSchemas) { - SPDLOG_TRACE("Creating tools parameters types map"); + SPDLOG_TRACE("Creating tools parameters types map with schemas size: {}", toolsSchemas.size()); ToolsParameterTypeMap_t toolsParametersTypes; for (const auto& [toolName, toolSchemaWrapper] : toolsSchemas) { const auto& toolSchemaStringRepr = toolSchemaWrapper.stringRepr; @@ -267,13 +296,13 @@ void Qwen3CoderToolParser::lazyFillInitToolParametersTypesMap() { SPDLOG_DEBUG("Filling tools parameters types map"); this->toolsParametersTypes = createToolsParametersTypesMap(this->toolSchemas); this->filledParametersTypesMap = true; + SPDLOG_DEBUG("Qwen3CoderToolParser created with {} tools", this->toolsParametersTypes.size()); } Qwen3CoderToolParser::Qwen3CoderToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) : BaseOutputParser(tokenizer), toolSchemas(toolSchemas), streamParser(this->toolsParametersTypes) { - SPDLOG_DEBUG("Qwen3CoderToolParser created with {} tools", toolsParametersTypes.size()); } void Qwen3CoderToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { diff --git a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.hpp b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.hpp index 48abf45fb9..dd1d6593d0 100644 --- a/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.hpp +++ b/src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.hpp @@ -144,11 +144,12 @@ class Qwen3CoderToolParser : public BaseOutputParser { void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; - const std::string& getParsingStartTag() const override { - return TOOL_START_TAG; + const std::vector& getParsingStartTags() const override { + static const std::vector startTags = {TOOL_START_TAG, FUNCTION_NAME_TAG}; + return startTags; } - const std::unordered_set& getSpecialParsingStartTags() const override { - static const std::unordered_set specialParsingStartTags = {}; + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags = {}; return specialParsingStartTags; } const std::string& getParsingEndTag() const override { diff --git a/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp b/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp index c32645becc..daf908a00d 100644 --- a/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp +++ b/src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp @@ -39,6 +39,7 @@ static std::unique_ptr qwen3Tokenizer; static std::map toolSchemasInput = { {"string_tool", R"({"properties": {"arg1": {"type": "string", "description": "A string argument."}}, "required": ["arg1"]})"}, + {"cd", R"({"properties": {"folder": {"type": "string", "description": "Path"}}, "required": ["folder"]})"}, {"string_int_tool", R"({"properties":{"arg1":{"type":"string","description":"A string argument."},"arg2":{"type":"integer","description":"An integer argument."}},"required":["arg1", "arg2"]})"}, {"some_tool", R"({"properties":{"source":{"type":"string","description":"The name of the file or directory to copy."},"destination":{"type":"string","description":"The destination name to copy the file or directory to. If the destination is a directory, the source will be copied into this directory. No file paths allowed. "}},"required":[]})"}}; @@ -115,6 +116,22 @@ value1 EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1\"}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } +TEST_F(Qwen3CoderOutputParserTest, Parse1ToolCall1Function1ArgumentNoProperBeginTag) { + std::string input = R"( + + +value1 + + +")"; + auto [generatedTensor, generatedTokens, parsedOutput] = generateParsedOutput(input); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool"); + // Qwen3CoderToolParserImpl removes newlines, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} TEST_F(Qwen3CoderOutputParserTest, Parse1ToolCallNestedXmlNotFromSchema) { std::string input = R"( " @@ -169,7 +186,7 @@ value1line2 ASSERT_EQ(parsedOutput.toolCalls.size(), 1); EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool"); - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1line1\nvalue1line2\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1line1\\nvalue1line2\"}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } TEST_F(Qwen3CoderOutputParserTest, TestJustParserImplUnaryToolCall) { @@ -547,6 +564,12 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) { // if we don't get closing tag we don't emit tool call int i = -1; std::vector>> chunkToDeltaVec{ + // now we test functool improperly beginning with + // its important that this is before any tag + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"string_tool"}}]}})"}, + {"value_before_tool_call", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"arg1\": \"value_before_tool_call\"}"}}]}})"}, + // now we test normal tool call {" \n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, @@ -554,7 +577,7 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) { {"ctio", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"n=st", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"ring_tool", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {">", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"string_tool"}}]}})"}, + {">", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"string_tool"}}]}})"}, {"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"arg1\": \"STRING_VALUE\"}"}}]}})"}, + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"arg1\": \"STRING_VALUE\"}"}}]}})"}, {" POTENTIALLY EXISINT CONTENT", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {" \n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"string_int_tool"}}]}})"}, + {">\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":2,"function":{"name":"string_int_tool"}}]}})"}, {"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"ANOTHER_STRING_VALUE\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, @@ -582,8 +605,14 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) { {"1522\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"arg1\": \"\nANOTHER_STRING_VALUE\", \"arg2\": 3141522}"}}]}})"}, - {"CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, std::nullopt}}; + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\"arg1\": \"\\nANOTHER_STRING_VALUE\", \"arg2\": 3141522}"}}]}})"}, + {"CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + // now we test functool improperly beginning with + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":3,"function":{"name":"string_tool"}}]}})"}, + {"value1", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":3,"function":{"arguments":"{\"arg1\": \"value1\"}"}}]}})"}, + {"NOTHING IMPORTANT HERE", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"part of bfcl 'draft'.\n\n\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":4,"function":{"name":"cd"}}]}})"}, + {"\n\nResearchDocs\n\n\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":4,"function":{"arguments":"{\"folder\": \"ResearchDocs\"}"}}]}})"}}; for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { i++; std::optional doc = outputParser->parseChunk(chunk, true, ov::genai::GenerationFinishReason::NONE);