Skip to content
2 changes: 1 addition & 1 deletion spelling-whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp
41 changes: 35 additions & 6 deletions src/llm/io_processing/qwen3coder/qwen3coder_tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,22 @@ static const ParametersTypeMap_t parseToolSchema(const std::string& functionName
return result;
}

// helper function to double escape \n,\r, \t in string
static std::string escapeString(const std::string& input) {
std::string output;
output.reserve(input.size());
for (char c : input) {
switch (c) {
case '\n':
output += "\\n";
break;
default:
output += c;
}
}
return output;
}

static std::string setCorrectValueType(std::string& inputValue, const std::string& currentParameterName, const ParametersTypeMap_t& parametersType) {
auto paramIt = parametersType.find(currentParameterName);
if (paramIt == parametersType.end()) {
Expand Down Expand Up @@ -157,10 +173,24 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) {
auto previousState = this->currentState;
switch (this->currentState) {
case State::Content: {
DEFINE_TAG_POSITION_AND_BREAK_IF_NOT_FOUND(Qwen3CoderToolParser::TOOL_START_TAG);
this->lastProcessedPosition = pos + Qwen3CoderToolParser::TOOL_START_TAG.length();
this->currentState = State::InsideToolCall;
this->toolCallPositions.begin.push(pos);
// normally we expect <tool_call> tag but we observerd that sometimes model generates <function=...> directly
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// normally we expect <tool_call> tag but we observerd that sometimes model generates <function=...> directly
// normally we expect <tool_call> tag but we observed that sometimes model generates <function=...> directly

// so we will check for both tags and handle accordingly
auto posTool = this->streamContent.find(Qwen3CoderToolParser::TOOL_START_TAG, this->getLastProcessedPosition());
auto posFunc = this->streamContent.find(Qwen3CoderToolParser::FUNCTION_NAME_TAG, this->getLastProcessedPosition());
if (posFunc == std::string::npos && posTool == std::string::npos) {
SPDLOG_TRACE("Did not find: {} or {}", Qwen3CoderToolParser::TOOL_START_TAG, Qwen3CoderToolParser::FUNCTION_NAME_TAG);
} else if (posTool < posFunc) {
// found <tool_call> first
this->lastProcessedPosition = posTool + Qwen3CoderToolParser::TOOL_START_TAG.length();
this->currentState = State::InsideToolCall;
this->toolCallPositions.begin.push(posTool);
} else {
Comment on lines +182 to +187
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it cover well cases when posTool == std::string::npos or posFunc == std::string::npos?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then posTool != posFunc so we hit either else if or else

// found <function=...> first, we will assume <tool_call> is missing and we will add it
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"we will add it" - do you inject "<tool_call>" somewhere?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

misleading comment - i will remove it.

SPDLOG_DEBUG("Did not find: {}, assuming it should exist", Qwen3CoderToolParser::TOOL_START_TAG);
this->lastProcessedPosition = posFunc + Qwen3CoderToolParser::FUNCTION_NAME_TAG.length();
this->currentState = State::InsideFunctionName;
this->toolCallPositions.begin.push(posFunc);
}
break;
}
case State::InsideToolCall: {
Expand All @@ -180,7 +210,6 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) {
auto funcEnd = streamContent.find(Qwen3CoderToolParser::FUNCTION_END_TAG, this->lastProcessedPosition);
auto paramStart = streamContent.find(Qwen3CoderToolParser::PARAMETER_NAME_TAG, this->lastProcessedPosition);
if (funcEnd == std::string::npos && paramStart == std::string::npos) {
break;
} else if (paramStart < funcEnd) { // next parameter
this->lastProcessedPosition = paramStart + Qwen3CoderToolParser::PARAMETER_NAME_TAG.length();
this->currentState = State::InsideParameterName;
Expand All @@ -207,7 +236,7 @@ bool Qwen3CoderToolParserImpl::parseUntilStateChange(ToolCalls_t& toolCalls) {
if (paramIt == this->toolsParametersTypeMap.end()) {
SPDLOG_DEBUG("Tool schema not found for tool: {}, leaving parameter: {} as string", this->currentFunction.name, this->currentParameterName);
} else {
parameterValue = setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second);
parameterValue = escapeString(setCorrectValueType(parameterValue, this->currentParameterName, paramIt->second));
}
auto res = this->currentFunction.parameters.try_emplace(this->currentParameterName, parameterValue);
if (!res.second)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class Qwen3CoderToolParser : public BaseOutputParser {
return TOOL_START_TAG;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {};
static const std::unordered_set<std::string> specialParsingStartTags = {FUNCTION_NAME_TAG};
return specialParsingStartTags;
}
const std::string& getParsingEndTag() const override {
Expand Down
36 changes: 30 additions & 6 deletions src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ value1
EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1\"}");
EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
}
TEST_F(Qwen3CoderOutputParserTest, Parse1ToolCall1Function1ArgumentNoProperBeginTag) {
std::string input = R"(
<function=string_tool>
<parameter=arg1>
value1
</parameter>
</function>
</tool_call>")";
auto [generatedTensor, generatedTokens, parsedOutput] = generateParsedOutput(input);

ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool");
// Qwen3CoderToolParserImpl removes newlines, so we expect arguments value to be without spaces
EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1\"}");
EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
}
TEST_F(Qwen3CoderOutputParserTest, Parse1ToolCallNestedXmlNotFromSchema) {
std::string input = R"(
"<tool_call>
Expand Down Expand Up @@ -169,7 +185,7 @@ value1line2

ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool");
EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1line1\nvalue1line2\"}");
EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\": \"value1line1\\nvalue1line2\"}");
EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
}
TEST_F(Qwen3CoderOutputParserTest, TestJustParserImplUnaryToolCall) {
Expand Down Expand Up @@ -547,14 +563,19 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) {
// if we don't get closing tag we don't emit tool call
int i = -1;
std::vector<std::tuple<std::string, ov::genai::GenerationFinishReason, std::optional<std::string>>> chunkToDeltaVec{
// now we test functool improperly beginning with <function=... and then being finished by </tool_call>
// its important that this is before any <tool_call> tag
{"<function=string_tool><parameter=arg1>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"string_tool"}}]}})"},
{"value_before_tool_call</parameter></function></tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"arg1\": \"value_before_tool_call\"}"}}]}})"},
// now we test normal tool call
{" <too", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"l_cal", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"l>\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<fun", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ctio", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"n=st", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ring_tool", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{">", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"string_tool"}}]}})"},
{">", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"string_tool"}}]}})"},
{"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<paramete", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"r=a", ov::genai::GenerationFinishReason::NONE, std::nullopt},
Expand All @@ -566,12 +587,12 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) {
{"</pa", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"rameter>\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"</function>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"arg1\": \"STRING_VALUE\"}"}}]}})"},
{"</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"arg1\": \"STRING_VALUE\"}"}}]}})"},
{" POTENTIALLY EXISINT CONTENT", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{" <tool", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{" <tool_call>\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<function=string_int_tool", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{">\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"string_int_tool"}}]}})"},
{">\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":2,"function":{"name":"string_int_tool"}}]}})"},
{"<parameter=arg1>\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ANOTHER_STRING_VALUE\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
Expand All @@ -582,8 +603,11 @@ TEST_F(Qwen3CoderOutputParserTest, StreamingSimpleToolCall) {
{"1522\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"</parameter>\n", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"</function>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"arg1\": \"\nANOTHER_STRING_VALUE\", \"arg2\": 3141522}"}}]}})"},
{"CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, std::nullopt}};
{"</tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\"arg1\": \"\\nANOTHER_STRING_VALUE\", \"arg2\": 3141522}"}}]}})"},
{"CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, std::nullopt},
// now we test functool improperly beginning with <function=... and then being finished by </tool_call>
{"<function=string_tool><parameter=arg1>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":3,"function":{"name":"string_tool"}}]}})"},
{"value1</parameter></function></tool_call>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":3,"function":{"arguments":"{\"arg1\": \"value1\"}"}}]}})"}};
for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) {
i++;
std::optional<rapidjson::Document> doc = outputParser->parseChunk(chunk, true, ov::genai::GenerationFinishReason::NONE);
Expand Down