test: add unit and integration tests for thinking tags removal

- Test thinking tags processing for reasoning models like DeepSeek-R1 and Qwen with environment variable configuration.
pull/21897/head
kimtaewoong 11 months ago
parent e1aa04a4da
commit 33d239279b

@ -287,3 +287,121 @@ def test_extract_json():
]
result = {"name": "test", "age": 123}
assert all(_parse_structured_output(item) == result for item in llm_texts)
@pytest.mark.parametrize(
("thinking_tags_enabled", "should_preserve_tags"),
[
("true", True), # LLM_NODE_THINKING_TAGS_ENABLED=true -> tags should be preserved
("false", False), # LLM_NODE_THINKING_TAGS_ENABLED=false -> tags should be removed
],
)
def test_execute_llm_with_thinking_tags(flask_req_ctx, thinking_tags_enabled, should_preserve_tags):
"""Test LLM node with thinking tags removal controlled via environment variable."""
import os
with patch.dict(os.environ, {"LLM_NODE_THINKING_TAGS_ENABLED": thinking_tags_enabled}):
# Reload the module to pick up the environment variable change
import importlib
from core.workflow.nodes.llm import node
importlib.reload(node)
node_instance = init_llm_node(
config={
"id": "llm",
"data": {
"title": f"thinking tags test ({'preserved' if should_preserve_tags else 'removed'})",
"type": "llm",
"model": {
"provider": "langgenius/openrouter",
"name": "qwen/qwen-2.5-72b-instruct",
"mode": "chat",
"completion_params": {},
},
"prompt_template": [
{
"role": "system",
"text": "you are a helpful assistant.",
},
{"role": "user", "text": "Say hello"},
],
"memory": None,
"context": {"enabled": False},
"vision": {"enabled": False},
},
},
)
# Create mock LLM result with thinking tags
mock_usage = LLMUsage(
prompt_tokens=10,
prompt_unit_price=Decimal("0.001"),
prompt_price_unit=Decimal("1000"),
prompt_price=Decimal("0.00001"),
completion_tokens=15,
completion_unit_price=Decimal("0.002"),
completion_price_unit=Decimal("1000"),
completion_price=Decimal("0.00003"),
total_tokens=25,
total_price=Decimal("0.00004"),
currency="USD",
latency=0.3,
)
# Mock response with thinking tags (simulating Qwen reasoning behavior)
mock_message = AssistantPromptMessage(
content="<think>Let me think about this greeting...</think>Hello! How can I help you today?"
)
mock_llm_result = LLMResult(
model="qwen/qwen-2.5-72b-instruct",
prompt_messages=[],
message=mock_message,
usage=mock_usage,
)
mock_model_instance = MagicMock()
mock_model_instance.invoke_llm.return_value = mock_llm_result
mock_model_config = MagicMock()
mock_model_config.mode = "chat"
mock_model_config.provider = "langgenius/openrouter"
mock_model_config.model = "qwen/qwen-2.5-72b-instruct"
mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
def mock_fetch_model_config_func(_node_data_model):
return mock_model_instance, mock_model_config
def mock_get_model_instance(_self, **kwargs):
return mock_model_instance
with (
patch.object(node_instance, "_fetch_model_config", mock_fetch_model_config_func),
patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance),
):
# Execute node
result = node_instance._run()
assert isinstance(result, Generator)
# Verify behavior based on the parameter
for item in result:
if isinstance(item, RunCompletedEvent):
assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
output_text = item.run_result.outputs.get("text")
assert output_text is not None
if should_preserve_tags:
# Verify thinking tags are preserved when enabled
assert "<think>" in output_text
assert "</think>" in output_text
assert "Let me think about this greeting..." in output_text
assert "Hello! How can I help you today?" in output_text
else:
# Verify thinking tags are removed when disabled
assert "<think>" not in output_text
assert "</think>" not in output_text
assert "Hello! How can I help you today?" in output_text
# Verify thinking content is not in output
assert "Let me think about this greeting..." not in output_text

@ -662,3 +662,153 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown:
assert list(gen) == []
mock_file_saver.save_binary_string.assert_not_called()
mock_file_saver.save_remote_url.assert_not_called()
class TestThinkingTagsRemoval:
"""Test cases for thinking tags removal functionality in LLM Node."""
def test_remove_single_thinking_tag(self, llm_node):
"""Test removal of single thinking tag block."""
input_text = "<think>This is my thinking process</think>Hello, how can I help you?"
expected = "Hello, how can I help you?"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_remove_multiple_thinking_tags(self, llm_node):
"""Test removal of multiple thinking tag blocks."""
input_text = "<think>First thought</think>Hello<think>Second thought</think> World!"
expected = "Hello World!"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_remove_multiline_thinking_tag(self, llm_node):
"""Test removal of multiline thinking tag blocks."""
input_text = """<think>
This is a multiline
thinking process
with multiple lines
</think>Final answer here."""
expected = "Final answer here."
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_case_insensitive_removal(self, llm_node):
"""Test case-insensitive thinking tag removal."""
input_text = "<THINK>Uppercase thinking</THINK>Response"
expected = "Response"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_mixed_case_removal(self, llm_node):
"""Test mixed case thinking tag removal."""
input_text = "<Think>Mixed case thinking</Think>Response"
expected = "Response"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_no_thinking_tags(self, llm_node):
"""Test text without thinking tags remains unchanged."""
input_text = "Hello, this is a normal response without thinking tags."
expected = input_text
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_empty_string(self, llm_node):
"""Test empty string handling."""
input_text = ""
expected = ""
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_only_thinking_tag(self, llm_node):
"""Test string with only thinking tag."""
input_text = "<think>Only thinking, no response</think>"
expected = ""
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_whitespace_handling(self, llm_node):
"""Test proper whitespace handling after tag removal."""
input_text = "<think>Thinking</think> Response with spaces"
expected = "Response with spaces"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_whitespace_after_tag(self, llm_node):
"""Test whitespace removal after thinking tags."""
input_text = "<think>Thinking</think> \n \t Final response"
expected = "Final response"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_none_input(self, llm_node):
"""Test None input handling."""
result = llm_node._remove_thinking_tags(None)
assert result is None
def test_non_string_input(self, llm_node):
"""Test non-string input handling."""
result = llm_node._remove_thinking_tags(123)
assert result == 123
def test_complex_real_world_example(self, llm_node):
"""Test with a complex real-world example from DeepSeek-R1."""
input_text = """<think>
Okay, let me try to figure out what the user is asking here. The message is just "gdgd".
That's pretty short and doesn't make much sense on its own. I need to consider different
possibilities.
First, maybe it's a typo or a shorthand. "GDGD" could be an acronym. Let me think about
common acronyms. "GDGD" might stand for "Good Good Good Good" but that seems unlikely.
</think>It looks like your message might be incomplete or unclear. Could you please provide
more context or rephrase your question? I'm here to help!"""
expected = (
"It looks like your message might be incomplete or unclear. Could you please "
"provide more context or rephrase your question? I'm here to help!"
)
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_multiple_whitespace_tags(self, llm_node):
"""Test multiple thinking tags with various whitespace."""
input_text = "<think>First</think> \n<think>Second</think> Final"
expected = "Final"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
@mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "true"})
def test_environment_variable_enabled(self):
"""Test that environment variable is properly read when enabled."""
from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
assert LLM_NODE_THINKING_TAGS_ENABLED is True
@mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "false"})
def test_environment_variable_disabled(self):
"""Test that environment variable is properly read when disabled."""
# Need to reimport to get the updated value
import importlib
import core.workflow.nodes.llm.node
importlib.reload(core.workflow.nodes.llm.node)
from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
assert LLM_NODE_THINKING_TAGS_ENABLED is False
def test_environment_variable_default(self):
"""Test that environment variable defaults to True."""
from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
# Default should be True for backward compatibility
assert LLM_NODE_THINKING_TAGS_ENABLED is True

Loading…
Cancel
Save