From 33d239279bcbb67f7aebd6760fece8d96412b861 Mon Sep 17 00:00:00 2001 From: kimtaewoong Date: Fri, 4 Jul 2025 03:37:30 +0900 Subject: [PATCH] test: add unit and integration tests for thinking tags removal - Test thinking tags processing for reasoning models like DeepSeek-R1 and Qwen with environment variable configuration. --- .../workflow/nodes/test_llm.py | 118 ++++++++++++++ .../core/workflow/nodes/llm/test_node.py | 150 ++++++++++++++++++ 2 files changed, 268 insertions(+) diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index 389d1071f3..4f73fe6af9 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -287,3 +287,121 @@ def test_extract_json(): ] result = {"name": "test", "age": 123} assert all(_parse_structured_output(item) == result for item in llm_texts) + + +@pytest.mark.parametrize( + ("thinking_tags_enabled", "should_preserve_tags"), + [ + ("true", True), # LLM_NODE_THINKING_TAGS_ENABLED=true -> tags should be preserved + ("false", False), # LLM_NODE_THINKING_TAGS_ENABLED=false -> tags should be removed + ], +) +def test_execute_llm_with_thinking_tags(flask_req_ctx, thinking_tags_enabled, should_preserve_tags): + """Test LLM node with thinking tags removal controlled via environment variable.""" + import os + + with patch.dict(os.environ, {"LLM_NODE_THINKING_TAGS_ENABLED": thinking_tags_enabled}): + # Reload the module to pick up the environment variable change + import importlib + + from core.workflow.nodes.llm import node + + importlib.reload(node) + + node_instance = init_llm_node( + config={ + "id": "llm", + "data": { + "title": f"thinking tags test ({'preserved' if should_preserve_tags else 'removed'})", + "type": "llm", + "model": { + "provider": "langgenius/openrouter", + "name": "qwen/qwen-2.5-72b-instruct", + "mode": "chat", + "completion_params": {}, + }, + "prompt_template": [ + { + "role": "system", + "text": "you are a helpful assistant.", + }, + {"role": "user", "text": "Say hello"}, + ], + "memory": None, + "context": {"enabled": False}, + "vision": {"enabled": False}, + }, + }, + ) + + # Create mock LLM result with thinking tags + mock_usage = LLMUsage( + prompt_tokens=10, + prompt_unit_price=Decimal("0.001"), + prompt_price_unit=Decimal("1000"), + prompt_price=Decimal("0.00001"), + completion_tokens=15, + completion_unit_price=Decimal("0.002"), + completion_price_unit=Decimal("1000"), + completion_price=Decimal("0.00003"), + total_tokens=25, + total_price=Decimal("0.00004"), + currency="USD", + latency=0.3, + ) + + # Mock response with thinking tags (simulating Qwen reasoning behavior) + mock_message = AssistantPromptMessage( + content="Let me think about this greeting...Hello! How can I help you today?" + ) + + mock_llm_result = LLMResult( + model="qwen/qwen-2.5-72b-instruct", + prompt_messages=[], + message=mock_message, + usage=mock_usage, + ) + + mock_model_instance = MagicMock() + mock_model_instance.invoke_llm.return_value = mock_llm_result + + mock_model_config = MagicMock() + mock_model_config.mode = "chat" + mock_model_config.provider = "langgenius/openrouter" + mock_model_config.model = "qwen/qwen-2.5-72b-instruct" + mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b" + + def mock_fetch_model_config_func(_node_data_model): + return mock_model_instance, mock_model_config + + def mock_get_model_instance(_self, **kwargs): + return mock_model_instance + + with ( + patch.object(node_instance, "_fetch_model_config", mock_fetch_model_config_func), + patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance), + ): + # Execute node + result = node_instance._run() + assert isinstance(result, Generator) + + # Verify behavior based on the parameter + for item in result: + if isinstance(item, RunCompletedEvent): + assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + output_text = item.run_result.outputs.get("text") + assert output_text is not None + + if should_preserve_tags: + # Verify thinking tags are preserved when enabled + assert "" in output_text + assert "" in output_text + assert "Let me think about this greeting..." in output_text + assert "Hello! How can I help you today?" in output_text + else: + # Verify thinking tags are removed when disabled + assert "" not in output_text + assert "" not in output_text + assert "Hello! How can I help you today?" in output_text + # Verify thinking content is not in output + assert "Let me think about this greeting..." not in output_text \ No newline at end of file diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py index 336c2befcc..5b5332e611 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py @@ -662,3 +662,153 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown: assert list(gen) == [] mock_file_saver.save_binary_string.assert_not_called() mock_file_saver.save_remote_url.assert_not_called() + + +class TestThinkingTagsRemoval: + """Test cases for thinking tags removal functionality in LLM Node.""" + + def test_remove_single_thinking_tag(self, llm_node): + """Test removal of single thinking tag block.""" + input_text = "This is my thinking processHello, how can I help you?" + expected = "Hello, how can I help you?" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_remove_multiple_thinking_tags(self, llm_node): + """Test removal of multiple thinking tag blocks.""" + input_text = "First thoughtHelloSecond thought World!" + expected = "Hello World!" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_remove_multiline_thinking_tag(self, llm_node): + """Test removal of multiline thinking tag blocks.""" + input_text = """ +This is a multiline +thinking process +with multiple lines +Final answer here.""" + expected = "Final answer here." + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_case_insensitive_removal(self, llm_node): + """Test case-insensitive thinking tag removal.""" + input_text = "Uppercase thinkingResponse" + expected = "Response" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_mixed_case_removal(self, llm_node): + """Test mixed case thinking tag removal.""" + input_text = "Mixed case thinkingResponse" + expected = "Response" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_no_thinking_tags(self, llm_node): + """Test text without thinking tags remains unchanged.""" + input_text = "Hello, this is a normal response without thinking tags." + expected = input_text + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_empty_string(self, llm_node): + """Test empty string handling.""" + input_text = "" + expected = "" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_only_thinking_tag(self, llm_node): + """Test string with only thinking tag.""" + input_text = "Only thinking, no response" + expected = "" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_whitespace_handling(self, llm_node): + """Test proper whitespace handling after tag removal.""" + input_text = "Thinking Response with spaces" + expected = "Response with spaces" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_whitespace_after_tag(self, llm_node): + """Test whitespace removal after thinking tags.""" + input_text = "Thinking \n \t Final response" + expected = "Final response" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_none_input(self, llm_node): + """Test None input handling.""" + result = llm_node._remove_thinking_tags(None) + assert result is None + + def test_non_string_input(self, llm_node): + """Test non-string input handling.""" + result = llm_node._remove_thinking_tags(123) + assert result == 123 + + def test_complex_real_world_example(self, llm_node): + """Test with a complex real-world example from DeepSeek-R1.""" + input_text = """ + +Okay, let me try to figure out what the user is asking here. The message is just "gdgd". +That's pretty short and doesn't make much sense on its own. I need to consider different +possibilities. + +First, maybe it's a typo or a shorthand. "GDGD" could be an acronym. Let me think about +common acronyms. "GDGD" might stand for "Good Good Good Good" but that seems unlikely. + +It looks like your message might be incomplete or unclear. Could you please provide +more context or rephrase your question? I'm here to help!""" + + expected = ( + "It looks like your message might be incomplete or unclear. Could you please " + "provide more context or rephrase your question? I'm here to help!" + ) + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_multiple_whitespace_tags(self, llm_node): + """Test multiple thinking tags with various whitespace.""" + input_text = "First \nSecond Final" + expected = "Final" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "true"}) + def test_environment_variable_enabled(self): + """Test that environment variable is properly read when enabled.""" + from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED + assert LLM_NODE_THINKING_TAGS_ENABLED is True + + @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "false"}) + def test_environment_variable_disabled(self): + """Test that environment variable is properly read when disabled.""" + # Need to reimport to get the updated value + import importlib + import core.workflow.nodes.llm.node + importlib.reload(core.workflow.nodes.llm.node) + from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED + assert LLM_NODE_THINKING_TAGS_ENABLED is False + + def test_environment_variable_default(self): + """Test that environment variable defaults to True.""" + from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED + # Default should be True for backward compatibility + assert LLM_NODE_THINKING_TAGS_ENABLED is True