test: add unit and integration tests for thinking tags removal

- Test thinking tags processing for reasoning models like DeepSeek-R1 and Qwen with environment variable configuration.
11 months ago · 33d239279b
parent e1aa04a4da
commit 33d239279b
2 changed files with 268 additions and 0 deletions
--- a/api/tests/integration_tests/workflow/nodes/test_llm.py
+++ b/api/tests/integration_tests/workflow/nodes/test_llm.py
@ -287,3 +287,121 @@ def test_extract_json():
    ]
    result = {"name": "test", "age": 123}
    assert all(_parse_structured_output(item) == result for item in llm_texts)
+
+
+@pytest.mark.parametrize(
+    ("thinking_tags_enabled", "should_preserve_tags"),
+    [
+        ("true", True),  # LLM_NODE_THINKING_TAGS_ENABLED=true -> tags should be preserved
+        ("false", False),  # LLM_NODE_THINKING_TAGS_ENABLED=false -> tags should be removed
+    ],
+)
+def test_execute_llm_with_thinking_tags(flask_req_ctx, thinking_tags_enabled, should_preserve_tags):
+    """Test LLM node with thinking tags removal controlled via environment variable."""
+    import os
+
+    with patch.dict(os.environ, {"LLM_NODE_THINKING_TAGS_ENABLED": thinking_tags_enabled}):
+        # Reload the module to pick up the environment variable change
+        import importlib
+
+        from core.workflow.nodes.llm import node
+
+        importlib.reload(node)
+
+        node_instance = init_llm_node(
+            config={
+                "id": "llm",
+                "data": {
+                    "title": f"thinking tags test ({'preserved' if should_preserve_tags else 'removed'})",
+                    "type": "llm",
+                    "model": {
+                        "provider": "langgenius/openrouter",
+                        "name": "qwen/qwen-2.5-72b-instruct",
+                        "mode": "chat",
+                        "completion_params": {},
+                    },
+                    "prompt_template": [
+                        {
+                            "role": "system",
+                            "text": "you are a helpful assistant.",
+                        },
+                        {"role": "user", "text": "Say hello"},
+                    ],
+                    "memory": None,
+                    "context": {"enabled": False},
+                    "vision": {"enabled": False},
+                },
+            },
+        )
+
+        # Create mock LLM result with thinking tags
+        mock_usage = LLMUsage(
+            prompt_tokens=10,
+            prompt_unit_price=Decimal("0.001"),
+            prompt_price_unit=Decimal("1000"),
+            prompt_price=Decimal("0.00001"),
+            completion_tokens=15,
+            completion_unit_price=Decimal("0.002"),
+            completion_price_unit=Decimal("1000"),
+            completion_price=Decimal("0.00003"),
+            total_tokens=25,
+            total_price=Decimal("0.00004"),
+            currency="USD",
+            latency=0.3,
+        )
+
+        # Mock response with thinking tags (simulating Qwen reasoning behavior)
+        mock_message = AssistantPromptMessage(
+            content="<think>Let me think about this greeting...</think>Hello! How can I help you today?"
+        )
+
+        mock_llm_result = LLMResult(
+            model="qwen/qwen-2.5-72b-instruct",
+            prompt_messages=[],
+            message=mock_message,
+            usage=mock_usage,
+        )
+
+        mock_model_instance = MagicMock()
+        mock_model_instance.invoke_llm.return_value = mock_llm_result
+
+        mock_model_config = MagicMock()
+        mock_model_config.mode = "chat"
+        mock_model_config.provider = "langgenius/openrouter"
+        mock_model_config.model = "qwen/qwen-2.5-72b-instruct"
+        mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
+
+        def mock_fetch_model_config_func(_node_data_model):
+            return mock_model_instance, mock_model_config
+
+        def mock_get_model_instance(_self, **kwargs):
+            return mock_model_instance
+
+        with (
+            patch.object(node_instance, "_fetch_model_config", mock_fetch_model_config_func),
+            patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance),
+        ):
+            # Execute node
+            result = node_instance._run()
+            assert isinstance(result, Generator)
+
+            # Verify behavior based on the parameter
+            for item in result:
+                if isinstance(item, RunCompletedEvent):
+                    assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
+                    output_text = item.run_result.outputs.get("text")
+                    assert output_text is not None
+
+                    if should_preserve_tags:
+                        # Verify thinking tags are preserved when enabled
+                        assert "<think>" in output_text
+                        assert "</think>" in output_text
+                        assert "Let me think about this greeting..." in output_text
+                        assert "Hello! How can I help you today?" in output_text
+                    else:
+                        # Verify thinking tags are removed when disabled
+                        assert "<think>" not in output_text
+                        assert "</think>" not in output_text
+                        assert "Hello! How can I help you today?" in output_text
+                        # Verify thinking content is not in output
+                        assert "Let me think about this greeting..." not in output_text
--- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
@ -662,3 +662,153 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown:
        assert list(gen) == []
        mock_file_saver.save_binary_string.assert_not_called()
        mock_file_saver.save_remote_url.assert_not_called()
+
+
+class TestThinkingTagsRemoval:
+    """Test cases for thinking tags removal functionality in LLM Node."""
+
+    def test_remove_single_thinking_tag(self, llm_node):
+        """Test removal of single thinking tag block."""
+        input_text = "<think>This is my thinking process</think>Hello, how can I help you?"
+        expected = "Hello, how can I help you?"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_remove_multiple_thinking_tags(self, llm_node):
+        """Test removal of multiple thinking tag blocks."""
+        input_text = "<think>First thought</think>Hello<think>Second thought</think> World!"
+        expected = "Hello World!"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_remove_multiline_thinking_tag(self, llm_node):
+        """Test removal of multiline thinking tag blocks."""
+        input_text = """<think>
+This is a multiline
+thinking process
+with multiple lines
+</think>Final answer here."""
+        expected = "Final answer here."
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_case_insensitive_removal(self, llm_node):
+        """Test case-insensitive thinking tag removal."""
+        input_text = "<THINK>Uppercase thinking</THINK>Response"
+        expected = "Response"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_mixed_case_removal(self, llm_node):
+        """Test mixed case thinking tag removal."""
+        input_text = "<Think>Mixed case thinking</Think>Response"
+        expected = "Response"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_no_thinking_tags(self, llm_node):
+        """Test text without thinking tags remains unchanged."""
+        input_text = "Hello, this is a normal response without thinking tags."
+        expected = input_text
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_empty_string(self, llm_node):
+        """Test empty string handling."""
+        input_text = ""
+        expected = ""
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_only_thinking_tag(self, llm_node):
+        """Test string with only thinking tag."""
+        input_text = "<think>Only thinking, no response</think>"
+        expected = ""
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_whitespace_handling(self, llm_node):
+        """Test proper whitespace handling after tag removal."""
+        input_text = "<think>Thinking</think>   Response with spaces"
+        expected = "Response with spaces"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_whitespace_after_tag(self, llm_node):
+        """Test whitespace removal after thinking tags."""
+        input_text = "<think>Thinking</think>  \n  \t  Final response"
+        expected = "Final response"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_none_input(self, llm_node):
+        """Test None input handling."""
+        result = llm_node._remove_thinking_tags(None)
+        assert result is None
+
+    def test_non_string_input(self, llm_node):
+        """Test non-string input handling."""
+        result = llm_node._remove_thinking_tags(123)
+        assert result == 123
+
+    def test_complex_real_world_example(self, llm_node):
+        """Test with a complex real-world example from DeepSeek-R1."""
+        input_text = """<think>
+
+Okay, let me try to figure out what the user is asking here. The message is just "gdgd". 
+That's pretty short and doesn't make much sense on its own. I need to consider different 
+possibilities.
+
+First, maybe it's a typo or a shorthand. "GDGD" could be an acronym. Let me think about 
+common acronyms. "GDGD" might stand for "Good Good Good Good" but that seems unlikely.
+
+</think>It looks like your message might be incomplete or unclear. Could you please provide 
+more context or rephrase your question? I'm here to help!"""
+
+        expected = (
+            "It looks like your message might be incomplete or unclear. Could you please "
+            "provide more context or rephrase your question? I'm here to help!"
+        )
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    def test_multiple_whitespace_tags(self, llm_node):
+        """Test multiple thinking tags with various whitespace."""
+        input_text = "<think>First</think>  \n<think>Second</think>   Final"
+        expected = "Final"
+
+        result = llm_node._remove_thinking_tags(input_text)
+        assert result == expected
+
+    @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "true"})
+    def test_environment_variable_enabled(self):
+        """Test that environment variable is properly read when enabled."""
+        from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
+        assert LLM_NODE_THINKING_TAGS_ENABLED is True
+
+    @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "false"})
+    def test_environment_variable_disabled(self):
+        """Test that environment variable is properly read when disabled."""
+        # Need to reimport to get the updated value
+        import importlib
+        import core.workflow.nodes.llm.node
+        importlib.reload(core.workflow.nodes.llm.node)
+        from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
+        assert LLM_NODE_THINKING_TAGS_ENABLED is False
+
+    def test_environment_variable_default(self):
+        """Test that environment variable defaults to True."""
+        from core.workflow.nodes.llm.node import LLM_NODE_THINKING_TAGS_ENABLED
+        # Default should be True for backward compatibility
+        assert LLM_NODE_THINKING_TAGS_ENABLED is True