diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index be0675a0f2..6ca526919a 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -2,6 +2,8 @@ import base64 import io import json import logging +import os +import re from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any, Optional, cast @@ -96,6 +98,21 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# Environment variable to control thinking tags preservation (default: true to maintain backward compatibility) +# helper & env‑flag + + +def _env_flag(name: str, default: bool = True) -> bool: + """Return an env var as bool (1/0 | true/false | yes/no).""" + val = os.getenv(name) + if val is None: + return default + return val.lower() in {"1", "true", "yes"} + + +# keep `` blocks unless explicitly disabled +LLM_NODE_THINKING_TAGS_ENABLED = _env_flag("LLM_NODE_THINKING_TAGS_ENABLED") + class LLMNode(BaseNode[LLMNodeData]): _node_data_cls = LLMNodeData @@ -376,7 +393,12 @@ class LLMNode(BaseNode[LLMNodeData]): except OutputParserError as e: raise LLMNodeError(f"Failed to parse structured output: {e}") - yield ModelInvokeCompletedEvent(text=full_text_buffer.getvalue(), usage=usage, finish_reason=finish_reason) + # Apply thinking tags removal if disabled + result_text = full_text_buffer.getvalue() + if not LLM_NODE_THINKING_TAGS_ENABLED: + result_text = self._remove_thinking_tags(result_text) + + yield ModelInvokeCompletedEvent(text=result_text, usage=usage, finish_reason=finish_reason) def _image_file_to_markdown(self, file: "File", /): text_chunk = f"![]({file.generate_url()})" @@ -902,8 +924,13 @@ class LLMNode(BaseNode[LLMNodeData]): for text_part in self._save_multimodal_output_and_convert_result_to_markdown(invoke_result.message.content): buffer.write(text_part) + # Apply thinking tags removal if disabled + result_text = buffer.getvalue() + if not LLM_NODE_THINKING_TAGS_ENABLED: + result_text = self._remove_thinking_tags(result_text) + return ModelInvokeCompletedEvent( - text=buffer.getvalue(), + text=result_text, usage=invoke_result.usage, finish_reason=None, ) @@ -1004,6 +1031,32 @@ class LLMNode(BaseNode[LLMNodeData]): logger.warning("unknown contents type encountered, type=%s", type(contents)) yield str(contents) + def _remove_thinking_tags(self, text: str) -> str: + """ + Remove thinking tags like from the response text. + This handles reasoning models like qwen, deepseek-r1 that include thinking process. + + Args: + text: The text content to clean + + Returns: + Cleaned text with thinking tags removed + """ + if not isinstance(text, str) or not text.strip(): + return text + + # Remove ... blocks (case-insensitive, multiline) + # Pattern explanation: + # \s* - optional whitespace before + # .*? - the thinking tag block (non-greedy) + # \s* - optional whitespace after + cleaned_text = re.sub(r"\s*.*?\s*", " ", text, flags=re.IGNORECASE | re.DOTALL) + + # Clean up multiple spaces and strip + cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip() + + return cleaned_text + def _combine_message_content_with_role( *, contents: Optional[str | list[PromptMessageContentUnionTypes]] = None, role: PromptMessageRole diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index ff119b7482..de1f400ec1 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -286,3 +286,121 @@ def test_extract_json(): ] result = {"name": "test", "age": 123} assert all(_parse_structured_output(item) == result for item in llm_texts) + + +@pytest.mark.parametrize( + ("thinking_tags_enabled", "should_preserve_tags"), + [ + ("true", True), # LLM_NODE_THINKING_TAGS_ENABLED=true -> tags should be preserved + ("false", False), # LLM_NODE_THINKING_TAGS_ENABLED=false -> tags should be removed + ], +) +def test_execute_llm_with_thinking_tags(flask_req_ctx, thinking_tags_enabled, should_preserve_tags): + """Test LLM node with thinking tags removal controlled via environment variable.""" + import os + + with patch.dict(os.environ, {"LLM_NODE_THINKING_TAGS_ENABLED": thinking_tags_enabled}): + # Reload the module to pick up the environment variable change + import importlib + + from core.workflow.nodes.llm import node + + importlib.reload(node) + + node_instance = init_llm_node( + config={ + "id": "llm", + "data": { + "title": f"thinking tags test ({'preserved' if should_preserve_tags else 'removed'})", + "type": "llm", + "model": { + "provider": "langgenius/openrouter", + "name": "qwen/qwen-2.5-72b-instruct", + "mode": "chat", + "completion_params": {}, + }, + "prompt_template": [ + { + "role": "system", + "text": "you are a helpful assistant.", + }, + {"role": "user", "text": "Say hello"}, + ], + "memory": None, + "context": {"enabled": False}, + "vision": {"enabled": False}, + }, + }, + ) + + # Create mock LLM result with thinking tags + mock_usage = LLMUsage( + prompt_tokens=10, + prompt_unit_price=Decimal("0.001"), + prompt_price_unit=Decimal("1000"), + prompt_price=Decimal("0.00001"), + completion_tokens=15, + completion_unit_price=Decimal("0.002"), + completion_price_unit=Decimal("1000"), + completion_price=Decimal("0.00003"), + total_tokens=25, + total_price=Decimal("0.00004"), + currency="USD", + latency=0.3, + ) + + # Mock response with thinking tags (simulating Qwen reasoning behavior) + mock_message = AssistantPromptMessage( + content="Let me think about this greeting...Hello! How can I help you today?" + ) + + mock_llm_result = LLMResult( + model="qwen/qwen-2.5-72b-instruct", + prompt_messages=[], + message=mock_message, + usage=mock_usage, + ) + + mock_model_instance = MagicMock() + mock_model_instance.invoke_llm.return_value = mock_llm_result + + mock_model_config = MagicMock() + mock_model_config.mode = "chat" + mock_model_config.provider = "langgenius/openrouter" + mock_model_config.model = "qwen/qwen-2.5-72b-instruct" + mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b" + + def mock_fetch_model_config_func(_node_data_model): + return mock_model_instance, mock_model_config + + def mock_get_model_instance(_self, **kwargs): + return mock_model_instance + + with ( + patch.object(node_instance, "_fetch_model_config", mock_fetch_model_config_func), + patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance), + ): + # Execute node + result = node_instance._run() + assert isinstance(result, Generator) + + # Verify behavior based on the parameter + for item in result: + if isinstance(item, RunCompletedEvent): + assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + output_text = item.run_result.outputs.get("text") + assert output_text is not None + + if should_preserve_tags: + # Verify thinking tags are preserved when enabled + assert "" in output_text + assert "" in output_text + assert "Let me think about this greeting..." in output_text + assert "Hello! How can I help you today?" in output_text + else: + # Verify thinking tags are removed when disabled + assert "" not in output_text + assert "" not in output_text + assert "Hello! How can I help you today?" in output_text + # Verify thinking content is not in output + assert "Let me think about this greeting..." not in output_text diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py index fefad0ec95..274ff5fb77 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py @@ -1,4 +1,6 @@ import base64 +import importlib +import os import uuid from collections.abc import Sequence from typing import Optional @@ -6,6 +8,7 @@ from unittest import mock import pytest +import core.workflow.nodes.llm.node from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_entities import CustomConfiguration, SystemConfiguration @@ -663,3 +666,104 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown: assert list(gen) == [] mock_file_saver.save_binary_string.assert_not_called() mock_file_saver.save_remote_url.assert_not_called() + + +class TestThinkingTagsRemoval: + """Test cases for thinking tags removal functionality in LLM Node.""" + + def test_remove_single_thinking_tag(self, llm_node): + """Test removal of single thinking tag block.""" + input_text = "This is my thinking processHello, how can I help you?" + expected = "Hello, how can I help you?" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_remove_multiple_thinking_tags(self, llm_node): + """Test removal of multiple thinking tag blocks.""" + input_text = "First thoughtHelloSecond thought World!" + expected = "Hello World!" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_remove_multiline_thinking_tag(self, llm_node): + """Test removal of multiline thinking tag blocks.""" + input_text = """ +This is a multiline +thinking process +with multiple lines +Final answer here.""" + expected = "Final answer here." + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_case_insensitive_removal(self, llm_node): + """Test case-insensitive thinking tag removal.""" + input_text = "Uppercase thinkingResponse" + expected = "Response" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_no_thinking_tags(self, llm_node): + """Test text without thinking tags remains unchanged.""" + input_text = "Hello, this is a normal response without thinking tags." + expected = input_text + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_empty_string(self, llm_node): + """Test empty string handling.""" + input_text = "" + expected = "" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_only_thinking_tag(self, llm_node): + """Test string with only thinking tag.""" + input_text = "Only thinking, no response" + expected = "" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_whitespace_handling(self, llm_node): + """Test proper whitespace handling after tag removal.""" + input_text = "Thinking Response with spaces" + expected = "Response with spaces" + + result = llm_node._remove_thinking_tags(input_text) + assert result == expected + + def test_none_input(self, llm_node): + """Test None input handling.""" + result = llm_node._remove_thinking_tags(None) + assert result is None + + def test_non_string_input(self, llm_node): + """Test non-string input handling.""" + result = llm_node._remove_thinking_tags(123) + assert result == 123 + + @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "true"}) + def test_environment_variable_enabled(self): + """Test that environment variable is properly read when enabled.""" + importlib.reload(core.workflow.nodes.llm.node) + assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is True + + @mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "false"}) + def test_environment_variable_disabled(self): + """Test that environment variable is properly read when disabled.""" + importlib.reload(core.workflow.nodes.llm.node) + assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is False + + def test_environment_variable_default(self): + """Test that environment variable defaults to True.""" + with mock.patch.dict("os.environ"): + os.environ.pop("LLM_NODE_THINKING_TAGS_ENABLED", None) + importlib.reload(core.workflow.nodes.llm.node) + assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is True diff --git a/docker/.env.example b/docker/.env.example index a05141569b..f220de8746 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -824,6 +824,10 @@ HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760 HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576 HTTP_REQUEST_NODE_SSL_VERIFY=True +# LLM node thinking tags preservation (default: true) +# Set to false to remove tags from reasoning models like DeepSeek-R1, Qwen +LLM_NODE_THINKING_TAGS_ENABLED=true + # Respect X-* headers to redirect clients RESPECT_XFORWARD_HEADERS_ENABLED=false diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 5962adb079..8b52419fa7 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -366,6 +366,7 @@ x-shared-env: &shared-api-worker-env HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True} + LLM_NODE_THINKING_TAGS_ENABLED: ${LLM_NODE_THINKING_TAGS_ENABLED:-true} RESPECT_XFORWARD_HEADERS_ENABLED: ${RESPECT_XFORWARD_HEADERS_ENABLED:-false} SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128} SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}