pull/21897/merge
taewoong Kim 10 months ago committed by GitHub
commit 2444c7ef03
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -2,6 +2,8 @@ import base64
import io import io
import json import json
import logging import logging
import os
import re
from collections.abc import Generator, Mapping, Sequence from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, cast from typing import TYPE_CHECKING, Any, Optional, cast
@ -96,6 +98,21 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Environment variable to control thinking tags preservation (default: true to maintain backward compatibility)
# helper & envflag
def _env_flag(name: str, default: bool = True) -> bool:
"""Return an env var as bool (1/0 | true/false | yes/no)."""
val = os.getenv(name)
if val is None:
return default
return val.lower() in {"1", "true", "yes"}
# keep `<think>` blocks unless explicitly disabled
LLM_NODE_THINKING_TAGS_ENABLED = _env_flag("LLM_NODE_THINKING_TAGS_ENABLED")
class LLMNode(BaseNode[LLMNodeData]): class LLMNode(BaseNode[LLMNodeData]):
_node_data_cls = LLMNodeData _node_data_cls = LLMNodeData
@ -376,7 +393,12 @@ class LLMNode(BaseNode[LLMNodeData]):
except OutputParserError as e: except OutputParserError as e:
raise LLMNodeError(f"Failed to parse structured output: {e}") raise LLMNodeError(f"Failed to parse structured output: {e}")
yield ModelInvokeCompletedEvent(text=full_text_buffer.getvalue(), usage=usage, finish_reason=finish_reason) # Apply thinking tags removal if disabled
result_text = full_text_buffer.getvalue()
if not LLM_NODE_THINKING_TAGS_ENABLED:
result_text = self._remove_thinking_tags(result_text)
yield ModelInvokeCompletedEvent(text=result_text, usage=usage, finish_reason=finish_reason)
def _image_file_to_markdown(self, file: "File", /): def _image_file_to_markdown(self, file: "File", /):
text_chunk = f"![]({file.generate_url()})" text_chunk = f"![]({file.generate_url()})"
@ -902,8 +924,13 @@ class LLMNode(BaseNode[LLMNodeData]):
for text_part in self._save_multimodal_output_and_convert_result_to_markdown(invoke_result.message.content): for text_part in self._save_multimodal_output_and_convert_result_to_markdown(invoke_result.message.content):
buffer.write(text_part) buffer.write(text_part)
# Apply thinking tags removal if disabled
result_text = buffer.getvalue()
if not LLM_NODE_THINKING_TAGS_ENABLED:
result_text = self._remove_thinking_tags(result_text)
return ModelInvokeCompletedEvent( return ModelInvokeCompletedEvent(
text=buffer.getvalue(), text=result_text,
usage=invoke_result.usage, usage=invoke_result.usage,
finish_reason=None, finish_reason=None,
) )
@ -1004,6 +1031,32 @@ class LLMNode(BaseNode[LLMNodeData]):
logger.warning("unknown contents type encountered, type=%s", type(contents)) logger.warning("unknown contents type encountered, type=%s", type(contents))
yield str(contents) yield str(contents)
def _remove_thinking_tags(self, text: str) -> str:
"""
Remove thinking tags like <think></think> from the response text.
This handles reasoning models like qwen, deepseek-r1 that include thinking process.
Args:
text: The text content to clean
Returns:
Cleaned text with thinking tags removed
"""
if not isinstance(text, str) or not text.strip():
return text
# Remove <think>...</think> blocks (case-insensitive, multiline)
# Pattern explanation:
# \s* - optional whitespace before <think>
# <think>.*?</think> - the thinking tag block (non-greedy)
# \s* - optional whitespace after </think>
cleaned_text = re.sub(r"\s*<think>.*?</think>\s*", " ", text, flags=re.IGNORECASE | re.DOTALL)
# Clean up multiple spaces and strip
cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
return cleaned_text
def _combine_message_content_with_role( def _combine_message_content_with_role(
*, contents: Optional[str | list[PromptMessageContentUnionTypes]] = None, role: PromptMessageRole *, contents: Optional[str | list[PromptMessageContentUnionTypes]] = None, role: PromptMessageRole

@ -286,3 +286,121 @@ def test_extract_json():
] ]
result = {"name": "test", "age": 123} result = {"name": "test", "age": 123}
assert all(_parse_structured_output(item) == result for item in llm_texts) assert all(_parse_structured_output(item) == result for item in llm_texts)
@pytest.mark.parametrize(
("thinking_tags_enabled", "should_preserve_tags"),
[
("true", True), # LLM_NODE_THINKING_TAGS_ENABLED=true -> tags should be preserved
("false", False), # LLM_NODE_THINKING_TAGS_ENABLED=false -> tags should be removed
],
)
def test_execute_llm_with_thinking_tags(flask_req_ctx, thinking_tags_enabled, should_preserve_tags):
"""Test LLM node with thinking tags removal controlled via environment variable."""
import os
with patch.dict(os.environ, {"LLM_NODE_THINKING_TAGS_ENABLED": thinking_tags_enabled}):
# Reload the module to pick up the environment variable change
import importlib
from core.workflow.nodes.llm import node
importlib.reload(node)
node_instance = init_llm_node(
config={
"id": "llm",
"data": {
"title": f"thinking tags test ({'preserved' if should_preserve_tags else 'removed'})",
"type": "llm",
"model": {
"provider": "langgenius/openrouter",
"name": "qwen/qwen-2.5-72b-instruct",
"mode": "chat",
"completion_params": {},
},
"prompt_template": [
{
"role": "system",
"text": "you are a helpful assistant.",
},
{"role": "user", "text": "Say hello"},
],
"memory": None,
"context": {"enabled": False},
"vision": {"enabled": False},
},
},
)
# Create mock LLM result with thinking tags
mock_usage = LLMUsage(
prompt_tokens=10,
prompt_unit_price=Decimal("0.001"),
prompt_price_unit=Decimal("1000"),
prompt_price=Decimal("0.00001"),
completion_tokens=15,
completion_unit_price=Decimal("0.002"),
completion_price_unit=Decimal("1000"),
completion_price=Decimal("0.00003"),
total_tokens=25,
total_price=Decimal("0.00004"),
currency="USD",
latency=0.3,
)
# Mock response with thinking tags (simulating Qwen reasoning behavior)
mock_message = AssistantPromptMessage(
content="<think>Let me think about this greeting...</think>Hello! How can I help you today?"
)
mock_llm_result = LLMResult(
model="qwen/qwen-2.5-72b-instruct",
prompt_messages=[],
message=mock_message,
usage=mock_usage,
)
mock_model_instance = MagicMock()
mock_model_instance.invoke_llm.return_value = mock_llm_result
mock_model_config = MagicMock()
mock_model_config.mode = "chat"
mock_model_config.provider = "langgenius/openrouter"
mock_model_config.model = "qwen/qwen-2.5-72b-instruct"
mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
def mock_fetch_model_config_func(_node_data_model):
return mock_model_instance, mock_model_config
def mock_get_model_instance(_self, **kwargs):
return mock_model_instance
with (
patch.object(node_instance, "_fetch_model_config", mock_fetch_model_config_func),
patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance),
):
# Execute node
result = node_instance._run()
assert isinstance(result, Generator)
# Verify behavior based on the parameter
for item in result:
if isinstance(item, RunCompletedEvent):
assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
output_text = item.run_result.outputs.get("text")
assert output_text is not None
if should_preserve_tags:
# Verify thinking tags are preserved when enabled
assert "<think>" in output_text
assert "</think>" in output_text
assert "Let me think about this greeting..." in output_text
assert "Hello! How can I help you today?" in output_text
else:
# Verify thinking tags are removed when disabled
assert "<think>" not in output_text
assert "</think>" not in output_text
assert "Hello! How can I help you today?" in output_text
# Verify thinking content is not in output
assert "Let me think about this greeting..." not in output_text

@ -1,4 +1,6 @@
import base64 import base64
import importlib
import os
import uuid import uuid
from collections.abc import Sequence from collections.abc import Sequence
from typing import Optional from typing import Optional
@ -6,6 +8,7 @@ from unittest import mock
import pytest import pytest
import core.workflow.nodes.llm.node
from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import CustomConfiguration, SystemConfiguration from core.entities.provider_entities import CustomConfiguration, SystemConfiguration
@ -663,3 +666,104 @@ class TestSaveMultimodalOutputAndConvertResultToMarkdown:
assert list(gen) == [] assert list(gen) == []
mock_file_saver.save_binary_string.assert_not_called() mock_file_saver.save_binary_string.assert_not_called()
mock_file_saver.save_remote_url.assert_not_called() mock_file_saver.save_remote_url.assert_not_called()
class TestThinkingTagsRemoval:
"""Test cases for thinking tags removal functionality in LLM Node."""
def test_remove_single_thinking_tag(self, llm_node):
"""Test removal of single thinking tag block."""
input_text = "<think>This is my thinking process</think>Hello, how can I help you?"
expected = "Hello, how can I help you?"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_remove_multiple_thinking_tags(self, llm_node):
"""Test removal of multiple thinking tag blocks."""
input_text = "<think>First thought</think>Hello<think>Second thought</think> World!"
expected = "Hello World!"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_remove_multiline_thinking_tag(self, llm_node):
"""Test removal of multiline thinking tag blocks."""
input_text = """<think>
This is a multiline
thinking process
with multiple lines
</think>Final answer here."""
expected = "Final answer here."
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_case_insensitive_removal(self, llm_node):
"""Test case-insensitive thinking tag removal."""
input_text = "<THINK>Uppercase thinking</THINK>Response"
expected = "Response"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_no_thinking_tags(self, llm_node):
"""Test text without thinking tags remains unchanged."""
input_text = "Hello, this is a normal response without thinking tags."
expected = input_text
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_empty_string(self, llm_node):
"""Test empty string handling."""
input_text = ""
expected = ""
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_only_thinking_tag(self, llm_node):
"""Test string with only thinking tag."""
input_text = "<think>Only thinking, no response</think>"
expected = ""
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_whitespace_handling(self, llm_node):
"""Test proper whitespace handling after tag removal."""
input_text = "<think>Thinking</think> Response with spaces"
expected = "Response with spaces"
result = llm_node._remove_thinking_tags(input_text)
assert result == expected
def test_none_input(self, llm_node):
"""Test None input handling."""
result = llm_node._remove_thinking_tags(None)
assert result is None
def test_non_string_input(self, llm_node):
"""Test non-string input handling."""
result = llm_node._remove_thinking_tags(123)
assert result == 123
@mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "true"})
def test_environment_variable_enabled(self):
"""Test that environment variable is properly read when enabled."""
importlib.reload(core.workflow.nodes.llm.node)
assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is True
@mock.patch.dict("os.environ", {"LLM_NODE_THINKING_TAGS_ENABLED": "false"})
def test_environment_variable_disabled(self):
"""Test that environment variable is properly read when disabled."""
importlib.reload(core.workflow.nodes.llm.node)
assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is False
def test_environment_variable_default(self):
"""Test that environment variable defaults to True."""
with mock.patch.dict("os.environ"):
os.environ.pop("LLM_NODE_THINKING_TAGS_ENABLED", None)
importlib.reload(core.workflow.nodes.llm.node)
assert core.workflow.nodes.llm.node.LLM_NODE_THINKING_TAGS_ENABLED is True

@ -824,6 +824,10 @@ HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576 HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
HTTP_REQUEST_NODE_SSL_VERIFY=True HTTP_REQUEST_NODE_SSL_VERIFY=True
# LLM node thinking tags preservation (default: true)
# Set to false to remove <think></think> tags from reasoning models like DeepSeek-R1, Qwen
LLM_NODE_THINKING_TAGS_ENABLED=true
# Respect X-* headers to redirect clients # Respect X-* headers to redirect clients
RESPECT_XFORWARD_HEADERS_ENABLED=false RESPECT_XFORWARD_HEADERS_ENABLED=false

@ -366,6 +366,7 @@ x-shared-env: &shared-api-worker-env
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True} HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True}
LLM_NODE_THINKING_TAGS_ENABLED: ${LLM_NODE_THINKING_TAGS_ENABLED:-true}
RESPECT_XFORWARD_HEADERS_ENABLED: ${RESPECT_XFORWARD_HEADERS_ENABLED:-false} RESPECT_XFORWARD_HEADERS_ENABLED: ${RESPECT_XFORWARD_HEADERS_ENABLED:-false}
SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128} SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128} SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}

Loading…
Cancel
Save