feat: add thinking tags control for reasoning models in LLM node

- Enable the LLM node to control thinking tags from reasoning models
(such as DeepSeek-R1, Qwen, etc.) through configurable processing.
pull/21897/head
kimtaewoong 11 months ago
parent f71dc0c5a1
commit e1aa04a4da

@ -2,6 +2,8 @@ import base64
import io
import json
import logging
import os
import re
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, cast
@ -96,6 +98,9 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
# Environment variable to control thinking tags preservation (default: true to maintain backward compatibility)
LLM_NODE_THINKING_TAGS_ENABLED = os.getenv("LLM_NODE_THINKING_TAGS_ENABLED", "true").lower() == "true"
class LLMNode(BaseNode[LLMNodeData]):
_node_data_cls = LLMNodeData
@ -374,7 +379,12 @@ class LLMNode(BaseNode[LLMNodeData]):
except OutputParserError as e:
raise LLMNodeError(f"Failed to parse structured output: {e}")
yield ModelInvokeCompletedEvent(text=full_text_buffer.getvalue(), usage=usage, finish_reason=finish_reason)
# Apply thinking tags removal if disabled
result_text = full_text_buffer.getvalue()
if not LLM_NODE_THINKING_TAGS_ENABLED:
result_text = self._remove_thinking_tags(result_text)
yield ModelInvokeCompletedEvent(text=result_text, usage=usage, finish_reason=finish_reason)
def _image_file_to_markdown(self, file: "File", /):
text_chunk = f"![]({file.generate_url()})"
@ -900,8 +910,13 @@ class LLMNode(BaseNode[LLMNodeData]):
for text_part in self._save_multimodal_output_and_convert_result_to_markdown(invoke_result.message.content):
buffer.write(text_part)
# Apply thinking tags removal if disabled
result_text = buffer.getvalue()
if not LLM_NODE_THINKING_TAGS_ENABLED:
result_text = self._remove_thinking_tags(result_text)
return ModelInvokeCompletedEvent(
text=buffer.getvalue(),
text=result_text,
usage=invoke_result.usage,
finish_reason=None,
)
@ -1002,6 +1017,32 @@ class LLMNode(BaseNode[LLMNodeData]):
logger.warning("unknown contents type encountered, type=%s", type(contents))
yield str(contents)
def _remove_thinking_tags(self, text: str) -> str:
"""
Remove thinking tags like <think></think> from the response text.
This handles reasoning models like qwen, deepseek-r1 that include thinking process.
Args:
text: The text content to clean
Returns:
Cleaned text with thinking tags removed
"""
if not isinstance(text, str) or not text.strip():
return text
# Remove <think>...</think> blocks (case-insensitive, multiline)
# Pattern explanation:
# \s* - optional whitespace before <think>
# <think>.*?</think> - the thinking tag block (non-greedy)
# \s* - optional whitespace after </think>
cleaned_text = re.sub(r"\s*<think>.*?</think>\s*", " ", text, flags=re.IGNORECASE | re.DOTALL)
# Clean up multiple spaces and strip
cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
return cleaned_text
def _combine_message_content_with_role(
*, contents: Optional[str | list[PromptMessageContentUnionTypes]] = None, role: PromptMessageRole
@ -1141,4 +1182,4 @@ def _handle_completion_template(
contents=[TextPromptMessageContent(data=result_text)], role=PromptMessageRole.USER
)
prompt_messages.append(prompt_message)
return prompt_messages
return prompt_messages
Loading…
Cancel
Save