From 7c28647cf5ef5140e523c7617df9c75aef1fcea1 Mon Sep 17 00:00:00 2001 From: ytqh Date: Wed, 19 Mar 2025 15:31:50 +0800 Subject: [PATCH] Revert "hide thinking process" This reverts commit 31cea3cd6163c762bc03911413200a3b5deaa88b. --- .../base_app_generate_response_converter.py | 69 +------------------ 1 file changed, 3 insertions(+), 66 deletions(-) diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py index 4295cc5162..be4027132b 100644 --- a/api/core/app/apps/base_app_generate_response_converter.py +++ b/api/core/app/apps/base_app_generate_response_converter.py @@ -1,6 +1,4 @@ -import json import logging -import re from abc import ABC, abstractmethod from collections.abc import Generator, Mapping from typing import Any, Union @@ -20,66 +18,7 @@ class AppGenerateResponseConverter(ABC): response: Union[AppBlockingResponse, Generator[AppStreamResponse, Any, None]], invoke_from: InvokeFrom, ) -> Mapping[str, Any] | Generator[str, None, None]: - if invoke_from == InvokeFrom.SERVICE_API: - if isinstance(response, AppBlockingResponse): - return cls.convert_blocking_full_response(response) - else: - - def _generate_full_response() -> Generator[str, Any, None]: - # Track if we're currently in a thinking process section - is_thinking_process = False - thinking_tag = "" - # Buffer to accumulate thinking process content - thinking_buffer = "" - - for chunk in cls.convert_stream_full_response(response): - - if chunk == "ping": - yield f"event: {chunk}\n\n" - continue - - try: - chunk_dict = json.loads(chunk) - except json.JSONDecodeError: - yield f"data: {chunk}\n\n" - continue - - if ( - not is_thinking_process - and "data" in chunk_dict - and "inputs" in chunk_dict["data"] - and chunk_dict["data"]["inputs"] is not None - and "thinking_tag" in chunk_dict["data"]["inputs"] - ): - is_thinking_process = True - elif not is_thinking_process: - yield f"data: {chunk}\n\n" - continue - - if not "answer" in chunk_dict: - yield f"data: {chunk}\n\n" - continue - - # extract message text from chunk - message_text = chunk_dict["answer"] - - thinking_buffer += message_text - - if not thinking_tag in thinking_buffer: # thinking process end, return as usual - continue - - # remove tags and its content - remaining_buffer = thinking_buffer.split(thinking_tag, 1)[1].strip() - thinking_tag = None - is_thinking_process = False - - chunk_dict["answer"] = remaining_buffer - chunk = json.dumps(chunk_dict) - - yield f"data: {chunk}\n\n" - - return _generate_full_response() - elif invoke_from == InvokeFrom.DEBUGGER: + if invoke_from in {InvokeFrom.DEBUGGER, InvokeFrom.SERVICE_API}: if isinstance(response, AppBlockingResponse): return cls.convert_blocking_full_response(response) else: @@ -174,10 +113,8 @@ class AppGenerateResponseConverter(ABC): ProviderTokenNotInitError: {"code": "provider_not_initialize", "status": 400}, QuotaExceededError: { "code": "provider_quota_exceeded", - "message": ( - "Your quota for Dify Hosted Model Provider has been exhausted. " - "Please go to Settings -> Model Provider to complete your own provider credentials." - ), + "message": "Your quota for Dify Hosted Model Provider has been exhausted. " + "Please go to Settings -> Model Provider to complete your own provider credentials.", "status": 400, }, ModelCurrentlyNotSupportError: {"code": "model_currently_not_support", "status": 400},