From f3c8625fe27527386cfd0f8b8f3b538b93929f98 Mon Sep 17 00:00:00 2001
From: Novice <857526207@qq.com>
Date: Thu, 3 Jul 2025 14:40:47 +0800
Subject: [PATCH 1/6] fix: The statistics page cannot display the tokens
 consumed by agent node (#21861)

---
 .../model_runtime/entities/llm_entities.py    | 31 +++++++++++++++++++
 api/core/workflow/nodes/tool/tool_node.py     |  9 ++++--
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/api/core/model_runtime/entities/llm_entities.py b/api/core/model_runtime/entities/llm_entities.py
index e52b0eba55..ace2c1f770 100644
--- a/api/core/model_runtime/entities/llm_entities.py
+++ b/api/core/model_runtime/entities/llm_entities.py
@@ -53,6 +53,37 @@ class LLMUsage(ModelUsage):
             latency=0.0,
         )
 
+    @classmethod
+    def from_metadata(cls, metadata: dict) -> "LLMUsage":
+        """
+        Create LLMUsage instance from metadata dictionary with default values.
+
+        Args:
+            metadata: Dictionary containing usage metadata
+
+        Returns:
+            LLMUsage instance with values from metadata or defaults
+        """
+        total_tokens = metadata.get("total_tokens", 0)
+        completion_tokens = metadata.get("completion_tokens", 0)
+        if total_tokens > 0 and completion_tokens == 0:
+            completion_tokens = total_tokens
+
+        return cls(
+            prompt_tokens=metadata.get("prompt_tokens", 0),
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            prompt_unit_price=Decimal(str(metadata.get("prompt_unit_price", 0))),
+            completion_unit_price=Decimal(str(metadata.get("completion_unit_price", 0))),
+            total_price=Decimal(str(metadata.get("total_price", 0))),
+            currency=metadata.get("currency", "USD"),
+            prompt_price_unit=Decimal(str(metadata.get("prompt_price_unit", 0))),
+            completion_price_unit=Decimal(str(metadata.get("completion_price_unit", 0))),
+            prompt_price=Decimal(str(metadata.get("prompt_price", 0))),
+            completion_price=Decimal(str(metadata.get("completion_price", 0))),
+            latency=metadata.get("latency", 0.0),
+        )
+
     def plus(self, other: "LLMUsage") -> "LLMUsage":
         """
         Add two LLMUsage instances together.
diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py
index 4d15d78a95..a4be02d863 100644
--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@@ -6,6 +6,7 @@ from sqlalchemy.orm import Session
 
 from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler
 from core.file import File, FileTransferMethod
+from core.model_runtime.entities.llm_entities import LLMUsage
 from core.plugin.impl.exc import PluginDaemonClientSideError
 from core.plugin.impl.plugin import PluginInstaller
 from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
@@ -208,7 +209,7 @@ class ToolNode(BaseNode[ToolNodeData]):
 
         agent_logs: list[AgentLogEvent] = []
         agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {}
-
+        llm_usage: LLMUsage | None = None
         variables: dict[str, Any] = {}
 
         for message in message_stream:
@@ -276,9 +277,10 @@ class ToolNode(BaseNode[ToolNodeData]):
             elif message.type == ToolInvokeMessage.MessageType.JSON:
                 assert isinstance(message.message, ToolInvokeMessage.JsonMessage)
                 if self.node_type == NodeType.AGENT:
-                    msg_metadata = message.message.json_object.pop("execution_metadata", {})
+                    msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {})
+                    llm_usage = LLMUsage.from_metadata(msg_metadata)
                     agent_execution_metadata = {
-                        key: value
+                        WorkflowNodeExecutionMetadataKey(key): value
                         for key, value in msg_metadata.items()
                         if key in WorkflowNodeExecutionMetadataKey.__members__.values()
                     }
@@ -377,6 +379,7 @@ class ToolNode(BaseNode[ToolNodeData]):
                     WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs,
                 },
                 inputs=parameters_for_log,
+                llm_usage=llm_usage,
             )
         )
 

From 47954aa28469f2913b9eecb53c88b080ca3fab81 Mon Sep 17 00:00:00 2001
From: cutiechi <superchijinpeng@gmail.com>
Date: Thu, 3 Jul 2025 14:50:53 +0800
Subject: [PATCH 2/6] feat(api): validate and reject external datasets in
 document update (#21783)

---
 api/controllers/service_api/dataset/document.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py
index d64d9df059..d571b21a0a 100644
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -211,6 +211,9 @@ class DocumentAddByFileApi(DatasetApiResource):
         if not dataset:
             raise ValueError("Dataset does not exist.")
 
+        if dataset.provider == "external":
+            raise ValueError("External datasets are not supported.")
+
         indexing_technique = args.get("indexing_technique") or dataset.indexing_technique
         if not indexing_technique:
             raise ValueError("indexing_technique is required.")
@@ -301,6 +304,9 @@ class DocumentUpdateByFileApi(DatasetApiResource):
         if not dataset:
             raise ValueError("Dataset does not exist.")
 
+        if dataset.provider == "external":
+            raise ValueError("External datasets are not supported.")
+
         # indexing_technique is already set in dataset since this is an update
         args["indexing_technique"] = dataset.indexing_technique
 

From 06c3deff11511c47bd302f97914cbbbe73c1f35a Mon Sep 17 00:00:00 2001
From: GuanMu <ballmanjq@gmail.com>
Date: Thu, 3 Jul 2025 16:07:07 +0800
Subject: [PATCH 3/6] Fix: Add title attribute to edit time text for improved
 accessibility (#21871)

---
 web/app/(commonLayout)/apps/AppCard.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/app/(commonLayout)/apps/AppCard.tsx b/web/app/(commonLayout)/apps/AppCard.tsx
index b7c9a2eee4..f50cc10520 100644
--- a/web/app/(commonLayout)/apps/AppCard.tsx
+++ b/web/app/(commonLayout)/apps/AppCard.tsx
@@ -339,7 +339,7 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
             <div className='flex items-center gap-1 text-[10px] font-medium leading-[18px] text-text-tertiary'>
               <div className='truncate' title={app.author_name}>{app.author_name}</div>
               <div>·</div>
-              <div className='truncate'>{EditTimeText}</div>
+              <div className='truncate' title={EditTimeText}>{EditTimeText}</div>
             </div>
           </div>
           <div className='flex h-5 w-5 shrink-0 items-center justify-center'>

From ed54bd5121342f154227b5794232ddba6cdb3c27 Mon Sep 17 00:00:00 2001
From: Joel <iamjoel007@gmail.com>
Date: Thu, 3 Jul 2025 16:43:11 +0800
Subject: [PATCH 4/6] fix: not search plugin if marketplace enabled (#21880)

---
 .../components/workflow/block-selector/all-tools.tsx   |  2 +-
 .../nodes/_base/components/agent-strategy-selector.tsx | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/web/app/components/workflow/block-selector/all-tools.tsx b/web/app/components/workflow/block-selector/all-tools.tsx
index 36831aee3c..e57a6bd3f7 100644
--- a/web/app/components/workflow/block-selector/all-tools.tsx
+++ b/web/app/components/workflow/block-selector/all-tools.tsx
@@ -90,7 +90,7 @@ const AllTools = ({
   const { enable_marketplace } = useGlobalPublicStore(s => s.systemFeatures)
 
   useEffect(() => {
-    if (enable_marketplace) return
+    if (!enable_marketplace) return
     if (searchText || tags.length > 0) {
       fetchPlugins({
         query: searchText,
diff --git a/web/app/components/workflow/nodes/_base/components/agent-strategy-selector.tsx b/web/app/components/workflow/nodes/_base/components/agent-strategy-selector.tsx
index dd6a1c6a22..f262ae7e34 100644
--- a/web/app/components/workflow/nodes/_base/components/agent-strategy-selector.tsx
+++ b/web/app/components/workflow/nodes/_base/components/agent-strategy-selector.tsx
@@ -22,6 +22,7 @@ import type { ListRef } from '@/app/components/workflow/block-selector/market-pl
 import PluginList, { type ListProps } from '@/app/components/workflow/block-selector/market-place-plugin/list'
 import { useMarketplacePlugins } from '@/app/components/plugins/marketplace/hooks'
 import { ToolTipContent } from '@/app/components/base/tooltip/content'
+import { useGlobalPublicStore } from '@/context/global-public-context'
 
 const DEFAULT_TAGS: ListProps['tags'] = []
 
@@ -131,7 +132,10 @@ export const AgentStrategySelector = memo((props: AgentStrategySelectorProps) =>
     plugins: notInstalledPlugins = [],
   } = useMarketplacePlugins()
 
+  const { enable_marketplace } = useGlobalPublicStore(s => s.systemFeatures)
+
   useEffect(() => {
+    if (!enable_marketplace) return
     if (query) {
       fetchPlugins({
         query,
@@ -158,7 +162,7 @@ export const AgentStrategySelector = memo((props: AgentStrategySelectorProps) =>
           alt='icon'
         /></div>}
         <p
-          className={classNames(value ? 'text-components-input-text-filled' : 'text-components-input-text-placeholder', 'text-xs px-1')}
+          className={classNames(value ? 'text-components-input-text-filled' : 'text-components-input-text-placeholder', 'px-1 text-xs')}
         >
           {value?.agent_strategy_label || t('workflow.nodes.agent.strategy.selectTip')}
         </p>
@@ -215,7 +219,8 @@ export const AgentStrategySelector = memo((props: AgentStrategySelectorProps) =>
             }}
             className='h-full max-h-full max-w-none overflow-y-auto'
             indexBarClassName='top-0 xl:top-36' showWorkflowEmpty={false} hasSearchText={false} />
-          <PluginList
+          {enable_marketplace
+          && <PluginList
             ref={pluginRef}
             wrapElemRef={wrapElemRef}
             list={notInstalledPlugins}
@@ -223,6 +228,7 @@ export const AgentStrategySelector = memo((props: AgentStrategySelectorProps) =>
             tags={DEFAULT_TAGS}
             disableMaxWidth
           />
+          }
         </main>
       </div>
     </PortalToFollowElemContent>

From 1c7404099ddce2baf3391393ce01e655b33e31b3 Mon Sep 17 00:00:00 2001
From: baonudesifeizhai <85092850+baonudesifeizhai@users.noreply.github.com>
Date: Thu, 3 Jul 2025 05:06:49 -0400
Subject: [PATCH 5/6] fix: prevent timeout in file encoding detection for large
 files (#21453)

Co-authored-by: crazywoola <427733928@qq.com>
---
 api/core/rag/extractor/helpers.py        | 10 +++++++---
 api/core/rag/extractor/text_extractor.py |  6 +++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/api/core/rag/extractor/helpers.py b/api/core/rag/extractor/helpers.py
index 69ca9d5d63..3d2fb55d9a 100644
--- a/api/core/rag/extractor/helpers.py
+++ b/api/core/rag/extractor/helpers.py
@@ -1,7 +1,6 @@
 """Document loader helpers."""
 
 import concurrent.futures
-from pathlib import Path
 from typing import NamedTuple, Optional, cast
 
 
@@ -16,7 +15,7 @@ class FileEncoding(NamedTuple):
     """The language of the file."""
 
 
-def detect_file_encodings(file_path: str, timeout: int = 5) -> list[FileEncoding]:
+def detect_file_encodings(file_path: str, timeout: int = 5, sample_size: int = 1024 * 1024) -> list[FileEncoding]:
     """Try to detect the file encoding.
 
     Returns a list of `FileEncoding` tuples with the detected encodings ordered
@@ -25,11 +24,16 @@ def detect_file_encodings(file_path: str, timeout: int = 5) -> list[FileEncoding
     Args:
         file_path: The path to the file to detect the encoding for.
         timeout: The timeout in seconds for the encoding detection.
+        sample_size: The number of bytes to read for encoding detection. Default is 1MB.
+                    For large files, reading only a sample is sufficient and prevents timeout.
     """
     import chardet
 
     def read_and_detect(file_path: str) -> list[dict]:
-        rawdata = Path(file_path).read_bytes()
+        with open(file_path, "rb") as f:
+            # Read only a sample of the file for encoding detection
+            # This prevents timeout on large files while still providing accurate encoding detection
+            rawdata = f.read(sample_size)
         return cast(list[dict], chardet.detect_all(rawdata))
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
diff --git a/api/core/rag/extractor/text_extractor.py b/api/core/rag/extractor/text_extractor.py
index b2b51d71d7..a00d328cb1 100644
--- a/api/core/rag/extractor/text_extractor.py
+++ b/api/core/rag/extractor/text_extractor.py
@@ -36,8 +36,12 @@ class TextExtractor(BaseExtractor):
                         break
                     except UnicodeDecodeError:
                         continue
+                else:
+                    raise RuntimeError(
+                        f"Decode failed: {self._file_path}, all detected encodings failed. Original error: {e}"
+                    )
             else:
-                raise RuntimeError(f"Error loading {self._file_path}") from e
+                raise RuntimeError(f"Decode failed: {self._file_path}, specified encoding failed. Original error: {e}")
         except Exception as e:
             raise RuntimeError(f"Error loading {self._file_path}") from e
 

From a79f37b686ca4a487df4311f413cc90e52e7af44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= <hjlarry@163.com>
Date: Thu, 3 Jul 2025 17:10:01 +0800
Subject: [PATCH 6/6] fix: tts tool must choose a voice (#21877)

---
 api/core/tools/builtin_tool/providers/audio/tools/tts.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/api/core/tools/builtin_tool/providers/audio/tools/tts.py b/api/core/tools/builtin_tool/providers/audio/tools/tts.py
index 9b104b00f5..f191968812 100644
--- a/api/core/tools/builtin_tool/providers/audio/tools/tts.py
+++ b/api/core/tools/builtin_tool/providers/audio/tools/tts.py
@@ -31,6 +31,14 @@ class TTSTool(BuiltinTool):
             model_type=ModelType.TTS,
             model=model,
         )
+        if not voice:
+            voices = model_instance.get_tts_voices()
+            if voices:
+                voice = voices[0].get("value")
+                if not voice:
+                    raise ValueError("Sorry, no voice available.")
+            else:
+                raise ValueError("Sorry, no voice available.")
         tts = model_instance.invoke_tts(
             content_text=tool_parameters.get("text"),  # type: ignore
             user=user_id,