text extractor tool
parent
67b1190535
commit
bc7cc06572
Binary file not shown.
|
Before Width: | Height: | Size: 153 KiB |
@ -1,20 +0,0 @@
|
|||||||
from typing import Any
|
|
||||||
|
|
||||||
from core.tools.errors import ToolProviderCredentialValidationError
|
|
||||||
from core.tools.provider.builtin.dalle.tools.dalle2 import DallE2Tool
|
|
||||||
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
|
|
||||||
|
|
||||||
|
|
||||||
class DALLEProvider(BuiltinToolProviderController):
|
|
||||||
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
|
|
||||||
try:
|
|
||||||
DallE2Tool().fork_tool_runtime(
|
|
||||||
runtime={
|
|
||||||
"credentials": credentials,
|
|
||||||
}
|
|
||||||
).invoke(
|
|
||||||
user_id="",
|
|
||||||
tool_parameters={"prompt": "cute girl, blue eyes, white hair, anime style", "size": "small", "n": 1},
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise ToolProviderCredentialValidationError(str(e))
|
|
||||||
@ -1,61 +0,0 @@
|
|||||||
identity:
|
|
||||||
author: Dify
|
|
||||||
name: dalle
|
|
||||||
label:
|
|
||||||
en_US: DALL-E
|
|
||||||
zh_Hans: DALL-E 绘画
|
|
||||||
pt_BR: DALL-E
|
|
||||||
description:
|
|
||||||
en_US: DALL-E art
|
|
||||||
zh_Hans: DALL-E 绘画
|
|
||||||
pt_BR: DALL-E art
|
|
||||||
icon: icon.png
|
|
||||||
tags:
|
|
||||||
- image
|
|
||||||
- productivity
|
|
||||||
credentials_for_provider:
|
|
||||||
openai_api_key:
|
|
||||||
type: secret-input
|
|
||||||
required: true
|
|
||||||
label:
|
|
||||||
en_US: OpenAI API key
|
|
||||||
zh_Hans: OpenAI API key
|
|
||||||
pt_BR: OpenAI API key
|
|
||||||
help:
|
|
||||||
en_US: Please input your OpenAI API key
|
|
||||||
zh_Hans: 请输入你的 OpenAI API key
|
|
||||||
pt_BR: Please input your OpenAI API key
|
|
||||||
placeholder:
|
|
||||||
en_US: Please input your OpenAI API key
|
|
||||||
zh_Hans: 请输入你的 OpenAI API key
|
|
||||||
pt_BR: Please input your OpenAI API key
|
|
||||||
openai_organization_id:
|
|
||||||
type: text-input
|
|
||||||
required: false
|
|
||||||
label:
|
|
||||||
en_US: OpenAI organization ID
|
|
||||||
zh_Hans: OpenAI organization ID
|
|
||||||
pt_BR: OpenAI organization ID
|
|
||||||
help:
|
|
||||||
en_US: Please input your OpenAI organization ID
|
|
||||||
zh_Hans: 请输入你的 OpenAI organization ID
|
|
||||||
pt_BR: Please input your OpenAI organization ID
|
|
||||||
placeholder:
|
|
||||||
en_US: Please input your OpenAI organization ID
|
|
||||||
zh_Hans: 请输入你的 OpenAI organization ID
|
|
||||||
pt_BR: Please input your OpenAI organization ID
|
|
||||||
openai_base_url:
|
|
||||||
type: text-input
|
|
||||||
required: false
|
|
||||||
label:
|
|
||||||
en_US: OpenAI base URL
|
|
||||||
zh_Hans: OpenAI base URL
|
|
||||||
pt_BR: OpenAI base URL
|
|
||||||
help:
|
|
||||||
en_US: Please input your OpenAI base URL
|
|
||||||
zh_Hans: 请输入你的 OpenAI base URL
|
|
||||||
pt_BR: Please input your OpenAI base URL
|
|
||||||
placeholder:
|
|
||||||
en_US: Please input your OpenAI base URL
|
|
||||||
zh_Hans: 请输入你的 OpenAI base URL
|
|
||||||
pt_BR: Please input your OpenAI base URL
|
|
||||||
@ -1,32 +0,0 @@
|
|||||||
from base64 import b64decode
|
|
||||||
from typing import Any, Union
|
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
from yarl import URL
|
|
||||||
from core.file.enums import FileType
|
|
||||||
|
|
||||||
from core.file.file_manager import download
|
|
||||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
|
||||||
from core.tools.errors import ToolParameterValidationError
|
|
||||||
from core.tools.tool.builtin_tool import BuiltinTool
|
|
||||||
|
|
||||||
|
|
||||||
class FileExtractorTool(BuiltinTool):
|
|
||||||
def _invoke(
|
|
||||||
self,
|
|
||||||
user_id: str,
|
|
||||||
tool_parameters: dict[str, Any],
|
|
||||||
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
|
||||||
"""
|
|
||||||
invoke tools
|
|
||||||
"""
|
|
||||||
# image file for workflow mode
|
|
||||||
file = tool_parameters.get("file")
|
|
||||||
if file and file.type != FileType.DOCUMENT:
|
|
||||||
raise ToolParameterValidationError("Not a valid document")
|
|
||||||
|
|
||||||
if file:
|
|
||||||
file_binary = download(file)
|
|
||||||
else:
|
|
||||||
raise ToolParameterValidationError("Please provide either file")
|
|
||||||
return result
|
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 4.3 KiB |
@ -0,0 +1,10 @@
|
|||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from core.tools.errors import ToolProviderCredentialValidationError
|
||||||
|
from core.tools.provider.builtin.dalle.tools.dalle2 import DallE2Tool
|
||||||
|
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
|
||||||
|
|
||||||
|
|
||||||
|
class FileExtractorProvider(BuiltinToolProviderController):
|
||||||
|
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
|
||||||
|
pass
|
||||||
@ -0,0 +1,15 @@
|
|||||||
|
identity:
|
||||||
|
author: Jyong
|
||||||
|
name: file_extractor
|
||||||
|
label:
|
||||||
|
en_US: File Extractor
|
||||||
|
zh_Hans: 文件提取
|
||||||
|
pt_BR: File Extractor
|
||||||
|
description:
|
||||||
|
en_US: Extract text from file
|
||||||
|
zh_Hans: 从文件中提取文本
|
||||||
|
pt_BR: Extract text from file
|
||||||
|
icon: icon.png
|
||||||
|
tags:
|
||||||
|
- utilities
|
||||||
|
- productivity
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
from base64 import b64decode
|
||||||
|
import tempfile
|
||||||
|
from typing import Any, Union
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
from yarl import URL
|
||||||
|
from core.file.enums import FileType
|
||||||
|
|
||||||
|
from core.file.file_manager import download_to_target_path
|
||||||
|
from core.rag.extractor.text_extractor import TextExtractor
|
||||||
|
from core.rag.splitter.fixed_text_splitter import FixedRecursiveCharacterTextSplitter
|
||||||
|
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||||
|
from core.tools.errors import ToolParameterValidationError
|
||||||
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
|
|
||||||
|
class FileExtractorTool(BuiltinTool):
|
||||||
|
def _invoke(
|
||||||
|
self,
|
||||||
|
user_id: str,
|
||||||
|
tool_parameters: dict[str, Any],
|
||||||
|
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
|
||||||
|
"""
|
||||||
|
invoke tools
|
||||||
|
"""
|
||||||
|
# image file for workflow mode
|
||||||
|
file = tool_parameters.get("text_file")
|
||||||
|
if file and file.type != FileType.DOCUMENT:
|
||||||
|
raise ToolParameterValidationError("Not a valid document")
|
||||||
|
|
||||||
|
if file:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
file_path = download_to_target_path(file, temp_dir)
|
||||||
|
extractor = TextExtractor(file_path, autodetect_encoding=True)
|
||||||
|
documents = extractor.extract()
|
||||||
|
character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
|
||||||
|
chunk_size=tool_parameters.get("max_token", 500),
|
||||||
|
chunk_overlap=0,
|
||||||
|
fixed_separator=tool_parameters.get("separator", "\n\n"),
|
||||||
|
separators=["\n\n", "。", ". ", " ", ""],
|
||||||
|
embedding_model_instance=None,
|
||||||
|
)
|
||||||
|
chunks = character_splitter.split_documents(documents)
|
||||||
|
return self.create_json_message(json.dumps([chunk.page_content for chunk in chunks]))
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ToolParameterValidationError("Please provide either file")
|
||||||
Loading…
Reference in New Issue