chore: remove model as tool (#4409)
parent
1d0f88264f
commit
182dadd433
@ -1,20 +0,0 @@
|
||||
provider: anthropic
|
||||
label:
|
||||
en_US: Anthropic Model Tools
|
||||
zh_Hans: Anthropic 模型能力
|
||||
pt_BR: Anthropic Model Tools
|
||||
models:
|
||||
- type: llm
|
||||
model: claude-3-sonnet-20240229
|
||||
label:
|
||||
zh_Hans: Claude3 Sonnet 视觉
|
||||
en_US: Claude3 Sonnet Vision
|
||||
properties:
|
||||
image_parameter_name: image_id
|
||||
- type: llm
|
||||
model: claude-3-opus-20240229
|
||||
label:
|
||||
zh_Hans: Claude3 Opus 视觉
|
||||
en_US: Claude3 Opus Vision
|
||||
properties:
|
||||
image_parameter_name: image_id
|
||||
@ -1,13 +0,0 @@
|
||||
provider: google
|
||||
label:
|
||||
en_US: Google Model Tools
|
||||
zh_Hans: Google 模型能力
|
||||
pt_BR: Google Model Tools
|
||||
models:
|
||||
- type: llm
|
||||
model: gemini-pro-vision
|
||||
label:
|
||||
zh_Hans: Gemini Pro 视觉
|
||||
en_US: Gemini Pro Vision
|
||||
properties:
|
||||
image_parameter_name: image_id
|
||||
@ -1,13 +0,0 @@
|
||||
provider: openai
|
||||
label:
|
||||
en_US: OpenAI Model Tools
|
||||
zh_Hans: OpenAI 模型能力
|
||||
pt_BR: OpenAI Model Tools
|
||||
models:
|
||||
- type: llm
|
||||
model: gpt-4-vision-preview
|
||||
label:
|
||||
zh_Hans: GPT-4 视觉
|
||||
en_US: GPT-4 Vision
|
||||
properties:
|
||||
image_parameter_name: image_id
|
||||
@ -1,13 +0,0 @@
|
||||
provider: zhipuai
|
||||
label:
|
||||
en_US: ZhipuAI Model Tools
|
||||
zh_Hans: ZhipuAI 模型能力
|
||||
pt_BR: ZhipuAI Model Tools
|
||||
models:
|
||||
- type: llm
|
||||
model: glm-4v
|
||||
label:
|
||||
zh_Hans: GLM-4 视觉
|
||||
en_US: GLM-4 Vision
|
||||
properties:
|
||||
image_parameter_name: image_id
|
||||
@ -1,244 +0,0 @@
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from core.entities.model_entities import ModelStatus
|
||||
from core.errors.error import ProviderTokenNotInitError
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
|
||||
from core.provider_manager import ProviderConfiguration, ProviderManager, ProviderModelBundle
|
||||
from core.tools.entities.common_entities import I18nObject
|
||||
from core.tools.entities.tool_entities import (
|
||||
ModelToolPropertyKey,
|
||||
ToolDescription,
|
||||
ToolIdentity,
|
||||
ToolParameter,
|
||||
ToolProviderCredentials,
|
||||
ToolProviderIdentity,
|
||||
ToolProviderType,
|
||||
)
|
||||
from core.tools.errors import ToolNotFoundError
|
||||
from core.tools.provider.tool_provider import ToolProviderController
|
||||
from core.tools.tool.model_tool import ModelTool
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.utils.configuration import ModelToolConfigurationManager
|
||||
|
||||
|
||||
class ModelToolProviderController(ToolProviderController):
|
||||
configuration: ProviderConfiguration = None
|
||||
is_active: bool = False
|
||||
|
||||
def __init__(self, configuration: ProviderConfiguration = None, **kwargs):
|
||||
"""
|
||||
init the provider
|
||||
|
||||
:param data: the data of the provider
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.configuration = configuration
|
||||
|
||||
@staticmethod
|
||||
def from_db(configuration: ProviderConfiguration = None) -> 'ModelToolProviderController':
|
||||
"""
|
||||
init the provider from db
|
||||
|
||||
:param configuration: the configuration of the provider
|
||||
"""
|
||||
# check if all models are active
|
||||
if configuration is None:
|
||||
return None
|
||||
is_active = True
|
||||
models = configuration.get_provider_models()
|
||||
for model in models:
|
||||
if model.status != ModelStatus.ACTIVE:
|
||||
is_active = False
|
||||
break
|
||||
|
||||
# get the provider configuration
|
||||
model_tool_configuration = ModelToolConfigurationManager.get_configuration(configuration.provider.provider)
|
||||
if model_tool_configuration is None:
|
||||
raise RuntimeError(f'no configuration found for provider {configuration.provider.provider}')
|
||||
|
||||
# override the configuration
|
||||
if model_tool_configuration.label:
|
||||
label = deepcopy(model_tool_configuration.label)
|
||||
if label.en_US:
|
||||
label.en_US = model_tool_configuration.label.en_US
|
||||
if label.zh_Hans:
|
||||
label.zh_Hans = model_tool_configuration.label.zh_Hans
|
||||
else:
|
||||
label = I18nObject(
|
||||
en_US=configuration.provider.label.en_US,
|
||||
zh_Hans=configuration.provider.label.zh_Hans
|
||||
)
|
||||
|
||||
return ModelToolProviderController(
|
||||
is_active=is_active,
|
||||
identity=ToolProviderIdentity(
|
||||
author='Dify',
|
||||
name=configuration.provider.provider,
|
||||
description=I18nObject(
|
||||
zh_Hans=f'{label.zh_Hans} 模型能力提供商',
|
||||
en_US=f'{label.en_US} model capability provider'
|
||||
),
|
||||
label=I18nObject(
|
||||
zh_Hans=label.zh_Hans,
|
||||
en_US=label.en_US
|
||||
),
|
||||
icon=configuration.provider.icon_small.en_US,
|
||||
),
|
||||
configuration=configuration,
|
||||
credentials_schema={},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_configuration_valid(configuration: ProviderConfiguration) -> bool:
|
||||
"""
|
||||
check if the configuration has a model can be used as a tool
|
||||
"""
|
||||
models = configuration.get_provider_models()
|
||||
for model in models:
|
||||
if model.model_type == ModelType.LLM and ModelFeature.VISION in (model.features or []):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_model_tools(self, tenant_id: str = None) -> list[ModelTool]:
|
||||
"""
|
||||
returns a list of tools that the provider can provide
|
||||
|
||||
:return: list of tools
|
||||
"""
|
||||
tenant_id = tenant_id or 'ffffffff-ffff-ffff-ffff-ffffffffffff'
|
||||
provider_manager = ProviderManager()
|
||||
if self.configuration is None:
|
||||
configurations = provider_manager.get_configurations(tenant_id=tenant_id).values()
|
||||
self.configuration = next(filter(lambda x: x.provider == self.identity.name, configurations), None)
|
||||
# get all tools
|
||||
tools: list[ModelTool] = []
|
||||
# get all models
|
||||
if not self.configuration:
|
||||
return tools
|
||||
configuration = self.configuration
|
||||
|
||||
provider_configuration = ModelToolConfigurationManager.get_configuration(configuration.provider.provider)
|
||||
if provider_configuration is None:
|
||||
raise RuntimeError(f'no configuration found for provider {configuration.provider.provider}')
|
||||
|
||||
for model in configuration.get_provider_models():
|
||||
model_configuration = ModelToolConfigurationManager.get_model_configuration(self.configuration.provider.provider, model.model)
|
||||
if model_configuration is None:
|
||||
continue
|
||||
|
||||
if model.model_type == ModelType.LLM and ModelFeature.VISION in (model.features or []):
|
||||
provider_instance = configuration.get_provider_instance()
|
||||
model_type_instance = provider_instance.get_model_instance(model.model_type)
|
||||
provider_model_bundle = ProviderModelBundle(
|
||||
configuration=configuration,
|
||||
provider_instance=provider_instance,
|
||||
model_type_instance=model_type_instance
|
||||
)
|
||||
|
||||
try:
|
||||
model_instance = ModelInstance(provider_model_bundle, model.model)
|
||||
except ProviderTokenNotInitError:
|
||||
model_instance = None
|
||||
|
||||
tools.append(ModelTool(
|
||||
identity=ToolIdentity(
|
||||
author='Dify',
|
||||
name=model.model,
|
||||
label=model_configuration.label,
|
||||
),
|
||||
parameters=[
|
||||
ToolParameter(
|
||||
name=ModelToolPropertyKey.IMAGE_PARAMETER_NAME.value,
|
||||
label=I18nObject(zh_Hans='图片ID', en_US='Image ID'),
|
||||
human_description=I18nObject(zh_Hans='图片ID', en_US='Image ID'),
|
||||
type=ToolParameter.ToolParameterType.STRING,
|
||||
form=ToolParameter.ToolParameterForm.LLM,
|
||||
required=True,
|
||||
default=Tool.VARIABLE_KEY.IMAGE.value
|
||||
)
|
||||
],
|
||||
description=ToolDescription(
|
||||
human=I18nObject(zh_Hans='图生文工具', en_US='Convert image to text'),
|
||||
llm='Vision tool used to extract text and other visual information from images, can be used for OCR, image captioning, etc.',
|
||||
),
|
||||
is_team_authorization=model.status == ModelStatus.ACTIVE,
|
||||
tool_type=ModelTool.ModelToolType.VISION,
|
||||
model_instance=model_instance,
|
||||
model=model.model,
|
||||
))
|
||||
|
||||
self.tools = tools
|
||||
return tools
|
||||
|
||||
def get_credentials_schema(self) -> dict[str, ToolProviderCredentials]:
|
||||
"""
|
||||
returns the credentials schema of the provider
|
||||
|
||||
:return: the credentials schema
|
||||
"""
|
||||
return {}
|
||||
|
||||
def get_tools(self, user_id: str, tenant_id: str) -> list[ModelTool]:
|
||||
"""
|
||||
returns a list of tools that the provider can provide
|
||||
|
||||
:return: list of tools
|
||||
"""
|
||||
return self._get_model_tools(tenant_id=tenant_id)
|
||||
|
||||
def get_tool(self, tool_name: str) -> ModelTool:
|
||||
"""
|
||||
get tool by name
|
||||
|
||||
:param tool_name: the name of the tool
|
||||
:return: the tool
|
||||
"""
|
||||
if self.tools is None:
|
||||
self.get_tools(user_id='', tenant_id=self.configuration.tenant_id)
|
||||
|
||||
for tool in self.tools:
|
||||
if tool.identity.name == tool_name:
|
||||
return tool
|
||||
|
||||
raise ValueError(f'tool {tool_name} not found')
|
||||
|
||||
def get_parameters(self, tool_name: str) -> list[ToolParameter]:
|
||||
"""
|
||||
returns the parameters of the tool
|
||||
|
||||
:param tool_name: the name of the tool, defined in `get_tools`
|
||||
:return: list of parameters
|
||||
"""
|
||||
tool = next(filter(lambda x: x.identity.name == tool_name, self.get_tools()), None)
|
||||
if tool is None:
|
||||
raise ToolNotFoundError(f'tool {tool_name} not found')
|
||||
return tool.parameters
|
||||
|
||||
@property
|
||||
def app_type(self) -> ToolProviderType:
|
||||
"""
|
||||
returns the type of the provider
|
||||
|
||||
:return: type of the provider
|
||||
"""
|
||||
return ToolProviderType.MODEL
|
||||
|
||||
def validate_credentials(self, credentials: dict[str, Any]) -> None:
|
||||
"""
|
||||
validate the credentials of the provider
|
||||
|
||||
:param tool_name: the name of the tool, defined in `get_tools`
|
||||
:param credentials: the credentials of the tool
|
||||
"""
|
||||
pass
|
||||
|
||||
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
|
||||
"""
|
||||
validate the credentials of the provider
|
||||
|
||||
:param tool_name: the name of the tool, defined in `get_tools`
|
||||
:param credentials: the credentials of the tool
|
||||
"""
|
||||
pass
|
||||
@ -1,159 +0,0 @@
|
||||
from base64 import b64encode
|
||||
from enum import Enum
|
||||
from typing import Any, cast
|
||||
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
PromptMessageContent,
|
||||
PromptMessageContentType,
|
||||
SystemPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.tools.entities.tool_entities import ModelToolPropertyKey, ToolInvokeMessage, ToolProviderType
|
||||
from core.tools.tool.tool import Tool
|
||||
|
||||
VISION_PROMPT = """## Image Recognition Task
|
||||
### Task Description
|
||||
I require a powerful vision language model for an image recognition task. The model should be capable of extracting various details from the images, including but not limited to text content, layout distribution, color distribution, main subjects, and emotional expressions.
|
||||
### Specific Requirements
|
||||
1. **Text Content Extraction:** Ensure that the model accurately recognizes and extracts text content from the images, regardless of text size, font, or color.
|
||||
2. **Layout Distribution Analysis:** The model should analyze the layout structure of the images, capturing the relationships between various elements and providing detailed information about the image layout.
|
||||
3. **Color Distribution Analysis:** Extract information about color distribution in the images, including primary colors, color combinations, and other relevant details.
|
||||
4. **Main Subject Recognition:** The model should accurately identify the main subjects in the images and provide detailed descriptions of these subjects.
|
||||
5. **Emotional Expression Analysis:** Analyze and describe the emotions or expressions conveyed in the images based on facial expressions, postures, and other relevant features.
|
||||
### Additional Considerations
|
||||
- Ensure that the extracted information is as comprehensive and accurate as possible.
|
||||
- For each task, provide confidence scores or relevance scores for the model outputs to assess the reliability of the results.
|
||||
- If necessary, pose specific questions for different tasks to guide the model in better understanding the images and providing relevant information."""
|
||||
|
||||
class ModelTool(Tool):
|
||||
class ModelToolType(Enum):
|
||||
"""
|
||||
the type of the model tool
|
||||
"""
|
||||
VISION = 'vision'
|
||||
|
||||
model_configuration: dict[str, Any] = None
|
||||
tool_type: ModelToolType
|
||||
|
||||
def __init__(self, model_instance: ModelInstance = None, model: str = None,
|
||||
tool_type: ModelToolType = ModelToolType.VISION,
|
||||
properties: dict[ModelToolPropertyKey, Any] = None,
|
||||
**kwargs):
|
||||
"""
|
||||
init the tool
|
||||
"""
|
||||
kwargs['model_configuration'] = {
|
||||
'model_instance': model_instance,
|
||||
'model': model,
|
||||
'properties': properties
|
||||
}
|
||||
kwargs['tool_type'] = tool_type
|
||||
super().__init__(**kwargs)
|
||||
|
||||
"""
|
||||
Model tool
|
||||
"""
|
||||
def fork_tool_runtime(self, meta: dict[str, Any]) -> 'Tool':
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=self.identity.copy() if self.identity else None,
|
||||
parameters=self.parameters.copy() if self.parameters else None,
|
||||
description=self.description.copy() if self.description else None,
|
||||
model_instance=self.model_configuration['model_instance'],
|
||||
model=self.model_configuration['model'],
|
||||
tool_type=self.tool_type,
|
||||
runtime=Tool.Runtime(**meta)
|
||||
)
|
||||
|
||||
def validate_credentials(self, credentials: dict[str, Any], parameters: dict[str, Any], format_only: bool = False) -> None:
|
||||
"""
|
||||
validate the credentials for Model tool
|
||||
"""
|
||||
pass
|
||||
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
return ToolProviderType.BUILT_IN
|
||||
|
||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
|
||||
"""
|
||||
"""
|
||||
model_instance = self.model_configuration['model_instance']
|
||||
if not model_instance:
|
||||
return self.create_text_message('the tool is not configured correctly')
|
||||
|
||||
if self.tool_type == ModelTool.ModelToolType.VISION:
|
||||
return self._invoke_llm_vision(user_id, tool_parameters)
|
||||
else:
|
||||
return self.create_text_message('the tool is not configured correctly')
|
||||
|
||||
def _invoke_llm_vision(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
|
||||
# get image
|
||||
image_parameter_name = self.model_configuration['properties'].get(ModelToolPropertyKey.IMAGE_PARAMETER_NAME, 'image_id')
|
||||
image_id = tool_parameters.pop(image_parameter_name, '')
|
||||
if not image_id:
|
||||
image = self.get_default_image_variable()
|
||||
if not image:
|
||||
return self.create_text_message('Please upload an image or input image_id')
|
||||
else:
|
||||
image = self.get_variable(image_id)
|
||||
if not image:
|
||||
image = self.get_default_image_variable()
|
||||
if not image:
|
||||
return self.create_text_message('Please upload an image or input image_id')
|
||||
|
||||
if not image:
|
||||
return self.create_text_message('Please upload an image or input image_id')
|
||||
|
||||
# get image
|
||||
image = self.get_variable_file(image.name)
|
||||
if not image:
|
||||
return self.create_text_message('Failed to get image')
|
||||
|
||||
# organize prompt messages
|
||||
prompt_messages = [
|
||||
SystemPromptMessage(
|
||||
content=VISION_PROMPT
|
||||
),
|
||||
UserPromptMessage(
|
||||
content=[
|
||||
PromptMessageContent(
|
||||
type=PromptMessageContentType.TEXT,
|
||||
data='Recognize the image and extract the information from the image.'
|
||||
),
|
||||
PromptMessageContent(
|
||||
type=PromptMessageContentType.IMAGE,
|
||||
data=f'data:image/png;base64,{b64encode(image).decode("utf-8")}'
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
llm_instance = cast(LargeLanguageModel, self.model_configuration['model_instance'])
|
||||
result: LLMResult = llm_instance.invoke(
|
||||
model=self.model_configuration['model'],
|
||||
credentials=self.runtime.credentials,
|
||||
prompt_messages=prompt_messages,
|
||||
model_parameters=tool_parameters,
|
||||
tools=[],
|
||||
stop=[],
|
||||
stream=False,
|
||||
user=user_id,
|
||||
)
|
||||
|
||||
if not result:
|
||||
return self.create_text_message('Failed to extract information from the image')
|
||||
|
||||
# get result
|
||||
content = result.message.content
|
||||
if not content:
|
||||
return self.create_text_message('Failed to extract information from the image')
|
||||
|
||||
return self.create_text_message(content)
|
||||
Loading…
Reference in New Issue