main
commit
38d1c85c57
@ -0,0 +1,14 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class MatrixoneConfig(BaseModel):
|
||||||
|
"""Matrixone vector database configuration."""
|
||||||
|
|
||||||
|
MATRIXONE_HOST: str = Field(default="localhost", description="Host address of the Matrixone server")
|
||||||
|
MATRIXONE_PORT: int = Field(default=6001, description="Port number of the Matrixone server")
|
||||||
|
MATRIXONE_USER: str = Field(default="dump", description="Username for authenticating with Matrixone")
|
||||||
|
MATRIXONE_PASSWORD: str = Field(default="111", description="Password for authenticating with Matrixone")
|
||||||
|
MATRIXONE_DATABASE: str = Field(default="dify", description="Name of the Matrixone database to connect to")
|
||||||
|
MATRIXONE_METRIC: str = Field(
|
||||||
|
default="l2", description="Distance metric type for vector similarity search (cosine or l2)"
|
||||||
|
)
|
||||||
@ -0,0 +1,421 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, NoReturn
|
||||||
|
|
||||||
|
from flask import Response
|
||||||
|
from flask_restful import Resource, fields, inputs, marshal, marshal_with, reqparse
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from werkzeug.exceptions import Forbidden
|
||||||
|
|
||||||
|
from controllers.console import api
|
||||||
|
from controllers.console.app.error import (
|
||||||
|
DraftWorkflowNotExist,
|
||||||
|
)
|
||||||
|
from controllers.console.app.wraps import get_app_model
|
||||||
|
from controllers.console.wraps import account_initialization_required, setup_required
|
||||||
|
from controllers.web.error import InvalidArgumentError, NotFoundError
|
||||||
|
from core.variables.segment_group import SegmentGroup
|
||||||
|
from core.variables.segments import ArrayFileSegment, FileSegment, Segment
|
||||||
|
from core.variables.types import SegmentType
|
||||||
|
from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
|
||||||
|
from factories.file_factory import build_from_mapping, build_from_mappings
|
||||||
|
from factories.variable_factory import build_segment_with_type
|
||||||
|
from libs.login import current_user, login_required
|
||||||
|
from models import App, AppMode, db
|
||||||
|
from models.workflow import WorkflowDraftVariable
|
||||||
|
from services.workflow_draft_variable_service import WorkflowDraftVariableList, WorkflowDraftVariableService
|
||||||
|
from services.workflow_service import WorkflowService
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_values_to_json_serializable_object(value: Segment) -> Any:
|
||||||
|
if isinstance(value, FileSegment):
|
||||||
|
return value.value.model_dump()
|
||||||
|
elif isinstance(value, ArrayFileSegment):
|
||||||
|
return [i.model_dump() for i in value.value]
|
||||||
|
elif isinstance(value, SegmentGroup):
|
||||||
|
return [_convert_values_to_json_serializable_object(i) for i in value.value]
|
||||||
|
else:
|
||||||
|
return value.value
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_var_value(variable: WorkflowDraftVariable) -> Any:
|
||||||
|
value = variable.get_value()
|
||||||
|
# create a copy of the value to avoid affecting the model cache.
|
||||||
|
value = value.model_copy(deep=True)
|
||||||
|
# Refresh the url signature before returning it to client.
|
||||||
|
if isinstance(value, FileSegment):
|
||||||
|
file = value.value
|
||||||
|
file.remote_url = file.generate_url()
|
||||||
|
elif isinstance(value, ArrayFileSegment):
|
||||||
|
files = value.value
|
||||||
|
for file in files:
|
||||||
|
file.remote_url = file.generate_url()
|
||||||
|
return _convert_values_to_json_serializable_object(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_pagination_parser():
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"page",
|
||||||
|
type=inputs.int_range(1, 100_000),
|
||||||
|
required=False,
|
||||||
|
default=1,
|
||||||
|
location="args",
|
||||||
|
help="the page of data requested",
|
||||||
|
)
|
||||||
|
parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS = {
|
||||||
|
"id": fields.String,
|
||||||
|
"type": fields.String(attribute=lambda model: model.get_variable_type()),
|
||||||
|
"name": fields.String,
|
||||||
|
"description": fields.String,
|
||||||
|
"selector": fields.List(fields.String, attribute=lambda model: model.get_selector()),
|
||||||
|
"value_type": fields.String,
|
||||||
|
"edited": fields.Boolean(attribute=lambda model: model.edited),
|
||||||
|
"visible": fields.Boolean,
|
||||||
|
}
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_VARIABLE_FIELDS = dict(
|
||||||
|
_WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS,
|
||||||
|
value=fields.Raw(attribute=_serialize_var_value),
|
||||||
|
)
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_ENV_VARIABLE_FIELDS = {
|
||||||
|
"id": fields.String,
|
||||||
|
"type": fields.String(attribute=lambda _: "env"),
|
||||||
|
"name": fields.String,
|
||||||
|
"description": fields.String,
|
||||||
|
"selector": fields.List(fields.String, attribute=lambda model: model.get_selector()),
|
||||||
|
"value_type": fields.String,
|
||||||
|
"edited": fields.Boolean(attribute=lambda model: model.edited),
|
||||||
|
"visible": fields.Boolean,
|
||||||
|
}
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_ENV_VARIABLE_LIST_FIELDS = {
|
||||||
|
"items": fields.List(fields.Nested(_WORKFLOW_DRAFT_ENV_VARIABLE_FIELDS)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_items(var_list: WorkflowDraftVariableList) -> list[WorkflowDraftVariable]:
|
||||||
|
return var_list.variables
|
||||||
|
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_VARIABLE_LIST_WITHOUT_VALUE_FIELDS = {
|
||||||
|
"items": fields.List(fields.Nested(_WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS), attribute=_get_items),
|
||||||
|
"total": fields.Raw(),
|
||||||
|
}
|
||||||
|
|
||||||
|
_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS = {
|
||||||
|
"items": fields.List(fields.Nested(_WORKFLOW_DRAFT_VARIABLE_FIELDS), attribute=_get_items),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _api_prerequisite(f):
|
||||||
|
"""Common prerequisites for all draft workflow variable APIs.
|
||||||
|
|
||||||
|
It ensures the following conditions are satisfied:
|
||||||
|
|
||||||
|
- Dify has been property setup.
|
||||||
|
- The request user has logged in and initialized.
|
||||||
|
- The requested app is a workflow or a chat flow.
|
||||||
|
- The request user has the edit permission for the app.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
if not current_user.is_editor:
|
||||||
|
raise Forbidden()
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowVariableCollectionApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_WITHOUT_VALUE_FIELDS)
|
||||||
|
def get(self, app_model: App):
|
||||||
|
"""
|
||||||
|
Get draft workflow
|
||||||
|
"""
|
||||||
|
parser = _create_pagination_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# fetch draft workflow by app_model
|
||||||
|
workflow_service = WorkflowService()
|
||||||
|
workflow_exist = workflow_service.is_workflow_exist(app_model=app_model)
|
||||||
|
if not workflow_exist:
|
||||||
|
raise DraftWorkflowNotExist()
|
||||||
|
|
||||||
|
# fetch draft workflow by app_model
|
||||||
|
with Session(bind=db.engine, expire_on_commit=False) as session:
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=session,
|
||||||
|
)
|
||||||
|
workflow_vars = draft_var_srv.list_variables_without_values(
|
||||||
|
app_id=app_model.id,
|
||||||
|
page=args.page,
|
||||||
|
limit=args.limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
return workflow_vars
|
||||||
|
|
||||||
|
@_api_prerequisite
|
||||||
|
def delete(self, app_model: App):
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=db.session(),
|
||||||
|
)
|
||||||
|
draft_var_srv.delete_workflow_variables(app_model.id)
|
||||||
|
db.session.commit()
|
||||||
|
return Response("", 204)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_node_id(node_id: str) -> NoReturn | None:
|
||||||
|
if node_id in [
|
||||||
|
CONVERSATION_VARIABLE_NODE_ID,
|
||||||
|
SYSTEM_VARIABLE_NODE_ID,
|
||||||
|
]:
|
||||||
|
# NOTE(QuantumGhost): While we store the system and conversation variables as node variables
|
||||||
|
# with specific `node_id` in database, we still want to make the API separated. By disallowing
|
||||||
|
# accessing system and conversation variables in `WorkflowDraftNodeVariableListApi`,
|
||||||
|
# we mitigate the risk that user of the API depending on the implementation detail of the API.
|
||||||
|
#
|
||||||
|
# ref: [Hyrum's Law](https://www.hyrumslaw.com/)
|
||||||
|
|
||||||
|
raise InvalidArgumentError(
|
||||||
|
f"invalid node_id, please use correspond api for conversation and system variables, node_id={node_id}",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class NodeVariableCollectionApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
|
||||||
|
def get(self, app_model: App, node_id: str):
|
||||||
|
validate_node_id(node_id)
|
||||||
|
with Session(bind=db.engine, expire_on_commit=False) as session:
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=session,
|
||||||
|
)
|
||||||
|
node_vars = draft_var_srv.list_node_variables(app_model.id, node_id)
|
||||||
|
|
||||||
|
return node_vars
|
||||||
|
|
||||||
|
@_api_prerequisite
|
||||||
|
def delete(self, app_model: App, node_id: str):
|
||||||
|
validate_node_id(node_id)
|
||||||
|
srv = WorkflowDraftVariableService(db.session())
|
||||||
|
srv.delete_node_variables(app_model.id, node_id)
|
||||||
|
db.session.commit()
|
||||||
|
return Response("", 204)
|
||||||
|
|
||||||
|
|
||||||
|
class VariableApi(Resource):
|
||||||
|
_PATCH_NAME_FIELD = "name"
|
||||||
|
_PATCH_VALUE_FIELD = "value"
|
||||||
|
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_FIELDS)
|
||||||
|
def get(self, app_model: App, variable_id: str):
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=db.session(),
|
||||||
|
)
|
||||||
|
variable = draft_var_srv.get_variable(variable_id=variable_id)
|
||||||
|
if variable is None:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
if variable.app_id != app_model.id:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
return variable
|
||||||
|
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_FIELDS)
|
||||||
|
def patch(self, app_model: App, variable_id: str):
|
||||||
|
# Request payload for file types:
|
||||||
|
#
|
||||||
|
# Local File:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# "type": "image",
|
||||||
|
# "transfer_method": "local_file",
|
||||||
|
# "url": "",
|
||||||
|
# "upload_file_id": "daded54f-72c7-4f8e-9d18-9b0abdd9f190"
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# Remote File:
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# "type": "image",
|
||||||
|
# "transfer_method": "remote_url",
|
||||||
|
# "url": "http://127.0.0.1:5001/files/1602650a-4fe4-423c-85a2-af76c083e3c4/file-preview?timestamp=1750041099&nonce=...&sign=...=",
|
||||||
|
# "upload_file_id": "1602650a-4fe4-423c-85a2-af76c083e3c4"
|
||||||
|
# }
|
||||||
|
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument(self._PATCH_NAME_FIELD, type=str, required=False, nullable=True, location="json")
|
||||||
|
# Parse 'value' field as-is to maintain its original data structure
|
||||||
|
parser.add_argument(self._PATCH_VALUE_FIELD, type=lambda x: x, required=False, nullable=True, location="json")
|
||||||
|
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=db.session(),
|
||||||
|
)
|
||||||
|
args = parser.parse_args(strict=True)
|
||||||
|
|
||||||
|
variable = draft_var_srv.get_variable(variable_id=variable_id)
|
||||||
|
if variable is None:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
if variable.app_id != app_model.id:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
|
||||||
|
new_name = args.get(self._PATCH_NAME_FIELD, None)
|
||||||
|
raw_value = args.get(self._PATCH_VALUE_FIELD, None)
|
||||||
|
if new_name is None and raw_value is None:
|
||||||
|
return variable
|
||||||
|
|
||||||
|
new_value = None
|
||||||
|
if raw_value is not None:
|
||||||
|
if variable.value_type == SegmentType.FILE:
|
||||||
|
if not isinstance(raw_value, dict):
|
||||||
|
raise InvalidArgumentError(description=f"expected dict for file, got {type(raw_value)}")
|
||||||
|
raw_value = build_from_mapping(mapping=raw_value, tenant_id=app_model.tenant_id)
|
||||||
|
elif variable.value_type == SegmentType.ARRAY_FILE:
|
||||||
|
if not isinstance(raw_value, list):
|
||||||
|
raise InvalidArgumentError(description=f"expected list for files, got {type(raw_value)}")
|
||||||
|
if len(raw_value) > 0 and not isinstance(raw_value[0], dict):
|
||||||
|
raise InvalidArgumentError(description=f"expected dict for files[0], got {type(raw_value)}")
|
||||||
|
raw_value = build_from_mappings(mappings=raw_value, tenant_id=app_model.tenant_id)
|
||||||
|
new_value = build_segment_with_type(variable.value_type, raw_value)
|
||||||
|
draft_var_srv.update_variable(variable, name=new_name, value=new_value)
|
||||||
|
db.session.commit()
|
||||||
|
return variable
|
||||||
|
|
||||||
|
@_api_prerequisite
|
||||||
|
def delete(self, app_model: App, variable_id: str):
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=db.session(),
|
||||||
|
)
|
||||||
|
variable = draft_var_srv.get_variable(variable_id=variable_id)
|
||||||
|
if variable is None:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
if variable.app_id != app_model.id:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
draft_var_srv.delete_variable(variable)
|
||||||
|
db.session.commit()
|
||||||
|
return Response("", 204)
|
||||||
|
|
||||||
|
|
||||||
|
class VariableResetApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
def put(self, app_model: App, variable_id: str):
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=db.session(),
|
||||||
|
)
|
||||||
|
|
||||||
|
workflow_srv = WorkflowService()
|
||||||
|
draft_workflow = workflow_srv.get_draft_workflow(app_model)
|
||||||
|
if draft_workflow is None:
|
||||||
|
raise NotFoundError(
|
||||||
|
f"Draft workflow not found, app_id={app_model.id}",
|
||||||
|
)
|
||||||
|
variable = draft_var_srv.get_variable(variable_id=variable_id)
|
||||||
|
if variable is None:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
if variable.app_id != app_model.id:
|
||||||
|
raise NotFoundError(description=f"variable not found, id={variable_id}")
|
||||||
|
|
||||||
|
resetted = draft_var_srv.reset_variable(draft_workflow, variable)
|
||||||
|
db.session.commit()
|
||||||
|
if resetted is None:
|
||||||
|
return Response("", 204)
|
||||||
|
else:
|
||||||
|
return marshal(resetted, _WORKFLOW_DRAFT_VARIABLE_FIELDS)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_variable_list(app_model: App, node_id) -> WorkflowDraftVariableList:
|
||||||
|
with Session(bind=db.engine, expire_on_commit=False) as session:
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(
|
||||||
|
session=session,
|
||||||
|
)
|
||||||
|
if node_id == CONVERSATION_VARIABLE_NODE_ID:
|
||||||
|
draft_vars = draft_var_srv.list_conversation_variables(app_model.id)
|
||||||
|
elif node_id == SYSTEM_VARIABLE_NODE_ID:
|
||||||
|
draft_vars = draft_var_srv.list_system_variables(app_model.id)
|
||||||
|
else:
|
||||||
|
draft_vars = draft_var_srv.list_node_variables(app_id=app_model.id, node_id=node_id)
|
||||||
|
return draft_vars
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationVariableCollectionApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
|
||||||
|
def get(self, app_model: App):
|
||||||
|
# NOTE(QuantumGhost): Prefill conversation variables into the draft variables table
|
||||||
|
# so their IDs can be returned to the caller.
|
||||||
|
workflow_srv = WorkflowService()
|
||||||
|
draft_workflow = workflow_srv.get_draft_workflow(app_model)
|
||||||
|
if draft_workflow is None:
|
||||||
|
raise NotFoundError(description=f"draft workflow not found, id={app_model.id}")
|
||||||
|
draft_var_srv = WorkflowDraftVariableService(db.session())
|
||||||
|
draft_var_srv.prefill_conversation_variable_default_values(draft_workflow)
|
||||||
|
db.session.commit()
|
||||||
|
return _get_variable_list(app_model, CONVERSATION_VARIABLE_NODE_ID)
|
||||||
|
|
||||||
|
|
||||||
|
class SystemVariableCollectionApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
|
||||||
|
def get(self, app_model: App):
|
||||||
|
return _get_variable_list(app_model, SYSTEM_VARIABLE_NODE_ID)
|
||||||
|
|
||||||
|
|
||||||
|
class EnvironmentVariableCollectionApi(Resource):
|
||||||
|
@_api_prerequisite
|
||||||
|
def get(self, app_model: App):
|
||||||
|
"""
|
||||||
|
Get draft workflow
|
||||||
|
"""
|
||||||
|
# fetch draft workflow by app_model
|
||||||
|
workflow_service = WorkflowService()
|
||||||
|
workflow = workflow_service.get_draft_workflow(app_model=app_model)
|
||||||
|
if workflow is None:
|
||||||
|
raise DraftWorkflowNotExist()
|
||||||
|
|
||||||
|
env_vars = workflow.environment_variables
|
||||||
|
env_vars_list = []
|
||||||
|
for v in env_vars:
|
||||||
|
env_vars_list.append(
|
||||||
|
{
|
||||||
|
"id": v.id,
|
||||||
|
"type": "env",
|
||||||
|
"name": v.name,
|
||||||
|
"description": v.description,
|
||||||
|
"selector": v.selector,
|
||||||
|
"value_type": v.value_type.value,
|
||||||
|
"value": v.value,
|
||||||
|
# Do not track edited for env vars.
|
||||||
|
"edited": False,
|
||||||
|
"visible": True,
|
||||||
|
"editable": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"items": env_vars_list}
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(
|
||||||
|
WorkflowVariableCollectionApi,
|
||||||
|
"/apps/<uuid:app_id>/workflows/draft/variables",
|
||||||
|
)
|
||||||
|
api.add_resource(NodeVariableCollectionApi, "/apps/<uuid:app_id>/workflows/draft/nodes/<string:node_id>/variables")
|
||||||
|
api.add_resource(VariableApi, "/apps/<uuid:app_id>/workflows/draft/variables/<uuid:variable_id>")
|
||||||
|
api.add_resource(VariableResetApi, "/apps/<uuid:app_id>/workflows/draft/variables/<uuid:variable_id>/reset")
|
||||||
|
|
||||||
|
api.add_resource(ConversationVariableCollectionApi, "/apps/<uuid:app_id>/workflows/draft/conversation-variables")
|
||||||
|
api.add_resource(SystemVariableCollectionApi, "/apps/<uuid:app_id>/workflows/draft/system-variables")
|
||||||
|
api.add_resource(EnvironmentVariableCollectionApi, "/apps/<uuid:app_id>/workflows/draft/environment-variables")
|
||||||
@ -1 +1,11 @@
|
|||||||
|
from typing import Any
|
||||||
|
|
||||||
|
# TODO(QuantumGhost): Refactor variable type identification. Instead of directly
|
||||||
|
# comparing `dify_model_identity` with constants throughout the codebase, extract
|
||||||
|
# this logic into a dedicated function. This would encapsulate the implementation
|
||||||
|
# details of how different variable types are identified.
|
||||||
FILE_MODEL_IDENTITY = "__dify__file__"
|
FILE_MODEL_IDENTITY = "__dify__file__"
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_file_object(o: Any) -> bool:
|
||||||
|
return isinstance(o, dict) and o.get("dify_model_identity") == FILE_MODEL_IDENTITY
|
||||||
|
|||||||
@ -0,0 +1,233 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from functools import wraps
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from mo_vector.client import MoVectorClient # type: ignore
|
||||||
|
from pydantic import BaseModel, model_validator
|
||||||
|
|
||||||
|
from configs import dify_config
|
||||||
|
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||||
|
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||||
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.embedding.embedding_base import Embeddings
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from models.dataset import Dataset
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class MatrixoneConfig(BaseModel):
|
||||||
|
host: str = "localhost"
|
||||||
|
port: int = 6001
|
||||||
|
user: str = "dump"
|
||||||
|
password: str = "111"
|
||||||
|
database: str = "dify"
|
||||||
|
metric: str = "l2"
|
||||||
|
|
||||||
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
|
def validate_config(cls, values: dict) -> dict:
|
||||||
|
if not values["host"]:
|
||||||
|
raise ValueError("config host is required")
|
||||||
|
if not values["port"]:
|
||||||
|
raise ValueError("config port is required")
|
||||||
|
if not values["user"]:
|
||||||
|
raise ValueError("config user is required")
|
||||||
|
if not values["password"]:
|
||||||
|
raise ValueError("config password is required")
|
||||||
|
if not values["database"]:
|
||||||
|
raise ValueError("config database is required")
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_client(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
if self.client is None:
|
||||||
|
self.client = self._get_client(None, False)
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MatrixoneVector(BaseVector):
|
||||||
|
"""
|
||||||
|
Matrixone vector storage implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, collection_name: str, config: MatrixoneConfig):
|
||||||
|
super().__init__(collection_name)
|
||||||
|
self.config = config
|
||||||
|
self.collection_name = collection_name.lower()
|
||||||
|
self.client = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def collection_name(self):
|
||||||
|
return self._collection_name
|
||||||
|
|
||||||
|
@collection_name.setter
|
||||||
|
def collection_name(self, value):
|
||||||
|
self._collection_name = value
|
||||||
|
|
||||||
|
def get_type(self) -> str:
|
||||||
|
return VectorType.MATRIXONE
|
||||||
|
|
||||||
|
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
if self.client is None:
|
||||||
|
self.client = self._get_client(len(embeddings[0]), True)
|
||||||
|
return self.add_texts(texts, embeddings)
|
||||||
|
|
||||||
|
def _get_client(self, dimension: Optional[int] = None, create_table: bool = False) -> MoVectorClient:
|
||||||
|
"""
|
||||||
|
Create a new client for the collection.
|
||||||
|
|
||||||
|
The collection will be created if it doesn't exist.
|
||||||
|
"""
|
||||||
|
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||||
|
with redis_client.lock(lock_name, timeout=20):
|
||||||
|
client = MoVectorClient(
|
||||||
|
connection_string=f"mysql+pymysql://{self.config.user}:{self.config.password}@{self.config.host}:{self.config.port}/{self.config.database}",
|
||||||
|
table_name=self.collection_name,
|
||||||
|
vector_dimension=dimension,
|
||||||
|
create_table=create_table,
|
||||||
|
)
|
||||||
|
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||||
|
if redis_client.get(collection_exist_cache_key):
|
||||||
|
return client
|
||||||
|
try:
|
||||||
|
client.create_full_text_index()
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Failed to create full text index")
|
||||||
|
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||||
|
return client
|
||||||
|
|
||||||
|
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
if self.client is None:
|
||||||
|
self.client = self._get_client(len(embeddings[0]), True)
|
||||||
|
assert self.client is not None
|
||||||
|
ids = []
|
||||||
|
for _, doc in enumerate(documents):
|
||||||
|
if doc.metadata is not None:
|
||||||
|
doc_id = doc.metadata.get("doc_id", str(uuid.uuid4()))
|
||||||
|
ids.append(doc_id)
|
||||||
|
self.client.insert(
|
||||||
|
texts=[doc.page_content for doc in documents],
|
||||||
|
embeddings=embeddings,
|
||||||
|
metadatas=[doc.metadata for doc in documents],
|
||||||
|
ids=ids,
|
||||||
|
)
|
||||||
|
return ids
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def text_exists(self, id: str) -> bool:
|
||||||
|
assert self.client is not None
|
||||||
|
result = self.client.get(ids=[id])
|
||||||
|
return len(result) > 0
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def delete_by_ids(self, ids: list[str]) -> None:
|
||||||
|
assert self.client is not None
|
||||||
|
if not ids:
|
||||||
|
return
|
||||||
|
self.client.delete(ids=ids)
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def get_ids_by_metadata_field(self, key: str, value: str):
|
||||||
|
assert self.client is not None
|
||||||
|
results = self.client.query_by_metadata(filter={key: value})
|
||||||
|
return [result.id for result in results]
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||||
|
assert self.client is not None
|
||||||
|
self.client.delete(filter={key: value})
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||||
|
assert self.client is not None
|
||||||
|
top_k = kwargs.get("top_k", 5)
|
||||||
|
document_ids_filter = kwargs.get("document_ids_filter")
|
||||||
|
filter = None
|
||||||
|
if document_ids_filter:
|
||||||
|
filter = {"document_id": {"$in": document_ids_filter}}
|
||||||
|
|
||||||
|
results = self.client.query(
|
||||||
|
query_vector=query_vector,
|
||||||
|
k=top_k,
|
||||||
|
filter=filter,
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
# TODO: add the score threshold to the query
|
||||||
|
for result in results:
|
||||||
|
metadata = result.metadata
|
||||||
|
docs.append(
|
||||||
|
Document(
|
||||||
|
page_content=result.document,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return docs
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||||
|
assert self.client is not None
|
||||||
|
top_k = kwargs.get("top_k", 5)
|
||||||
|
document_ids_filter = kwargs.get("document_ids_filter")
|
||||||
|
filter = None
|
||||||
|
if document_ids_filter:
|
||||||
|
filter = {"document_id": {"$in": document_ids_filter}}
|
||||||
|
score_threshold = float(kwargs.get("score_threshold", 0.0))
|
||||||
|
|
||||||
|
results = self.client.full_text_query(
|
||||||
|
keywords=[query],
|
||||||
|
k=top_k,
|
||||||
|
filter=filter,
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
for result in results:
|
||||||
|
metadata = result.metadata
|
||||||
|
if isinstance(metadata, str):
|
||||||
|
import json
|
||||||
|
|
||||||
|
metadata = json.loads(metadata)
|
||||||
|
score = 1 - result.distance
|
||||||
|
if score >= score_threshold:
|
||||||
|
metadata["score"] = score
|
||||||
|
docs.append(
|
||||||
|
Document(
|
||||||
|
page_content=result.document,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return docs
|
||||||
|
|
||||||
|
@ensure_client
|
||||||
|
def delete(self) -> None:
|
||||||
|
assert self.client is not None
|
||||||
|
self.client.delete()
|
||||||
|
|
||||||
|
|
||||||
|
class MatrixoneVectorFactory(AbstractVectorFactory):
|
||||||
|
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> MatrixoneVector:
|
||||||
|
if dataset.index_struct_dict:
|
||||||
|
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||||
|
collection_name = class_prefix
|
||||||
|
else:
|
||||||
|
dataset_id = dataset.id
|
||||||
|
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||||
|
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.MATRIXONE, collection_name))
|
||||||
|
|
||||||
|
config = MatrixoneConfig(
|
||||||
|
host=dify_config.MATRIXONE_HOST or "localhost",
|
||||||
|
port=dify_config.MATRIXONE_PORT or 6001,
|
||||||
|
user=dify_config.MATRIXONE_USER or "dump",
|
||||||
|
password=dify_config.MATRIXONE_PASSWORD or "111",
|
||||||
|
database=dify_config.MATRIXONE_DATABASE or "dify",
|
||||||
|
metric=dify_config.MATRIXONE_METRIC or "l2",
|
||||||
|
)
|
||||||
|
return MatrixoneVector(collection_name=collection_name, config=config)
|
||||||
@ -1,8 +1,26 @@
|
|||||||
|
import json
|
||||||
from collections.abc import Iterable, Sequence
|
from collections.abc import Iterable, Sequence
|
||||||
|
|
||||||
|
from .segment_group import SegmentGroup
|
||||||
|
from .segments import ArrayFileSegment, FileSegment, Segment
|
||||||
|
|
||||||
|
|
||||||
def to_selector(node_id: str, name: str, paths: Iterable[str] = ()) -> Sequence[str]:
|
def to_selector(node_id: str, name: str, paths: Iterable[str] = ()) -> Sequence[str]:
|
||||||
selectors = [node_id, name]
|
selectors = [node_id, name]
|
||||||
if paths:
|
if paths:
|
||||||
selectors.extend(paths)
|
selectors.extend(paths)
|
||||||
return selectors
|
return selectors
|
||||||
|
|
||||||
|
|
||||||
|
class SegmentJSONEncoder(json.JSONEncoder):
|
||||||
|
def default(self, o):
|
||||||
|
if isinstance(o, ArrayFileSegment):
|
||||||
|
return [v.model_dump() for v in o.value]
|
||||||
|
elif isinstance(o, FileSegment):
|
||||||
|
return o.value.model_dump()
|
||||||
|
elif isinstance(o, SegmentGroup):
|
||||||
|
return [self.default(seg) for seg in o.value]
|
||||||
|
elif isinstance(o, Segment):
|
||||||
|
return o.value
|
||||||
|
else:
|
||||||
|
super().default(o)
|
||||||
|
|||||||
@ -0,0 +1,39 @@
|
|||||||
|
import abc
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from core.variables import Variable
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationVariableUpdater(Protocol):
|
||||||
|
"""
|
||||||
|
ConversationVariableUpdater defines an abstraction for updating conversation variable values.
|
||||||
|
|
||||||
|
It is intended for use by `v1.VariableAssignerNode` and `v2.VariableAssignerNode` when updating
|
||||||
|
conversation variables.
|
||||||
|
|
||||||
|
Implementations may choose to batch updates. If batching is used, the `flush` method
|
||||||
|
should be implemented to persist buffered changes, and `update`
|
||||||
|
should handle buffering accordingly.
|
||||||
|
|
||||||
|
Note: Since implementations may buffer updates, instances of ConversationVariableUpdater
|
||||||
|
are not thread-safe. Each VariableAssignerNode should create its own instance during execution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def update(self, conversation_id: str, variable: "Variable") -> None:
|
||||||
|
"""
|
||||||
|
Updates the value of the specified conversation variable in the underlying storage.
|
||||||
|
|
||||||
|
:param conversation_id: The ID of the conversation to update. Typically references `ConversationVariable.id`.
|
||||||
|
:param variable: The `Variable` instance containing the updated value.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def flush(self):
|
||||||
|
"""
|
||||||
|
Flushes all pending updates to the underlying storage system.
|
||||||
|
|
||||||
|
If the implementation does not buffer updates, this method can be a no-op.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue