Merge branch 'plugins/beta' into dev/plugin-deploy

feat/custom-tool-input
Yeuoly 1 year ago
commit 1097bf314a

@ -8,7 +8,7 @@ inputs:
poetry-version: poetry-version:
description: Poetry version to set up description: Poetry version to set up
required: true required: true
default: '1.8.4' default: '2.0.1'
poetry-lockfile: poetry-lockfile:
description: Path to the Poetry lockfile to restore cache from description: Path to the Poetry lockfile to restore cache from
required: true required: true

@ -43,19 +43,17 @@ jobs:
run: poetry install -C api --with dev run: poetry install -C api --with dev
- name: Check dependencies in pyproject.toml - name: Check dependencies in pyproject.toml
run: poetry run -C api bash dev/pytest/pytest_artifacts.sh run: poetry run -P api bash dev/pytest/pytest_artifacts.sh
- name: Run Unit tests - name: Run Unit tests
run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh run: poetry run -P api bash dev/pytest/pytest_unit_tests.sh
- name: Run dify config tests - name: Run dify config tests
run: poetry run -C api python dev/pytest/pytest_config_tests.py run: poetry run -P api python dev/pytest/pytest_config_tests.py
- name: Run mypy - name: Run mypy
run: | run: |
pushd api poetry run -C api python -m mypy --install-types --non-interactive .
poetry run python -m mypy --install-types --non-interactive .
popd
- name: Set up dotenvs - name: Set up dotenvs
run: | run: |
@ -75,4 +73,4 @@ jobs:
ssrf_proxy ssrf_proxy
- name: Run Workflow - name: Run Workflow
run: poetry run -C api bash dev/pytest/pytest_workflow.sh run: poetry run -P api bash dev/pytest/pytest_workflow.sh

@ -39,12 +39,12 @@ jobs:
if: steps.changed-files.outputs.any_changed == 'true' if: steps.changed-files.outputs.any_changed == 'true'
run: | run: |
poetry run -C api ruff --version poetry run -C api ruff --version
poetry run -C api ruff check ./api poetry run -C api ruff check ./
poetry run -C api ruff format --check ./api poetry run -C api ruff format --check ./
- name: Dotenv check - name: Dotenv check
if: steps.changed-files.outputs.any_changed == 'true' if: steps.changed-files.outputs.any_changed == 'true'
run: poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example run: poetry run -P api dotenv-linter ./api/.env.example ./web/.env.example
- name: Lint hints - name: Lint hints
if: failure() if: failure()
@ -87,7 +87,35 @@ jobs:
- name: Web style check - name: Web style check
if: steps.changed-files.outputs.any_changed == 'true' if: steps.changed-files.outputs.any_changed == 'true'
run: echo "${{ steps.changed-files.outputs.all_changed_files }}" | sed 's|web/||g' | xargs pnpm eslint # wait for next lint support eslint v9 run: yarn run lint
docker-compose-template:
name: Docker Compose Template
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
docker/generate_docker_compose
docker/.env.example
docker/docker-compose-template.yaml
docker/docker-compose.yaml
- name: Generate Docker Compose
if: steps.changed-files.outputs.any_changed == 'true'
run: |
cd docker
./generate_docker_compose
- name: Check for changes
if: steps.changed-files.outputs.any_changed == 'true'
run: git diff --exit-code
docker-compose-template: docker-compose-template:

@ -70,4 +70,4 @@ jobs:
tidb tidb
- name: Test Vector Stores - name: Test Vector Stores
run: poetry run -C api bash dev/pytest/pytest_vdb.sh run: poetry run -P api bash dev/pytest/pytest_vdb.sh

3
.gitignore vendored

@ -197,3 +197,6 @@ api/.vscode
# pnpm # pnpm
/.pnpm-store /.pnpm-store
# plugin migrate
plugins.jsonl

@ -422,8 +422,7 @@ POSITION_PROVIDER_INCLUDES=
POSITION_PROVIDER_EXCLUDES= POSITION_PROVIDER_EXCLUDES=
# Plugin configuration # Plugin configuration
PLUGIN_API_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi PLUGIN_DAEMON_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi
PLUGIN_API_URL=http://127.0.0.1:5002
PLUGIN_DAEMON_URL=http://127.0.0.1:5002 PLUGIN_DAEMON_URL=http://127.0.0.1:5002
PLUGIN_REMOTE_INSTALL_PORT=5003 PLUGIN_REMOTE_INSTALL_PORT=5003
PLUGIN_REMOTE_INSTALL_HOST=localhost PLUGIN_REMOTE_INSTALL_HOST=localhost
@ -436,7 +435,7 @@ MARKETPLACE_ENABLED=true
MARKETPLACE_API_URL=https://marketplace.dify.ai MARKETPLACE_API_URL=https://marketplace.dify.ai
# Endpoint configuration # Endpoint configuration
ENDPOINT_URL_TEMPLATE=http://localhost/e/{hook_id} ENDPOINT_URL_TEMPLATE=http://localhost:5002/e/{hook_id}
# Reset password token expiry minutes # Reset password token expiry minutes
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5 RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5

@ -53,10 +53,12 @@ ignore = [
"FURB152", # math-constant "FURB152", # math-constant
"UP007", # non-pep604-annotation "UP007", # non-pep604-annotation
"UP032", # f-string "UP032", # f-string
"UP045", # non-pep604-annotation-optional
"B005", # strip-with-multi-characters "B005", # strip-with-multi-characters
"B006", # mutable-argument-default "B006", # mutable-argument-default
"B007", # unused-loop-control-variable "B007", # unused-loop-control-variable
"B026", # star-arg-unpacking-after-keyword-arg "B026", # star-arg-unpacking-after-keyword-arg
"B903", # class-as-data-structure
"B904", # raise-without-from-inside-except "B904", # raise-without-from-inside-except
"B905", # zip-without-explicit-strict "B905", # zip-without-explicit-strict
"N806", # non-lowercase-variable-in-function "N806", # non-lowercase-variable-in-function

@ -4,7 +4,7 @@ FROM python:3.12-slim-bookworm AS base
WORKDIR /app/api WORKDIR /app/api
# Install Poetry # Install Poetry
ENV POETRY_VERSION=1.8.4 ENV POETRY_VERSION=2.0.1
# if you located in China, you can use aliyun mirror to speed up # if you located in China, you can use aliyun mirror to speed up
# RUN pip install --no-cache-dir poetry==${POETRY_VERSION} -i https://mirrors.aliyun.com/pypi/simple/ # RUN pip install --no-cache-dir poetry==${POETRY_VERSION} -i https://mirrors.aliyun.com/pypi/simple/
@ -55,6 +55,7 @@ RUN apt-get update \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \ && echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \ && apt-get update \
# For Security # For Security
# && apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.19+dfsg-1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
# install a chinese font to support the use of tools like matplotlib # install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \ && apt-get install -y fonts-noto-cjk \
&& apt-get autoremove -y \ && apt-get autoremove -y \

@ -79,5 +79,5 @@
2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml` 2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`
```bash ```bash
poetry run -C api bash dev/pytest/pytest_all_tests.sh poetry run -P api bash dev/pytest/pytest_all_tests.sh
``` ```

@ -141,10 +141,10 @@ class PluginConfig(BaseSettings):
PLUGIN_DAEMON_URL: HttpUrl = Field( PLUGIN_DAEMON_URL: HttpUrl = Field(
description="Plugin API URL", description="Plugin API URL",
default="http://plugin:5002", default="http://localhost:5002",
) )
PLUGIN_API_KEY: str = Field( PLUGIN_DAEMON_KEY: str = Field(
description="Plugin API key", description="Plugin API key",
default="plugin-api-key", default="plugin-api-key",
) )
@ -200,7 +200,7 @@ class EndpointConfig(BaseSettings):
) )
CONSOLE_WEB_URL: str = Field( CONSOLE_WEB_URL: str = Field(
description="Base URL for the console web interface," "used for frontend references and CORS configuration", description="Base URL for the console web interface,used for frontend references and CORS configuration",
default="", default="",
) )

@ -181,7 +181,7 @@ class HostedFetchAppTemplateConfig(BaseSettings):
""" """
HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field( HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field(
description="Mode for fetching app templates: remote, db, or builtin" " default to remote,", description="Mode for fetching app templates: remote, db, or builtin default to remote,",
default="remote", default="remote",
) )

@ -59,7 +59,7 @@ class InsertExploreAppListApi(Resource):
with Session(db.engine) as session: with Session(db.engine) as session:
app = session.execute(select(App).filter(App.id == args["app_id"])).scalar_one_or_none() app = session.execute(select(App).filter(App.id == args["app_id"])).scalar_one_or_none()
if not app: if not app:
raise NotFound(f'App \'{args["app_id"]}\' is not found') raise NotFound(f"App '{args['app_id']}' is not found")
site = app.site site = app.site
if not site: if not site:

@ -22,7 +22,7 @@ from controllers.console.wraps import account_initialization_required, setup_req
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError from core.model_runtime.errors.invoke import InvokeError
from libs.login import login_required from libs.login import login_required
from models.model import AppMode from models import App, AppMode
from services.audio_service import AudioService from services.audio_service import AudioService
from services.errors.audio import ( from services.errors.audio import (
AudioTooLargeServiceError, AudioTooLargeServiceError,
@ -79,7 +79,7 @@ class ChatMessageTextApi(Resource):
@login_required @login_required
@account_initialization_required @account_initialization_required
@get_app_model @get_app_model
def post(self, app_model): def post(self, app_model: App):
from werkzeug.exceptions import InternalServerError from werkzeug.exceptions import InternalServerError
try: try:
@ -98,9 +98,13 @@ class ChatMessageTextApi(Resource):
and app_model.workflow.features_dict and app_model.workflow.features_dict
): ):
text_to_speech = app_model.workflow.features_dict.get("text_to_speech") text_to_speech = app_model.workflow.features_dict.get("text_to_speech")
if text_to_speech is None:
raise ValueError("TTS is not enabled")
voice = args.get("voice") or text_to_speech.get("voice") voice = args.get("voice") or text_to_speech.get("voice")
else: else:
try: try:
if app_model.app_model_config is None:
raise ValueError("AppModelConfig not found")
voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice") voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
except Exception: except Exception:
voice = None voice = None

@ -135,7 +135,7 @@ class DataSourceNotionListApi(Resource):
data_source_info = json.loads(document.data_source_info) data_source_info = json.loads(document.data_source_info)
exist_page_ids.append(data_source_info["notion_page_id"]) exist_page_ids.append(data_source_info["notion_page_id"])
# get all authorized pages # get all authorized pages
data_source_bindings = session.execute( data_source_bindings = session.scalars(
select(DataSourceOauthBinding).filter_by( select(DataSourceOauthBinding).filter_by(
tenant_id=current_user.current_tenant_id, provider="notion", disabled=False tenant_id=current_user.current_tenant_id, provider="notion", disabled=False
) )

@ -52,12 +52,12 @@ class DatasetListApi(Resource):
# provider = request.args.get("provider", default="vendor") # provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str) search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids") tag_ids = request.args.getlist("tag_ids")
include_all = request.args.get("include_all", default="false").lower() == "true"
if ids: if ids:
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id) datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
else: else:
datasets, total = DatasetService.get_datasets( datasets, total = DatasetService.get_datasets(
page, limit, current_user.current_tenant_id, current_user, search, tag_ids page, limit, current_user.current_tenant_id, current_user, search, tag_ids, include_all
) )
# check embedding setting # check embedding setting
@ -457,7 +457,7 @@ class DatasetIndexingEstimateApi(Resource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "in the Settings -> Model Provider." "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -619,8 +619,7 @@ class DatasetRetrievalSettingApi(Resource):
vector_type = dify_config.VECTOR_STORE vector_type = dify_config.VECTOR_STORE
match vector_type: match vector_type:
case ( case (
VectorType.MILVUS VectorType.RELYT
| VectorType.RELYT
| VectorType.PGVECTOR | VectorType.PGVECTOR
| VectorType.TIDB_VECTOR | VectorType.TIDB_VECTOR
| VectorType.CHROMA | VectorType.CHROMA
@ -645,6 +644,7 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.TIDB_ON_QDRANT | VectorType.TIDB_ON_QDRANT
| VectorType.LINDORM | VectorType.LINDORM
| VectorType.COUCHBASE | VectorType.COUCHBASE
| VectorType.MILVUS
): ):
return { return {
"retrieval_method": [ "retrieval_method": [

@ -362,8 +362,7 @@ class DatasetInitApi(Resource):
) )
except InvokeAuthorizationError: except InvokeAuthorizationError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -540,8 +539,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
return response.model_dump(), 200 return response.model_dump(), 200
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)

@ -168,8 +168,7 @@ class DatasetDocumentSegmentApi(Resource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -217,8 +216,7 @@ class DatasetDocumentSegmentAddApi(Resource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -267,8 +265,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -368,9 +365,9 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
result = [] result = []
for index, row in df.iterrows(): for index, row in df.iterrows():
if document.doc_form == "qa_model": if document.doc_form == "qa_model":
data = {"content": row[0], "answer": row[1]} data = {"content": row.iloc[0], "answer": row.iloc[1]}
else: else:
data = {"content": row[0]} data = {"content": row.iloc[0]}
result.append(data) result.append(data)
if len(result) == 0: if len(result) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")
@ -437,8 +434,7 @@ class ChildChunkAddApi(Resource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)

@ -32,7 +32,7 @@ class ConversationListApi(InstalledAppResource):
pinned = None pinned = None
if "pinned" in args and args["pinned"] is not None: if "pinned" in args and args["pinned"] is not None:
pinned = True if args["pinned"] == "true" else False pinned = args["pinned"] == "true"
try: try:
with Session(db.engine) as session: with Session(db.engine) as session:

@ -65,7 +65,7 @@ def enterprise_inner_api_user_auth(view):
def plugin_inner_api_only(view): def plugin_inner_api_only(view):
@wraps(view) @wraps(view)
def decorated(*args, **kwargs): def decorated(*args, **kwargs):
if not dify_config.PLUGIN_API_KEY: if not dify_config.PLUGIN_DAEMON_KEY:
abort(404) abort(404)
# get header 'X-Inner-Api-Key' # get header 'X-Inner-Api-Key'

@ -7,4 +7,4 @@ api = ExternalApi(bp)
from . import index from . import index
from .app import app, audio, completion, conversation, file, message, workflow from .app import app, audio, completion, conversation, file, message, workflow
from .dataset import dataset, document, hit_testing, segment from .dataset import dataset, document, hit_testing, segment, upload_file

@ -31,8 +31,11 @@ class DatasetListApi(DatasetApiResource):
# provider = request.args.get("provider", default="vendor") # provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str) search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids") tag_ids = request.args.getlist("tag_ids")
include_all = request.args.get("include_all", default="false").lower() == "true"
datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids) datasets, total = DatasetService.get_datasets(
page, limit, tenant_id, current_user, search, tag_ids, include_all
)
# check embedding setting # check embedding setting
provider_manager = ProviderManager() provider_manager = ProviderManager()
configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id) configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)

@ -53,8 +53,7 @@ class SegmentApi(DatasetApiResource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -95,8 +94,7 @@ class SegmentApi(DatasetApiResource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
@ -175,8 +173,7 @@ class DatasetSegmentApi(DatasetApiResource):
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)

@ -0,0 +1,54 @@
from werkzeug.exceptions import NotFound
from controllers.service_api import api
from controllers.service_api.wraps import (
DatasetApiResource,
)
from core.file import helpers as file_helpers
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import UploadFile
from services.dataset_service import DocumentService
class UploadFileApi(DatasetApiResource):
def get(self, tenant_id, dataset_id, document_id):
"""Get upload file."""
# check dataset
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
# check upload file
if document.data_source_type != "upload_file":
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
data_source_info = document.data_source_info_dict
if data_source_info and "upload_file_id" in data_source_info:
file_id = data_source_info["upload_file_id"]
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("UploadFile not found.")
else:
raise ValueError("Upload file id not found in document data source info.")
url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": url,
"download_url": f"{url}&as_attachment=true",
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at.timestamp(),
}, 200
api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")

@ -195,7 +195,11 @@ def validate_and_get_api_token(scope: str | None = None):
with Session(db.engine, expire_on_commit=False) as session: with Session(db.engine, expire_on_commit=False) as session:
update_stmt = ( update_stmt = (
update(ApiToken) update(ApiToken)
.where(ApiToken.token == auth_token, ApiToken.last_used_at < cutoff_time, ApiToken.type == scope) .where(
ApiToken.token == auth_token,
(ApiToken.last_used_at.is_(None) | (ApiToken.last_used_at < cutoff_time)),
ApiToken.type == scope,
)
.values(last_used_at=current_time) .values(last_used_at=current_time)
.returning(ApiToken) .returning(ApiToken)
) )
@ -236,7 +240,7 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]
tenant_id=app_model.tenant_id, tenant_id=app_model.tenant_id,
app_id=app_model.id, app_id=app_model.id,
type="service_api", type="service_api",
is_anonymous=True if user_id == "DEFAULT-USER" else False, is_anonymous=user_id == "DEFAULT-USER",
session_id=user_id, session_id=user_id,
) )
db.session.add(end_user) db.session.add(end_user)

@ -39,7 +39,7 @@ class ConversationListApi(WebApiResource):
pinned = None pinned = None
if "pinned" in args and args["pinned"] is not None: if "pinned" in args and args["pinned"] is not None:
pinned = True if args["pinned"] == "true" else False pinned = args["pinned"] == "true"
try: try:
with Session(db.engine) as session: with Session(db.engine) as session:

@ -168,7 +168,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
self.save_agent_thought( self.save_agent_thought(
agent_thought=agent_thought, agent_thought=agent_thought,
tool_name=scratchpad.action.action_name if scratchpad.action else "", tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""),
tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {}, tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {},
tool_invoke_meta={}, tool_invoke_meta={},
thought=scratchpad.thought or "", thought=scratchpad.thought or "",

@ -167,8 +167,7 @@ class AppQueueManager:
else: else:
if isinstance(data, DeclarativeMeta) or hasattr(data, "_sa_instance_state"): if isinstance(data, DeclarativeMeta) or hasattr(data, "_sa_instance_state"):
raise TypeError( raise TypeError(
"Critical Error: Passing SQLAlchemy Model instances " "Critical Error: Passing SQLAlchemy Model instances that cause thread safety issues is not allowed."
"that cause thread safety issues is not allowed."
) )

@ -89,6 +89,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
Conversation.id == conversation_id, Conversation.id == conversation_id,
Conversation.app_id == app_model.id, Conversation.app_id == app_model.id,
Conversation.status == "normal", Conversation.status == "normal",
Conversation.is_deleted.is_(False),
] ]
if isinstance(user, Account): if isinstance(user, Account):

@ -145,7 +145,7 @@ class MessageCycleManage:
# get extension # get extension
if "." in message_file.url: if "." in message_file.url:
extension = f'.{message_file.url.split(".")[-1]}' extension = f".{message_file.url.split('.')[-1]}"
if len(extension) > 10: if len(extension) > 10:
extension = ".bin" extension = ".bin"
else: else:

@ -62,8 +62,9 @@ class ApiExternalDataTool(ExternalDataTool):
if not api_based_extension: if not api_based_extension:
raise ValueError( raise ValueError(
"[External data tool] API query failed, variable: {}, " "[External data tool] API query failed, variable: {}, error: api_based_extension_id is invalid".format(
"error: api_based_extension_id is invalid".format(self.variable) self.variable
)
) )
# decrypt api_key # decrypt api_key

@ -33,7 +33,7 @@ def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str,
sign = hmac.new(key, msg.encode(), hashlib.sha256).digest() sign = hmac.new(key, msg.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode() encoded_sign = base64.urlsafe_b64encode(sign).decode()
return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}&user_id={user_id}" return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}&user_id={user_id}&tenant_id={tenant_id}"
def verify_plugin_file_signature( def verify_plugin_file_signature(

@ -90,7 +90,7 @@ class File(BaseModel):
def markdown(self) -> str: def markdown(self) -> str:
url = self.generate_url() url = self.generate_url()
if self.type == FileType.IMAGE: if self.type == FileType.IMAGE:
text = f'![{self.filename or ""}]({url})' text = f"![{self.filename or ''}]({url})"
else: else:
text = f"[{self.filename or url}]({url})" text = f"[{self.filename or url}]({url})"

@ -530,7 +530,6 @@ class IndexingRunner:
# chunk nodes by chunk size # chunk nodes by chunk size
indexing_start_at = time.perf_counter() indexing_start_at = time.perf_counter()
tokens = 0 tokens = 0
chunk_size = 10
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX: if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
# create keyword index # create keyword index
create_keyword_thread = threading.Thread( create_keyword_thread = threading.Thread(
@ -539,11 +538,22 @@ class IndexingRunner:
) )
create_keyword_thread.start() create_keyword_thread.start()
max_workers = 10
if dataset.indexing_technique == "high_quality": if dataset.indexing_technique == "high_quality":
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [] futures = []
for i in range(0, len(documents), chunk_size):
chunk_documents = documents[i : i + chunk_size] # Distribute documents into multiple groups based on the hash values of page_content
# This is done to prevent multiple threads from processing the same document,
# Thereby avoiding potential database insertion deadlocks
document_groups: list[list[Document]] = [[] for _ in range(max_workers)]
for document in documents:
hash = helper.generate_text_hash(document.page_content)
group_index = int(hash, 16) % max_workers
document_groups[group_index].append(document)
for chunk_documents in document_groups:
if len(chunk_documents) == 0:
continue
futures.append( futures.append(
executor.submit( executor.submit(
self._process_chunk, self._process_chunk,

@ -131,7 +131,7 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = (
SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = ( SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, " "Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n" "and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response" "MAKE SURE your output is the SAME language as the Assistant's latest response. "
"The output must be an array in JSON format following the specified schema:\n" "The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n' '["question1","question2","question3"]\n'
) )

@ -1,6 +1,9 @@
import logging
from threading import Lock from threading import Lock
from typing import Any from typing import Any
logger = logging.getLogger(__name__)
_tokenizer: Any = None _tokenizer: Any = None
_lock = Lock() _lock = Lock()
@ -43,5 +46,6 @@ class GPT2Tokenizer:
base_path = abspath(__file__) base_path = abspath(__file__)
gpt2_tokenizer_path = join(dirname(base_path), "gpt2") gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
_tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path) _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
logger.info("Fallback to Transformers' GPT-2 tokenizer from tiktoken")
return _tokenizer return _tokenizer

@ -1,42 +0,0 @@
model: ernie-lite-pro-128k
label:
en_US: Ernie-Lite-Pro-128K
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
min: 0.1
max: 1.0
default: 0.8
- name: top_p
use_template: top_p
- name: min_output_tokens
label:
en_US: "Min Output Tokens"
zh_Hans: "最小输出Token数"
use_template: max_tokens
min: 2
max: 2048
help:
zh_Hans: 指定模型最小输出token数
en_US: Specifies the lower limit on the length of generated results.
- name: max_output_tokens
label:
en_US: "Max Output Tokens"
zh_Hans: "最大输出Token数"
use_template: max_tokens
min: 2
max: 2048
default: 2048
help:
zh_Hans: 指定模型最大输出token数
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty

@ -87,6 +87,6 @@ class CommonValidator:
if value.lower() not in {"true", "false"}: if value.lower() not in {"true", "false"}:
raise ValueError(f"Variable {credential_form_schema.variable} should be true or false") raise ValueError(f"Variable {credential_form_schema.variable} should be true or false")
value = True if value.lower() == "true" else False value = value.lower() == "true"
return value return value

@ -6,6 +6,7 @@ from pydantic import BaseModel, ValidationInfo, field_validator
class TracingProviderEnum(Enum): class TracingProviderEnum(Enum):
LANGFUSE = "langfuse" LANGFUSE = "langfuse"
LANGSMITH = "langsmith" LANGSMITH = "langsmith"
OPIK = "opik"
class BaseTracingConfig(BaseModel): class BaseTracingConfig(BaseModel):
@ -56,5 +57,36 @@ class LangSmithConfig(BaseTracingConfig):
return v return v
class OpikConfig(BaseTracingConfig):
"""
Model class for Opik tracing config.
"""
api_key: str | None = None
project: str | None = None
workspace: str | None = None
url: str = "https://www.comet.com/opik/api/"
@field_validator("project")
@classmethod
def project_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "Default Project"
return v
@field_validator("url")
@classmethod
def url_validator(cls, v, info: ValidationInfo):
if v is None or v == "":
v = "https://www.comet.com/opik/api/"
if not v.startswith(("https://", "http://")):
raise ValueError("url must start with https:// or http://")
if not v.endswith("/api/"):
raise ValueError("url should ends with /api/")
return v
OPS_FILE_PATH = "ops_trace/" OPS_FILE_PATH = "ops_trace/"
OPS_TRACE_FAILED_KEY = "FAILED_OPS_TRACE" OPS_TRACE_FAILED_KEY = "FAILED_OPS_TRACE"

@ -0,0 +1,469 @@
import json
import logging
import os
import uuid
from datetime import datetime, timedelta
from typing import Optional, cast
from opik import Opik, Trace
from opik.id_helpers import uuid4_to_uuid7
from core.ops.base_trace_instance import BaseTraceInstance
from core.ops.entities.config_entity import OpikConfig
from core.ops.entities.trace_entity import (
BaseTraceInfo,
DatasetRetrievalTraceInfo,
GenerateNameTraceInfo,
MessageTraceInfo,
ModerationTraceInfo,
SuggestedQuestionTraceInfo,
ToolTraceInfo,
TraceTaskName,
WorkflowTraceInfo,
)
from extensions.ext_database import db
from models.model import EndUser, MessageFile
from models.workflow import WorkflowNodeExecution
logger = logging.getLogger(__name__)
def wrap_dict(key_name, data):
"""Make sure that the input data is a dict"""
if not isinstance(data, dict):
return {key_name: data}
return data
def wrap_metadata(metadata, **kwargs):
"""Add common metatada to all Traces and Spans"""
metadata["created_from"] = "dify"
metadata.update(kwargs)
return metadata
def prepare_opik_uuid(user_datetime: Optional[datetime], user_uuid: Optional[str]):
"""Opik needs UUIDv7 while Dify uses UUIDv4 for identifier of most
messages and objects. The type-hints of BaseTraceInfo indicates that
objects start_time and message_id could be null which means we cannot map
it to a UUIDv7. Given that we have no way to identify that object
uniquely, generate a new random one UUIDv7 in that case.
"""
if user_datetime is None:
user_datetime = datetime.now()
if user_uuid is None:
user_uuid = str(uuid.uuid4())
return uuid4_to_uuid7(user_datetime, user_uuid)
class OpikDataTrace(BaseTraceInstance):
def __init__(
self,
opik_config: OpikConfig,
):
super().__init__(opik_config)
self.opik_client = Opik(
project_name=opik_config.project,
workspace=opik_config.workspace,
host=opik_config.url,
api_key=opik_config.api_key,
)
self.project = opik_config.project
self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
def trace(self, trace_info: BaseTraceInfo):
if isinstance(trace_info, WorkflowTraceInfo):
self.workflow_trace(trace_info)
if isinstance(trace_info, MessageTraceInfo):
self.message_trace(trace_info)
if isinstance(trace_info, ModerationTraceInfo):
self.moderation_trace(trace_info)
if isinstance(trace_info, SuggestedQuestionTraceInfo):
self.suggested_question_trace(trace_info)
if isinstance(trace_info, DatasetRetrievalTraceInfo):
self.dataset_retrieval_trace(trace_info)
if isinstance(trace_info, ToolTraceInfo):
self.tool_trace(trace_info)
if isinstance(trace_info, GenerateNameTraceInfo):
self.generate_name_trace(trace_info)
def workflow_trace(self, trace_info: WorkflowTraceInfo):
dify_trace_id = trace_info.workflow_run_id
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
workflow_metadata = wrap_metadata(
trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id
)
root_span_id = None
if trace_info.message_id:
dify_trace_id = trace_info.message_id
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
trace_data = {
"id": opik_trace_id,
"name": TraceTaskName.MESSAGE_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": workflow_metadata,
"input": wrap_dict("input", trace_info.workflow_run_inputs),
"output": wrap_dict("output", trace_info.workflow_run_outputs),
"tags": ["message", "workflow"],
"project_name": self.project,
}
self.add_trace(trace_data)
root_span_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_run_id)
span_data = {
"id": root_span_id,
"parent_span_id": None,
"trace_id": opik_trace_id,
"name": TraceTaskName.WORKFLOW_TRACE.value,
"input": wrap_dict("input", trace_info.workflow_run_inputs),
"output": wrap_dict("output", trace_info.workflow_run_outputs),
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": workflow_metadata,
"tags": ["workflow"],
"project_name": self.project,
}
self.add_span(span_data)
else:
trace_data = {
"id": opik_trace_id,
"name": TraceTaskName.MESSAGE_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": workflow_metadata,
"input": wrap_dict("input", trace_info.workflow_run_inputs),
"output": wrap_dict("output", trace_info.workflow_run_outputs),
"tags": ["workflow"],
"project_name": self.project,
}
self.add_trace(trace_data)
# through workflow_run_id get all_nodes_execution
workflow_nodes_execution_id_records = (
db.session.query(WorkflowNodeExecution.id)
.filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id)
.all()
)
for node_execution_id_record in workflow_nodes_execution_id_records:
node_execution = (
db.session.query(
WorkflowNodeExecution.id,
WorkflowNodeExecution.tenant_id,
WorkflowNodeExecution.app_id,
WorkflowNodeExecution.title,
WorkflowNodeExecution.node_type,
WorkflowNodeExecution.status,
WorkflowNodeExecution.inputs,
WorkflowNodeExecution.outputs,
WorkflowNodeExecution.created_at,
WorkflowNodeExecution.elapsed_time,
WorkflowNodeExecution.process_data,
WorkflowNodeExecution.execution_metadata,
)
.filter(WorkflowNodeExecution.id == node_execution_id_record.id)
.first()
)
if not node_execution:
continue
node_execution_id = node_execution.id
tenant_id = node_execution.tenant_id
app_id = node_execution.app_id
node_name = node_execution.title
node_type = node_execution.node_type
status = node_execution.status
if node_type == "llm":
inputs = (
json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {}
)
else:
inputs = json.loads(node_execution.inputs) if node_execution.inputs else {}
outputs = json.loads(node_execution.outputs) if node_execution.outputs else {}
created_at = node_execution.created_at or datetime.now()
elapsed_time = node_execution.elapsed_time
finished_at = created_at + timedelta(seconds=elapsed_time)
execution_metadata = (
json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {}
)
metadata = execution_metadata.copy()
metadata.update(
{
"workflow_run_id": trace_info.workflow_run_id,
"node_execution_id": node_execution_id,
"tenant_id": tenant_id,
"app_id": app_id,
"app_name": node_name,
"node_type": node_type,
"status": status,
}
)
process_data = json.loads(node_execution.process_data) if node_execution.process_data else {}
provider = None
model = None
total_tokens = 0
completion_tokens = 0
prompt_tokens = 0
if process_data and process_data.get("model_mode") == "chat":
run_type = "llm"
provider = process_data.get("model_provider", None)
model = process_data.get("model_name", "")
metadata.update(
{
"ls_provider": provider,
"ls_model_name": model,
}
)
try:
if outputs.get("usage"):
total_tokens = outputs["usage"].get("total_tokens", 0)
prompt_tokens = outputs["usage"].get("prompt_tokens", 0)
completion_tokens = outputs["usage"].get("completion_tokens", 0)
except Exception:
logger.error("Failed to extract usage", exc_info=True)
else:
run_type = "tool"
parent_span_id = trace_info.workflow_app_log_id or trace_info.workflow_run_id
if not total_tokens:
total_tokens = execution_metadata.get("total_tokens", 0)
span_data = {
"trace_id": opik_trace_id,
"id": prepare_opik_uuid(created_at, node_execution_id),
"parent_span_id": prepare_opik_uuid(trace_info.start_time, parent_span_id),
"name": node_type,
"type": run_type,
"start_time": created_at,
"end_time": finished_at,
"metadata": wrap_metadata(metadata),
"input": wrap_dict("input", inputs),
"output": wrap_dict("output", outputs),
"tags": ["node_execution"],
"project_name": self.project,
"usage": {
"total_tokens": total_tokens,
"completion_tokens": completion_tokens,
"prompt_tokens": prompt_tokens,
},
"model": model,
"provider": provider,
}
self.add_span(span_data)
def message_trace(self, trace_info: MessageTraceInfo):
# get message file data
file_list = cast(list[str], trace_info.file_list) or []
message_file_data: Optional[MessageFile] = trace_info.message_file_data
if message_file_data is not None:
file_url = f"{self.file_base_url}/{message_file_data.url}" if message_file_data else ""
file_list.append(file_url)
message_data = trace_info.message_data
if message_data is None:
return
metadata = trace_info.metadata
message_id = trace_info.message_id
user_id = message_data.from_account_id
metadata["user_id"] = user_id
metadata["file_list"] = file_list
if message_data.from_end_user_id:
end_user_data: Optional[EndUser] = (
db.session.query(EndUser).filter(EndUser.id == message_data.from_end_user_id).first()
)
if end_user_data is not None:
end_user_id = end_user_data.session_id
metadata["end_user_id"] = end_user_id
trace_data = {
"id": prepare_opik_uuid(trace_info.start_time, message_id),
"name": TraceTaskName.MESSAGE_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": wrap_metadata(metadata),
"input": trace_info.inputs,
"output": message_data.answer,
"tags": ["message", str(trace_info.conversation_mode)],
"project_name": self.project,
}
trace = self.add_trace(trace_data)
span_data = {
"trace_id": trace.id,
"name": "llm",
"type": "llm",
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": wrap_metadata(metadata),
"input": {"input": trace_info.inputs},
"output": {"output": message_data.answer},
"tags": ["llm", str(trace_info.conversation_mode)],
"usage": {
"completion_tokens": trace_info.answer_tokens,
"prompt_tokens": trace_info.message_tokens,
"total_tokens": trace_info.total_tokens,
},
"project_name": self.project,
}
self.add_span(span_data)
def moderation_trace(self, trace_info: ModerationTraceInfo):
if trace_info.message_data is None:
return
start_time = trace_info.start_time or trace_info.message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"name": TraceTaskName.MODERATION_TRACE.value,
"type": "tool",
"start_time": start_time,
"end_time": trace_info.end_time or trace_info.message_data.updated_at,
"metadata": wrap_metadata(trace_info.metadata),
"input": wrap_dict("input", trace_info.inputs),
"output": {
"action": trace_info.action,
"flagged": trace_info.flagged,
"preset_response": trace_info.preset_response,
"inputs": trace_info.inputs,
},
"tags": ["moderation"],
}
self.add_span(span_data)
def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
message_data = trace_info.message_data
if message_data is None:
return
start_time = trace_info.start_time or message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
"type": "tool",
"start_time": start_time,
"end_time": trace_info.end_time or message_data.updated_at,
"metadata": wrap_metadata(trace_info.metadata),
"input": wrap_dict("input", trace_info.inputs),
"output": wrap_dict("output", trace_info.suggested_question),
"tags": ["suggested_question"],
}
self.add_span(span_data)
def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
if trace_info.message_data is None:
return
start_time = trace_info.start_time or trace_info.message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
"type": "tool",
"start_time": start_time,
"end_time": trace_info.end_time or trace_info.message_data.updated_at,
"metadata": wrap_metadata(trace_info.metadata),
"input": wrap_dict("input", trace_info.inputs),
"output": {"documents": trace_info.documents},
"tags": ["dataset_retrieval"],
}
self.add_span(span_data)
def tool_trace(self, trace_info: ToolTraceInfo):
span_data = {
"trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
"name": trace_info.tool_name,
"type": "tool",
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": wrap_metadata(trace_info.metadata),
"input": wrap_dict("input", trace_info.tool_inputs),
"output": wrap_dict("output", trace_info.tool_outputs),
"tags": ["tool", trace_info.tool_name],
}
self.add_span(span_data)
def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
trace_data = {
"id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
"name": TraceTaskName.GENERATE_NAME_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": wrap_metadata(trace_info.metadata),
"input": trace_info.inputs,
"output": trace_info.outputs,
"tags": ["generate_name"],
"project_name": self.project,
}
trace = self.add_trace(trace_data)
span_data = {
"trace_id": trace.id,
"name": TraceTaskName.GENERATE_NAME_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
"metadata": wrap_metadata(trace_info.metadata),
"input": wrap_dict("input", trace_info.inputs),
"output": wrap_dict("output", trace_info.outputs),
"tags": ["generate_name"],
}
self.add_span(span_data)
def add_trace(self, opik_trace_data: dict) -> Trace:
try:
trace = self.opik_client.trace(**opik_trace_data)
logger.debug("Opik Trace created successfully")
return trace
except Exception as e:
raise ValueError(f"Opik Failed to create trace: {str(e)}")
def add_span(self, opik_span_data: dict):
try:
self.opik_client.span(**opik_span_data)
logger.debug("Opik Span created successfully")
except Exception as e:
raise ValueError(f"Opik Failed to create span: {str(e)}")
def api_check(self):
try:
self.opik_client.auth_check()
return True
except Exception as e:
logger.info(f"Opik API check failed: {str(e)}", exc_info=True)
raise ValueError(f"Opik API check failed: {str(e)}")
def get_project_url(self):
try:
return self.opik_client.get_project_url(project_name=self.project)
except Exception as e:
logger.info(f"Opik get run url failed: {str(e)}", exc_info=True)
raise ValueError(f"Opik get run url failed: {str(e)}")

@ -17,6 +17,7 @@ from core.ops.entities.config_entity import (
OPS_FILE_PATH, OPS_FILE_PATH,
LangfuseConfig, LangfuseConfig,
LangSmithConfig, LangSmithConfig,
OpikConfig,
TracingProviderEnum, TracingProviderEnum,
) )
from core.ops.entities.trace_entity import ( from core.ops.entities.trace_entity import (
@ -32,6 +33,7 @@ from core.ops.entities.trace_entity import (
) )
from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace
from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace
from core.ops.opik_trace.opik_trace import OpikDataTrace
from core.ops.utils import get_message_data from core.ops.utils import get_message_data
from extensions.ext_database import db from extensions.ext_database import db
from extensions.ext_storage import storage from extensions.ext_storage import storage
@ -52,6 +54,12 @@ provider_config_map: dict[str, dict[str, Any]] = {
"other_keys": ["project", "endpoint"], "other_keys": ["project", "endpoint"],
"trace_instance": LangSmithDataTrace, "trace_instance": LangSmithDataTrace,
}, },
TracingProviderEnum.OPIK.value: {
"config_class": OpikConfig,
"secret_keys": ["api_key"],
"other_keys": ["project", "url", "workspace"],
"trace_instance": OpikDataTrace,
},
} }

@ -30,7 +30,7 @@ from core.plugin.manager.exc import (
) )
plugin_daemon_inner_api_baseurl = dify_config.PLUGIN_DAEMON_URL plugin_daemon_inner_api_baseurl = dify_config.PLUGIN_DAEMON_URL
plugin_daemon_inner_api_key = dify_config.PLUGIN_API_KEY plugin_daemon_inner_api_key = dify_config.PLUGIN_DAEMON_KEY
T = TypeVar("T", bound=(BaseModel | dict | list | bool | str)) T = TypeVar("T", bound=(BaseModel | dict | list | bool | str))

@ -48,8 +48,10 @@ class PluginToolManager(BasePluginManager):
tool_provider_id = GenericProviderID(provider) tool_provider_id = GenericProviderID(provider)
def transformer(json_response: dict[str, Any]) -> dict: def transformer(json_response: dict[str, Any]) -> dict:
for tool in json_response.get("data", {}).get("declaration", {}).get("tools", []): data = json_response.get("data")
tool["identity"]["provider"] = tool_provider_id.provider_name if data:
for tool in data.get("declaration", {}).get("tools", []):
tool["identity"]["provider"] = tool_provider_id.provider_name
return json_response return json_response

@ -23,7 +23,12 @@ from core.helper import encrypter
from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType
from core.helper.position_helper import is_filtered from core.helper.position_helper import is_filtered
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.entities.provider_entities import CredentialFormSchema, FormType, ProviderEntity from core.model_runtime.entities.provider_entities import (
ConfigurateMethod,
CredentialFormSchema,
FormType,
ProviderEntity,
)
from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
from extensions import ext_hosting_provider from extensions import ext_hosting_provider
from extensions.ext_database import db from extensions.ext_database import db
@ -839,11 +844,18 @@ class ProviderManager:
:return: :return:
""" """
# Get provider model credential secret variables # Get provider model credential secret variables
model_credential_secret_variables = self._extract_secret_variables( if ConfigurateMethod.PREDEFINED_MODEL in provider_entity.configurate_methods:
provider_entity.model_credential_schema.credential_form_schemas model_credential_secret_variables = self._extract_secret_variables(
if provider_entity.model_credential_schema provider_entity.provider_credential_schema.credential_form_schemas
else [] if provider_entity.provider_credential_schema
) else []
)
else:
model_credential_secret_variables = self._extract_secret_variables(
provider_entity.model_credential_schema.credential_form_schemas
if provider_entity.model_credential_schema
else []
)
model_settings: list[ModelSettings] = [] model_settings: list[ModelSettings] = []
if not provider_model_settings: if not provider_model_settings:

@ -258,7 +258,7 @@ class LindormVectorStore(BaseVector):
hnsw_ef_construction = kwargs.pop("hnsw_ef_construction", 500) hnsw_ef_construction = kwargs.pop("hnsw_ef_construction", 500)
ivfpq_m = kwargs.pop("ivfpq_m", dimension) ivfpq_m = kwargs.pop("ivfpq_m", dimension)
nlist = kwargs.pop("nlist", 1000) nlist = kwargs.pop("nlist", 1000)
centroids_use_hnsw = kwargs.pop("centroids_use_hnsw", True if nlist >= 5000 else False) centroids_use_hnsw = kwargs.pop("centroids_use_hnsw", nlist >= 5000)
centroids_hnsw_m = kwargs.pop("centroids_hnsw_m", 24) centroids_hnsw_m = kwargs.pop("centroids_hnsw_m", 24)
centroids_hnsw_ef_construct = kwargs.pop("centroids_hnsw_ef_construct", 500) centroids_hnsw_ef_construct = kwargs.pop("centroids_hnsw_ef_construct", 500)
centroids_hnsw_ef_search = kwargs.pop("centroids_hnsw_ef_search", 100) centroids_hnsw_ef_search = kwargs.pop("centroids_hnsw_ef_search", 100)
@ -305,7 +305,7 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic
if method_name == "ivfpq": if method_name == "ivfpq":
ivfpq_m = kwargs["ivfpq_m"] ivfpq_m = kwargs["ivfpq_m"]
nlist = kwargs["nlist"] nlist = kwargs["nlist"]
centroids_use_hnsw = True if nlist > 10000 else False centroids_use_hnsw = nlist > 10000
centroids_hnsw_m = 24 centroids_hnsw_m = 24
centroids_hnsw_ef_construct = 500 centroids_hnsw_ef_construct = 500
centroids_hnsw_ef_search = 100 centroids_hnsw_ef_search = 100

@ -57,6 +57,11 @@ CREATE TABLE IF NOT EXISTS {table_name} (
) using heap; ) using heap;
""" """
SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
"""
class PGVector(BaseVector): class PGVector(BaseVector):
def __init__(self, collection_name: str, config: PGVectorConfig): def __init__(self, collection_name: str, config: PGVectorConfig):
@ -205,7 +210,10 @@ class PGVector(BaseVector):
with self._get_cursor() as cur: with self._get_cursor() as cur:
cur.execute("CREATE EXTENSION IF NOT EXISTS vector") cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension)) cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
# TODO: create index https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing # PG hnsw index only support 2000 dimension or less
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
if dimension <= 2000:
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
redis_client.set(collection_exist_cache_key, 1, ex=3600) redis_client.set(collection_exist_cache_key, 1, ex=3600)

@ -74,7 +74,7 @@ class CacheEmbedding(Embeddings):
embedding_queue_embeddings.append(normalized_embedding) embedding_queue_embeddings.append(normalized_embedding)
except IntegrityError: except IntegrityError:
db.session.rollback() db.session.rollback()
except Exception as e: except Exception:
logging.exception("Failed transform embedding") logging.exception("Failed transform embedding")
cache_embeddings = [] cache_embeddings = []
try: try:

@ -31,7 +31,7 @@ class FirecrawlApp:
"markdown": data.get("markdown"), "markdown": data.get("markdown"),
} }
else: else:
raise Exception(f'Failed to scrape URL. Error: {response_data["error"]}') raise Exception(f"Failed to scrape URL. Error: {response_data['error']}")
elif response.status_code in {402, 409, 500}: elif response.status_code in {402, 409, 500}:
error_message = response.json().get("error", "Unknown error occurred") error_message = response.json().get("error", "Unknown error occurred")

@ -358,8 +358,7 @@ class NotionExtractor(BaseExtractor):
if not data_source_binding: if not data_source_binding:
raise Exception( raise Exception(
f"No notion data source binding found for tenant {tenant_id} " f"No notion data source binding found for tenant {tenant_id} and notion workspace {notion_workspace_id}"
f"and notion workspace {notion_workspace_id}"
) )
return cast(str, data_source_binding.access_token) return cast(str, data_source_binding.access_token)

@ -112,7 +112,7 @@ class QAIndexProcessor(BaseIndexProcessor):
df = pd.read_csv(file) df = pd.read_csv(file)
text_docs = [] text_docs = []
for index, row in df.iterrows(): for index, row in df.iterrows():
data = Document(page_content=row[0], metadata={"answer": row[1]}) data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]})
text_docs.append(data) text_docs.append(data)
if len(text_docs) == 0: if len(text_docs) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")

@ -94,9 +94,9 @@ class ApiTool(Tool):
if "api_key_header_prefix" in credentials: if "api_key_header_prefix" in credentials:
api_key_header_prefix = credentials["api_key_header_prefix"] api_key_header_prefix = credentials["api_key_header_prefix"]
if api_key_header_prefix == "basic" and credentials["api_key_value"]: if api_key_header_prefix == "basic" and credentials["api_key_value"]:
credentials["api_key_value"] = f'Basic {credentials["api_key_value"]}' credentials["api_key_value"] = f"Basic {credentials['api_key_value']}"
elif api_key_header_prefix == "bearer" and credentials["api_key_value"]: elif api_key_header_prefix == "bearer" and credentials["api_key_value"]:
credentials["api_key_value"] = f'Bearer {credentials["api_key_value"]}' credentials["api_key_value"] = f"Bearer {credentials['api_key_value']}"
elif api_key_header_prefix == "custom": elif api_key_header_prefix == "custom":
pass pass

@ -48,7 +48,9 @@ class PluginToolProviderController(BuiltinToolProviderController):
""" """
return tool with given name return tool with given name
""" """
tool_entity = next(tool_entity for tool_entity in self.entity.tools if tool_entity.identity.name == tool_name) tool_entity = next(
(tool_entity for tool_entity in self.entity.tools if tool_entity.identity.name == tool_name), None
)
if not tool_entity: if not tool_entity:
raise ValueError(f"Tool with name {tool_name} not found") raise ValueError(f"Tool with name {tool_name} not found")

@ -39,7 +39,7 @@ class ToolFileMessageTransformer:
conversation_id=conversation_id, conversation_id=conversation_id,
) )
url = f'/files/tools/{file.id}{guess_extension(file.mimetype) or ".png"}' url = f"/files/tools/{file.id}{guess_extension(file.mimetype) or '.png'}"
yield ToolInvokeMessage( yield ToolInvokeMessage(
type=ToolInvokeMessage.MessageType.IMAGE_LINK, type=ToolInvokeMessage.MessageType.IMAGE_LINK,
@ -115,4 +115,4 @@ class ToolFileMessageTransformer:
@classmethod @classmethod
def get_tool_file_url(cls, tool_file_id: str, extension: Optional[str]) -> str: def get_tool_file_url(cls, tool_file_id: str, extension: Optional[str]) -> str:
return f'/files/tools/{tool_file_id}{extension or ".bin"}' return f"/files/tools/{tool_file_id}{extension or '.bin'}"

@ -5,6 +5,7 @@ from json import loads as json_loads
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from typing import Optional from typing import Optional
from flask import request
from requests import get from requests import get
from yaml import YAMLError, safe_load # type: ignore from yaml import YAMLError, safe_load # type: ignore
@ -29,6 +30,10 @@ class ApiBasedToolSchemaParser:
raise ToolProviderNotFoundError("No server found in the openapi yaml.") raise ToolProviderNotFoundError("No server found in the openapi yaml.")
server_url = openapi["servers"][0]["url"] server_url = openapi["servers"][0]["url"]
request_env = request.headers.get("X-Request-Env")
if request_env:
matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
server_url = matched_servers[0] if matched_servers else server_url
# list all interfaces # list all interfaces
interfaces = [] interfaces = []
@ -112,7 +117,7 @@ class ApiBasedToolSchemaParser:
llm_description=property.get("description", ""), llm_description=property.get("description", ""),
default=property.get("default", None), default=property.get("default", None),
placeholder=I18nObject( placeholder=I18nObject(
en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "") en_US=property.get("description", ""), zh_Hans=property.get("description", "")
), ),
) )
@ -144,7 +149,7 @@ class ApiBasedToolSchemaParser:
if not path: if not path:
path = str(uuid.uuid4()) path = str(uuid.uuid4())
interface["operation"]["operationId"] = f'{path}_{interface["method"]}' interface["operation"]["operationId"] = f"{path}_{interface['method']}"
bundles.append( bundles.append(
ApiToolBundle( ApiToolBundle(

@ -134,6 +134,10 @@ class ArrayStringSegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_STRING value_type: SegmentType = SegmentType.ARRAY_STRING
value: Sequence[str] value: Sequence[str]
@property
def text(self) -> str:
return json.dumps(self.value)
class ArrayNumberSegment(ArraySegment): class ArrayNumberSegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_NUMBER value_type: SegmentType = SegmentType.ARRAY_NUMBER

@ -1,6 +1,7 @@
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import Generator from collections.abc import Generator
from typing import Optional
from core.workflow.entities.variable_pool import VariablePool from core.workflow.entities.variable_pool import VariablePool
from core.workflow.graph_engine.entities.event import GraphEngineEvent, NodeRunExceptionEvent, NodeRunSucceededEvent from core.workflow.graph_engine.entities.event import GraphEngineEvent, NodeRunExceptionEvent, NodeRunSucceededEvent
@ -48,25 +49,35 @@ class StreamProcessor(ABC):
# we remove the node maybe shortcut the answer node, so comment this code for now # we remove the node maybe shortcut the answer node, so comment this code for now
# there is not effect on the answer node and the workflow, when we have a better solution # there is not effect on the answer node and the workflow, when we have a better solution
# we can open this code. Issues: #11542 #9560 #10638 #10564 # we can open this code. Issues: #11542 #9560 #10638 #10564
ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id) # ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id)
if "answer" in ids: # if "answer" in ids:
continue # continue
else: # else:
reachable_node_ids.extend(ids) # reachable_node_ids.extend(ids)
# The branch_identify parameter is added to ensure that
# only nodes in the correct logical branch are included.
ids = self._fetch_node_ids_in_reachable_branch(edge.target_node_id, run_result.edge_source_handle)
reachable_node_ids.extend(ids)
else: else:
unreachable_first_node_ids.append(edge.target_node_id) unreachable_first_node_ids.append(edge.target_node_id)
for node_id in unreachable_first_node_ids: for node_id in unreachable_first_node_ids:
self._remove_node_ids_in_unreachable_branch(node_id, reachable_node_ids) self._remove_node_ids_in_unreachable_branch(node_id, reachable_node_ids)
def _fetch_node_ids_in_reachable_branch(self, node_id: str) -> list[str]: def _fetch_node_ids_in_reachable_branch(self, node_id: str, branch_identify: Optional[str] = None) -> list[str]:
node_ids = [] node_ids = []
for edge in self.graph.edge_mapping.get(node_id, []): for edge in self.graph.edge_mapping.get(node_id, []):
if edge.target_node_id == self.graph.root_node_id: if edge.target_node_id == self.graph.root_node_id:
continue continue
# Only follow edges that match the branch_identify or have no run_condition
if edge.run_condition and edge.run_condition.branch_identify:
if not branch_identify or edge.run_condition.branch_identify != branch_identify:
continue
node_ids.append(edge.target_node_id) node_ids.append(edge.target_node_id)
node_ids.extend(self._fetch_node_ids_in_reachable_branch(edge.target_node_id)) node_ids.extend(self._fetch_node_ids_in_reachable_branch(edge.target_node_id, branch_identify))
return node_ids return node_ids
def _remove_node_ids_in_unreachable_branch(self, node_id: str, reachable_node_ids: list[str]) -> None: def _remove_node_ids_in_unreachable_branch(self, node_id: str, reachable_node_ids: list[str]) -> None:

@ -253,9 +253,9 @@ class Executor:
) )
if executor_response.size > threshold_size: if executor_response.size > threshold_size:
raise ResponseSizeError( raise ResponseSizeError(
f'{"File" if executor_response.is_file else "Text"} size is too large,' f"{'File' if executor_response.is_file else 'Text'} size is too large,"
f' max size is {threshold_size / 1024 / 1024:.2f} MB,' f" max size is {threshold_size / 1024 / 1024:.2f} MB,"
f' but current size is {executor_response.readable_size}.' f" but current size is {executor_response.readable_size}."
) )
return executor_response return executor_response
@ -338,7 +338,7 @@ class Executor:
if self.auth.config and self.auth.config.header: if self.auth.config and self.auth.config.header:
authorization_header = self.auth.config.header authorization_header = self.auth.config.header
if k.lower() == authorization_header.lower(): if k.lower() == authorization_header.lower():
raw += f'{k}: {"*" * len(v)}\r\n' raw += f"{k}: {'*' * len(v)}\r\n"
continue continue
raw += f"{k}: {v}\r\n" raw += f"{k}: {v}\r\n"

@ -1,4 +1,5 @@
import json import json
from collections.abc import Sequence
from typing import Any, cast from typing import Any, cast
from core.variables import SegmentType, Variable from core.variables import SegmentType, Variable
@ -31,7 +32,7 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
inputs = self.node_data.model_dump() inputs = self.node_data.model_dump()
process_data: dict[str, Any] = {} process_data: dict[str, Any] = {}
# NOTE: This node has no outputs # NOTE: This node has no outputs
updated_variables: list[Variable] = [] updated_variable_selectors: list[Sequence[str]] = []
try: try:
for item in self.node_data.items: for item in self.node_data.items:
@ -98,7 +99,8 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
value=item.value, value=item.value,
) )
variable = variable.model_copy(update={"value": updated_value}) variable = variable.model_copy(update={"value": updated_value})
updated_variables.append(variable) self.graph_runtime_state.variable_pool.add(variable.selector, variable)
updated_variable_selectors.append(variable.selector)
except VariableOperatorNodeError as e: except VariableOperatorNodeError as e:
return NodeRunResult( return NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED, status=WorkflowNodeExecutionStatus.FAILED,
@ -107,9 +109,15 @@ class VariableAssignerNode(BaseNode[VariableAssignerNodeData]):
error=str(e), error=str(e),
) )
# The `updated_variable_selectors` is a list contains list[str] which not hashable,
# remove the duplicated items first.
updated_variable_selectors = list(set(map(tuple, updated_variable_selectors)))
# Update variables # Update variables
for variable in updated_variables: for selector in updated_variable_selectors:
self.graph_runtime_state.variable_pool.add(variable.selector, variable) variable = self.graph_runtime_state.variable_pool.get(selector)
if not isinstance(variable, Variable):
raise VariableNotFoundError(variable_selector=selector)
process_data[variable.name] = variable.value process_data[variable.name] = variable.value
if variable.selector[0] == CONVERSATION_VARIABLE_NODE_ID: if variable.selector[0] == CONVERSATION_VARIABLE_NODE_ID:

@ -26,7 +26,7 @@ def handle(sender, **kwargs):
tool_runtime=tool_runtime, tool_runtime=tool_runtime,
provider_name=tool_entity.provider_name, provider_name=tool_entity.provider_name,
provider_type=tool_entity.provider_type, provider_type=tool_entity.provider_type,
identity_id=f'WORKFLOW.{app.id}.{node_data.get("id")}', identity_id=f"WORKFLOW.{app.id}.{node_data.get('id')}",
) )
manager.delete_tool_parameters_cache() manager.delete_tool_parameters_cache()
except: except:

@ -34,7 +34,7 @@ class OpenDALStorage(BaseStorage):
root = kwargs.get("root", "storage") root = kwargs.get("root", "storage")
Path(root).mkdir(parents=True, exist_ok=True) Path(root).mkdir(parents=True, exist_ok=True)
self.op = opendal.Operator(scheme=scheme, **kwargs) self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore
logger.debug(f"opendal operator created with scheme {scheme}") logger.debug(f"opendal operator created with scheme {scheme}")
retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True) retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
self.op = self.op.layer(retry_layer) self.op = self.op.layer(retry_layer)

@ -1,6 +1,6 @@
from flask_restful import fields # type: ignore from flask_restful import fields # type: ignore
from libs.helper import TimestampField from libs.helper import AvatarUrlField, TimestampField
simple_account_fields = {"id": fields.String, "name": fields.String, "email": fields.String} simple_account_fields = {"id": fields.String, "name": fields.String, "email": fields.String}
@ -8,6 +8,7 @@ account_fields = {
"id": fields.String, "id": fields.String,
"name": fields.String, "name": fields.String,
"avatar": fields.String, "avatar": fields.String,
"avatar_url": AvatarUrlField,
"email": fields.String, "email": fields.String,
"is_password_set": fields.Boolean, "is_password_set": fields.Boolean,
"interface_language": fields.String, "interface_language": fields.String,
@ -22,6 +23,7 @@ account_with_role_fields = {
"id": fields.String, "id": fields.String,
"name": fields.String, "name": fields.String,
"avatar": fields.String, "avatar": fields.String,
"avatar_url": AvatarUrlField,
"email": fields.String, "email": fields.String,
"last_login_at": TimestampField, "last_login_at": TimestampField,
"last_active_at": TimestampField, "last_active_at": TimestampField,

@ -43,6 +43,18 @@ class AppIconUrlField(fields.Raw):
return None return None
class AvatarUrlField(fields.Raw):
def output(self, key, obj):
if obj is None:
return None
from models.account import Account
if isinstance(obj, Account) and obj.avatar is not None:
return file_helpers.get_signed_file_url(obj.avatar)
return None
class TimestampField(fields.Raw): class TimestampField(fields.Raw):
def format(self, value) -> int: def format(self, value) -> int:
return int(value.timestamp()) return int(value.timestamp())

@ -13,6 +13,7 @@ from typing import Any, cast
from sqlalchemy import func from sqlalchemy import func
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped
from configs import dify_config from configs import dify_config
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -515,7 +516,7 @@ class DocumentSegment(db.Model): # type: ignore[name-defined]
tenant_id = db.Column(StringUUID, nullable=False) tenant_id = db.Column(StringUUID, nullable=False)
dataset_id = db.Column(StringUUID, nullable=False) dataset_id = db.Column(StringUUID, nullable=False)
document_id = db.Column(StringUUID, nullable=False) document_id = db.Column(StringUUID, nullable=False)
position = db.Column(db.Integer, nullable=False) position: Mapped[int]
content = db.Column(db.Text, nullable=False) content = db.Column(db.Text, nullable=False)
answer = db.Column(db.Text, nullable=True) answer = db.Column(db.Text, nullable=True)
word_count = db.Column(db.Integer, nullable=False) word_count = db.Column(db.Integer, nullable=False)
@ -582,7 +583,7 @@ class DocumentSegment(db.Model): # type: ignore[name-defined]
return [] return []
else: else:
return [] return []
@property @property
def sign_content(self): def sign_content(self):
return self.get_sign_content() return self.get_sign_content()
@ -747,7 +748,7 @@ class DatasetKeywordTable(db.Model): # type: ignore[name-defined]
if keyword_table_text: if keyword_table_text:
return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder) return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder)
return None return None
except Exception as e: except Exception:
logging.exception(f"Failed to load keyword table from file: {file_key}") logging.exception(f"Failed to load keyword table from file: {file_key}")
return None return None

@ -1486,9 +1486,8 @@ class ApiToken(Base):
def generate_api_key(prefix, n): def generate_api_key(prefix, n):
while True: while True:
result = prefix + generate_string(n) result = prefix + generate_string(n)
while db.session.query(ApiToken).filter(ApiToken.token == result).count() > 0: if db.session.query(ApiToken).filter(ApiToken.token == result).count() > 0:
result = prefix + generate_string(n) continue
return result return result

1734
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -1,9 +1,10 @@
[project] [project]
name = "dify-api" name = "dify-api"
requires-python = ">=3.11,<3.13" requires-python = ">=3.11,<3.13"
dynamic = [ "dependencies" ]
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core>=2.0.0"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
@ -48,6 +49,7 @@ numpy = "~1.26.4"
oci = "~2.135.1" oci = "~2.135.1"
openai = "~1.52.0" openai = "~1.52.0"
openpyxl = "~3.1.5" openpyxl = "~3.1.5"
opik = "~1.3.4"
pandas = { version = "~2.2.2", extras = ["performance", "excel"] } pandas = { version = "~2.2.2", extras = ["performance", "excel"] }
pandas-stubs = "~2.2.3.241009" pandas-stubs = "~2.2.3.241009"
psycogreen = "~1.0.2" psycogreen = "~1.0.2"
@ -157,4 +159,4 @@ pytest-mock = "~3.14.0"
optional = true optional = true
[tool.poetry.group.lint.dependencies] [tool.poetry.group.lint.dependencies]
dotenv-linter = "~0.5.0" dotenv-linter = "~0.5.0"
ruff = "~0.8.1" ruff = "~0.9.2"

@ -7,7 +7,7 @@ env =
CODE_EXECUTION_API_KEY = dify-sandbox CODE_EXECUTION_API_KEY = dify-sandbox
CODE_EXECUTION_ENDPOINT = http://127.0.0.1:8194 CODE_EXECUTION_ENDPOINT = http://127.0.0.1:8194
CODE_MAX_STRING_LENGTH = 80000 CODE_MAX_STRING_LENGTH = 80000
PLUGIN_API_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi PLUGIN_DAEMON_KEY=lYkiYYT6owG+71oLerGzA7GXCgOT++6ovaezWAjpCjf+Sjc3ZtU+qUEi
PLUGIN_DAEMON_URL=http://127.0.0.1:5002 PLUGIN_DAEMON_URL=http://127.0.0.1:5002
PLUGIN_MAX_PACKAGE_SIZE=15728640 PLUGIN_MAX_PACKAGE_SIZE=15728640
INNER_API_KEY_FOR_PLUGIN=QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1 INNER_API_KEY_FOR_PLUGIN=QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1

@ -286,7 +286,7 @@ class AppAnnotationService:
df = pd.read_csv(file) df = pd.read_csv(file)
result = [] result = []
for index, row in df.iterrows(): for index, row in df.iterrows():
content = {"question": row[0], "answer": row[1]} content = {"question": row.iloc[0], "answer": row.iloc[1]}
result.append(content) result.append(content)
if len(result) == 0: if len(result) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")

@ -1,7 +1,7 @@
import logging import logging
import uuid import uuid
from enum import StrEnum from enum import StrEnum
from typing import Optional, cast from typing import Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from uuid import uuid4 from uuid import uuid4
@ -159,15 +159,6 @@ class AppDslService:
status=ImportStatus.FAILED, status=ImportStatus.FAILED,
error="Empty content from url", error="Empty content from url",
) )
try:
content = cast(bytes, content).decode("utf-8")
except UnicodeDecodeError as e:
return Import(
id=import_id,
status=ImportStatus.FAILED,
error=f"Error decoding content: {e}",
)
except Exception as e: except Exception as e:
return Import( return Import(
id=import_id, id=import_id,

@ -82,7 +82,7 @@ class AudioService:
from app import app from app import app
from extensions.ext_database import db from extensions.ext_database import db
def invoke_tts(text_content: str, app_model, voice: Optional[str] = None): def invoke_tts(text_content: str, app_model: App, voice: Optional[str] = None):
with app.app_context(): with app.app_context():
if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}: if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}:
workflow = app_model.workflow workflow = app_model.workflow
@ -95,6 +95,8 @@ class AudioService:
voice = features_dict["text_to_speech"].get("voice") if voice is None else voice voice = features_dict["text_to_speech"].get("voice") if voice is None else voice
else: else:
if app_model.app_model_config is None:
raise ValueError("AppModelConfig not found")
text_to_speech_dict = app_model.app_model_config.text_to_speech_dict text_to_speech_dict = app_model.app_model_config.text_to_speech_dict
if not text_to_speech_dict.get("enabled"): if not text_to_speech_dict.get("enabled"):

@ -4,13 +4,16 @@ import logging
import random import random
import time import time
import uuid import uuid
from collections import Counter
from typing import Any, Optional from typing import Any, Optional
from flask_login import current_user # type: ignore from flask_login import current_user # type: ignore
from sqlalchemy import func from sqlalchemy import func
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound from werkzeug.exceptions import NotFound
from configs import dify_config from configs import dify_config
from core.entities import DEFAULT_PLUGIN_ID
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.model_manager import ModelManager from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
@ -71,7 +74,7 @@ from tasks.sync_website_document_indexing_task import sync_website_document_inde
class DatasetService: class DatasetService:
@staticmethod @staticmethod
def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None): def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc()) query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())
if user: if user:
@ -86,7 +89,7 @@ class DatasetService:
else: else:
return [], 0 return [], 0
else: else:
if user.current_role != TenantAccountRole.OWNER: if user.current_role != TenantAccountRole.OWNER or not include_all:
# show all datasets that the user has permission to access # show all datasets that the user has permission to access
if permitted_dataset_ids: if permitted_dataset_ids:
query = query.filter( query = query.filter(
@ -221,8 +224,7 @@ class DatasetService:
) )
except LLMBadRequestError: except LLMBadRequestError:
raise ValueError( raise ValueError(
"No Embedding Model available. Please configure a valid provider " "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"in the Settings -> Model Provider."
) )
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ValueError(f"The dataset in unavailable, due to: {ex.description}") raise ValueError(f"The dataset in unavailable, due to: {ex.description}")
@ -267,7 +269,15 @@ class DatasetService:
external_knowledge_api_id = data.get("external_knowledge_api_id", None) external_knowledge_api_id = data.get("external_knowledge_api_id", None)
if not external_knowledge_api_id: if not external_knowledge_api_id:
raise ValueError("External knowledge api id is required.") raise ValueError("External knowledge api id is required.")
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(dataset_id=dataset_id).first()
with Session(db.engine) as session:
external_knowledge_binding = (
session.query(ExternalKnowledgeBindings).filter_by(dataset_id=dataset_id).first()
)
if not external_knowledge_binding:
raise ValueError("External knowledge binding not found.")
if ( if (
external_knowledge_binding.external_knowledge_id != external_knowledge_id external_knowledge_binding.external_knowledge_id != external_knowledge_id
or external_knowledge_binding.external_knowledge_api_id != external_knowledge_api_id or external_knowledge_binding.external_knowledge_api_id != external_knowledge_api_id
@ -315,8 +325,19 @@ class DatasetService:
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ValueError(ex.description) raise ValueError(ex.description)
else: else:
# add default plugin id to both setting sets, to make sure the plugin model provider is consistent
plugin_model_provider = dataset.embedding_model_provider
if "/" not in plugin_model_provider:
plugin_model_provider = f"{DEFAULT_PLUGIN_ID}/{plugin_model_provider}/{plugin_model_provider}"
new_plugin_model_provider = data["embedding_model_provider"]
if "/" not in new_plugin_model_provider:
new_plugin_model_provider = (
f"{DEFAULT_PLUGIN_ID}/{new_plugin_model_provider}/{new_plugin_model_provider}"
)
if ( if (
data["embedding_model_provider"] != dataset.embedding_model_provider new_plugin_model_provider != plugin_model_provider
or data["embedding_model"] != dataset.embedding_model or data["embedding_model"] != dataset.embedding_model
): ):
action = "update" action = "update"
@ -859,7 +880,7 @@ class DocumentService:
position = DocumentService.get_documents_position(dataset.id) position = DocumentService.get_documents_position(dataset.id)
document_ids = [] document_ids = []
duplicate_document_ids = [] duplicate_document_ids = []
if knowledge_config.data_source.info_list.data_source_type == "upload_file": if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
for file_id in upload_file_list: for file_id in upload_file_list:
file = ( file = (
@ -901,7 +922,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -916,8 +937,8 @@ class DocumentService:
document_ids.append(document.id) document_ids.append(document.id)
documents.append(document) documents.append(document)
position += 1 position += 1
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if not notion_info_list: if not notion_info_list:
raise ValueError("No notion info list found.") raise ValueError("No notion info list found.")
exist_page_ids = [] exist_page_ids = []
@ -956,7 +977,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -976,8 +997,8 @@ class DocumentService:
# delete not selected documents # delete not selected documents
if len(exist_document) > 0: if len(exist_document) > 0:
clean_notion_document_task.delay(list(exist_document.values()), dataset.id) clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if not website_info: if not website_info:
raise ValueError("No website info list found.") raise ValueError("No website info list found.")
urls = website_info.urls urls = website_info.urls
@ -996,7 +1017,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -1195,20 +1216,20 @@ class DocumentService:
if features.billing.enabled: if features.billing.enabled:
count = 0 count = 0
if knowledge_config.data_source.info_list.data_source_type == "upload_file": if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = ( upload_file_list = (
knowledge_config.data_source.info_list.file_info_list.file_ids knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
if knowledge_config.data_source.info_list.file_info_list if knowledge_config.data_source.info_list.file_info_list # type: ignore
else [] else []
) )
count = len(upload_file_list) count = len(upload_file_list)
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if notion_info_list: if notion_info_list:
for notion_info in notion_info_list: for notion_info in notion_info_list:
count = count + len(notion_info.pages) count = count + len(notion_info.pages)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if website_info: if website_info:
count = len(website_info.urls) count = len(website_info.urls)
batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
@ -1239,7 +1260,7 @@ class DocumentService:
dataset = Dataset( dataset = Dataset(
tenant_id=tenant_id, tenant_id=tenant_id,
name="", name="",
data_source_type=knowledge_config.data_source.info_list.data_source_type, data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore
indexing_technique=knowledge_config.indexing_technique, indexing_technique=knowledge_config.indexing_technique,
created_by=account.id, created_by=account.id,
embedding_model=knowledge_config.embedding_model, embedding_model=knowledge_config.embedding_model,
@ -1614,8 +1635,11 @@ class SegmentService:
segment.answer = args.answer segment.answer = args.answer
segment.word_count += len(args.answer) if args.answer else 0 segment.word_count += len(args.answer) if args.answer else 0
word_count_change = segment.word_count - word_count_change word_count_change = segment.word_count - word_count_change
keyword_changed = False
if args.keywords: if args.keywords:
segment.keywords = args.keywords if Counter(segment.keywords) != Counter(args.keywords):
segment.keywords = args.keywords
keyword_changed = True
segment.enabled = True segment.enabled = True
segment.disabled_at = None segment.disabled_at = None
segment.disabled_by = None segment.disabled_by = None
@ -1626,13 +1650,6 @@ class SegmentService:
document.word_count = max(0, document.word_count + word_count_change) document.word_count = max(0, document.word_count + word_count_change)
db.session.add(document) db.session.add(document)
# update segment index task # update segment index task
if args.enabled:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks: if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
# regenerate child chunks # regenerate child chunks
# get embedding model instance # get embedding model instance
@ -1665,6 +1682,14 @@ class SegmentService:
VectorService.generate_child_chunks( VectorService.generate_child_chunks(
segment, document, dataset, embedding_model_instance, processing_rule, True segment, document, dataset, embedding_model_instance, processing_rule, True
) )
elif document.doc_form in (IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX):
if args.enabled or keyword_changed:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
else: else:
segment_hash = helper.generate_text_hash(content) segment_hash = helper.generate_text_hash(content)
tokens = 0 tokens = 0

@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel):
original_document_id: Optional[str] = None original_document_id: Optional[str] = None
duplicate: bool = True duplicate: bool = True
indexing_technique: Literal["high_quality", "economy"] indexing_technique: Literal["high_quality", "economy"]
data_source: DataSource data_source: Optional[DataSource] = None
process_rule: Optional[ProcessRule] = None process_rule: Optional[ProcessRule] = None
retrieval_model: Optional[RetrievalModel] = None retrieval_model: Optional[RetrievalModel] = None
doc_form: str = "text_model" doc_form: str = "text_model"

@ -155,7 +155,7 @@ class ExternalDatasetService:
if custom_parameters: if custom_parameters:
for parameter in custom_parameters: for parameter in custom_parameters:
if parameter.get("required", False) and not process_parameter.get(parameter.get("name")): if parameter.get("required", False) and not process_parameter.get(parameter.get("name")):
raise ValueError(f'{parameter.get("name")} is required') raise ValueError(f"{parameter.get('name')} is required")
@staticmethod @staticmethod
def process_external_api( def process_external_api(

@ -59,6 +59,15 @@ class OpsService:
except Exception: except Exception:
new_decrypt_tracing_config.update({"project_url": "https://smith.langchain.com/"}) new_decrypt_tracing_config.update({"project_url": "https://smith.langchain.com/"})
if tracing_provider == "opik" and (
"project_url" not in decrypt_tracing_config or not decrypt_tracing_config.get("project_url")
):
try:
project_url = OpsTraceManager.get_trace_config_project_url(decrypt_tracing_config, tracing_provider)
new_decrypt_tracing_config.update({"project_url": project_url})
except Exception:
new_decrypt_tracing_config.update({"project_url": "https://www.comet.com/opik/"})
trace_config_data.tracing_config = new_decrypt_tracing_config trace_config_data.tracing_config = new_decrypt_tracing_config
return trace_config_data.to_dict() return trace_config_data.to_dict()
@ -92,7 +101,7 @@ class OpsService:
if tracing_provider == "langfuse": if tracing_provider == "langfuse":
project_key = OpsTraceManager.get_trace_config_project_key(tracing_config, tracing_provider) project_key = OpsTraceManager.get_trace_config_project_key(tracing_config, tracing_provider)
project_url = "{host}/project/{key}".format(host=tracing_config.get("host"), key=project_key) project_url = "{host}/project/{key}".format(host=tracing_config.get("host"), key=project_key)
elif tracing_provider == "langsmith": elif tracing_provider in ("langsmith", "opik"):
project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider) project_url = OpsTraceManager.get_trace_config_project_url(tracing_config, tracing_provider)
else: else:
project_url = None project_url = None

@ -5,7 +5,8 @@ import uuid
import click import click
from celery import shared_task # type: ignore from celery import shared_task # type: ignore
from sqlalchemy import func from sqlalchemy import func, select
from sqlalchemy.orm import Session
from core.model_manager import ModelManager from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
@ -18,7 +19,12 @@ from services.vector_service import VectorService
@shared_task(queue="dataset") @shared_task(queue="dataset")
def batch_create_segment_to_index_task( def batch_create_segment_to_index_task(
job_id: str, content: list, dataset_id: str, document_id: str, tenant_id: str, user_id: str job_id: str,
content: list,
dataset_id: str,
document_id: str,
tenant_id: str,
user_id: str,
): ):
""" """
Async batch create segment to index Async batch create segment to index
@ -37,25 +43,35 @@ def batch_create_segment_to_index_task(
indexing_cache_key = "segment_batch_import_{}".format(job_id) indexing_cache_key = "segment_batch_import_{}".format(job_id)
try: try:
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() with Session(db.engine) as session:
if not dataset: dataset = session.get(Dataset, dataset_id)
raise ValueError("Dataset not exist.") if not dataset:
raise ValueError("Dataset not exist.")
dataset_document = db.session.query(Document).filter(Document.id == document_id).first() dataset_document = session.get(Document, document_id)
if not dataset_document: if not dataset_document:
raise ValueError("Document not exist.") raise ValueError("Document not exist.")
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if (
raise ValueError("Document is not available.") not dataset_document.enabled
document_segments = [] or dataset_document.archived
embedding_model = None or dataset_document.indexing_status != "completed"
if dataset.indexing_technique == "high_quality": ):
model_manager = ModelManager() raise ValueError("Document is not available.")
embedding_model = model_manager.get_model_instance( document_segments = []
tenant_id=dataset.tenant_id, embedding_model = None
provider=dataset.embedding_model_provider, if dataset.indexing_technique == "high_quality":
model_type=ModelType.TEXT_EMBEDDING, model_manager = ModelManager()
model=dataset.embedding_model, embedding_model = model_manager.get_model_instance(
tenant_id=dataset.tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model,
)
word_count_change = 0
segments_to_insert: list[str] = []
max_position_stmt = select(func.max(DocumentSegment.position)).where(
DocumentSegment.document_id == dataset_document.id
) )
word_count_change = 0 word_count_change = 0
if embedding_model: if embedding_model:
@ -103,7 +119,10 @@ def batch_create_segment_to_index_task(
redis_client.setex(indexing_cache_key, 600, "completed") redis_client.setex(indexing_cache_key, 600, "completed")
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info( logging.info(
click.style("Segment batch created job: {} latency: {}".format(job_id, end_at - start_at), fg="green") click.style(
"Segment batch created job: {} latency: {}".format(job_id, end_at - start_at),
fg="green",
)
) )
except Exception: except Exception:
logging.exception("Segments batch created index failed") logging.exception("Segments batch created index failed")

@ -44,6 +44,6 @@ def test_duplicated_dependency_crossing_groups() -> None:
dependency_names = list(dependencies.keys()) dependency_names = list(dependencies.keys())
all_dependency_names.extend(dependency_names) all_dependency_names.extend(dependency_names)
expected_all_dependency_names = set(all_dependency_names) expected_all_dependency_names = set(all_dependency_names)
assert sorted(expected_all_dependency_names) == sorted( assert sorted(expected_all_dependency_names) == sorted(all_dependency_names), (
all_dependency_names "Duplicated dependencies crossing groups are found"
), "Duplicated dependencies crossing groups are found" )

@ -85,7 +85,7 @@ VOLC_EMBEDDING_ENDPOINT_ID=
ZHINAO_API_KEY= ZHINAO_API_KEY=
# Plugin configuration # Plugin configuration
PLUGIN_API_KEY= PLUGIN_DAEMON_KEY=
PLUGIN_DAEMON_URL= PLUGIN_DAEMON_URL=
INNER_API_KEY= INNER_API_KEY=

@ -4,7 +4,6 @@ from app_fixture import mock_user # type: ignore
def test_post_requires_login(app): def test_post_requires_login(app):
with app.test_client() as client: with app.test_client() as client, patch("flask_login.utils._get_user", mock_user):
with patch("flask_login.utils._get_user", mock_user): response = client.get("/console/api/data-source/integrates")
response = client.get("/console/api/data-source/integrates") assert response.status_code == 200
assert response.status_code == 200

@ -89,9 +89,9 @@ class TestOpenSearchVector:
print("Actual document ID:", hits_by_vector[0].metadata["document_id"] if hits_by_vector else "No hits") print("Actual document ID:", hits_by_vector[0].metadata["document_id"] if hits_by_vector else "No hits")
assert len(hits_by_vector) > 0, f"Expected at least one hit, got {len(hits_by_vector)}" assert len(hits_by_vector) > 0, f"Expected at least one hit, got {len(hits_by_vector)}"
assert ( assert hits_by_vector[0].metadata["document_id"] == self.example_doc_id, (
hits_by_vector[0].metadata["document_id"] == self.example_doc_id f"Expected document ID {self.example_doc_id}, got {hits_by_vector[0].metadata['document_id']}"
), f"Expected document ID {self.example_doc_id}, got {hits_by_vector[0].metadata['document_id']}" )
def test_get_ids_by_metadata_field(self): def test_get_ids_by_metadata_field(self):
mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}}

@ -434,11 +434,11 @@ def test_fetch_files_with_non_existent_variable(llm_node):
# jinja2_variables=[], # jinja2_variables=[],
# ) # )
# # Verify the result # # Verify the result
# assert len(prompt_messages) == len(scenario.expected_messages), f"Scenario failed: {scenario.description}" # assert len(prompt_messages) == len(scenario.expected_messages), f"Scenario failed: {scenario.description}"
# assert ( # assert prompt_messages == scenario.expected_messages, (
# prompt_messages == scenario.expected_messages # f"Message content mismatch in scenario: {scenario.description}"
# ), f"Message content mismatch in scenario: {scenario.description}" # )
def test_handle_list_messages_basic(llm_node): def test_handle_list_messages_basic(llm_node):

@ -401,8 +401,7 @@ def test__convert_to_llm_node_for_workflow_advanced_completion_model(default_var
prompt_template = PromptTemplateEntity( prompt_template = PromptTemplateEntity(
prompt_type=PromptTemplateEntity.PromptType.ADVANCED, prompt_type=PromptTemplateEntity.PromptType.ADVANCED,
advanced_completion_prompt_template=AdvancedCompletionPromptTemplateEntity( advanced_completion_prompt_template=AdvancedCompletionPromptTemplateEntity(
prompt="You are a helpful assistant named {{name}}.\n\nContext:\n{{#context#}}\n\n" prompt="You are a helpful assistant named {{name}}.\n\nContext:\n{{#context#}}\n\nHuman: hi\nAssistant: ",
"Human: hi\nAssistant: ",
role_prefix=AdvancedCompletionPromptTemplateEntity.RolePrefixEntity(user="Human", assistant="Assistant"), role_prefix=AdvancedCompletionPromptTemplateEntity.RolePrefixEntity(user="Human", assistant="Assistant"),
), ),
) )

@ -20,8 +20,8 @@ BASE_API_AND_DOCKER_CONFIG_SET_DIFF = {
"OCI_ENDPOINT", "OCI_ENDPOINT",
"OCI_REGION", "OCI_REGION",
"OCI_SECRET_KEY", "OCI_SECRET_KEY",
"PLUGIN_API_KEY", "PLUGIN_DAEMON_KEY",
"PLUGIN_API_URL", "PLUGIN_DAEMON_URL",
"PLUGIN_REMOTE_INSTALL_HOST", "PLUGIN_REMOTE_INSTALL_HOST",
"PLUGIN_REMOTE_INSTALL_PORT", "PLUGIN_REMOTE_INSTALL_PORT",
"REDIS_DB", "REDIS_DB",
@ -66,8 +66,8 @@ BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = {
"PGVECTO_RS_PASSWORD", "PGVECTO_RS_PASSWORD",
"PGVECTO_RS_PORT", "PGVECTO_RS_PORT",
"PGVECTO_RS_USER", "PGVECTO_RS_USER",
"PLUGIN_API_KEY", "PLUGIN_DAEMON_KEY",
"PLUGIN_API_URL", "PLUGIN_DAEMON_URL",
"PLUGIN_REMOTE_INSTALL_HOST", "PLUGIN_REMOTE_INSTALL_HOST",
"PLUGIN_REMOTE_INSTALL_PORT", "PLUGIN_REMOTE_INSTALL_PORT",
"RESPECT_XFORWARD_HEADERS_ENABLED", "RESPECT_XFORWARD_HEADERS_ENABLED",

@ -9,10 +9,10 @@ if ! command -v ruff &> /dev/null || ! command -v dotenv-linter &> /dev/null; th
fi fi
# run ruff linter # run ruff linter
poetry run -C api ruff check --fix ./api poetry run -C api ruff check --fix ./
# run ruff formatter # run ruff formatter
poetry run -C api ruff format ./api poetry run -C api ruff format ./
# run dotenv-linter linter # run dotenv-linter linter
poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example poetry run -P api dotenv-linter ./api/.env.example ./web/.env.example

@ -12,7 +12,7 @@ if [ $? -ne 0 ]; then
# update poetry.lock # update poetry.lock
# refreshing lockfile only without updating locked versions # refreshing lockfile only without updating locked versions
echo "poetry.lock is outdated, refreshing without updating locked versions ..." echo "poetry.lock is outdated, refreshing without updating locked versions ..."
poetry lock -C api --no-update poetry lock -C api
else else
echo "poetry.lock is ready." echo "poetry.lock is ready."
fi fi

@ -1,13 +0,0 @@
services:
# Chroma vector store.
chroma:
image: ghcr.io/chroma-core/chroma:0.5.20
restart: always
volumes:
- ./volumes/chroma:/chroma/chroma
environment:
CHROMA_SERVER_AUTHN_CREDENTIALS: difyai123456
CHROMA_SERVER_AUTHN_PROVIDER: chromadb.auth.token_authn.TokenAuthenticationServerProvider
IS_PERSISTENT: TRUE
ports:
- "8000:8000"

@ -1,109 +0,0 @@
version: '3'
services:
# The postgres database.
db:
image: postgres:15-alpine
restart: always
environment:
# The password for the default postgres user.
POSTGRES_PASSWORD: difyai123456
# The name of the default postgres database.
POSTGRES_DB: dify
# postgres data directory
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- ./volumes/db/data:/var/lib/postgresql/data
ports:
- "5432:5432"
# The redis cache.
redis:
image: redis:6-alpine
restart: always
volumes:
# Mount the redis data directory to the container.
- ./volumes/redis/data:/data
# Set the redis password when startup redis server.
command: redis-server --requirepass difyai123456
ports:
- "6379:6379"
# The Weaviate vector store.
weaviate:
image: semitechnologies/weaviate:1.19.0
restart: always
volumes:
# Mount the Weaviate data directory to the container.
- ./volumes/weaviate:/var/lib/weaviate
environment:
# The Weaviate configurations
# You can refer to the [Weaviate](https://weaviate.io/developers/weaviate/config-refs/env-vars) documentation for more information.
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'none'
CLUSTER_HOSTNAME: 'node1'
AUTHENTICATION_APIKEY_ENABLED: 'true'
AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih'
AUTHENTICATION_APIKEY_USERS: 'hello@dify.ai'
AUTHORIZATION_ADMINLIST_ENABLED: 'true'
AUTHORIZATION_ADMINLIST_USERS: 'hello@dify.ai'
ports:
- "8080:8080"
# The DifySandbox
sandbox:
image: langgenius/dify-sandbox:0.2.1
restart: always
environment:
# The DifySandbox configurations
# Make sure you are changing this key for your deployment with a strong key.
# You can generate a strong key using `openssl rand -base64 42`.
API_KEY: dify-sandbox
GIN_MODE: 'release'
WORKER_TIMEOUT: 15
ENABLE_NETWORK: 'true'
HTTP_PROXY: 'http://ssrf_proxy:3128'
HTTPS_PROXY: 'http://ssrf_proxy:3128'
SANDBOX_PORT: 8194
volumes:
- ./volumes/sandbox/dependencies:/dependencies
networks:
- ssrf_proxy_network
# ssrf_proxy server
# for more information, please refer to
# https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
ssrf_proxy:
image: ubuntu/squid:latest
restart: always
ports:
- "3128:3128"
- "8194:8194"
volumes:
# pls clearly modify the squid.conf file to fit your network environment.
- ./volumes/ssrf_proxy/squid.conf:/etc/squid/squid.conf
networks:
- ssrf_proxy_network
- default
# Qdrant vector store.
# uncomment to use qdrant as vector store.
# (if uncommented, you need to comment out the weaviate service above,
# and set VECTOR_STORE to qdrant in the api & worker service.)
# qdrant:
# image: qdrant/qdrant:1.7.3
# restart: always
# volumes:
# - ./volumes/qdrant:/qdrant/storage
# environment:
# QDRANT_API_KEY: 'difyai123456'
# ports:
# - "6333:6333"
# - "6334:6334"
networks:
# create a network between sandbox, api and ssrf_proxy, and can not access outside.
ssrf_proxy_network:
driver: bridge
internal: true

@ -1,64 +0,0 @@
version: '3.5'
services:
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.5
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
healthcheck:
test: ["CMD", "etcdctl", "endpoint", "health"]
interval: 30s
timeout: 20s
retries: 3
minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
ports:
- "9001:9001"
- "9000:9000"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
milvus-standalone:
container_name: milvus-standalone
image: milvusdb/milvus:v2.4.6
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
common.security.authorizationEnabled: true
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- "etcd"
- "minio"
networks:
default:
name: milvus

@ -1,40 +0,0 @@
services:
opensearch: # This is also the hostname of the container within the Docker network (i.e. https://opensearch/)
image: opensearchproject/opensearch:latest # Specifying the latest available image - modify if you want a specific version
container_name: opensearch
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true # Disable JVM heap memory swapping
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx1024m" # Set min and max JVM heap sizes to at least 50% of system RAM
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=Qazwsxedc!@#123 # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later
ulimits:
memlock:
soft: -1 # Set memlock to unlimited (no soft or hard limit)
hard: -1
nofile:
soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536
hard: 65536
volumes:
- ./volumes/opensearch/data:/usr/share/opensearch/data # Creates volume called opensearch-data1 and mounts it to the container
ports:
- 9200:9200 # REST API
- 9600:9600 # Performance Analyzer
networks:
- opensearch-net # All of the containers will join the same Docker bridge network
opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
container_name: opensearch-dashboards
ports:
- 5601:5601 # Map host port 5601 to container port 5601
expose:
- "5601" # Expose port 5601 for web access to OpenSearch Dashboards
environment:
OPENSEARCH_HOSTS: '["https://opensearch:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
volumes:
- ./volumes/opensearch/opensearch_dashboards.yml:/usr/share/opensearch-dashboards/config/opensearch_dashboards.yml
networks:
- opensearch-net
networks:
opensearch-net:
driver: bridge

@ -1,17 +0,0 @@
services:
# oracle 23 ai vector store.
oracle:
image: container-registry.oracle.com/database/free:latest
restart: always
ports:
- 1521:1521
volumes:
- type: volume
source: oradata_vector
target: /opt/oracle/oradata
- ./startupscripts:/opt/oracle/scripts/startup
environment:
- ORACLE_PWD=Dify123456
- ORACLE_CHARACTERSET=AL32UTF8
volumes:
oradata_vector:

@ -1,23 +0,0 @@
services:
# The pgvecto—rs database.
pgvecto-rs:
image: tensorchord/pgvecto-rs:pg16-v0.2.0
restart: always
environment:
PGUSER: postgres
# The password for the default postgres user.
POSTGRES_PASSWORD: difyai123456
# The name of the default postgres database.
POSTGRES_DB: dify
# postgres data directory
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- ./volumes/pgvectors/data:/var/lib/postgresql/data
# uncomment to expose db(postgresql) port to host
ports:
- "5431:5432"
healthcheck:
test: [ "CMD", "pg_isready" ]
interval: 1s
timeout: 3s
retries: 30

@ -1,23 +0,0 @@
services:
# Qdrant vector store.
pgvector:
image: pgvector/pgvector:pg16
restart: always
environment:
PGUSER: postgres
# The password for the default postgres user.
POSTGRES_PASSWORD: difyai123456
# The name of the default postgres database.
POSTGRES_DB: dify
# postgres data directory
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- ./volumes/pgvector/data:/var/lib/postgresql/data
# uncomment to expose db(postgresql) port to host
ports:
- "5433:5432"
healthcheck:
test: [ "CMD", "pg_isready" ]
interval: 1s
timeout: 3s
retries: 30

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

@ -1,12 +0,0 @@
services:
# Qdrant vector store.
qdrant:
image: langgenius/qdrant:v1.7.3
restart: always
volumes:
- ./volumes/qdrant:/qdrant/storage
environment:
QDRANT_API_KEY: 'difyai123456'
ports:
- "6333:6333"
- "6334:6334"

@ -1,597 +0,0 @@
version: '3'
services:
# API service
api:
image: langgenius/dify-api:1.0.0-beta.1
restart: always
environment:
# Startup mode, 'api' starts the API server.
MODE: api
# The log level for the application. Supported values are `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`
LOG_LEVEL: INFO
# enable DEBUG mode to output more logs
# DEBUG : true
# A secret key that is used for securely signing the session cookie and encrypting sensitive information on the database. You can generate a strong key using `openssl rand -base64 42`.
SECRET_KEY: sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
# The base URL of console application web frontend, refers to the Console base URL of WEB service if console domain is
# different from api or web app domain.
# example: http://cloud.dify.ai
CONSOLE_WEB_URL: ''
# Password for admin user initialization.
# If left unset, admin user will not be prompted for a password when creating the initial admin account.
INIT_PASSWORD: ''
# The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
# different from api or web app domain.
# example: http://cloud.dify.ai
CONSOLE_API_URL: ''
# The URL prefix for Service API endpoints, refers to the base URL of the current API service if api domain is
# different from console domain.
# example: http://api.dify.ai
SERVICE_API_URL: ''
# The URL prefix for Web APP frontend, refers to the Web App base URL of WEB service if web app domain is different from
# console or api domain.
# example: http://udify.app
APP_WEB_URL: ''
# File preview or download Url prefix.
# used to display File preview or download Url to the front-end or as Multi-model inputs;
# Url is signed and has expiration time.
FILES_URL: ''
# File Access Time specifies a time interval in seconds for the file to be accessed.
# The default value is 300 seconds.
FILES_ACCESS_TIMEOUT: 300
# The maximum number of active requests for the application, where 0 means unlimited, should be a non-negative integer.
APP_MAX_ACTIVE_REQUESTS: 0
# When enabled, migrations will be executed prior to application startup and the application will start after the migrations have completed.
MIGRATION_ENABLED: 'true'
# The configurations of postgres database connection.
# It is consistent with the configuration in the 'db' service below.
DB_USERNAME: postgres
DB_PASSWORD: difyai123456
DB_HOST: db
DB_PORT: 5432
DB_DATABASE: dify
# The configurations of redis connection.
# It is consistent with the configuration in the 'redis' service below.
REDIS_HOST: redis
REDIS_PORT: 6379
REDIS_USERNAME: ''
REDIS_PASSWORD: difyai123456
REDIS_USE_SSL: 'false'
# use redis db 0 for redis cache
REDIS_DB: 0
# The configurations of celery broker.
# Use redis as the broker, and redis db 1 for celery broker.
CELERY_BROKER_URL: redis://:difyai123456@redis:6379/1
# Specifies the allowed origins for cross-origin requests to the Web API, e.g. https://dify.app or * for all origins.
WEB_API_CORS_ALLOW_ORIGINS: '*'
# Specifies the allowed origins for cross-origin requests to the console API, e.g. https://cloud.dify.ai or * for all origins.
CONSOLE_CORS_ALLOW_ORIGINS: '*'
# CSRF Cookie settings
# Controls whether a cookie is sent with cross-site requests,
# providing some protection against cross-site request forgery attacks
#
# Default: `SameSite=Lax, Secure=false, HttpOnly=true`
# This default configuration supports same-origin requests using either HTTP or HTTPS,
# but does not support cross-origin requests. It is suitable for local debugging purposes.
#
# If you want to enable cross-origin support,
# you must use the HTTPS protocol and set the configuration to `SameSite=None, Secure=true, HttpOnly=true`.
#
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob` and `google-storage`, Default: `local`
STORAGE_TYPE: local
# The path to the local storage directory, the directory relative the root path of API service codes or absolute path. Default: `storage` or `/home/john/storage`.
# only available when STORAGE_TYPE is `local`.
STORAGE_LOCAL_PATH: storage
# The S3 storage configurations, only available when STORAGE_TYPE is `s3`.
S3_USE_AWS_MANAGED_IAM: 'false'
S3_ENDPOINT: 'https://xxx.r2.cloudflarestorage.com'
S3_BUCKET_NAME: 'difyai'
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The Google storage configurations, only available when STORAGE_TYPE is `google-storage`.
GOOGLE_STORAGE_BUCKET_NAME: 'yout-bucket-name'
# if you want to use Application Default Credentials, you can leave GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64 empty.
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: 'your-google-service-account-json-base64-string'
# The Alibaba Cloud OSS configurations, only available when STORAGE_TYPE is `aliyun-oss`
ALIYUN_OSS_BUCKET_NAME: 'your-bucket-name'
ALIYUN_OSS_ACCESS_KEY: 'your-access-key'
ALIYUN_OSS_SECRET_KEY: 'your-secret-key'
ALIYUN_OSS_ENDPOINT: 'https://oss-ap-southeast-1-internal.aliyuncs.com'
ALIYUN_OSS_REGION: 'ap-southeast-1'
ALIYUN_OSS_AUTH_VERSION: 'v4'
# The Tencent COS storage configurations, only available when STORAGE_TYPE is `tencent-cos`.
TENCENT_COS_BUCKET_NAME: 'your-bucket-name'
TENCENT_COS_SECRET_KEY: 'your-secret-key'
TENCENT_COS_SECRET_ID: 'your-secret-id'
TENCENT_COS_REGION: 'your-region'
TENCENT_COS_SCHEME: 'your-scheme'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`, `relyt`,`pgvector`, `chroma`, 'opensearch', 'tidb_vector'.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
WEAVIATE_ENDPOINT: http://weaviate:8080
# The Weaviate API key.
WEAVIATE_API_KEY: WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
# The Qdrant endpoint URL. Only available when VECTOR_STORE is `qdrant`.
QDRANT_URL: http://qdrant:6333
# The Qdrant API key.
QDRANT_API_KEY: difyai123456
# The Qdrant client timeout setting.
QDRANT_CLIENT_TIMEOUT: 20
# The Qdrant client enable gRPC mode.
QDRANT_GRPC_ENABLED: 'false'
# The Qdrant server gRPC mode PORT.
QDRANT_GRPC_PORT: 6334
# Milvus configuration Only available when VECTOR_STORE is `milvus`.
# The milvus uri.
MILVUS_URI: http://127.0.0.1:19530
# The milvus token.
MILVUS_TOKEN: ''
# The milvus username.
MILVUS_USER: root
# The milvus password.
MILVUS_PASSWORD: Milvus
# relyt configurations
RELYT_HOST: db
RELYT_PORT: 5432
RELYT_USER: postgres
RELYT_PASSWORD: difyai123456
RELYT_DATABASE: postgres
# pgvector configurations
PGVECTOR_HOST: pgvector
PGVECTOR_PORT: 5432
PGVECTOR_USER: postgres
PGVECTOR_PASSWORD: difyai123456
PGVECTOR_DATABASE: dify
# tidb vector configurations
TIDB_VECTOR_HOST: tidb
TIDB_VECTOR_PORT: 4000
TIDB_VECTOR_USER: xxx.root
TIDB_VECTOR_PASSWORD: xxxxxx
TIDB_VECTOR_DATABASE: dify
# oracle configurations
ORACLE_HOST: oracle
ORACLE_PORT: 1521
ORACLE_USER: dify
ORACLE_PASSWORD: dify
ORACLE_DATABASE: FREEPDB1
# Chroma configuration
CHROMA_HOST: 127.0.0.1
CHROMA_PORT: 8000
CHROMA_TENANT: default_tenant
CHROMA_DATABASE: default_database
CHROMA_AUTH_PROVIDER: chromadb.auth.token_authn.TokenAuthClientProvider
CHROMA_AUTH_CREDENTIALS: xxxxxx
# ElasticSearch Config
ELASTICSEARCH_HOST: 127.0.0.1
ELASTICSEARCH_PORT: 9200
ELASTICSEARCH_USERNAME: elastic
ELASTICSEARCH_PASSWORD: elastic
# Mail configuration, support: resend, smtp
MAIL_TYPE: ''
# default send from email address, if not specified
MAIL_DEFAULT_SEND_FROM: 'YOUR EMAIL FROM (eg: no-reply <no-reply@dify.ai>)'
SMTP_SERVER: ''
SMTP_PORT: 465
SMTP_USERNAME: ''
SMTP_PASSWORD: ''
SMTP_USE_TLS: 'true'
SMTP_OPPORTUNISTIC_TLS: 'false'
# the api-key for resend (https://resend.com)
RESEND_API_KEY: ''
RESEND_API_URL: https://api.resend.com
# The DSN for Sentry error reporting. If not set, Sentry error reporting will be disabled.
SENTRY_DSN: ''
# The sample rate for Sentry events. Default: `1.0`
SENTRY_TRACES_SAMPLE_RATE: 1.0
# The sample rate for Sentry profiles. Default: `1.0`
SENTRY_PROFILES_SAMPLE_RATE: 1.0
# Notion import configuration, support public and internal
NOTION_INTEGRATION_TYPE: public
NOTION_CLIENT_SECRET: you-client-secret
NOTION_CLIENT_ID: you-client-id
NOTION_INTERNAL_SECRET: you-internal-secret
# The sandbox service endpoint.
CODE_EXECUTION_ENDPOINT: "http://sandbox:8194"
CODE_EXECUTION_API_KEY: dify-sandbox
CODE_MAX_NUMBER: 9223372036854775807
CODE_MIN_NUMBER: -9223372036854775808
CODE_MAX_STRING_LENGTH: 80000
TEMPLATE_TRANSFORM_MAX_LENGTH: 80000
CODE_MAX_STRING_ARRAY_LENGTH: 30
CODE_MAX_OBJECT_ARRAY_LENGTH: 30
CODE_MAX_NUMBER_ARRAY_LENGTH: 1000
# SSRF Proxy server
SSRF_PROXY_HTTP_URL: 'http://ssrf_proxy:3128'
SSRF_PROXY_HTTPS_URL: 'http://ssrf_proxy:3128'
# Indexing configuration
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: 4000
depends_on:
- db
- redis
volumes:
# Mount the storage directory to the container, for storing user files.
- ./volumes/app/storage:/app/api/storage
# uncomment to expose dify-api port to host
# ports:
# - "5001:5001"
networks:
- ssrf_proxy_network
- default
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:1.0.0-beta.1
restart: always
environment:
CONSOLE_WEB_URL: ''
# Startup mode, 'worker' starts the Celery worker for processing the queue.
MODE: worker
# --- All the configurations below are the same as those in the 'api' service. ---
# The log level for the application. Supported values are `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`
LOG_LEVEL: INFO
# A secret key that is used for securely signing the session cookie and encrypting sensitive information on the database. You can generate a strong key using `openssl rand -base64 42`.
# same as the API service
SECRET_KEY: sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
# The configurations of postgres database connection.
# It is consistent with the configuration in the 'db' service below.
DB_USERNAME: postgres
DB_PASSWORD: difyai123456
DB_HOST: db
DB_PORT: 5432
DB_DATABASE: dify
# The configurations of redis cache connection.
REDIS_HOST: redis
REDIS_PORT: 6379
REDIS_USERNAME: ''
REDIS_PASSWORD: difyai123456
REDIS_DB: 0
REDIS_USE_SSL: 'false'
# The configurations of celery broker.
CELERY_BROKER_URL: redis://:difyai123456@redis:6379/1
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob` and `google-storage`, Default: `local`
STORAGE_TYPE: local
STORAGE_LOCAL_PATH: storage
# The S3 storage configurations, only available when STORAGE_TYPE is `s3`.
S3_USE_AWS_MANAGED_IAM: 'false'
S3_ENDPOINT: 'https://xxx.r2.cloudflarestorage.com'
S3_BUCKET_NAME: 'difyai'
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The Google storage configurations, only available when STORAGE_TYPE is `google-storage`.
GOOGLE_STORAGE_BUCKET_NAME: 'yout-bucket-name'
# if you want to use Application Default Credentials, you can leave GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64 empty.
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64: 'your-google-service-account-json-base64-string'
# The Alibaba Cloud OSS configurations, only available when STORAGE_TYPE is `aliyun-oss`
ALIYUN_OSS_BUCKET_NAME: 'your-bucket-name'
ALIYUN_OSS_ACCESS_KEY: 'your-access-key'
ALIYUN_OSS_SECRET_KEY: 'your-secret-key'
ALIYUN_OSS_ENDPOINT: 'https://oss-ap-southeast-1-internal.aliyuncs.com'
ALIYUN_OSS_REGION: 'ap-southeast-1'
ALIYUN_OSS_AUTH_VERSION: 'v4'
# The Tencent COS storage configurations, only available when STORAGE_TYPE is `tencent-cos`.
TENCENT_COS_BUCKET_NAME: 'your-bucket-name'
TENCENT_COS_SECRET_KEY: 'your-secret-key'
TENCENT_COS_SECRET_ID: 'your-secret-id'
TENCENT_COS_REGION: 'your-region'
TENCENT_COS_SCHEME: 'your-scheme'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`, `relyt`, `pgvector`, `chroma`, 'opensearch', 'tidb_vector'.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
WEAVIATE_ENDPOINT: http://weaviate:8080
# The Weaviate API key.
WEAVIATE_API_KEY: WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
# The Qdrant endpoint URL. Only available when VECTOR_STORE is `qdrant`.
QDRANT_URL: http://qdrant:6333
# The Qdrant API key.
QDRANT_API_KEY: difyai123456
# The Qdrant client timeout setting.
QDRANT_CLIENT_TIMEOUT: 20
# The Qdrant client enable gRPC mode.
QDRANT_GRPC_ENABLED: 'false'
# The Qdrant server gRPC mode PORT.
QDRANT_GRPC_PORT: 6334
# Milvus configuration Only available when VECTOR_STORE is `milvus`.
# The milvus uri.
MILVUS_URI: http://127.0.0.1:19530
# The milvus token.
MILVUS_PORT: ''
# The milvus username.
MILVUS_USER: root
# The milvus password.
MILVUS_PASSWORD: Milvus
# Mail configuration, support: resend
MAIL_TYPE: ''
# default send from email address, if not specified
MAIL_DEFAULT_SEND_FROM: 'YOUR EMAIL FROM (eg: no-reply <no-reply@dify.ai>)'
SMTP_SERVER: ''
SMTP_PORT: 465
SMTP_USERNAME: ''
SMTP_PASSWORD: ''
SMTP_USE_TLS: 'true'
SMTP_OPPORTUNISTIC_TLS: 'false'
# the api-key for resend (https://resend.com)
RESEND_API_KEY: ''
RESEND_API_URL: https://api.resend.com
# relyt configurations
RELYT_HOST: db
RELYT_PORT: 5432
RELYT_USER: postgres
RELYT_PASSWORD: difyai123456
RELYT_DATABASE: postgres
# tencent configurations
TENCENT_VECTOR_DB_URL: http://127.0.0.1
TENCENT_VECTOR_DB_API_KEY: dify
TENCENT_VECTOR_DB_TIMEOUT: 30
TENCENT_VECTOR_DB_USERNAME: dify
TENCENT_VECTOR_DB_DATABASE: dify
TENCENT_VECTOR_DB_SHARD: 1
TENCENT_VECTOR_DB_REPLICAS: 2
# OpenSearch configuration
OPENSEARCH_HOST: 127.0.0.1
OPENSEARCH_PORT: 9200
OPENSEARCH_USER: admin
OPENSEARCH_PASSWORD: admin
OPENSEARCH_SECURE: 'true'
# pgvector configurations
PGVECTOR_HOST: pgvector
PGVECTOR_PORT: 5432
PGVECTOR_USER: postgres
PGVECTOR_PASSWORD: difyai123456
PGVECTOR_DATABASE: dify
# tidb vector configurations
TIDB_VECTOR_HOST: tidb
TIDB_VECTOR_PORT: 4000
TIDB_VECTOR_USER: xxx.root
TIDB_VECTOR_PASSWORD: xxxxxx
TIDB_VECTOR_DATABASE: dify
# oracle configurations
ORACLE_HOST: oracle
ORACLE_PORT: 1521
ORACLE_USER: dify
ORACLE_PASSWORD: dify
ORACLE_DATABASE: FREEPDB1
# Chroma configuration
CHROMA_HOST: 127.0.0.1
CHROMA_PORT: 8000
CHROMA_TENANT: default_tenant
CHROMA_DATABASE: default_database
CHROMA_AUTH_PROVIDER: chromadb.auth.token_authn.TokenAuthClientProvider
CHROMA_AUTH_CREDENTIALS: xxxxxx
# ElasticSearch Config
ELASTICSEARCH_HOST: 127.0.0.1
ELASTICSEARCH_PORT: 9200
ELASTICSEARCH_USERNAME: elastic
ELASTICSEARCH_PASSWORD: elastic
# Notion import configuration, support public and internal
NOTION_INTEGRATION_TYPE: public
NOTION_CLIENT_SECRET: you-client-secret
NOTION_CLIENT_ID: you-client-id
NOTION_INTERNAL_SECRET: you-internal-secret
# Indexing configuration
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: 1000
CREATE_TIDB_SERVICE_JOB_ENABLED: false
depends_on:
- db
- redis
volumes:
# Mount the storage directory to the container, for storing user files.
- ./volumes/app/storage:/app/api/storage
networks:
- ssrf_proxy_network
- default
# Frontend web application.
web:
image: langgenius/dify-web:1.0.0-beta.1
restart: always
environment:
# The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
# different from api or web app domain.
# example: http://cloud.dify.ai
CONSOLE_API_URL: ''
# The URL for Web APP api server, refers to the Web App base URL of WEB service if web app domain is different from
# console or api domain.
# example: http://udify.app
APP_API_URL: ''
# The DSN for Sentry error reporting. If not set, Sentry error reporting will be disabled.
SENTRY_DSN: ''
# uncomment to expose dify-web port to host
# ports:
# - "3000:3000"
# The postgres database.
db:
image: postgres:15-alpine
restart: always
environment:
PGUSER: postgres
# The password for the default postgres user.
POSTGRES_PASSWORD: difyai123456
# The name of the default postgres database.
POSTGRES_DB: dify
# postgres data directory
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- ./volumes/db/data:/var/lib/postgresql/data
# notice!: if you use windows-wsl2, postgres may not work properly due to the ntfs issue.you can use volumes to mount the data directory to the host.
# if you use the following config, you need to uncomment the volumes configuration below at the end of the file.
# - postgres:/var/lib/postgresql/data
# uncomment to expose db(postgresql) port to host
# ports:
# - "5432:5432"
healthcheck:
test: [ "CMD", "pg_isready" ]
interval: 1s
timeout: 3s
retries: 30
# The redis cache.
redis:
image: redis:6-alpine
restart: always
volumes:
# Mount the redis data directory to the container.
- ./volumes/redis/data:/data
# Set the redis password when startup redis server.
command: redis-server --requirepass difyai123456
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
# uncomment to expose redis port to host
# ports:
# - "6379:6379"
# The Weaviate vector store.
weaviate:
image: semitechnologies/weaviate:1.19.0
restart: always
volumes:
# Mount the Weaviate data directory to the container.
- ./volumes/weaviate:/var/lib/weaviate
environment:
# The Weaviate configurations
# You can refer to the [Weaviate](https://weaviate.io/developers/weaviate/config-refs/env-vars) documentation for more information.
QUERY_DEFAULTS_LIMIT: 25
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'none'
CLUSTER_HOSTNAME: 'node1'
AUTHENTICATION_APIKEY_ENABLED: 'true'
AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih'
AUTHENTICATION_APIKEY_USERS: 'hello@dify.ai'
AUTHORIZATION_ADMINLIST_ENABLED: 'true'
AUTHORIZATION_ADMINLIST_USERS: 'hello@dify.ai'
# uncomment to expose weaviate port to host
# ports:
# - "8080:8080"
# The DifySandbox
sandbox:
image: langgenius/dify-sandbox:0.2.1
restart: always
environment:
# The DifySandbox configurations
# Make sure you are changing this key for your deployment with a strong key.
# You can generate a strong key using `openssl rand -base64 42`.
API_KEY: dify-sandbox
GIN_MODE: 'release'
WORKER_TIMEOUT: 15
ENABLE_NETWORK: 'true'
HTTP_PROXY: 'http://ssrf_proxy:3128'
HTTPS_PROXY: 'http://ssrf_proxy:3128'
SANDBOX_PORT: 8194
volumes:
- ./volumes/sandbox/dependencies:/dependencies
networks:
- ssrf_proxy_network
# ssrf_proxy server
# for more information, please refer to
# https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed
ssrf_proxy:
image: ubuntu/squid:latest
restart: always
volumes:
# pls clearly modify the squid.conf file to fit your network environment.
- ./volumes/ssrf_proxy/squid.conf:/etc/squid/squid.conf
networks:
- ssrf_proxy_network
- default
# Qdrant vector store.
# uncomment to use qdrant as vector store.
# (if uncommented, you need to comment out the weaviate service above,
# and set VECTOR_STORE to qdrant in the api & worker service.)
# qdrant:
# image: langgenius/qdrant:v1.7.3
# restart: always
# volumes:
# - ./volumes/qdrant:/qdrant/storage
# environment:
# QDRANT_API_KEY: 'difyai123456'
# # uncomment to expose qdrant port to host
# # ports:
# # - "6333:6333"
# # - "6334:6334"
# The pgvector vector database.
# Uncomment to use qdrant as vector store.
# pgvector:
# image: pgvector/pgvector:pg16
# restart: always
# environment:
# PGUSER: postgres
# # The password for the default postgres user.
# POSTGRES_PASSWORD: difyai123456
# # The name of the default postgres database.
# POSTGRES_DB: dify
# # postgres data directory
# PGDATA: /var/lib/postgresql/data/pgdata
# volumes:
# - ./volumes/pgvector/data:/var/lib/postgresql/data
# # uncomment to expose db(postgresql) port to host
# # ports:
# # - "5433:5432"
# healthcheck:
# test: [ "CMD", "pg_isready" ]
# interval: 1s
# timeout: 3s
# retries: 30
# The oracle vector database.
# Uncomment to use oracle23ai as vector store. Also need to Uncomment volumes block
# oracle:
# image: container-registry.oracle.com/database/free:latest
# restart: always
# ports:
# - 1521:1521
# volumes:
# - type: volume
# source: oradata
# target: /opt/oracle/oradata
# - ./startupscripts:/opt/oracle/scripts/startup
# environment:
# - ORACLE_PWD=Dify123456
# - ORACLE_CHARACTERSET=AL32UTF8
# The nginx reverse proxy.
# used for reverse proxying the API service and Web service.
nginx:
image: nginx:latest
restart: always
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
- ./nginx/conf.d:/etc/nginx/conf.d
#- ./nginx/ssl:/etc/ssl
depends_on:
- api
- web
ports:
- "80:80"
#- "443:443"
# notice: if you use windows-wsl2, postgres may not work properly due to the ntfs issue.you can use volumes to mount the data directory to the host.
# volumes:
#   postgres:
networks:
# create a network between sandbox, api and ssrf_proxy, and can not access outside.
ssrf_proxy_network:
driver: bridge
internal: true
#volumes:
# oradata:

@ -1,38 +0,0 @@
server {
listen 80;
server_name _;
location /console/api {
proxy_pass http://api:5001;
include proxy.conf;
}
location /api {
proxy_pass http://api:5001;
include proxy.conf;
}
location /v1 {
proxy_pass http://api:5001;
include proxy.conf;
}
location /files {
proxy_pass http://api:5001;
include proxy.conf;
}
location / {
proxy_pass http://web:3000;
include proxy.conf;
}
# If you want to support HTTPS, please uncomment the code snippet below
#listen 443 ssl;
#ssl_certificate ./../ssl/your_cert_file.cer;
#ssl_certificate_key ./../ssl/your_cert_key.key;
#ssl_protocols TLSv1.1 TLSv1.2 TLSv1.3;
#ssl_prefer_server_ciphers on;
#ssl_session_cache shared:SSL:10m;
#ssl_session_timeout 10m;
}

@ -1,32 +0,0 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
#gzip on;
client_max_body_size 15M;
include /etc/nginx/conf.d/*.conf;
}

@ -1,8 +0,0 @@
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_buffering off;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;

@ -1,5 +0,0 @@
show pdbs;
ALTER SYSTEM SET PROCESSES=500 SCOPE=SPFILE;
alter session set container= freepdb1;
create user dify identified by dify DEFAULT TABLESPACE users quota unlimited on users;
grant DB_DEVELOPER_ROLE to dify;

@ -1,222 +0,0 @@
---
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
# Description:
# Default configuration for OpenSearch Dashboards
# OpenSearch Dashboards is served by a back end server. This setting specifies the port to use.
# server.port: 5601
# Specifies the address to which the OpenSearch Dashboards server will bind. IP addresses and host names are both valid values.
# The default is 'localhost', which usually means remote machines will not be able to connect.
# To allow connections from remote users, set this parameter to a non-loopback address.
# server.host: "localhost"
# Enables you to specify a path to mount OpenSearch Dashboards at if you are running behind a proxy.
# Use the `server.rewriteBasePath` setting to tell OpenSearch Dashboards if it should remove the basePath
# from requests it receives, and to prevent a deprecation warning at startup.
# This setting cannot end in a slash.
# server.basePath: ""
# Specifies whether OpenSearch Dashboards should rewrite requests that are prefixed with
# `server.basePath` or require that they are rewritten by your reverse proxy.
# server.rewriteBasePath: false
# The maximum payload size in bytes for incoming server requests.
# server.maxPayloadBytes: 1048576
# The OpenSearch Dashboards server's name. This is used for display purposes.
# server.name: "your-hostname"
# The URLs of the OpenSearch instances to use for all your queries.
# opensearch.hosts: ["http://localhost:9200"]
# OpenSearch Dashboards uses an index in OpenSearch to store saved searches, visualizations and
# dashboards. OpenSearch Dashboards creates a new index if the index doesn't already exist.
# opensearchDashboards.index: ".opensearch_dashboards"
# The default application to load.
# opensearchDashboards.defaultAppId: "home"
# Setting for an optimized healthcheck that only uses the local OpenSearch node to do Dashboards healthcheck.
# This settings should be used for large clusters or for clusters with ingest heavy nodes.
# It allows Dashboards to only healthcheck using the local OpenSearch node rather than fan out requests across all nodes.
#
# It requires the user to create an OpenSearch node attribute with the same name as the value used in the setting
# This node attribute should assign all nodes of the same cluster an integer value that increments with each new cluster that is spun up
# e.g. in opensearch.yml file you would set the value to a setting using node.attr.cluster_id:
# Should only be enabled if there is a corresponding node attribute created in your OpenSearch config that matches the value here
# opensearch.optimizedHealthcheckId: "cluster_id"
# If your OpenSearch is protected with basic authentication, these settings provide
# the username and password that the OpenSearch Dashboards server uses to perform maintenance on the OpenSearch Dashboards
# index at startup. Your OpenSearch Dashboards users still need to authenticate with OpenSearch, which
# is proxied through the OpenSearch Dashboards server.
# opensearch.username: "opensearch_dashboards_system"
# opensearch.password: "pass"
# Enables SSL and paths to the PEM-format SSL certificate and SSL key files, respectively.
# These settings enable SSL for outgoing requests from the OpenSearch Dashboards server to the browser.
# server.ssl.enabled: false
# server.ssl.certificate: /path/to/your/server.crt
# server.ssl.key: /path/to/your/server.key
# Optional settings that provide the paths to the PEM-format SSL certificate and key files.
# These files are used to verify the identity of OpenSearch Dashboards to OpenSearch and are required when
# xpack.security.http.ssl.client_authentication in OpenSearch is set to required.
# opensearch.ssl.certificate: /path/to/your/client.crt
# opensearch.ssl.key: /path/to/your/client.key
# Optional setting that enables you to specify a path to the PEM file for the certificate
# authority for your OpenSearch instance.
# opensearch.ssl.certificateAuthorities: [ "/path/to/your/CA.pem" ]
# To disregard the validity of SSL certificates, change this setting's value to 'none'.
# opensearch.ssl.verificationMode: full
# Time in milliseconds to wait for OpenSearch to respond to pings. Defaults to the value of
# the opensearch.requestTimeout setting.
# opensearch.pingTimeout: 1500
# Time in milliseconds to wait for responses from the back end or OpenSearch. This value
# must be a positive integer.
# opensearch.requestTimeout: 30000
# List of OpenSearch Dashboards client-side headers to send to OpenSearch. To send *no* client-side
# headers, set this value to [] (an empty list).
# opensearch.requestHeadersWhitelist: [ authorization ]
# Header names and values that are sent to OpenSearch. Any custom headers cannot be overwritten
# by client-side headers, regardless of the opensearch.requestHeadersWhitelist configuration.
# opensearch.customHeaders: {}
# Time in milliseconds for OpenSearch to wait for responses from shards. Set to 0 to disable.
# opensearch.shardTimeout: 30000
# Logs queries sent to OpenSearch. Requires logging.verbose set to true.
# opensearch.logQueries: false
# Specifies the path where OpenSearch Dashboards creates the process ID file.
# pid.file: /var/run/opensearchDashboards.pid
# Enables you to specify a file where OpenSearch Dashboards stores log output.
# logging.dest: stdout
# Set the value of this setting to true to suppress all logging output.
# logging.silent: false
# Set the value of this setting to true to suppress all logging output other than error messages.
# logging.quiet: false
# Set the value of this setting to true to log all events, including system usage information
# and all requests.
# logging.verbose: false
# Set the interval in milliseconds to sample system and process performance
# metrics. Minimum is 100ms. Defaults to 5000.
# ops.interval: 5000
# Specifies locale to be used for all localizable strings, dates and number formats.
# Supported languages are the following: English - en , by default , Chinese - zh-CN .
# i18n.locale: "en"
# Set the allowlist to check input graphite Url. Allowlist is the default check list.
# vis_type_timeline.graphiteAllowedUrls: ['https://www.hostedgraphite.com/UID/ACCESS_KEY/graphite']
# Set the blocklist to check input graphite Url. Blocklist is an IP list.
# Below is an example for reference
# vis_type_timeline.graphiteBlockedIPs: [
# //Loopback
# '127.0.0.0/8',
# '::1/128',
# //Link-local Address for IPv6
# 'fe80::/10',
# //Private IP address for IPv4
# '10.0.0.0/8',
# '172.16.0.0/12',
# '192.168.0.0/16',
# //Unique local address (ULA)
# 'fc00::/7',
# //Reserved IP address
# '0.0.0.0/8',
# '100.64.0.0/10',
# '192.0.0.0/24',
# '192.0.2.0/24',
# '198.18.0.0/15',
# '192.88.99.0/24',
# '198.51.100.0/24',
# '203.0.113.0/24',
# '224.0.0.0/4',
# '240.0.0.0/4',
# '255.255.255.255/32',
# '::/128',
# '2001:db8::/32',
# 'ff00::/8',
# ]
# vis_type_timeline.graphiteBlockedIPs: []
# opensearchDashboards.branding:
# logo:
# defaultUrl: ""
# darkModeUrl: ""
# mark:
# defaultUrl: ""
# darkModeUrl: ""
# loadingLogo:
# defaultUrl: ""
# darkModeUrl: ""
# faviconUrl: ""
# applicationTitle: ""
# Set the value of this setting to true to capture region blocked warnings and errors
# for your map rendering services.
# map.showRegionBlockedWarning: false%
# Set the value of this setting to false to suppress search usage telemetry
# for reducing the load of OpenSearch cluster.
# data.search.usageTelemetry.enabled: false
# 2.4 renames 'wizard.enabled: false' to 'vis_builder.enabled: false'
# Set the value of this setting to false to disable VisBuilder
# functionality in Visualization.
# vis_builder.enabled: false
# 2.4 New Experimental Feature
# Set the value of this setting to true to enable the experimental multiple data source
# support feature. Use with caution.
# data_source.enabled: false
# Set the value of these settings to customize crypto materials to encryption saved credentials
# in data sources.
# data_source.encryption.wrappingKeyName: 'changeme'
# data_source.encryption.wrappingKeyNamespace: 'changeme'
# data_source.encryption.wrappingKey: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
# 2.6 New ML Commons Dashboards Feature
# Set the value of this setting to true to enable the ml commons dashboards
# ml_commons_dashboards.enabled: false
# 2.12 New experimental Assistant Dashboards Feature
# Set the value of this setting to true to enable the assistant dashboards
# assistant.chat.enabled: false
# 2.13 New Query Assistant Feature
# Set the value of this setting to false to disable the query assistant
# observability.query_assist.enabled: false
# 2.14 Enable Ui Metric Collectors in Usage Collector
# Set the value of this setting to true to enable UI Metric collections
# usageCollection.uiMetric.enabled: false
opensearch.hosts: [https://localhost:9200]
opensearch.ssl.verificationMode: none
opensearch.username: admin
opensearch.password: 'Qazwsxedc!@#123'
opensearch.requestHeadersWhitelist: [authorization, securitytenant]
opensearch_security.multitenancy.enabled: true
opensearch_security.multitenancy.tenants.preferred: [Private, Global]
opensearch_security.readonly_mode.roles: [kibana_read_only]
# Use this setting if you are running opensearch-dashboards without https
opensearch_security.cookie.secure: false
server.host: '0.0.0.0'

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save