diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 615c253967..d93ca97f46 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -145,7 +145,9 @@ class DatasetListApi(DatasetApiResource): external_knowledge_id=args["external_knowledge_id"], embedding_model_provider=args["embedding_model_provider"], embedding_model_name=args["embedding_model"], - retrieval_model=RetrievalModel(**args["retrieval_model"]), + retrieval_model=RetrievalModel(**args["retrieval_model"]) + if args["retrieval_model"] is not None + else None, ) except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index 969cd112ee..11f245812e 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -48,25 +48,26 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT, ) + if "ssl_verify" not in kwargs: + kwargs["ssl_verify"] = HTTP_REQUEST_NODE_SSL_VERIFY + + ssl_verify = kwargs.pop("ssl_verify") + retries = 0 while retries <= max_retries: try: if dify_config.SSRF_PROXY_ALL_URL: - with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=HTTP_REQUEST_NODE_SSL_VERIFY) as client: + with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=ssl_verify) as client: response = client.request(method=method, url=url, **kwargs) elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL: proxy_mounts = { - "http://": httpx.HTTPTransport( - proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=HTTP_REQUEST_NODE_SSL_VERIFY - ), - "https://": httpx.HTTPTransport( - proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=HTTP_REQUEST_NODE_SSL_VERIFY - ), + "http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=ssl_verify), + "https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=ssl_verify), } - with httpx.Client(mounts=proxy_mounts, verify=HTTP_REQUEST_NODE_SSL_VERIFY) as client: + with httpx.Client(mounts=proxy_mounts, verify=ssl_verify) as client: response = client.request(method=method, url=url, **kwargs) else: - with httpx.Client(verify=HTTP_REQUEST_NODE_SSL_VERIFY) as client: + with httpx.Client(verify=ssl_verify) as client: response = client.request(method=method, url=url, **kwargs) if response.status_code not in STATUS_FORCELIST: diff --git a/api/core/workflow/nodes/http_request/entities.py b/api/core/workflow/nodes/http_request/entities.py index 054e30f0aa..8d7ba25d47 100644 --- a/api/core/workflow/nodes/http_request/entities.py +++ b/api/core/workflow/nodes/http_request/entities.py @@ -90,6 +90,7 @@ class HttpRequestNodeData(BaseNodeData): params: str body: Optional[HttpRequestNodeBody] = None timeout: Optional[HttpRequestNodeTimeout] = None + ssl_verify: Optional[bool] = dify_config.HTTP_REQUEST_NODE_SSL_VERIFY class Response: diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py index f7fa8d670c..5d466e645f 100644 --- a/api/core/workflow/nodes/http_request/executor.py +++ b/api/core/workflow/nodes/http_request/executor.py @@ -88,6 +88,7 @@ class Executor: self.method = node_data.method self.auth = node_data.authorization self.timeout = timeout + self.ssl_verify = node_data.ssl_verify self.params = [] self.headers = {} self.content = None @@ -316,6 +317,7 @@ class Executor: "headers": headers, "params": self.params, "timeout": (self.timeout.connect, self.timeout.read, self.timeout.write), + "ssl_verify": self.ssl_verify, "follow_redirects": True, "max_retries": self.max_retries, } diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 467161d5ed..fd2b0f9ae8 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -51,6 +51,7 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]): "max_read_timeout": dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, "max_write_timeout": dify_config.HTTP_REQUEST_MAX_WRITE_TIMEOUT, }, + "ssl_verify": dify_config.HTTP_REQUEST_NODE_SSL_VERIFY, }, "retry_config": { "max_retries": dify_config.SSRF_DEFAULT_MAX_RETRIES, diff --git a/api/core/workflow/nodes/list_operator/node.py b/api/core/workflow/nodes/list_operator/node.py index 432c57294e..04ccfc5405 100644 --- a/api/core/workflow/nodes/list_operator/node.py +++ b/api/core/workflow/nodes/list_operator/node.py @@ -149,7 +149,10 @@ class ListOperatorNode(BaseNode[ListOperatorNodeData]): def _extract_slice( self, variable: Union[ArrayFileSegment, ArrayNumberSegment, ArrayStringSegment] ) -> Union[ArrayFileSegment, ArrayNumberSegment, ArrayStringSegment]: - value = int(self.graph_runtime_state.variable_pool.convert_template(self.node_data.extract_by.serial).text) - 1 + value = int(self.graph_runtime_state.variable_pool.convert_template(self.node_data.extract_by.serial).text) + if value < 1: + raise ValueError(f"Invalid serial index: must be >= 1, got {value}") + value -= 1 if len(variable.value) > int(value): result = variable.value[value] else: diff --git a/api/test_workflow_deletion.py b/api/tests/unit_tests/services/workflow/test_workflow_deletion.py similarity index 100% rename from api/test_workflow_deletion.py rename to api/tests/unit_tests/services/workflow/test_workflow_deletion.py diff --git a/docker/.env.example b/docker/.env.example index acb09c0d4f..e49e8fee89 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -174,6 +174,12 @@ CELERY_MIN_WORKERS= API_TOOL_DEFAULT_CONNECT_TIMEOUT=10 API_TOOL_DEFAULT_READ_TIMEOUT=60 +# ------------------------------- +# Datasource Configuration +# -------------------------------- +ENABLE_WEBSITE_JINAREADER=true +ENABLE_WEBSITE_FIRECRAWL=true +ENABLE_WEBSITE_WATERCRAWL=true # ------------------------------ # Database Configuration diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 86976063c3..a8f7b755fb 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -75,7 +75,9 @@ services: MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} - + ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} + ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} + ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} # The postgres database. db: image: postgres:15-alpine diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index e9c8c8715a..25b0c56561 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -43,6 +43,9 @@ x-shared-env: &shared-api-worker-env CELERY_MIN_WORKERS: ${CELERY_MIN_WORKERS:-} API_TOOL_DEFAULT_CONNECT_TIMEOUT: ${API_TOOL_DEFAULT_CONNECT_TIMEOUT:-10} API_TOOL_DEFAULT_READ_TIMEOUT: ${API_TOOL_DEFAULT_READ_TIMEOUT:-60} + ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} + ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} + ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} DB_USERNAME: ${DB_USERNAME:-postgres} DB_PASSWORD: ${DB_PASSWORD:-difyai123456} DB_HOST: ${DB_HOST:-db} @@ -543,7 +546,9 @@ services: MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} - + ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} + ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} + ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} # The postgres database. db: image: postgres:15-alpine diff --git a/web/.env.example b/web/.env.example index 51dc3d6b3c..1c3f42ddfc 100644 --- a/web/.env.example +++ b/web/.env.example @@ -49,3 +49,8 @@ NEXT_PUBLIC_MAX_PARALLEL_LIMIT=10 # The maximum number of iterations for agent setting NEXT_PUBLIC_MAX_ITERATIONS_NUM=5 + +NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=true +NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=true +NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=true + diff --git a/web/app/components/datasets/create/step-one/index.tsx b/web/app/components/datasets/create/step-one/index.tsx index 6f4231bb1f..38c885ebe2 100644 --- a/web/app/components/datasets/create/step-one/index.tsx +++ b/web/app/components/datasets/create/step-one/index.tsx @@ -20,7 +20,7 @@ import { useProviderContext } from '@/context/provider-context' import VectorSpaceFull from '@/app/components/billing/vector-space-full' import classNames from '@/utils/classnames' import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' - +import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' type IStepOneProps = { datasetId?: string dataSourceType?: DataSourceType @@ -126,9 +126,7 @@ const StepOne = ({ return true if (files.some(file => !file.file.id)) return true - if (isShowVectorSpaceFull) - return true - return false + return isShowVectorSpaceFull }, [files, isShowVectorSpaceFull]) return ( @@ -193,7 +191,8 @@ const StepOne = ({ {t('datasetCreation.stepOne.dataSourceType.notion')} -