Merge branch 'langgenius:main' into main

pull/19119/head
sondin 1 year ago committed by GitHub
commit e9599876a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -34,4 +34,4 @@ if you see such error message when you open this project in codespaces:
![Alt text](troubleshooting.png) ![Alt text](troubleshooting.png)
a simple workaround is change `/signin` endpoint into another one, then login with GitHub account and close the tab, then change it back to `/signin` endpoint. Then all things will be fine. a simple workaround is change `/signin` endpoint into another one, then login with GitHub account and close the tab, then change it back to `/signin` endpoint. Then all things will be fine.
The reason is `signin` endpoint is not allowed in codespaces, details can be found [here](https://github.com/orgs/community/discussions/5204) The reason is `signin` endpoint is not allowed in codespaces, details can be found [here](https://github.com/orgs/community/discussions/5204)

@ -2,7 +2,7 @@
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda // README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
{ {
"name": "Python 3.12", "name": "Python 3.12",
"build": { "build": {
"context": "..", "context": "..",
"dockerfile": "Dockerfile" "dockerfile": "Dockerfile"
}, },

@ -1,3 +1,3 @@
This file copied into the container along with environment.yml* from the parent This file copied into the container along with environment.yml* from the parent
folder. This file is included to prevents the Dockerfile COPY instruction from folder. This file is included to prevents the Dockerfile COPY instruction from
failing if no environment.yml is found. failing if no environment.yml is found.

@ -5,18 +5,35 @@ root = true
# Unix-style newlines with a newline ending every file # Unix-style newlines with a newline ending every file
[*] [*]
charset = utf-8
end_of_line = lf end_of_line = lf
insert_final_newline = true insert_final_newline = true
trim_trailing_whitespace = true
[*.py]
indent_size = 4
indent_style = space
[*.{yml,yaml}]
indent_style = space
indent_size = 2
[*.toml]
indent_size = 4
indent_style = space
# Markdown and MDX are whitespace sensitive languages.
# Do not remove trailing spaces.
[*.{md,mdx}]
trim_trailing_whitespace = false
# Matches multiple files with brace expansion notation # Matches multiple files with brace expansion notation
# Set default charset # Set default charset
[*.{js,tsx}] [*.{js,tsx}]
charset = utf-8
indent_style = space indent_style = space
indent_size = 2 indent_size = 2
# Matches the exact files package.json
# Matches the exact files either package.json or .travis.yml [package.json]
[{package.json,.travis.yml}]
indent_style = space indent_style = space
indent_size = 2 indent_size = 2

2
.gitattributes vendored

@ -1,5 +1,5 @@
# Ensure that .sh scripts use LF as line separator, even if they are checked out # Ensure that .sh scripts use LF as line separator, even if they are checked out
# to Windows(NTFS) file-system, by a user of Docker for Windows. # to Windows(NTFS) file-system, by a user of Docker for Windows.
# These .sh scripts will be run from the Container after `docker compose up -d`. # These .sh scripts will be run from the Container after `docker compose up -d`.
# If they appear to be CRLF style, Dash from the Container will fail to execute # If they appear to be CRLF style, Dash from the Container will fail to execute
# them. # them.

@ -0,0 +1,22 @@
{
"Verbose": false,
"Debug": false,
"IgnoreDefaults": false,
"SpacesAfterTabs": false,
"NoColor": false,
"Exclude": [
"^web/public/vs/",
"^web/public/pdf.worker.min.mjs$",
"web/app/components/base/icons/src/vender/"
],
"AllowedContentTypes": [],
"PassedFiles": [],
"Disable": {
"EndOfLine": false,
"Indentation": false,
"IndentSize": true,
"InsertFinalNewline": false,
"TrimTrailingWhitespace": false,
"MaxLineLength": false
}
}

@ -9,6 +9,12 @@ concurrency:
group: style-${{ github.head_ref || github.run_id }} group: style-${{ github.head_ref || github.run_id }}
cancel-in-progress: true cancel-in-progress: true
permissions:
checks: write
statuses: write
contents: read
jobs: jobs:
python-style: python-style:
name: Python Style name: Python Style
@ -163,3 +169,14 @@ jobs:
VALIDATE_DOCKERFILE_HADOLINT: true VALIDATE_DOCKERFILE_HADOLINT: true
VALIDATE_XML: true VALIDATE_XML: true
VALIDATE_YAML: true VALIDATE_YAML: true
- name: EditorConfig checks
uses: super-linter/super-linter/slim@v7
env:
DEFAULT_BRANCH: main
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
IGNORE_GENERATED_FILES: true
IGNORE_GITIGNORED_FILES: true
# EditorConfig validation
VALIDATE_EDITORCONFIG: true
EDITORCONFIG_FILE_NAME: editorconfig-checker.json

@ -90,4 +90,4 @@ Recomendamos revisar este documento cuidadosamente antes de proceder con la conf
No dudes en contactarnos si encuentras algún problema durante el proceso de configuración. No dudes en contactarnos si encuentras algún problema durante el proceso de configuración.
## Obteniendo Ayuda ## Obteniendo Ayuda
Si alguna vez te quedas atascado o tienes una pregunta urgente mientras contribuyes, simplemente envíanos tus consultas a través del issue relacionado de GitHub, o únete a nuestro [Discord](https://discord.gg/8Tpq4AcN9c) para una charla rápida. Si alguna vez te quedas atascado o tienes una pregunta urgente mientras contribuyes, simplemente envíanos tus consultas a través del issue relacionado de GitHub, o únete a nuestro [Discord](https://discord.gg/8Tpq4AcN9c) para una charla rápida.

@ -90,4 +90,4 @@ Nous recommandons de revoir attentivement ce document avant de procéder à la c
N'hésitez pas à nous contacter si vous rencontrez des problèmes pendant le processus de configuration. N'hésitez pas à nous contacter si vous rencontrez des problèmes pendant le processus de configuration.
## Obtenir de l'aide ## Obtenir de l'aide
Si jamais vous êtes bloqué ou avez une question urgente en contribuant, envoyez-nous simplement vos questions via le problème GitHub concerné, ou rejoignez notre [Discord](https://discord.gg/8Tpq4AcN9c) pour une discussion rapide. Si jamais vous êtes bloqué ou avez une question urgente en contribuant, envoyez-nous simplement vos questions via le problème GitHub concerné, ou rejoignez notre [Discord](https://discord.gg/8Tpq4AcN9c) pour une discussion rapide.

@ -90,4 +90,4 @@ PR 설명에 기존 이슈를 연결하거나 새 이슈를 여는 것을 잊지
설정 과정에서 문제가 발생하면 언제든지 연락해 주세요. 설정 과정에서 문제가 발생하면 언제든지 연락해 주세요.
## 도움 받기 ## 도움 받기
기여하는 동안 막히거나 긴급한 질문이 있으면, 관련 GitHub 이슈를 통해 질문을 보내거나, 빠른 대화를 위해 우리의 [Discord](https://discord.gg/8Tpq4AcN9c)에 참여하세요. 기여하는 동안 막히거나 긴급한 질문이 있으면, 관련 GitHub 이슈를 통해 질문을 보내거나, 빠른 대화를 위해 우리의 [Discord](https://discord.gg/8Tpq4AcN9c)에 참여하세요.

@ -90,4 +90,4 @@ Recomendamos revisar este documento cuidadosamente antes de prosseguir com a con
Sinta-se à vontade para entrar em contato se encontrar quaisquer problemas durante o processo de configuração. Sinta-se à vontade para entrar em contato se encontrar quaisquer problemas durante o processo de configuração.
## Obtendo Ajuda ## Obtendo Ajuda
Se você ficar preso ou tiver uma dúvida urgente enquanto contribui, simplesmente envie suas perguntas através do problema relacionado no GitHub, ou entre no nosso [Discord](https://discord.gg/8Tpq4AcN9c) para uma conversa rápida. Se você ficar preso ou tiver uma dúvida urgente enquanto contribui, simplesmente envie suas perguntas através do problema relacionado no GitHub, ou entre no nosso [Discord](https://discord.gg/8Tpq4AcN9c) para uma conversa rápida.

@ -90,4 +90,4 @@ Kuruluma geçmeden önce bu belgeyi dikkatlice incelemenizi öneririz, çünkü
Kurulum süreci sırasında herhangi bir sorunla karşılaşırsanız bizimle iletişime geçmekten çekinmeyin. Kurulum süreci sırasında herhangi bir sorunla karşılaşırsanız bizimle iletişime geçmekten çekinmeyin.
## Yardım Almak ## Yardım Almak
Katkıda bulunurken takılırsanız veya yanıcı bir sorunuz olursa, sorularınızı ilgili GitHub sorunu aracılığıyla bize gönderin veya hızlı bir sohbet için [Discord'umuza](https://discord.gg/8Tpq4AcN9c) katılın. Katkıda bulunurken takılırsanız veya yanıcı bir sorunuz olursa, sorularınızı ilgili GitHub sorunu aracılığıyla bize gönderin veya hızlı bir sohbet için [Discord'umuza](https://discord.gg/8Tpq4AcN9c) katılın.

@ -1,259 +1,259 @@
![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) ![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab)
<p align="center"> <p align="center">
📌 <a href="https://dify.ai/blog/introducing-dify-workflow-file-upload-a-demo-on-ai-podcast">Predstavljamo nalaganje datotek Dify Workflow: znova ustvarite Google NotebookLM Podcast</a> 📌 <a href="https://dify.ai/blog/introducing-dify-workflow-file-upload-a-demo-on-ai-podcast">Predstavljamo nalaganje datotek Dify Workflow: znova ustvarite Google NotebookLM Podcast</a>
</p> </p>
<p align="center"> <p align="center">
<a href="https://cloud.dify.ai">Dify Cloud</a> · <a href="https://cloud.dify.ai">Dify Cloud</a> ·
<a href="https://docs.dify.ai/getting-started/install-self-hosted">Samostojno gostovanje</a> · <a href="https://docs.dify.ai/getting-started/install-self-hosted">Samostojno gostovanje</a> ·
<a href="https://docs.dify.ai">Dokumentacija</a> · <a href="https://docs.dify.ai">Dokumentacija</a> ·
<a href="https://dify.ai/pricing">Pregled ponudb izdelkov Dify</a> <a href="https://dify.ai/pricing">Pregled ponudb izdelkov Dify</a>
</p> </p>
<p align="center"> <p align="center">
<a href="https://dify.ai" target="_blank"> <a href="https://dify.ai" target="_blank">
<img alt="Static Badge" src="https://img.shields.io/badge/Product-F04438"></a> <img alt="Static Badge" src="https://img.shields.io/badge/Product-F04438"></a>
<a href="https://dify.ai/pricing" target="_blank"> <a href="https://dify.ai/pricing" target="_blank">
<img alt="Static Badge" src="https://img.shields.io/badge/free-pricing?logo=free&color=%20%23155EEF&label=pricing&labelColor=%20%23528bff"></a> <img alt="Static Badge" src="https://img.shields.io/badge/free-pricing?logo=free&color=%20%23155EEF&label=pricing&labelColor=%20%23528bff"></a>
<a href="https://discord.gg/FngNHpbcY7" target="_blank"> <a href="https://discord.gg/FngNHpbcY7" target="_blank">
<img src="https://img.shields.io/discord/1082486657678311454?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb" <img src="https://img.shields.io/discord/1082486657678311454?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb"
alt="chat on Discord"></a> alt="chat on Discord"></a>
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank"> <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5" <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a> alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank"> <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff" <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a> alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank"> <a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a> <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank"> <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
<img alt="Commits last month" src="https://img.shields.io/github/commit-activity/m/langgenius/dify?labelColor=%20%2332b583&color=%20%2312b76a"></a> <img alt="Commits last month" src="https://img.shields.io/github/commit-activity/m/langgenius/dify?labelColor=%20%2332b583&color=%20%2312b76a"></a>
<a href="https://github.com/langgenius/dify/" target="_blank"> <a href="https://github.com/langgenius/dify/" target="_blank">
<img alt="Issues closed" src="https://img.shields.io/github/issues-search?query=repo%3Alanggenius%2Fdify%20is%3Aclosed&label=issues%20closed&labelColor=%20%237d89b0&color=%20%235d6b98"></a> <img alt="Issues closed" src="https://img.shields.io/github/issues-search?query=repo%3Alanggenius%2Fdify%20is%3Aclosed&label=issues%20closed&labelColor=%20%237d89b0&color=%20%235d6b98"></a>
<a href="https://github.com/langgenius/dify/discussions/" target="_blank"> <a href="https://github.com/langgenius/dify/discussions/" target="_blank">
<img alt="Discussion posts" src="https://img.shields.io/github/discussions/langgenius/dify?labelColor=%20%239b8afb&color=%20%237a5af8"></a> <img alt="Discussion posts" src="https://img.shields.io/github/discussions/langgenius/dify?labelColor=%20%239b8afb&color=%20%237a5af8"></a>
</p> </p>
<p align="center"> <p align="center">
<a href="./README.md"><img alt="README in English" src="https://img.shields.io/badge/English-d9d9d9"></a> <a href="./README.md"><img alt="README in English" src="https://img.shields.io/badge/English-d9d9d9"></a>
<a href="./README_CN.md"><img alt="简体中文版自述文件" src="https://img.shields.io/badge/简体中文-d9d9d9"></a> <a href="./README_CN.md"><img alt="简体中文版自述文件" src="https://img.shields.io/badge/简体中文-d9d9d9"></a>
<a href="./README_JA.md"><img alt="日本語のREADME" src="https://img.shields.io/badge/日本語-d9d9d9"></a> <a href="./README_JA.md"><img alt="日本語のREADME" src="https://img.shields.io/badge/日本語-d9d9d9"></a>
<a href="./README_ES.md"><img alt="README en Español" src="https://img.shields.io/badge/Español-d9d9d9"></a> <a href="./README_ES.md"><img alt="README en Español" src="https://img.shields.io/badge/Español-d9d9d9"></a>
<a href="./README_FR.md"><img alt="README en Français" src="https://img.shields.io/badge/Français-d9d9d9"></a> <a href="./README_FR.md"><img alt="README en Français" src="https://img.shields.io/badge/Français-d9d9d9"></a>
<a href="./README_KL.md"><img alt="README tlhIngan Hol" src="https://img.shields.io/badge/Klingon-d9d9d9"></a> <a href="./README_KL.md"><img alt="README tlhIngan Hol" src="https://img.shields.io/badge/Klingon-d9d9d9"></a>
<a href="./README_KR.md"><img alt="README in Korean" src="https://img.shields.io/badge/한국어-d9d9d9"></a> <a href="./README_KR.md"><img alt="README in Korean" src="https://img.shields.io/badge/한국어-d9d9d9"></a>
<a href="./README_AR.md"><img alt="README بالعربية" src="https://img.shields.io/badge/العربية-d9d9d9"></a> <a href="./README_AR.md"><img alt="README بالعربية" src="https://img.shields.io/badge/العربية-d9d9d9"></a>
<a href="./README_TR.md"><img alt="Türkçe README" src="https://img.shields.io/badge/Türkçe-d9d9d9"></a> <a href="./README_TR.md"><img alt="Türkçe README" src="https://img.shields.io/badge/Türkçe-d9d9d9"></a>
<a href="./README_VI.md"><img alt="README Tiếng Việt" src="https://img.shields.io/badge/Ti%E1%BA%BFng%20Vi%E1%BB%87t-d9d9d9"></a> <a href="./README_VI.md"><img alt="README Tiếng Việt" src="https://img.shields.io/badge/Ti%E1%BA%BFng%20Vi%E1%BB%87t-d9d9d9"></a>
<a href="./README_SI.md"><img alt="README Slovenščina" src="https://img.shields.io/badge/Sloven%C5%A1%C4%8Dina-d9d9d9"></a> <a href="./README_SI.md"><img alt="README Slovenščina" src="https://img.shields.io/badge/Sloven%C5%A1%C4%8Dina-d9d9d9"></a>
<a href="./README_BN.md"><img alt="README in বাংলা" src="https://img.shields.io/badge/বাংলা-d9d9d9"></a> <a href="./README_BN.md"><img alt="README in বাংলা" src="https://img.shields.io/badge/বাংলা-d9d9d9"></a>
</p> </p>
Dify je odprtokodna platforma za razvoj aplikacij LLM. Njegov intuitivni vmesnik združuje agentski potek dela z umetno inteligenco, cevovod RAG, zmogljivosti agentov, upravljanje modelov, funkcije opazovanja in več, kar vam omogoča hiter prehod od prototipa do proizvodnje. Dify je odprtokodna platforma za razvoj aplikacij LLM. Njegov intuitivni vmesnik združuje agentski potek dela z umetno inteligenco, cevovod RAG, zmogljivosti agentov, upravljanje modelov, funkcije opazovanja in več, kar vam omogoča hiter prehod od prototipa do proizvodnje.
## Hitri začetek ## Hitri začetek
> Preden namestite Dify, se prepričajte, da vaša naprava izpolnjuje naslednje minimalne sistemske zahteve: > Preden namestite Dify, se prepričajte, da vaša naprava izpolnjuje naslednje minimalne sistemske zahteve:
> >
>- CPU >= 2 Core >- CPU >= 2 Core
>- RAM >= 4 GiB >- RAM >= 4 GiB
</br> </br>
Najlažji način za zagon strežnika Dify je prek docker compose . Preden zaženete Dify z naslednjimi ukazi, se prepričajte, da sta Docker in Docker Compose nameščena na vašem računalniku: Najlažji način za zagon strežnika Dify je prek docker compose . Preden zaženete Dify z naslednjimi ukazi, se prepričajte, da sta Docker in Docker Compose nameščena na vašem računalniku:
```bash ```bash
cd dify cd dify
cd docker cd docker
cp .env.example .env cp .env.example .env
docker compose up -d docker compose up -d
``` ```
Po zagonu lahko dostopate do nadzorne plošče Dify v brskalniku na [http://localhost/install](http://localhost/install) in začnete postopek inicializacije. Po zagonu lahko dostopate do nadzorne plošče Dify v brskalniku na [http://localhost/install](http://localhost/install) in začnete postopek inicializacije.
#### Iskanje pomoči #### Iskanje pomoči
Prosimo, glejte naša pogosta vprašanja [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) če naletite na težave pri nastavitvi Dify. Če imate še vedno težave, se obrnite na [skupnost ali nas](#community--contact). Prosimo, glejte naša pogosta vprašanja [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) če naletite na težave pri nastavitvi Dify. Če imate še vedno težave, se obrnite na [skupnost ali nas](#community--contact).
> Če želite prispevati k Difyju ali narediti dodaten razvoj, glejte naš vodnik za [uvajanje iz izvorne kode](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code) > Če želite prispevati k Difyju ali narediti dodaten razvoj, glejte naš vodnik za [uvajanje iz izvorne kode](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code)
## Ključne značilnosti ## Ključne značilnosti
**1. Potek dela**: **1. Potek dela**:
Zgradite in preizkusite zmogljive poteke dela AI na vizualnem platnu, pri čemer izkoristite vse naslednje funkcije in več. Zgradite in preizkusite zmogljive poteke dela AI na vizualnem platnu, pri čemer izkoristite vse naslednje funkcije in več.
https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa
**2. Celovita podpora za modele**: **2. Celovita podpora za modele**:
Brezhibna integracija s stotinami lastniških/odprtokodnih LLM-jev ducatov ponudnikov sklepanja in samostojnih rešitev, ki pokrivajo GPT, Mistral, Llama3 in vse modele, združljive z API-jem OpenAI. Celoten seznam podprtih ponudnikov modelov najdete [tukaj](https://docs.dify.ai/getting-started/readme/model-providers). Brezhibna integracija s stotinami lastniških/odprtokodnih LLM-jev ducatov ponudnikov sklepanja in samostojnih rešitev, ki pokrivajo GPT, Mistral, Llama3 in vse modele, združljive z API-jem OpenAI. Celoten seznam podprtih ponudnikov modelov najdete [tukaj](https://docs.dify.ai/getting-started/readme/model-providers).
![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)
**3. Prompt IDE**: **3. Prompt IDE**:
intuitivni vmesnik za ustvarjanje pozivov, primerjavo zmogljivosti modela in dodajanje dodatnih funkcij, kot je pretvorba besedila v govor, aplikaciji, ki temelji na klepetu. intuitivni vmesnik za ustvarjanje pozivov, primerjavo zmogljivosti modela in dodajanje dodatnih funkcij, kot je pretvorba besedila v govor, aplikaciji, ki temelji na klepetu.
**4. RAG Pipeline**: **4. RAG Pipeline**:
E Obsežne zmogljivosti RAG, ki pokrivajo vse od vnosa dokumenta do priklica, s podporo za ekstrakcijo besedila iz datotek PDF, PPT in drugih običajnih formatov dokumentov. E Obsežne zmogljivosti RAG, ki pokrivajo vse od vnosa dokumenta do priklica, s podporo za ekstrakcijo besedila iz datotek PDF, PPT in drugih običajnih formatov dokumentov.
**5. Agent capabilities**: **5. Agent capabilities**:
definirate lahko agente, ki temeljijo na klicanju funkcij LLM ali ReAct, in dodate vnaprej izdelana orodja ali orodja po meri za agenta. Dify ponuja več kot 50 vgrajenih orodij za agente AI, kot so Google Search, DALL·E, Stable Diffusion in WolframAlpha. definirate lahko agente, ki temeljijo na klicanju funkcij LLM ali ReAct, in dodate vnaprej izdelana orodja ali orodja po meri za agenta. Dify ponuja več kot 50 vgrajenih orodij za agente AI, kot so Google Search, DALL·E, Stable Diffusion in WolframAlpha.
**6. LLMOps**: **6. LLMOps**:
Spremljajte in analizirajte dnevnike aplikacij in učinkovitost skozi čas. Pozive, nabore podatkov in modele lahko nenehno izboljšujete na podlagi proizvodnih podatkov in opomb. Spremljajte in analizirajte dnevnike aplikacij in učinkovitost skozi čas. Pozive, nabore podatkov in modele lahko nenehno izboljšujete na podlagi proizvodnih podatkov in opomb.
**7. Backend-as-a-Service**: **7. Backend-as-a-Service**:
AVse ponudbe Difyja so opremljene z ustreznimi API-ji, tako da lahko Dify brez težav integrirate v svojo poslovno logiko. AVse ponudbe Difyja so opremljene z ustreznimi API-ji, tako da lahko Dify brez težav integrirate v svojo poslovno logiko.
## Primerjava Funkcij ## Primerjava Funkcij
<table style="width: 100%;"> <table style="width: 100%;">
<tr> <tr>
<th align="center">Funkcija</th> <th align="center">Funkcija</th>
<th align="center">Dify.AI</th> <th align="center">Dify.AI</th>
<th align="center">LangChain</th> <th align="center">LangChain</th>
<th align="center">Flowise</th> <th align="center">Flowise</th>
<th align="center">OpenAI Assistants API</th> <th align="center">OpenAI Assistants API</th>
</tr> </tr>
<tr> <tr>
<td align="center">Programski pristop</td> <td align="center">Programski pristop</td>
<td align="center">API + usmerjeno v aplikacije</td> <td align="center">API + usmerjeno v aplikacije</td>
<td align="center">Python koda</td> <td align="center">Python koda</td>
<td align="center">Usmerjeno v aplikacije</td> <td align="center">Usmerjeno v aplikacije</td>
<td align="center">Usmerjeno v API</td> <td align="center">Usmerjeno v API</td>
</tr> </tr>
<tr> <tr>
<td align="center">Podprti LLM-ji</td> <td align="center">Podprti LLM-ji</td>
<td align="center">Bogata izbira</td> <td align="center">Bogata izbira</td>
<td align="center">Bogata izbira</td> <td align="center">Bogata izbira</td>
<td align="center">Bogata izbira</td> <td align="center">Bogata izbira</td>
<td align="center">Samo OpenAI</td> <td align="center">Samo OpenAI</td>
</tr> </tr>
<tr> <tr>
<td align="center">RAG pogon</td> <td align="center">RAG pogon</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
<tr> <tr>
<td align="center">Agent</td> <td align="center">Agent</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
<tr> <tr>
<td align="center">Potek dela</td> <td align="center">Potek dela</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
<tr> <tr>
<td align="center">Spremljanje</td> <td align="center">Spremljanje</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
<tr> <tr>
<td align="center">Funkcija za podjetja (SSO/nadzor dostopa)</td> <td align="center">Funkcija za podjetja (SSO/nadzor dostopa)</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
<tr> <tr>
<td align="center">Lokalna namestitev</td> <td align="center">Lokalna namestitev</td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
<td align="center"></td> <td align="center"></td>
</tr> </tr>
</table> </table>
## Uporaba Dify ## Uporaba Dify
- **Cloud </br>** - **Cloud </br>**
Gostimo storitev Dify Cloud za vsakogar, ki jo lahko preizkusite brez nastavitev. Zagotavlja vse zmožnosti različice za samostojno namestitev in vključuje 200 brezplačnih klicev GPT-4 v načrtu peskovnika. Gostimo storitev Dify Cloud za vsakogar, ki jo lahko preizkusite brez nastavitev. Zagotavlja vse zmožnosti različice za samostojno namestitev in vključuje 200 brezplačnih klicev GPT-4 v načrtu peskovnika.
- **Self-hosting Dify Community Edition</br>** - **Self-hosting Dify Community Edition</br>**
Hitro zaženite Dify v svojem okolju s tem [začetnim vodnikom](#quick-start) . Za dodatne reference in podrobnejša navodila uporabite našo [dokumentacijo](https://docs.dify.ai) . Hitro zaženite Dify v svojem okolju s tem [začetnim vodnikom](#quick-start) . Za dodatne reference in podrobnejša navodila uporabite našo [dokumentacijo](https://docs.dify.ai) .
- **Dify za podjetja/organizacije</br>** - **Dify za podjetja/organizacije</br>**
Ponujamo dodatne funkcije, osredotočene na podjetja. Zabeležite svoja vprašanja prek tega klepetalnega robota ali nam pošljite e-pošto, da se pogovorimo o potrebah podjetja. </br> Ponujamo dodatne funkcije, osredotočene na podjetja. Zabeležite svoja vprašanja prek tega klepetalnega robota ali nam pošljite e-pošto, da se pogovorimo o potrebah podjetja. </br>
> Za novoustanovljena podjetja in mala podjetja, ki uporabljajo AWS, si oglejte Dify Premium na AWS Marketplace in ga z enim klikom uvedite v svoj AWS VPC. To je cenovno ugodna ponudba AMI z možnostjo ustvarjanja aplikacij z logotipom in blagovno znamko po meri. > Za novoustanovljena podjetja in mala podjetja, ki uporabljajo AWS, si oglejte Dify Premium na AWS Marketplace in ga z enim klikom uvedite v svoj AWS VPC. To je cenovno ugodna ponudba AMI z možnostjo ustvarjanja aplikacij z logotipom in blagovno znamko po meri.
## Staying ahead ## Staying ahead
Star Dify on GitHub and be instantly notified of new releases. Star Dify on GitHub and be instantly notified of new releases.
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) ![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
## Napredne nastavitve ## Napredne nastavitve
Če morate prilagoditi konfiguracijo, si oglejte komentarje v naši datoteki .env.example in posodobite ustrezne vrednosti v svoji .env datoteki. Poleg tega boste morda morali prilagoditi docker-compose.yamlsamo datoteko, na primer spremeniti različice slike, preslikave vrat ali namestitve nosilca, glede na vaše specifično okolje in zahteve za uvajanje. Po kakršnih koli spremembah ponovno zaženite docker-compose up -d. Celoten seznam razpoložljivih spremenljivk okolja najdete tukaj . Če morate prilagoditi konfiguracijo, si oglejte komentarje v naši datoteki .env.example in posodobite ustrezne vrednosti v svoji .env datoteki. Poleg tega boste morda morali prilagoditi docker-compose.yamlsamo datoteko, na primer spremeniti različice slike, preslikave vrat ali namestitve nosilca, glede na vaše specifično okolje in zahteve za uvajanje. Po kakršnih koli spremembah ponovno zaženite docker-compose up -d. Celoten seznam razpoložljivih spremenljivk okolja najdete tukaj .
Če želite konfigurirati visoko razpoložljivo nastavitev, so na voljo Helm Charts in datoteke YAML, ki jih prispeva skupnost, ki omogočajo uvedbo Difyja v Kubernetes. Če želite konfigurirati visoko razpoložljivo nastavitev, so na voljo Helm Charts in datoteke YAML, ki jih prispeva skupnost, ki omogočajo uvedbo Difyja v Kubernetes.
- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes) - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s) - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
#### Uporaba Terraform za uvajanje #### Uporaba Terraform za uvajanje
namestite Dify v Cloud Platform z enim klikom z uporabo [terraform](https://www.terraform.io/) namestite Dify v Cloud Platform z enim klikom z uporabo [terraform](https://www.terraform.io/)
##### Azure Global ##### Azure Global
- [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform) - [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform)
##### Google Cloud ##### Google Cloud
- [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) - [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
#### Uporaba AWS CDK za uvajanje #### Uporaba AWS CDK za uvajanje
Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/) Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/)
##### AWS ##### AWS
- [AWS CDK by @KevinZhao](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) - [AWS CDK by @KevinZhao](https://github.com/aws-samples/solution-for-deploying-dify-on-aws)
## Prispevam ## Prispevam
Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkrati vas prosimo, da podprete Dify tako, da ga delite na družbenih medijih ter na dogodkih in konferencah. Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkrati vas prosimo, da podprete Dify tako, da ga delite na družbenih medijih ter na dogodkih in konferencah.
> Iščemo sodelavce za pomoč pri prevajanju Difyja v jezike, ki niso mandarinščina ali angleščina. Če želite pomagati, si oglejte i18n README za več informacij in nam pustite komentar v global-userskanalu našega strežnika skupnosti Discord . > Iščemo sodelavce za pomoč pri prevajanju Difyja v jezike, ki niso mandarinščina ali angleščina. Če želite pomagati, si oglejte i18n README za več informacij in nam pustite komentar v global-userskanalu našega strežnika skupnosti Discord .
## Skupnost in stik ## Skupnost in stik
* [Github Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj. * [Github Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj.
* [GitHub Issues](https://github.com/langgenius/dify/issues). Najboljše za: hrošče, na katere naletite pri uporabi Dify.AI, in predloge funkcij. Oglejte si naš [vodnik za prispevke](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). * [GitHub Issues](https://github.com/langgenius/dify/issues). Najboljše za: hrošče, na katere naletite pri uporabi Dify.AI, in predloge funkcij. Oglejte si naš [vodnik za prispevke](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
* [Discord](https://discord.gg/FngNHpbcY7). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. * [Discord](https://discord.gg/FngNHpbcY7). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo.
* [X(Twitter)](https://twitter.com/dify_ai). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. * [X(Twitter)](https://twitter.com/dify_ai). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo.
**Contributors** **Contributors**
<a href="https://github.com/langgenius/dify/graphs/contributors"> <a href="https://github.com/langgenius/dify/graphs/contributors">
<img src="https://contrib.rocks/image?repo=langgenius/dify" /> <img src="https://contrib.rocks/image?repo=langgenius/dify" />
</a> </a>
## Star history ## Star history
[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) [![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date)
## Varnostno razkritje ## Varnostno razkritje
Zaradi zaščite vaše zasebnosti se izogibajte objavljanju varnostnih vprašanj na GitHub. Namesto tega pošljite vprašanja na security@dify.ai in zagotovili vam bomo podrobnejši odgovor. Zaradi zaščite vaše zasebnosti se izogibajte objavljanju varnostnih vprašanj na GitHub. Namesto tega pošljite vprašanja na security@dify.ai in zagotovili vam bomo podrobnejši odgovor.
## Licenca ## Licenca
To skladišče je na voljo pod [odprtokodno licenco Dify](LICENSE) , ki je v bistvu Apache 2.0 z nekaj dodatnimi omejitvami. To skladišče je na voljo pod [odprtokodno licenco Dify](LICENSE) , ki je v bistvu Apache 2.0 z nekaj dodatnimi omejitvami.

@ -16,4 +16,4 @@ logs
.ruff_cache .ruff_cache
# venv # venv
.venv .venv

@ -52,7 +52,6 @@ def initialize_extensions(app: DifyApp):
ext_mail, ext_mail,
ext_migrate, ext_migrate,
ext_otel, ext_otel,
ext_otel_patch,
ext_proxy_fix, ext_proxy_fix,
ext_redis, ext_redis,
ext_repositories, ext_repositories,
@ -85,7 +84,6 @@ def initialize_extensions(app: DifyApp):
ext_proxy_fix, ext_proxy_fix,
ext_blueprints, ext_blueprints,
ext_commands, ext_commands,
ext_otel_patch, # Apply patch before initializing OpenTelemetry
ext_otel, ext_otel,
] ]
for ext in extensions: for ext in extensions:

@ -444,13 +444,13 @@ def convert_to_agent_apps():
WHERE a.mode = 'chat' WHERE a.mode = 'chat'
AND am.agent_mode is not null AND am.agent_mode is not null
AND ( AND (
am.agent_mode like '%"strategy": "function_call"%' am.agent_mode like '%"strategy": "function_call"%'
OR am.agent_mode like '%"strategy": "react"%' OR am.agent_mode like '%"strategy": "react"%'
) )
AND ( AND (
am.agent_mode like '{"enabled": true%' am.agent_mode like '{"enabled": true%'
OR am.agent_mode like '{"max_iteration": %' OR am.agent_mode like '{"max_iteration": %'
) ORDER BY a.created_at DESC LIMIT 1000 ) ORDER BY a.created_at DESC LIMIT 1000
""" """
with db.engine.begin() as conn: with db.engine.begin() as conn:
@ -818,8 +818,9 @@ def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[
click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green")) click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green"))
@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
@click.command("clear-orphaned-file-records", help="Clear orphaned file records.") @click.command("clear-orphaned-file-records", help="Clear orphaned file records.")
def clear_orphaned_file_records(): def clear_orphaned_file_records(force: bool):
""" """
Clear orphaned file records in the database. Clear orphaned file records in the database.
""" """
@ -845,7 +846,15 @@ def clear_orphaned_file_records():
# notify user and ask for confirmation # notify user and ask for confirmation
click.echo( click.echo(
click.style("This command will find and delete orphaned file records in the following tables:", fg="yellow") click.style(
"This command will first find and delete orphaned file records from the message_files table,", fg="yellow"
)
)
click.echo(
click.style(
"and then it will find and delete orphaned file records in the following tables:",
fg="yellow",
)
) )
for files_table in files_tables: for files_table in files_tables:
click.echo(click.style(f"- {files_table['table']}", fg="yellow")) click.echo(click.style(f"- {files_table['table']}", fg="yellow"))
@ -878,11 +887,55 @@ def clear_orphaned_file_records():
fg="yellow", fg="yellow",
) )
) )
click.confirm("Do you want to proceed?", abort=True) if not force:
click.confirm("Do you want to proceed?", abort=True)
# start the cleanup process # start the cleanup process
click.echo(click.style("Starting orphaned file records cleanup.", fg="white")) click.echo(click.style("Starting orphaned file records cleanup.", fg="white"))
# clean up the orphaned records in the message_files table where message_id doesn't exist in messages table
try:
click.echo(
click.style("- Listing message_files records where message_id doesn't exist in messages table", fg="white")
)
query = (
"SELECT mf.id, mf.message_id "
"FROM message_files mf LEFT JOIN messages m ON mf.message_id = m.id "
"WHERE m.id IS NULL"
)
orphaned_message_files = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
for i in rs:
orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])})
if orphaned_message_files:
click.echo(click.style(f"Found {len(orphaned_message_files)} orphaned message_files records:", fg="white"))
for record in orphaned_message_files:
click.echo(click.style(f" - id: {record['id']}, message_id: {record['message_id']}", fg="black"))
if not force:
click.confirm(
(
f"Do you want to proceed "
f"to delete all {len(orphaned_message_files)} orphaned message_files records?"
),
abort=True,
)
click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
query = "DELETE FROM message_files WHERE id IN :ids"
with db.engine.begin() as conn:
conn.execute(db.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
click.echo(
click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
)
else:
click.echo(click.style("No orphaned message_files records found. There is nothing to delete.", fg="green"))
except Exception as e:
click.echo(click.style(f"Error deleting orphaned message_files records: {str(e)}", fg="red"))
# clean up the orphaned records in the rest of the *_files tables
try: try:
# fetch file id and keys from each table # fetch file id and keys from each table
all_files_in_tables = [] all_files_in_tables = []
@ -964,7 +1017,8 @@ def clear_orphaned_file_records():
click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white")) click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white"))
for file in orphaned_files: for file in orphaned_files:
click.echo(click.style(f"- orphaned file id: {file}", fg="black")) click.echo(click.style(f"- orphaned file id: {file}", fg="black"))
click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True) if not force:
click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True)
# delete orphaned records for each file # delete orphaned records for each file
try: try:
@ -979,8 +1033,9 @@ def clear_orphaned_file_records():
click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green")) click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green"))
@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
@click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.") @click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.")
def remove_orphaned_files_on_storage(): def remove_orphaned_files_on_storage(force: bool):
""" """
Remove orphaned files on the storage. Remove orphaned files on the storage.
""" """
@ -1028,7 +1083,8 @@ def remove_orphaned_files_on_storage():
fg="yellow", fg="yellow",
) )
) )
click.confirm("Do you want to proceed?", abort=True) if not force:
click.confirm("Do you want to proceed?", abort=True)
# start the cleanup process # start the cleanup process
click.echo(click.style("Starting orphaned files cleanup.", fg="white")) click.echo(click.style("Starting orphaned files cleanup.", fg="white"))
@ -1069,7 +1125,8 @@ def remove_orphaned_files_on_storage():
click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white")) click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white"))
for file in orphaned_files: for file in orphaned_files:
click.echo(click.style(f"- orphaned file: {file}", fg="black")) click.echo(click.style(f"- orphaned file: {file}", fg="black"))
click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True) if not force:
click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True)
# delete orphaned files # delete orphaned files
removed_files = 0 removed_files = 0

@ -1,4 +1,5 @@
from typing import Optional import enum
from typing import Literal, Optional
from pydantic import Field, PositiveInt from pydantic import Field, PositiveInt
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
@ -9,6 +10,14 @@ class OpenSearchConfig(BaseSettings):
Configuration settings for OpenSearch Configuration settings for OpenSearch
""" """
class AuthMethod(enum.StrEnum):
"""
Authentication method for OpenSearch
"""
BASIC = "basic"
AWS_MANAGED_IAM = "aws_managed_iam"
OPENSEARCH_HOST: Optional[str] = Field( OPENSEARCH_HOST: Optional[str] = Field(
description="Hostname or IP address of the OpenSearch server (e.g., 'localhost' or 'opensearch.example.com')", description="Hostname or IP address of the OpenSearch server (e.g., 'localhost' or 'opensearch.example.com')",
default=None, default=None,
@ -19,6 +28,16 @@ class OpenSearchConfig(BaseSettings):
default=9200, default=9200,
) )
OPENSEARCH_SECURE: bool = Field(
description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)",
default=False,
)
OPENSEARCH_AUTH_METHOD: AuthMethod = Field(
description="Authentication method for OpenSearch connection (default is 'basic')",
default=AuthMethod.BASIC,
)
OPENSEARCH_USER: Optional[str] = Field( OPENSEARCH_USER: Optional[str] = Field(
description="Username for authenticating with OpenSearch", description="Username for authenticating with OpenSearch",
default=None, default=None,
@ -29,7 +48,11 @@ class OpenSearchConfig(BaseSettings):
default=None, default=None,
) )
OPENSEARCH_SECURE: bool = Field( OPENSEARCH_AWS_REGION: Optional[str] = Field(
description="Whether to use SSL/TLS encrypted connection for OpenSearch (True for HTTPS, False for HTTP)", description="AWS region for OpenSearch (e.g. 'us-west-2')",
default=False, default=None,
)
OPENSEARCH_AWS_SERVICE: Optional[Literal["es", "aoss"]] = Field(
description="AWS service for OpenSearch (e.g. 'aoss' for OpenSearch Serverless)", default=None
) )

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field( CURRENT_VERSION: str = Field(
description="Dify version", description="Dify version",
default="1.3.0", default="1.3.1",
) )
COMMIT_SHA: str = Field( COMMIT_SHA: str = Field(

@ -16,11 +16,25 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
if dify_config.ETL_TYPE == "Unstructured": if dify_config.ETL_TYPE == "Unstructured":
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"] DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "vtt", "properties"]
DOCUMENT_EXTENSIONS.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub")) DOCUMENT_EXTENSIONS.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
if dify_config.UNSTRUCTURED_API_URL: if dify_config.UNSTRUCTURED_API_URL:
DOCUMENT_EXTENSIONS.append("ppt") DOCUMENT_EXTENSIONS.append("ppt")
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
else: else:
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"] DOCUMENT_EXTENSIONS = [
"txt",
"markdown",
"md",
"mdx",
"pdf",
"html",
"htm",
"xlsx",
"xls",
"docx",
"csv",
"vtt",
"properties",
]
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])

@ -89,7 +89,7 @@ class FilePreviewApi(Resource):
if args["as_attachment"]: if args["as_attachment"]:
encoded_filename = quote(upload_file.name) encoded_filename = quote(upload_file.name)
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}" response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
response.headers["Content-Type"] = "application/octet-stream" response.headers["Content-Type"] = "application/octet-stream"
return response return response

@ -1,4 +1,4 @@
ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible.
{{instruction}} {{instruction}}
@ -47,7 +47,7 @@ Thought:""" # noqa: E501
ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES = """Observation: {{observation}} ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES = """Observation: {{observation}}
Thought:""" Thought:"""
ENGLISH_REACT_CHAT_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. ENGLISH_REACT_CHAT_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible.
{{instruction}} {{instruction}}

@ -381,6 +381,8 @@ class WorkflowCycleManage:
workflow_node_execution.elapsed_time = elapsed_time workflow_node_execution.elapsed_time = elapsed_time
workflow_node_execution.execution_metadata = execution_metadata workflow_node_execution.execution_metadata = execution_metadata
self._workflow_node_execution_repository.update(workflow_node_execution)
return workflow_node_execution return workflow_node_execution
def _handle_workflow_node_execution_retried( def _handle_workflow_node_execution_retried(

@ -10,13 +10,13 @@ class NodeJsTemplateTransformer(TemplateTransformer):
f""" f"""
// declare main function // declare main function
{cls._code_placeholder} {cls._code_placeholder}
// decode and prepare input object // decode and prepare input object
var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8')) var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8'))
// execute main function // execute main function
var output_obj = main(inputs_obj) var output_obj = main(inputs_obj)
// convert output to json and print // convert output to json and print
var output_json = JSON.stringify(output_obj) var output_json = JSON.stringify(output_obj)
var result = `<<RESULT>>${{output_json}}<<RESULT>>` var result = `<<RESULT>>${{output_json}}<<RESULT>>`

@ -21,20 +21,20 @@ class Jinja2TemplateTransformer(TemplateTransformer):
import jinja2 import jinja2
template = jinja2.Template('''{cls._code_placeholder}''') template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs) return template.render(**inputs)
import json import json
from base64 import b64decode from base64 import b64decode
# decode and prepare input dict # decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8')) inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function # execute main function
output = main(**inputs_obj) output = main(**inputs_obj)
# convert output and print # convert output and print
result = f'''<<RESULT>>{{output}}<<RESULT>>''' result = f'''<<RESULT>>{{output}}<<RESULT>>'''
print(result) print(result)
""") """)
return runner_script return runner_script
@ -43,15 +43,15 @@ class Jinja2TemplateTransformer(TemplateTransformer):
preload_script = dedent(""" preload_script = dedent("""
import jinja2 import jinja2
from base64 import b64decode from base64 import b64decode
def _jinja2_preload_(): def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue # prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('{{s}}') template = jinja2.Template('{{s}}')
template.render(s='a') template.render(s='a')
if __name__ == '__main__': if __name__ == '__main__':
_jinja2_preload_() _jinja2_preload_()
""") """)
return preload_script return preload_script

@ -9,16 +9,16 @@ class Python3TemplateTransformer(TemplateTransformer):
runner_script = dedent(f""" runner_script = dedent(f"""
# declare main function # declare main function
{cls._code_placeholder} {cls._code_placeholder}
import json import json
from base64 import b64decode from base64 import b64decode
# decode and prepare input dict # decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8')) inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function # execute main function
output_obj = main(**inputs_obj) output_obj = main(**inputs_obj)
# convert output to json and print # convert output to json and print
output_json = json.dumps(output_obj, indent=4) output_json = json.dumps(output_obj, indent=4)
result = f'''<<RESULT>>{{output_json}}<<RESULT>>''' result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''

@ -3,6 +3,8 @@ import logging
import re import re
from typing import Optional, cast from typing import Optional, cast
import json_repair
from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser
from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
from core.llm_generator.prompts import ( from core.llm_generator.prompts import (
@ -366,7 +368,20 @@ class LLMGenerator:
), ),
) )
generated_json_schema = cast(str, response.message.content) raw_content = response.message.content
if not isinstance(raw_content, str):
raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}")
try:
parsed_content = json.loads(raw_content)
except json.JSONDecodeError:
parsed_content = json_repair.loads(raw_content)
if not isinstance(parsed_content, dict | list):
raise ValueError(f"Failed to parse structured output from llm: {raw_content}")
generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False)
return {"output": generated_json_schema, "error": ""} return {"output": generated_json_schema, "error": ""}
except InvokeError as e: except InvokeError as e:

@ -1,5 +1,5 @@
# Written by YORKI MINAKO🤡, Edited by Xiaoyi # Written by YORKI MINAKO🤡, Edited by Xiaoyi
CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is. CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is.
Notice: the language type user uses could be diverse, which can be English, Chinese, Italian, Español, Arabic, Japanese, French, and etc. Notice: the language type user uses could be diverse, which can be English, Chinese, Italian, Español, Arabic, Japanese, French, and etc.
ENSURE your output is in the SAME language as the user's input! ENSURE your output is in the SAME language as the user's input!
Your output is restricted only to: (Input language) Intention + Subject(short as possible) Your output is restricted only to: (Input language) Intention + Subject(short as possible)
@ -58,7 +58,7 @@ User Input: yo, 你今天咋样?
"Your Output": "查询今日我的状态☺️" "Your Output": "查询今日我的状态☺️"
} }
User Input: User Input:
""" # noqa: E501 """ # noqa: E501
PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE = ( PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE = (
@ -163,11 +163,11 @@ Here is a task description for which I would like you to create a high-quality p
{{TASK_DESCRIPTION}} {{TASK_DESCRIPTION}}
</task_description> </task_description>
Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include: Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
- Do not include <input> or <output> section and variables in the prompt, assume user will add them at their own will. - Do not include <input> or <output> section and variables in the prompt, assume user will add them at their own will.
- Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. - Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag.
- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not include variables in the prompt. Give three pairs of input and output examples. - Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not include variables in the prompt. Give three pairs of input and output examples.
- Include other relevant sections demarcated with appropriate XML tags like <examples>, <instruction>. - Include other relevant sections demarcated with appropriate XML tags like <examples>, <instruction>.
- Use the same language as task description. - Use the same language as task description.
- Output in ``` xml ``` and start with <instruction> - Output in ``` xml ``` and start with <instruction>
Please generate the full prompt template with at least 300 words and output only the prompt template. Please generate the full prompt template with at least 300 words and output only the prompt template.
""" # noqa: E501 """ # noqa: E501
@ -178,28 +178,28 @@ Here is a task description for which I would like you to create a high-quality p
{{TASK_DESCRIPTION}} {{TASK_DESCRIPTION}}
</task_description> </task_description>
Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include: Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
- Descriptive variable names surrounded by {{ }} (two curly brackets) to indicate where the actual values will be substituted in. Choose variable names that clearly indicate the type of value expected. Variable names have to be composed of number, english alphabets and underline and nothing else. - Descriptive variable names surrounded by {{ }} (two curly brackets) to indicate where the actual values will be substituted in. Choose variable names that clearly indicate the type of value expected. Variable names have to be composed of number, english alphabets and underline and nothing else.
- Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. - Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag.
- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not use curly brackets any other than in <instruction> section. - Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not use curly brackets any other than in <instruction> section.
- Any other relevant sections demarcated with appropriate XML tags like <input>, <output>, etc. - Any other relevant sections demarcated with appropriate XML tags like <input>, <output>, etc.
- Use the same language as task description. - Use the same language as task description.
- Output in ``` xml ``` and start with <instruction> - Output in ``` xml ``` and start with <instruction>
Please generate the full prompt template and output only the prompt template. Please generate the full prompt template and output only the prompt template.
""" # noqa: E501 """ # noqa: E501
RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE = """ RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE = """
I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted. I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted.
<information to be extracted> <information to be extracted>
variables name bounded two double curly brackets. Variable name has to be composed of number, english alphabets and underline and nothing else. variables name bounded two double curly brackets. Variable name has to be composed of number, english alphabets and underline and nothing else.
</information to be extracted> </information to be extracted>
Step 1: Carefully read the input and understand the structure of the expected output. Step 1: Carefully read the input and understand the structure of the expected output.
Step 2: Extract relevant parameters from the provided text based on the name and description of object. Step 2: Extract relevant parameters from the provided text based on the name and description of object.
Step 3: Structure the extracted parameters to JSON object as specified in <structure>. Step 3: Structure the extracted parameters to JSON object as specified in <structure>.
Step 4: Ensure that the list of variable_names is properly formatted and valid. The output should not contain any XML tags. Output an empty list if there is no valid variable name in input text. Step 4: Ensure that the list of variable_names is properly formatted and valid. The output should not contain any XML tags. Output an empty list if there is no valid variable name in input text.
### Structure ### Structure
Here is the structure of the expected output, I should always follow the output structure. Here is the structure of the expected output, I should always follow the output structure.
["variable_name_1", "variable_name_2"] ["variable_name_1", "variable_name_2"]
### Input Text ### Input Text
@ -214,13 +214,13 @@ I should always output a valid list. Output nothing other than the list of varia
RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE = """ RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE = """
<instruction> <instruction>
Step 1: Identify the purpose of the chatbot from the variable {{TASK_DESCRIPTION}} and infer chatbot's tone (e.g., friendly, professional, etc.) to add personality traits. Step 1: Identify the purpose of the chatbot from the variable {{TASK_DESCRIPTION}} and infer chatbot's tone (e.g., friendly, professional, etc.) to add personality traits.
Step 2: Create a coherent and engaging opening statement. Step 2: Create a coherent and engaging opening statement.
Step 3: Ensure the output is welcoming and clearly explains what the chatbot is designed to do. Do not include any XML tags in the output. Step 3: Ensure the output is welcoming and clearly explains what the chatbot is designed to do. Do not include any XML tags in the output.
Please use the same language as the user's input language. If user uses chinese then generate opening statement in chinese, if user uses english then generate opening statement in english. Please use the same language as the user's input language. If user uses chinese then generate opening statement in chinese, if user uses english then generate opening statement in english.
Example Input: Example Input:
Provide customer support for an e-commerce website Provide customer support for an e-commerce website
Example Output: Example Output:
Welcome! I'm here to assist you with any questions or issues you might have with your shopping experience. Whether you're looking for product information, need help with your order, or have any other inquiries, feel free to ask. I'm friendly, helpful, and ready to support you in any way I can. Welcome! I'm here to assist you with any questions or issues you might have with your shopping experience. Whether you're looking for product information, need help with your order, or have any other inquiries, feel free to ask. I'm friendly, helpful, and ready to support you in any way I can.
<Task> <Task>
Here is the task description: {{INPUT_TEXT}} Here is the task description: {{INPUT_TEXT}}
@ -276,15 +276,15 @@ Your task is to convert simple user descriptions into properly formatted JSON Sc
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"email": { "email": {
"type": "string", "type": "string",
"format": "email" "format": "email"
}, },
"password": { "password": {
"type": "string", "type": "string",
"minLength": 8 "minLength": 8
}, },
"age": { "age": {
"type": "integer", "type": "integer",
"minimum": 18 "minimum": 18
} }

@ -307,4 +307,4 @@ Runtime Errors:
""" """
``` ```
For interface method details, see: [Interfaces](./interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py). For interface method details, see: [Interfaces](./interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).

@ -170,4 +170,4 @@ Runtime Errors:
""" """
``` ```
For interface method explanations, see: [Interfaces](./interfaces.md). For detailed implementation, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py). For interface method explanations, see: [Interfaces](./interfaces.md). For detailed implementation, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).

@ -294,4 +294,4 @@ provider_credential_schema:
""" """
``` ```
接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。 接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。

@ -169,4 +169,4 @@ pricing: # 价格信息
""" """
``` ```
接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。 接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。

@ -72,7 +72,7 @@ class PluginAppBackwardsInvocation(BaseBackwardsInvocation):
raise ValueError("missing query") raise ValueError("missing query")
return cls.invoke_chat_app(app, user, conversation_id, query, stream, inputs, files) return cls.invoke_chat_app(app, user, conversation_id, query, stream, inputs, files)
elif app.mode == AppMode.WORKFLOW.value: elif app.mode == AppMode.WORKFLOW:
return cls.invoke_workflow_app(app, user, stream, inputs, files) return cls.invoke_workflow_app(app, user, stream, inputs, files)
elif app.mode == AppMode.COMPLETION: elif app.mode == AppMode.COMPLETION:
return cls.invoke_completion_app(app, user, stream, inputs, files) return cls.invoke_completion_app(app, user, stream, inputs, files)

@ -239,8 +239,8 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation):
content = payload.text content = payload.text
SUMMARY_PROMPT = """You are a professional language researcher, you are interested in the language SUMMARY_PROMPT = """You are a professional language researcher, you are interested in the language
and you can quickly aimed at the main point of an webpage and reproduce it in your own words but and you can quickly aimed at the main point of an webpage and reproduce it in your own words but
retain the original meaning and keep the key points. retain the original meaning and keep the key points.
however, the text you got is too long, what you got is possible a part of the text. however, the text you got is too long, what you got is possible a part of the text.
Please summarize the text you got. Please summarize the text you got.

@ -10,4 +10,4 @@
], ],
"query_prompt": "\n\n用户{{#query#}}", "query_prompt": "\n\n用户{{#query#}}",
"stops": ["用户:"] "stops": ["用户:"]
} }

@ -6,4 +6,4 @@
], ],
"query_prompt": "{{#query#}}", "query_prompt": "{{#query#}}",
"stops": null "stops": null
} }

@ -6,4 +6,4 @@
], ],
"query_prompt": "{{#query#}}", "query_prompt": "{{#query#}}",
"stops": null "stops": null
} }

@ -156,8 +156,8 @@ class AnalyticdbVectorBySql:
values = [] values = []
id_prefix = str(uuid.uuid4()) + "_" id_prefix = str(uuid.uuid4()) + "_"
sql = f""" sql = f"""
INSERT INTO {self.table_name} INSERT INTO {self.table_name}
(id, ref_doc_id, vector, page_content, metadata_, to_tsvector) (id, ref_doc_id, vector, page_content, metadata_, to_tsvector)
VALUES (%s, %s, %s, %s, %s, to_tsvector('zh_cn', %s)); VALUES (%s, %s, %s, %s, %s, to_tsvector('zh_cn', %s));
""" """
for i, doc in enumerate(documents): for i, doc in enumerate(documents):
@ -242,7 +242,7 @@ class AnalyticdbVectorBySql:
where_clause += f"AND metadata_->>'document_id' IN ({document_ids})" where_clause += f"AND metadata_->>'document_id' IN ({document_ids})"
with self._get_cursor() as cur: with self._get_cursor() as cur:
cur.execute( cur.execute(
f"""SELECT id, vector, page_content, metadata_, f"""SELECT id, vector, page_content, metadata_,
ts_rank(to_tsvector, to_tsquery_from_text(%s, 'zh_cn'), 32) AS score ts_rank(to_tsvector, to_tsquery_from_text(%s, 'zh_cn'), 32) AS score
FROM {self.table_name} FROM {self.table_name}
WHERE to_tsvector@@to_tsquery_from_text(%s, 'zh_cn') {where_clause} WHERE to_tsvector@@to_tsquery_from_text(%s, 'zh_cn') {where_clause}

@ -27,8 +27,8 @@ class MilvusConfig(BaseModel):
uri: str # Milvus server URI uri: str # Milvus server URI
token: Optional[str] = None # Optional token for authentication token: Optional[str] = None # Optional token for authentication
user: str # Username for authentication user: Optional[str] = None # Username for authentication
password: str # Password for authentication password: Optional[str] = None # Password for authentication
batch_size: int = 100 # Batch size for operations batch_size: int = 100 # Batch size for operations
database: str = "default" # Database name database: str = "default" # Database name
enable_hybrid_search: bool = False # Flag to enable hybrid search enable_hybrid_search: bool = False # Flag to enable hybrid search
@ -43,10 +43,11 @@ class MilvusConfig(BaseModel):
""" """
if not values.get("uri"): if not values.get("uri"):
raise ValueError("config MILVUS_URI is required") raise ValueError("config MILVUS_URI is required")
if not values.get("user"): if not values.get("token"):
raise ValueError("config MILVUS_USER is required") if not values.get("user"):
if not values.get("password"): raise ValueError("config MILVUS_USER is required")
raise ValueError("config MILVUS_PASSWORD is required") if not values.get("password"):
raise ValueError("config MILVUS_PASSWORD is required")
return values return values
def to_milvus_params(self): def to_milvus_params(self):
@ -356,11 +357,14 @@ class MilvusVector(BaseVector):
) )
redis_client.set(collection_exist_cache_key, 1, ex=3600) redis_client.set(collection_exist_cache_key, 1, ex=3600)
def _init_client(self, config) -> MilvusClient: def _init_client(self, config: MilvusConfig) -> MilvusClient:
""" """
Initialize and return a Milvus client. Initialize and return a Milvus client.
""" """
client = MilvusClient(uri=config.uri, user=config.user, password=config.password, db_name=config.database) if config.token:
client = MilvusClient(uri=config.uri, token=config.token, db_name=config.database)
else:
client = MilvusClient(uri=config.uri, user=config.user, password=config.password, db_name=config.database)
return client return client

@ -203,7 +203,7 @@ class OceanBaseVector(BaseVector):
full_sql = f"""SELECT metadata, text, MATCH (text) AGAINST (:query) AS score full_sql = f"""SELECT metadata, text, MATCH (text) AGAINST (:query) AS score
FROM {self._collection_name} FROM {self._collection_name}
WHERE MATCH (text) AGAINST (:query) > 0 WHERE MATCH (text) AGAINST (:query) > 0
{where_clause} {where_clause}
ORDER BY score DESC ORDER BY score DESC
LIMIT {top_k}""" LIMIT {top_k}"""

@ -59,12 +59,12 @@ CREATE TABLE IF NOT EXISTS {table_name} (
""" """
SQL_CREATE_INDEX_PQ = """ SQL_CREATE_INDEX_PQ = """
CREATE INDEX IF NOT EXISTS embedding_{table_name}_pq_idx ON {table_name} CREATE INDEX IF NOT EXISTS embedding_{table_name}_pq_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64, enable_pq=on, pq_m={pq_m}); USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64, enable_pq=on, pq_m={pq_m});
""" """
SQL_CREATE_INDEX = """ SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_{table_name}_idx ON {table_name} CREATE INDEX IF NOT EXISTS embedding_cosine_{table_name}_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
""" """

@ -1,10 +1,9 @@
import json import json
import logging import logging
import ssl from typing import Any, Literal, Optional
from typing import Any, Optional
from uuid import uuid4 from uuid import uuid4
from opensearchpy import OpenSearch, helpers from opensearchpy import OpenSearch, Urllib3AWSV4SignerAuth, Urllib3HttpConnection, helpers
from opensearchpy.helpers import BulkIndexError from opensearchpy.helpers import BulkIndexError
from pydantic import BaseModel, model_validator from pydantic import BaseModel, model_validator
@ -24,9 +23,12 @@ logger = logging.getLogger(__name__)
class OpenSearchConfig(BaseModel): class OpenSearchConfig(BaseModel):
host: str host: str
port: int port: int
secure: bool = False
auth_method: Literal["basic", "aws_managed_iam"] = "basic"
user: Optional[str] = None user: Optional[str] = None
password: Optional[str] = None password: Optional[str] = None
secure: bool = False aws_region: Optional[str] = None
aws_service: Optional[str] = None
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
@ -35,24 +37,40 @@ class OpenSearchConfig(BaseModel):
raise ValueError("config OPENSEARCH_HOST is required") raise ValueError("config OPENSEARCH_HOST is required")
if not values.get("port"): if not values.get("port"):
raise ValueError("config OPENSEARCH_PORT is required") raise ValueError("config OPENSEARCH_PORT is required")
if values.get("auth_method") == "aws_managed_iam":
if not values.get("aws_region"):
raise ValueError("config OPENSEARCH_AWS_REGION is required for AWS_MANAGED_IAM auth method")
if not values.get("aws_service"):
raise ValueError("config OPENSEARCH_AWS_SERVICE is required for AWS_MANAGED_IAM auth method")
return values return values
def create_ssl_context(self) -> ssl.SSLContext: def create_aws_managed_iam_auth(self) -> Urllib3AWSV4SignerAuth:
ssl_context = ssl.create_default_context() import boto3 # type: ignore
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE # Disable Certificate Validation return Urllib3AWSV4SignerAuth(
return ssl_context credentials=boto3.Session().get_credentials(),
region=self.aws_region,
service=self.aws_service, # type: ignore[arg-type]
)
def to_opensearch_params(self) -> dict[str, Any]: def to_opensearch_params(self) -> dict[str, Any]:
params = { params = {
"hosts": [{"host": self.host, "port": self.port}], "hosts": [{"host": self.host, "port": self.port}],
"use_ssl": self.secure, "use_ssl": self.secure,
"verify_certs": self.secure, "verify_certs": self.secure,
"connection_class": Urllib3HttpConnection,
"pool_maxsize": 20,
} }
if self.user and self.password:
if self.auth_method == "basic":
logger.info("Using basic authentication for OpenSearch Vector DB")
params["http_auth"] = (self.user, self.password) params["http_auth"] = (self.user, self.password)
if self.secure: elif self.auth_method == "aws_managed_iam":
params["ssl_context"] = self.create_ssl_context() logger.info("Using AWS managed IAM role for OpenSearch Vector DB")
params["http_auth"] = self.create_aws_managed_iam_auth()
return params return params
@ -76,16 +94,23 @@ class OpenSearchVector(BaseVector):
action = { action = {
"_op_type": "index", "_op_type": "index",
"_index": self._collection_name.lower(), "_index": self._collection_name.lower(),
"_id": uuid4().hex,
"_source": { "_source": {
Field.CONTENT_KEY.value: documents[i].page_content, Field.CONTENT_KEY.value: documents[i].page_content,
Field.VECTOR.value: embeddings[i], # Make sure you pass an array here Field.VECTOR.value: embeddings[i], # Make sure you pass an array here
Field.METADATA_KEY.value: documents[i].metadata, Field.METADATA_KEY.value: documents[i].metadata,
}, },
} }
# See https://github.com/langchain-ai/langchainjs/issues/4346#issuecomment-1935123377
if self._client_config.aws_service not in ["aoss"]:
action["_id"] = uuid4().hex
actions.append(action) actions.append(action)
helpers.bulk(self._client, actions) helpers.bulk(
client=self._client,
actions=actions,
timeout=30,
max_retries=3,
)
def get_ids_by_metadata_field(self, key: str, value: str): def get_ids_by_metadata_field(self, key: str, value: str):
query = {"query": {"term": {f"{Field.METADATA_KEY.value}.{key}": value}}} query = {"query": {"term": {f"{Field.METADATA_KEY.value}.{key}": value}}}
@ -234,6 +259,7 @@ class OpenSearchVector(BaseVector):
}, },
} }
logger.info(f"Creating OpenSearch index {self._collection_name.lower()}")
self._client.indices.create(index=self._collection_name.lower(), body=index_body) self._client.indices.create(index=self._collection_name.lower(), body=index_body)
redis_client.set(collection_exist_cache_key, 1, ex=3600) redis_client.set(collection_exist_cache_key, 1, ex=3600)
@ -252,9 +278,12 @@ class OpenSearchVectorFactory(AbstractVectorFactory):
open_search_config = OpenSearchConfig( open_search_config = OpenSearchConfig(
host=dify_config.OPENSEARCH_HOST or "localhost", host=dify_config.OPENSEARCH_HOST or "localhost",
port=dify_config.OPENSEARCH_PORT, port=dify_config.OPENSEARCH_PORT,
secure=dify_config.OPENSEARCH_SECURE,
auth_method=dify_config.OPENSEARCH_AUTH_METHOD.value,
user=dify_config.OPENSEARCH_USER, user=dify_config.OPENSEARCH_USER,
password=dify_config.OPENSEARCH_PASSWORD, password=dify_config.OPENSEARCH_PASSWORD,
secure=dify_config.OPENSEARCH_SECURE, aws_region=dify_config.OPENSEARCH_AWS_REGION,
aws_service=dify_config.OPENSEARCH_AWS_SERVICE,
) )
return OpenSearchVector(collection_name=collection_name, config=open_search_config) return OpenSearchVector(collection_name=collection_name, config=open_search_config)

@ -59,8 +59,8 @@ CREATE TABLE IF NOT EXISTS {table_name} (
) )
""" """
SQL_CREATE_INDEX = """ SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS idx_docs_{table_name} ON {table_name}(text) CREATE INDEX IF NOT EXISTS idx_docs_{table_name} ON {table_name}(text)
INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS
('FILTER CTXSYS.NULL_FILTER SECTION GROUP CTXSYS.HTML_SECTION_GROUP LEXER world_lexer') ('FILTER CTXSYS.NULL_FILTER SECTION GROUP CTXSYS.HTML_SECTION_GROUP LEXER world_lexer')
""" """
@ -164,7 +164,7 @@ class OracleVector(BaseVector):
with conn.cursor() as cur: with conn.cursor() as cur:
try: try:
cur.execute( cur.execute(
f"""INSERT INTO {self.table_name} (id, text, meta, embedding) f"""INSERT INTO {self.table_name} (id, text, meta, embedding)
VALUES (:1, :2, :3, :4)""", VALUES (:1, :2, :3, :4)""",
value, value,
) )
@ -227,8 +227,8 @@ class OracleVector(BaseVector):
conn.outputtypehandler = self.output_type_handler conn.outputtypehandler = self.output_type_handler
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute( cur.execute(
f"""SELECT meta, text, vector_distance(embedding,(select to_vector(:1) from dual),cosine) f"""SELECT meta, text, vector_distance(embedding,(select to_vector(:1) from dual),cosine)
AS distance FROM {self.table_name} AS distance FROM {self.table_name}
{where_clause} ORDER BY distance fetch first {top_k} rows only""", {where_clause} ORDER BY distance fetch first {top_k} rows only""",
[numpy.array(query_vector)], [numpy.array(query_vector)],
) )
@ -290,7 +290,7 @@ class OracleVector(BaseVector):
document_ids = ", ".join(f"'{id}'" for id in document_ids_filter) document_ids = ", ".join(f"'{id}'" for id in document_ids_filter)
where_clause = f" AND metadata->>'document_id' in ({document_ids}) " where_clause = f" AND metadata->>'document_id' in ({document_ids}) "
cur.execute( cur.execute(
f"""select meta, text, embedding FROM {self.table_name} f"""select meta, text, embedding FROM {self.table_name}
WHERE CONTAINS(text, :kk, 1) > 0 {where_clause} WHERE CONTAINS(text, :kk, 1) > 0 {where_clause}
order by score(1) desc fetch first {top_k} rows only""", order by score(1) desc fetch first {top_k} rows only""",
kk=" ACCUM ".join(entities), kk=" ACCUM ".join(entities),

@ -61,7 +61,7 @@ CREATE TABLE IF NOT EXISTS {table_name} (
""" """
SQL_CREATE_INDEX = """ SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
""" """

@ -58,7 +58,7 @@ CREATE TABLE IF NOT EXISTS {table_name} (
""" """
SQL_CREATE_INDEX = """ SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
USING hnsw (embedding floatvector_cosine_ops) WITH (m = 16, ef_construction = 64); USING hnsw (embedding floatvector_cosine_ops) WITH (m = 16, ef_construction = 64);
""" """

@ -205,9 +205,9 @@ class TiDBVector(BaseVector):
with Session(self._engine) as session: with Session(self._engine) as session:
select_statement = sql_text(f""" select_statement = sql_text(f"""
SELECT meta, text, distance SELECT meta, text, distance
FROM ( FROM (
SELECT SELECT
meta, meta,
text, text,
{tidb_dist_func}(vector, :query_vector_str) AS distance {tidb_dist_func}(vector, :query_vector_str) AS distance

@ -52,14 +52,16 @@ class RerankModelRunner(BaseRerankRunner):
rerank_documents = [] rerank_documents = []
for result in rerank_result.docs: for result in rerank_result.docs:
# format document if score_threshold is None or result.score >= score_threshold:
rerank_document = Document( # format document
page_content=result.text, rerank_document = Document(
metadata=documents[result.index].metadata, page_content=result.text,
provider=documents[result.index].provider, metadata=documents[result.index].metadata,
) provider=documents[result.index].provider,
if rerank_document.metadata is not None: )
rerank_document.metadata["score"] = result.score if rerank_document.metadata is not None:
rerank_documents.append(rerank_document) rerank_document.metadata["score"] = result.score
rerank_documents.append(rerank_document)
return rerank_documents rerank_documents.sort(key=lambda x: x.metadata.get("score", 0.0), reverse=True)
return rerank_documents[:top_n] if top_n else rerank_documents

@ -50,7 +50,7 @@ You are a text metadata extract engine that extract text's metadata based on use
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator". # Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format ### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields. The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint ### Constraint
DO NOT include anything other than the JSON array in your response. DO NOT include anything other than the JSON array in your response.
### Example ### Example
Here is the chat example between human and assistant, inside <example></example> XML tags. Here is the chat example between human and assistant, inside <example></example> XML tags.
@ -59,7 +59,7 @@ User:{{"input_text": ["I want to know which companys email address test@examp
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}} Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}} User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}} Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
</example> </example>
### User Input ### User Input
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}} {{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
### Assistant Output ### Assistant Output

@ -6,8 +6,8 @@ from core.tools.entities.tool_entities import ToolProviderType
from core.tools.utils.model_invocation_utils import ModelInvocationUtils from core.tools.utils.model_invocation_utils import ModelInvocationUtils
_SUMMARY_PROMPT = """You are a professional language researcher, you are interested in the language _SUMMARY_PROMPT = """You are a professional language researcher, you are interested in the language
and you can quickly aimed at the main point of an webpage and reproduce it in your own words but and you can quickly aimed at the main point of an webpage and reproduce it in your own words but
retain the original meaning and keep the key points. retain the original meaning and keep the key points.
however, the text you got is too long, what you got is possible a part of the text. however, the text you got is too long, what you got is possible a part of the text.
Please summarize the text you got. Please summarize the text you got.
""" """

@ -246,7 +246,7 @@ class ToolEngine:
+ "you do not need to create it, just tell the user to check it now." + "you do not need to create it, just tell the user to check it now."
) )
elif response.type == ToolInvokeMessage.MessageType.JSON: elif response.type == ToolInvokeMessage.MessageType.JSON:
result = json.dumps( result += json.dumps(
cast(ToolInvokeMessage.JsonMessage, response.message).json_object, ensure_ascii=False cast(ToolInvokeMessage.JsonMessage, response.message).json_object, ensure_ascii=False
) )
else: else:

@ -11,6 +11,7 @@ import docx
import pandas as pd import pandas as pd
import pypandoc # type: ignore import pypandoc # type: ignore
import pypdfium2 # type: ignore import pypdfium2 # type: ignore
import webvtt # type: ignore
import yaml # type: ignore import yaml # type: ignore
from docx.document import Document from docx.document import Document
from docx.oxml.table import CT_Tbl from docx.oxml.table import CT_Tbl
@ -132,6 +133,10 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
return _extract_text_from_json(file_content) return _extract_text_from_json(file_content)
case "application/x-yaml" | "text/yaml": case "application/x-yaml" | "text/yaml":
return _extract_text_from_yaml(file_content) return _extract_text_from_yaml(file_content)
case "text/vtt":
return _extract_text_from_vtt(file_content)
case "text/properties":
return _extract_text_from_properties(file_content)
case _: case _:
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}") raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
@ -139,7 +144,7 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str: def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
"""Extract text from a file based on its file extension.""" """Extract text from a file based on its file extension."""
match file_extension: match file_extension:
case ".txt" | ".markdown" | ".md" | ".html" | ".htm" | ".xml" | ".vtt": case ".txt" | ".markdown" | ".md" | ".html" | ".htm" | ".xml":
return _extract_text_from_plain_text(file_content) return _extract_text_from_plain_text(file_content)
case ".json": case ".json":
return _extract_text_from_json(file_content) return _extract_text_from_json(file_content)
@ -165,6 +170,10 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
return _extract_text_from_eml(file_content) return _extract_text_from_eml(file_content)
case ".msg": case ".msg":
return _extract_text_from_msg(file_content) return _extract_text_from_msg(file_content)
case ".vtt":
return _extract_text_from_vtt(file_content)
case ".properties":
return _extract_text_from_properties(file_content)
case _: case _:
raise UnsupportedFileTypeError(f"Unsupported Extension Type: {file_extension}") raise UnsupportedFileTypeError(f"Unsupported Extension Type: {file_extension}")
@ -214,8 +223,8 @@ def _extract_text_from_doc(file_content: bytes) -> str:
""" """
from unstructured.partition.api import partition_via_api from unstructured.partition.api import partition_via_api
if not (dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY): if not dify_config.UNSTRUCTURED_API_URL:
raise TextExtractionError("UNSTRUCTURED_API_URL and UNSTRUCTURED_API_KEY must be set") raise TextExtractionError("UNSTRUCTURED_API_URL must be set")
try: try:
with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
@ -226,7 +235,7 @@ def _extract_text_from_doc(file_content: bytes) -> str:
file=file, file=file,
metadata_filename=temp_file.name, metadata_filename=temp_file.name,
api_url=dify_config.UNSTRUCTURED_API_URL, api_url=dify_config.UNSTRUCTURED_API_URL,
api_key=dify_config.UNSTRUCTURED_API_KEY, api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
) )
os.unlink(temp_file.name) os.unlink(temp_file.name)
return "\n".join([getattr(element, "text", "") for element in elements]) return "\n".join([getattr(element, "text", "") for element in elements])
@ -462,3 +471,68 @@ def _extract_text_from_msg(file_content: bytes) -> str:
return "\n".join([str(element) for element in elements]) return "\n".join([str(element) for element in elements])
except Exception as e: except Exception as e:
raise TextExtractionError(f"Failed to extract text from MSG: {str(e)}") from e raise TextExtractionError(f"Failed to extract text from MSG: {str(e)}") from e
def _extract_text_from_vtt(vtt_bytes: bytes) -> str:
text = _extract_text_from_plain_text(vtt_bytes)
# remove bom
text = text.lstrip("\ufeff")
raw_results = []
for caption in webvtt.from_string(text):
raw_results.append((caption.voice, caption.text))
# Merge consecutive utterances by the same speaker
merged_results = []
if raw_results:
current_speaker, current_text = raw_results[0]
for i in range(1, len(raw_results)):
spk, txt = raw_results[i]
if spk == None:
merged_results.append((None, current_text))
continue
if spk == current_speaker:
# If it is the same speaker, merge the utterances (joined by space)
current_text += " " + txt
else:
# If the speaker changes, register the utterance so far and move on
merged_results.append((current_speaker, current_text))
current_speaker, current_text = spk, txt
# Add the last element
merged_results.append((current_speaker, current_text))
else:
merged_results = raw_results
# Return the result in the specified format: Speaker "text" style
formatted = [f'{spk or ""} "{txt}"' for spk, txt in merged_results]
return "\n".join(formatted)
def _extract_text_from_properties(file_content: bytes) -> str:
try:
text = _extract_text_from_plain_text(file_content)
lines = text.splitlines()
result = []
for line in lines:
line = line.strip()
# Preserve comments and empty lines
if not line or line.startswith("#") or line.startswith("!"):
result.append(line)
continue
if "=" in line:
key, value = line.split("=", 1)
elif ":" in line:
key, value = line.split(":", 1)
else:
key, value = line, ""
result.append(f"{key.strip()}: {value.strip()}")
return "\n".join(result)
except Exception as e:
raise TextExtractionError(f"Failed to extract text from properties file: {str(e)}") from e

@ -50,7 +50,7 @@ You are a text metadata extract engine that extract text's metadata based on use
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator". # Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format ### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields. The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint ### Constraint
DO NOT include anything other than the JSON array in your response. DO NOT include anything other than the JSON array in your response.
### Example ### Example
Here is the chat example between human and assistant, inside <example></example> XML tags. Here is the chat example between human and assistant, inside <example></example> XML tags.
@ -59,7 +59,7 @@ User:{{"input_text": ["I want to know which companys email address test@examp
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}} Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}} User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}} Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
</example> </example>
### User Input ### User Input
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}} {{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
### Assistant Output ### Assistant Output

@ -17,7 +17,7 @@ Some additional information is provided below. Always adhere to these instructio
</instruction> </instruction>
Steps: Steps:
1. Review the chat history provided within the <histories> tags. 1. Review the chat history provided within the <histories> tags.
2. Extract the relevant information based on the criteria given, output multiple values if there is multiple relevant information that match the criteria in the given text. 2. Extract the relevant information based on the criteria given, output multiple values if there is multiple relevant information that match the criteria in the given text.
3. Generate a well-formatted output using the defined functions and arguments. 3. Generate a well-formatted output using the defined functions and arguments.
4. Use the `extract_parameter` function to create structured outputs with appropriate parameters. 4. Use the `extract_parameter` function to create structured outputs with appropriate parameters.
5. Do not include any XML tags in your output. 5. Do not include any XML tags in your output.
@ -89,13 +89,13 @@ Some extra information are provided below, I should always follow the instructio
</instructions> </instructions>
### Extract parameter Workflow ### Extract parameter Workflow
I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted. I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted.
<information to be extracted> <information to be extracted>
{{ structure }} {{ structure }}
</information to be extracted> </information to be extracted>
Step 1: Carefully read the input and understand the structure of the expected output. Step 1: Carefully read the input and understand the structure of the expected output.
Step 2: Extract relevant parameters from the provided text based on the name and description of object. Step 2: Extract relevant parameters from the provided text based on the name and description of object.
Step 3: Structure the extracted parameters to JSON object as specified in <structure>. Step 3: Structure the extracted parameters to JSON object as specified in <structure>.
Step 4: Ensure that the JSON object is properly formatted and valid. The output should not contain any XML tags. Only the JSON object should be outputted. Step 4: Ensure that the JSON object is properly formatted and valid. The output should not contain any XML tags. Only the JSON object should be outputted.
@ -106,10 +106,10 @@ Here are the chat histories between human and assistant, inside <histories></his
</histories> </histories>
### Structure ### Structure
Here is the structure of the expected output, I should always follow the output structure. Here is the structure of the expected output, I should always follow the output structure.
{{γγγ {{γγγ
'properties1': 'relevant text extracted from input', 'properties1': 'relevant text extracted from input',
'properties2': 'relevant text extracted from input', 'properties2': 'relevant text extracted from input',
}}γγγ }}γγγ
### Input Text ### Input Text
@ -119,7 +119,7 @@ Inside <text></text> XML tags, there is a text that I should extract parameters
</text> </text>
### Answer ### Answer
I should always output a valid JSON object. Output nothing other than the JSON object. I should always output a valid JSON object. Output nothing other than the JSON object.
```JSON ```JSON
""" # noqa: E501 """ # noqa: E501

@ -55,7 +55,7 @@ You are a text classification engine that analyzes text data and assigns categor
Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification. Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
### Format ### Format
The input text is in the variable input_text. Categories are specified as a category list with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy. The input text is in the variable input_text. Categories are specified as a category list with two filed category_id and category_name in the variable categories. Classification instructions may be included to improve the classification accuracy.
### Constraint ### Constraint
DO NOT include anything other than the JSON array in your response. DO NOT include anything other than the JSON array in your response.
### Example ### Example
Here is the chat example between human and assistant, inside <example></example> XML tags. Here is the chat example between human and assistant, inside <example></example> XML tags.
@ -64,7 +64,7 @@ User:{{"input_text": ["I recently had a great experience with your company. The
Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}} Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}} User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}}
Assistant:{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Experience"}} Assistant:{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Experience"}}
</example> </example>
### Memory ### Memory
Here are the chat histories between human and assistant, inside <histories></histories> XML tags. Here are the chat histories between human and assistant, inside <histories></histories> XML tags.
<histories> <histories>

@ -9,6 +9,7 @@ from core.app.apps.base_app_queue_manager import GenerateTaskStoppedError
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.file.models import File from core.file.models import File
from core.workflow.callbacks import WorkflowCallback from core.workflow.callbacks import WorkflowCallback
from core.workflow.constants import ENVIRONMENT_VARIABLE_NODE_ID
from core.workflow.entities.variable_pool import VariablePool from core.workflow.entities.variable_pool import VariablePool
from core.workflow.errors import WorkflowNodeRunFailedError from core.workflow.errors import WorkflowNodeRunFailedError
from core.workflow.graph_engine.entities.event import GraphEngineEvent, GraphRunFailedEvent, InNodeEvent from core.workflow.graph_engine.entities.event import GraphEngineEvent, GraphRunFailedEvent, InNodeEvent
@ -364,4 +365,5 @@ class WorkflowEntry:
input_value = file_factory.build_from_mappings(mappings=input_value, tenant_id=tenant_id) input_value = file_factory.build_from_mappings(mappings=input_value, tenant_id=tenant_id)
# append variable and value to variable pool # append variable and value to variable pool
variable_pool.add([variable_node_id] + variable_key_list, input_value) if variable_node_id != ENVIRONMENT_VARIABLE_NODE_ID:
variable_pool.add([variable_node_id] + variable_key_list, input_value)

@ -20,7 +20,8 @@ if [[ "${MODE}" == "worker" ]]; then
CONCURRENCY_OPTION="-c ${CELERY_WORKER_AMOUNT:-1}" CONCURRENCY_OPTION="-c ${CELERY_WORKER_AMOUNT:-1}"
fi fi
exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION --loglevel ${LOG_LEVEL:-INFO} \ exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION \
--max-tasks-per-child ${MAX_TASK_PRE_CHILD:-50} --loglevel ${LOG_LEVEL:-INFO} \
-Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion} -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion}
elif [[ "${MODE}" == "beat" ]]; then elif [[ "${MODE}" == "beat" ]]; then

@ -8,192 +8,197 @@ from typing import Union
from celery.signals import worker_init # type: ignore from celery.signals import worker_init # type: ignore
from flask_login import user_loaded_from_request, user_logged_in # type: ignore from flask_login import user_loaded_from_request, user_logged_in # type: ignore
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider
from opentelemetry.propagate import set_global_textmap
from opentelemetry.propagators.b3 import B3Format
from opentelemetry.propagators.composite import CompositePropagator
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
)
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
from opentelemetry.semconv.resource import ResourceAttributes
from opentelemetry.trace import Span, get_current_span, get_tracer_provider, set_tracer_provider
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from opentelemetry.trace.status import StatusCode
from configs import dify_config from configs import dify_config
from dify_app import DifyApp from dify_app import DifyApp
class ExceptionLoggingHandler(logging.Handler):
"""Custom logging handler that creates spans for logging.exception() calls"""
def emit(self, record):
try:
if record.exc_info:
tracer = get_tracer_provider().get_tracer("dify.exception.logging")
with tracer.start_as_current_span(
"log.exception",
attributes={
"log.level": record.levelname,
"log.message": record.getMessage(),
"log.logger": record.name,
"log.file.path": record.pathname,
"log.file.line": record.lineno,
},
) as span:
span.set_status(StatusCode.ERROR)
span.record_exception(record.exc_info[1])
span.set_attribute("exception.type", record.exc_info[0].__name__)
span.set_attribute("exception.message", str(record.exc_info[1]))
except Exception:
pass
@user_logged_in.connect @user_logged_in.connect
@user_loaded_from_request.connect @user_loaded_from_request.connect
def on_user_loaded(_sender, user): def on_user_loaded(_sender, user):
if user:
current_span = get_current_span()
if current_span:
current_span.set_attribute("service.tenant.id", user.current_tenant_id)
current_span.set_attribute("service.user.id", user.id)
def init_app(app: DifyApp):
if dify_config.ENABLE_OTEL: if dify_config.ENABLE_OTEL:
setup_context_propagation() from opentelemetry.trace import get_current_span
# Initialize OpenTelemetry
# Follow Semantic Convertions 1.32.0 to define resource attributes if user:
resource = Resource( current_span = get_current_span()
attributes={ if current_span:
ResourceAttributes.SERVICE_NAME: dify_config.APPLICATION_NAME, current_span.set_attribute("service.tenant.id", user.current_tenant_id)
ResourceAttributes.SERVICE_VERSION: f"dify-{dify_config.CURRENT_VERSION}-{dify_config.COMMIT_SHA}", current_span.set_attribute("service.user.id", user.id)
ResourceAttributes.PROCESS_PID: os.getpid(),
ResourceAttributes.DEPLOYMENT_ENVIRONMENT: f"{dify_config.DEPLOY_ENV}-{dify_config.EDITION}",
ResourceAttributes.HOST_NAME: socket.gethostname(),
ResourceAttributes.HOST_ARCH: platform.machine(),
"custom.deployment.git_commit": dify_config.COMMIT_SHA,
ResourceAttributes.HOST_ID: platform.node(),
ResourceAttributes.OS_TYPE: platform.system().lower(),
ResourceAttributes.OS_DESCRIPTION: platform.platform(),
ResourceAttributes.OS_VERSION: platform.version(),
}
)
sampler = ParentBasedTraceIdRatio(dify_config.OTEL_SAMPLING_RATE)
provider = TracerProvider(resource=resource, sampler=sampler)
set_tracer_provider(provider)
exporter: Union[OTLPSpanExporter, ConsoleSpanExporter]
metric_exporter: Union[OTLPMetricExporter, ConsoleMetricExporter]
if dify_config.OTEL_EXPORTER_TYPE == "otlp":
exporter = OTLPSpanExporter(
endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces",
headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"},
)
metric_exporter = OTLPMetricExporter(
endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics",
headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"},
)
else:
# Fallback to console exporter
exporter = ConsoleSpanExporter()
metric_exporter = ConsoleMetricExporter()
provider.add_span_processor(
BatchSpanProcessor(
exporter,
max_queue_size=dify_config.OTEL_MAX_QUEUE_SIZE,
schedule_delay_millis=dify_config.OTEL_BATCH_EXPORT_SCHEDULE_DELAY,
max_export_batch_size=dify_config.OTEL_MAX_EXPORT_BATCH_SIZE,
export_timeout_millis=dify_config.OTEL_BATCH_EXPORT_TIMEOUT,
)
)
reader = PeriodicExportingMetricReader(
metric_exporter,
export_interval_millis=dify_config.OTEL_METRIC_EXPORT_INTERVAL,
export_timeout_millis=dify_config.OTEL_METRIC_EXPORT_TIMEOUT,
)
set_meter_provider(MeterProvider(resource=resource, metric_readers=[reader]))
if not is_celery_worker():
init_flask_instrumentor(app)
CeleryInstrumentor(tracer_provider=get_tracer_provider(), meter_provider=get_meter_provider()).instrument()
instrument_exception_logging()
init_sqlalchemy_instrumentor(app)
atexit.register(shutdown_tracer)
def is_celery_worker(): def init_app(app: DifyApp):
return "celery" in sys.argv[0].lower() def is_celery_worker():
return "celery" in sys.argv[0].lower()
def instrument_exception_logging():
exception_handler = ExceptionLoggingHandler()
logging.getLogger().addHandler(exception_handler)
def instrument_exception_logging(): def init_flask_instrumentor(app: DifyApp):
exception_handler = ExceptionLoggingHandler() meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION)
logging.getLogger().addHandler(exception_handler) _http_response_counter = meter.create_counter(
"http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}"
)
def response_hook(span: Span, status: str, response_headers: list):
if span and span.is_recording():
if status.startswith("2"):
span.set_status(StatusCode.OK)
else:
span.set_status(StatusCode.ERROR, status)
status = status.split(" ")[0]
status_code = int(status)
status_class = f"{status_code // 100}xx"
_http_response_counter.add(1, {"status_code": status_code, "status_class": status_class})
instrumentor = FlaskInstrumentor()
if dify_config.DEBUG:
logging.info("Initializing Flask instrumentor")
instrumentor.instrument_app(app, response_hook=response_hook)
def init_sqlalchemy_instrumentor(app: DifyApp):
with app.app_context():
engines = list(app.extensions["sqlalchemy"].engines.values())
SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines)
def setup_context_propagation():
# Configure propagators
set_global_textmap(
CompositePropagator(
[
TraceContextTextMapPropagator(), # W3C trace context
B3Format(), # B3 propagation (used by many systems)
]
)
)
def init_flask_instrumentor(app: DifyApp): def shutdown_tracer():
meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION) provider = trace.get_tracer_provider()
_http_response_counter = meter.create_counter( if hasattr(provider, "force_flush"):
"http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}" provider.force_flush()
class ExceptionLoggingHandler(logging.Handler):
"""Custom logging handler that creates spans for logging.exception() calls"""
def emit(self, record):
try:
if record.exc_info:
tracer = get_tracer_provider().get_tracer("dify.exception.logging")
with tracer.start_as_current_span(
"log.exception",
attributes={
"log.level": record.levelname,
"log.message": record.getMessage(),
"log.logger": record.name,
"log.file.path": record.pathname,
"log.file.line": record.lineno,
},
) as span:
span.set_status(StatusCode.ERROR)
span.record_exception(record.exc_info[1])
span.set_attribute("exception.type", record.exc_info[0].__name__)
span.set_attribute("exception.message", str(record.exc_info[1]))
except Exception:
pass
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider
from opentelemetry.propagate import set_global_textmap
from opentelemetry.propagators.b3 import B3Format
from opentelemetry.propagators.composite import CompositePropagator
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
) )
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
def response_hook(span: Span, status: str, response_headers: list): from opentelemetry.semconv.resource import ResourceAttributes
if span and span.is_recording(): from opentelemetry.trace import Span, get_tracer_provider, set_tracer_provider
if status.startswith("2"): from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
span.set_status(StatusCode.OK) from opentelemetry.trace.status import StatusCode
else:
span.set_status(StatusCode.ERROR, status) setup_context_propagation()
# Initialize OpenTelemetry
status = status.split(" ")[0] # Follow Semantic Convertions 1.32.0 to define resource attributes
status_code = int(status) resource = Resource(
status_class = f"{status_code // 100}xx" attributes={
_http_response_counter.add(1, {"status_code": status_code, "status_class": status_class}) ResourceAttributes.SERVICE_NAME: dify_config.APPLICATION_NAME,
ResourceAttributes.SERVICE_VERSION: f"dify-{dify_config.CURRENT_VERSION}-{dify_config.COMMIT_SHA}",
instrumentor = FlaskInstrumentor() ResourceAttributes.PROCESS_PID: os.getpid(),
if dify_config.DEBUG: ResourceAttributes.DEPLOYMENT_ENVIRONMENT: f"{dify_config.DEPLOY_ENV}-{dify_config.EDITION}",
logging.info("Initializing Flask instrumentor") ResourceAttributes.HOST_NAME: socket.gethostname(),
instrumentor.instrument_app(app, response_hook=response_hook) ResourceAttributes.HOST_ARCH: platform.machine(),
"custom.deployment.git_commit": dify_config.COMMIT_SHA,
ResourceAttributes.HOST_ID: platform.node(),
def init_sqlalchemy_instrumentor(app: DifyApp): ResourceAttributes.OS_TYPE: platform.system().lower(),
with app.app_context(): ResourceAttributes.OS_DESCRIPTION: platform.platform(),
engines = list(app.extensions["sqlalchemy"].engines.values()) ResourceAttributes.OS_VERSION: platform.version(),
SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines) }
)
sampler = ParentBasedTraceIdRatio(dify_config.OTEL_SAMPLING_RATE)
def setup_context_propagation(): provider = TracerProvider(resource=resource, sampler=sampler)
# Configure propagators set_tracer_provider(provider)
set_global_textmap( exporter: Union[OTLPSpanExporter, ConsoleSpanExporter]
CompositePropagator( metric_exporter: Union[OTLPMetricExporter, ConsoleMetricExporter]
[ if dify_config.OTEL_EXPORTER_TYPE == "otlp":
TraceContextTextMapPropagator(), # W3C trace context exporter = OTLPSpanExporter(
B3Format(), # B3 propagation (used by many systems) endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces",
] headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"},
)
metric_exporter = OTLPMetricExporter(
endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics",
headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"},
)
else:
# Fallback to console exporter
exporter = ConsoleSpanExporter()
metric_exporter = ConsoleMetricExporter()
provider.add_span_processor(
BatchSpanProcessor(
exporter,
max_queue_size=dify_config.OTEL_MAX_QUEUE_SIZE,
schedule_delay_millis=dify_config.OTEL_BATCH_EXPORT_SCHEDULE_DELAY,
max_export_batch_size=dify_config.OTEL_MAX_EXPORT_BATCH_SIZE,
export_timeout_millis=dify_config.OTEL_BATCH_EXPORT_TIMEOUT,
) )
) )
reader = PeriodicExportingMetricReader(
metric_exporter,
export_interval_millis=dify_config.OTEL_METRIC_EXPORT_INTERVAL,
export_timeout_millis=dify_config.OTEL_METRIC_EXPORT_TIMEOUT,
)
set_meter_provider(MeterProvider(resource=resource, metric_readers=[reader]))
if not is_celery_worker():
init_flask_instrumentor(app)
CeleryInstrumentor(tracer_provider=get_tracer_provider(), meter_provider=get_meter_provider()).instrument()
instrument_exception_logging()
init_sqlalchemy_instrumentor(app)
atexit.register(shutdown_tracer)
@worker_init.connect(weak=False) def is_enabled():
def init_celery_worker(*args, **kwargs): return dify_config.ENABLE_OTEL
tracer_provider = get_tracer_provider()
metric_provider = get_meter_provider()
if dify_config.DEBUG:
logging.info("Initializing OpenTelemetry for Celery worker")
CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument()
def shutdown_tracer(): @worker_init.connect(weak=False)
provider = trace.get_tracer_provider() def init_celery_worker(*args, **kwargs):
if hasattr(provider, "force_flush"): if dify_config.ENABLE_OTEL:
provider.force_flush() from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.metrics import get_meter_provider
from opentelemetry.trace import get_tracer_provider
tracer_provider = get_tracer_provider()
metric_provider = get_meter_provider()
if dify_config.DEBUG:
logging.info("Initializing OpenTelemetry for Celery worker")
CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument()

@ -1,63 +0,0 @@
"""
Patch for OpenTelemetry context detach method to handle None tokens gracefully.
This patch addresses the issue where OpenTelemetry's context.detach() method raises a TypeError
when called with a None token. The error occurs in the contextvars_context.py file where it tries
to call reset() on a None token.
Related GitHub issue: https://github.com/langgenius/dify/issues/18496
Error being fixed:
```
Traceback (most recent call last):
File "opentelemetry/context/__init__.py", line 154, in detach
_RUNTIME_CONTEXT.detach(token)
File "opentelemetry/context/contextvars_context.py", line 50, in detach
self._current_context.reset(token) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: expected an instance of Token, got None
```
Instead of modifying the third-party package directly, this patch monkey-patches the
context.detach method to gracefully handle None tokens.
"""
import logging
from functools import wraps
from opentelemetry import context
logger = logging.getLogger(__name__)
# Store the original detach method
original_detach = context.detach
# Create a patched version that handles None tokens
@wraps(original_detach)
def patched_detach(token):
"""
A patched version of context.detach that handles None tokens gracefully.
"""
if token is None:
logger.debug("Attempted to detach a None token, skipping")
return
return original_detach(token)
def is_enabled():
"""
Check if the extension is enabled.
Always enable this patch to prevent errors even when OpenTelemetry is disabled.
"""
return True
def init_app(app):
"""
Initialize the OpenTelemetry context patch.
"""
# Replace the original detach method with our patched version
context.detach = patched_detach
logger.info("OpenTelemetry context.detach patched to handle None tokens")

@ -21,14 +21,14 @@ def upgrade():
# ### commands auto generated by Alembic - please adjust! ### # ### commands auto generated by Alembic - please adjust! ###
# Get the database connection # Get the database connection
conn = op.get_bind() conn = op.get_bind()
# Use SQLAlchemy inspector to get the columns of the 'tool_files' table # Use SQLAlchemy inspector to get the columns of the 'tool_files' table
inspector = sa.inspect(conn) inspector = sa.inspect(conn)
columns = [col['name'] for col in inspector.get_columns('tool_files')] columns = [col['name'] for col in inspector.get_columns('tool_files')]
# If 'name' or 'size' columns already exist, exit the upgrade function # If 'name' or 'size' columns already exist, exit the upgrade function
if 'name' in columns or 'size' in columns: if 'name' in columns or 'size' in columns:
return return
with op.batch_alter_table('tool_files', schema=None) as batch_op: with op.batch_alter_table('tool_files', schema=None) as batch_op:
batch_op.add_column(sa.Column('name', sa.String(), nullable=True)) batch_op.add_column(sa.Column('name', sa.String(), nullable=True))

@ -35,4 +35,4 @@ def downgrade():
# batch_op.drop_column('retry_index') # batch_op.drop_column('retry_index')
pass pass
# ### end Alembic commands ### # ### end Alembic commands ###

@ -23,7 +23,7 @@ def upgrade():
conn = op.get_bind() conn = op.get_bind()
inspector = inspect(conn) inspector = inspect(conn)
has_column = 'retry_index' in [col['name'] for col in inspector.get_columns('workflow_node_executions')] has_column = 'retry_index' in [col['name'] for col in inspector.get_columns('workflow_node_executions')]
if has_column: if has_column:
with op.batch_alter_table('workflow_node_executions', schema=None) as batch_op: with op.batch_alter_table('workflow_node_executions', schema=None) as batch_op:
batch_op.drop_column('retry_index') batch_op.drop_column('retry_index')

@ -1,7 +1,7 @@
"""init """init
Revision ID: 64b051264f32 Revision ID: 64b051264f32
Revises: Revises:
Create Date: 2023-05-13 14:26:59.085018 Create Date: 2023-05-13 14:26:59.085018
""" """

@ -99,12 +99,12 @@ def upgrade():
id=id, id=id,
tenant_id=tenant_id, tenant_id=tenant_id,
user_id=user_id, user_id=user_id,
provider='google', provider='google',
encrypted_credentials=encrypted_credentials, encrypted_credentials=encrypted_credentials,
created_at=created_at, created_at=created_at,
updated_at=updated_at updated_at=updated_at
) )
# ### end Alembic commands ### # ### end Alembic commands ###

@ -1,6 +1,6 @@
[project] [project]
name = "dify-api" name = "dify-api"
version = "1.3.0" dynamic = ["version"]
requires-python = ">=3.11,<3.13" requires-python = ">=3.11,<3.13"
dependencies = [ dependencies = [
@ -81,15 +81,19 @@ dependencies = [
"tokenizers~=0.15.0", "tokenizers~=0.15.0",
"transformers~=4.35.0", "transformers~=4.35.0",
"unstructured[docx,epub,md,ppt,pptx]~=0.16.1", "unstructured[docx,epub,md,ppt,pptx]~=0.16.1",
"validators==0.21.0",
"weave~=0.51.34", "weave~=0.51.34",
"yarl~=1.18.3", "yarl~=1.18.3",
"webvtt-py~=0.5.1",
] ]
# Before adding new dependency, consider place it in # Before adding new dependency, consider place it in
# alphabet order (a-z) and suitable group. # alphabet order (a-z) and suitable group.
[tool.setuptools]
packages = []
[tool.uv] [tool.uv]
default-groups = ["storage", "tools", "vdb"] default-groups = ["storage", "tools", "vdb"]
package = false
[dependency-groups] [dependency-groups]
@ -191,6 +195,6 @@ vdb = [
"tidb-vector==0.0.9", "tidb-vector==0.0.9",
"upstash-vector==0.6.0", "upstash-vector==0.6.0",
"volcengine-compat~=1.0.156", "volcengine-compat~=1.0.156",
"weaviate-client~=3.21.0", "weaviate-client~=3.24.0",
"xinference-client~=1.2.2", "xinference-client~=1.2.2",
] ]

@ -2,9 +2,9 @@ import json
from copy import deepcopy from copy import deepcopy
from datetime import UTC, datetime from datetime import UTC, datetime
from typing import Any, Optional, Union, cast from typing import Any, Optional, Union, cast
from urllib.parse import urlparse
import httpx import httpx
import validators
from constants import HIDDEN_VALUE from constants import HIDDEN_VALUE
from core.helper import ssrf_proxy from core.helper import ssrf_proxy
@ -72,7 +72,9 @@ class ExternalDatasetService:
endpoint = f"{settings['endpoint']}/retrieval" endpoint = f"{settings['endpoint']}/retrieval"
api_key = settings["api_key"] api_key = settings["api_key"]
if not validators.url(endpoint, simple_host=True):
parsed_url = urlparse(endpoint)
if not all([parsed_url.scheme, parsed_url.netloc]):
if not endpoint.startswith("http://") and not endpoint.startswith("https://"): if not endpoint.startswith("http://") and not endpoint.startswith("https://"):
raise ValueError(f"invalid endpoint: {endpoint} must start with http:// or https://") raise ValueError(f"invalid endpoint: {endpoint} must start with http:// or https://")
else: else:

@ -86,9 +86,9 @@ limit 1000"""
update_retrieval_model_sql = ", retrieval_model = :retrieval_model" update_retrieval_model_sql = ", retrieval_model = :retrieval_model"
params["retrieval_model"] = json.dumps(retrieval_model) params["retrieval_model"] = json.dumps(retrieval_model)
sql = f"""update {table_name} sql = f"""update {table_name}
set {provider_column_name} = set {provider_column_name} =
concat('{DEFAULT_PLUGIN_ID}/', {provider_column_name}, '/', {provider_column_name}) concat('{DEFAULT_PLUGIN_ID}/', {provider_column_name}, '/', {provider_column_name})
{update_retrieval_model_sql} {update_retrieval_model_sql}
where id = :record_id""" where id = :record_id"""
conn.execute(db.text(sql), params) conn.execute(db.text(sql), params)
@ -131,10 +131,10 @@ limit 1000"""
while True: while True:
sql = f""" sql = f"""
SELECT id, {provider_column_name} AS provider_name SELECT id, {provider_column_name} AS provider_name
FROM {table_name} FROM {table_name}
WHERE {provider_column_name} NOT LIKE '%/%' WHERE {provider_column_name} NOT LIKE '%/%'
AND {provider_column_name} IS NOT NULL AND {provider_column_name} IS NOT NULL
AND {provider_column_name} != '' AND {provider_column_name} != ''
AND id > :last_id AND id > :last_id
ORDER BY id ASC ORDER BY id ASC
@ -183,8 +183,8 @@ limit 1000"""
if batch_updates: if batch_updates:
update_sql = f""" update_sql = f"""
UPDATE {table_name} UPDATE {table_name}
SET {provider_column_name} = :updated_value SET {provider_column_name} = :updated_value
WHERE id = :record_id WHERE id = :record_id
""" """
conn.execute(db.text(update_sql), [{"updated_value": u, "record_id": r} for u, r in batch_updates]) conn.execute(db.text(update_sql), [{"updated_value": u, "record_id": r} for u, r in batch_updates])

@ -77,7 +77,7 @@
<h1>Some Documents in Your Knowledge Base Have Been Disabled</h1> <h1>Some Documents in Your Knowledge Base Have Been Disabled</h1>
<p>Dear {{userName}},</p> <p>Dear {{userName}},</p>
<p> <p>
We're sorry for the inconvenience. To ensure optimal performance, documents We're sorry for the inconvenience. To ensure optimal performance, documents
that havent been updated or accessed in the past 30 days have been disabled in that havent been updated or accessed in the past 30 days have been disabled in
your knowledge bases: your knowledge bases:
</p> </p>
@ -97,4 +97,4 @@
</div> </div>
</div> </div>
</body> </body>
</html> </html>

@ -122,4 +122,4 @@
</div> </div>
</body> </body>
</html> </html>

@ -102,4 +102,4 @@
</div> </div>
</body> </body>
</html> </html>

@ -1 +1 @@
.env.test .env.test

@ -23,13 +23,70 @@ def setup_mock_redis():
ext_redis.redis_client.lock = MagicMock(return_value=mock_redis_lock) ext_redis.redis_client.lock = MagicMock(return_value=mock_redis_lock)
class TestOpenSearchConfig:
def test_to_opensearch_params(self):
config = OpenSearchConfig(
host="localhost",
port=9200,
secure=True,
user="admin",
password="password",
)
params = config.to_opensearch_params()
assert params["hosts"] == [{"host": "localhost", "port": 9200}]
assert params["use_ssl"] is True
assert params["verify_certs"] is True
assert params["connection_class"].__name__ == "Urllib3HttpConnection"
assert params["http_auth"] == ("admin", "password")
@patch("boto3.Session")
@patch("core.rag.datasource.vdb.opensearch.opensearch_vector.Urllib3AWSV4SignerAuth")
def test_to_opensearch_params_with_aws_managed_iam(
self, mock_aws_signer_auth: MagicMock, mock_boto_session: MagicMock
):
mock_credentials = MagicMock()
mock_boto_session.return_value.get_credentials.return_value = mock_credentials
mock_auth_instance = MagicMock()
mock_aws_signer_auth.return_value = mock_auth_instance
aws_region = "ap-southeast-2"
aws_service = "aoss"
host = f"aoss-endpoint.{aws_region}.aoss.amazonaws.com"
port = 9201
config = OpenSearchConfig(
host=host,
port=port,
secure=True,
auth_method="aws_managed_iam",
aws_region=aws_region,
aws_service=aws_service,
)
params = config.to_opensearch_params()
assert params["hosts"] == [{"host": host, "port": port}]
assert params["use_ssl"] is True
assert params["verify_certs"] is True
assert params["connection_class"].__name__ == "Urllib3HttpConnection"
assert params["http_auth"] is mock_auth_instance
mock_aws_signer_auth.assert_called_once_with(
credentials=mock_credentials, region=aws_region, service=aws_service
)
assert mock_boto_session.return_value.get_credentials.called
class TestOpenSearchVector: class TestOpenSearchVector:
def setup_method(self): def setup_method(self):
self.collection_name = "test_collection" self.collection_name = "test_collection"
self.example_doc_id = "example_doc_id" self.example_doc_id = "example_doc_id"
self.vector = OpenSearchVector( self.vector = OpenSearchVector(
collection_name=self.collection_name, collection_name=self.collection_name,
config=OpenSearchConfig(host="localhost", port=9200, user="admin", password="password", secure=False), config=OpenSearchConfig(host="localhost", port=9200, secure=False, user="admin", password="password"),
) )
self.vector._client = MagicMock() self.vector._client = MagicMock()

@ -1 +1 @@
.env.test .env.test

@ -864,10 +864,11 @@ def test_condition_parallel_correct_output(mock_close, mock_remove, app):
with patch.object(CodeNode, "_run", new=code_generator): with patch.object(CodeNode, "_run", new=code_generator):
generator = graph_engine.run() generator = graph_engine.run()
stream_content = "" stream_content = ""
res_content = "VAT:\ndify 123" wrong_content = ["Stamp Duty", "other"]
for item in generator: for item in generator:
if isinstance(item, NodeRunStreamChunkEvent): if isinstance(item, NodeRunStreamChunkEvent):
stream_content += f"{item.chunk_content}\n" stream_content += f"{item.chunk_content}\n"
if isinstance(item, GraphRunSucceededEvent): if isinstance(item, GraphRunSucceededEvent):
assert item.outputs == {"answer": res_content} assert item.outputs is not None
assert stream_content == res_content + "\n" answer = item.outputs["answer"]
assert all(rc not in answer for rc in wrong_content)

@ -1155,7 +1155,6 @@ wheels = [
[[package]] [[package]]
name = "dify-api" name = "dify-api"
version = "1.3.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "authlib" }, { name = "authlib" },
@ -1233,8 +1232,8 @@ dependencies = [
{ name = "tokenizers" }, { name = "tokenizers" },
{ name = "transformers" }, { name = "transformers" },
{ name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] }, { name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] },
{ name = "validators" },
{ name = "weave" }, { name = "weave" },
{ name = "webvtt-py" },
{ name = "yarl" }, { name = "yarl" },
] ]
@ -1403,8 +1402,8 @@ requires-dist = [
{ name = "tokenizers", specifier = "~=0.15.0" }, { name = "tokenizers", specifier = "~=0.15.0" },
{ name = "transformers", specifier = "~=4.35.0" }, { name = "transformers", specifier = "~=4.35.0" },
{ name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" },
{ name = "validators", specifier = "==0.21.0" },
{ name = "weave", specifier = "~=0.51.34" }, { name = "weave", specifier = "~=0.51.34" },
{ name = "webvtt-py", specifier = "~=0.5.1" },
{ name = "yarl", specifier = "~=1.18.3" }, { name = "yarl", specifier = "~=1.18.3" },
] ]
@ -1492,7 +1491,7 @@ vdb = [
{ name = "tidb-vector", specifier = "==0.0.9" }, { name = "tidb-vector", specifier = "==0.0.9" },
{ name = "upstash-vector", specifier = "==0.6.0" }, { name = "upstash-vector", specifier = "==0.6.0" },
{ name = "volcengine-compat", specifier = "~=1.0.156" }, { name = "volcengine-compat", specifier = "~=1.0.156" },
{ name = "weaviate-client", specifier = "~=3.21.0" }, { name = "weaviate-client", specifier = "~=3.24.0" },
{ name = "xinference-client", specifier = "~=1.2.2" }, { name = "xinference-client", specifier = "~=1.2.2" },
] ]
@ -6086,11 +6085,11 @@ wheels = [
[[package]] [[package]]
name = "validators" name = "validators"
version = "0.21.0" version = "0.34.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1f/c5/4095e7a5a6fecc2eca953ad058a3609135d833f986f84951f7e26790d651/validators-0.21.0.tar.gz", hash = "sha256:245b98ab778ed9352a7269c6a8f6c2a839bed5b2a7e3e60273ce399d247dd4b3", size = 20937 } sdist = { url = "https://files.pythonhosted.org/packages/64/07/91582d69320f6f6daaf2d8072608a4ad8884683d4840e7e4f3a9dbdcc639/validators-0.34.0.tar.gz", hash = "sha256:647fe407b45af9a74d245b943b18e6a816acf4926974278f6dd617778e1e781f", size = 70955 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/ad/50/18dbf2ac594234ee6249bfe3425fa424c18eeb96f29dcd47f199ed6c51bc/validators-0.21.0-py3-none-any.whl", hash = "sha256:3470db6f2384c49727ee319afa2e97aec3f8fad736faa6067e0fd7f9eaf2c551", size = 27686 }, { url = "https://files.pythonhosted.org/packages/6e/78/36828a4d857b25896f9774c875714ba4e9b3bc8a92d2debe3f4df3a83d4f/validators-0.34.0-py3-none-any.whl", hash = "sha256:c804b476e3e6d3786fa07a30073a4ef694e617805eb1946ceee3fe5a9b8b1321", size = 43536 },
] ]
[[package]] [[package]]
@ -6220,17 +6219,16 @@ wheels = [
[[package]] [[package]]
name = "weaviate-client" name = "weaviate-client"
version = "3.21.0" version = "3.24.2"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "authlib" }, { name = "authlib" },
{ name = "requests" }, { name = "requests" },
{ name = "tqdm" },
{ name = "validators" }, { name = "validators" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/b4/a5/c6777a8507249d7a63f4f5d9696eb5f45beac87db0eddfa4438d408cc3b4/weaviate-client-3.21.0.tar.gz", hash = "sha256:ec94ac554883c765e94da8b2947c4f0fa4a0378ed3bbe9f3653df3a5b1745a6d", size = 186970 } sdist = { url = "https://files.pythonhosted.org/packages/1f/c1/3285a21d8885f2b09aabb65edb9a8e062a35c2d7175e1bb024fa096582ab/weaviate-client-3.24.2.tar.gz", hash = "sha256:6914c48c9a7e5ad0be9399271f9cb85d6f59ab77476c6d4e56a3925bf149edaa", size = 199332 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/df/5b/57b55ad36eb071b57e79f1ea7fba5bfe6a2fe49702607f56726569665d60/weaviate_client-3.21.0-py3-none-any.whl", hash = "sha256:420444ded7106fb000f4f8b2321b5f5fa2387825aa7a303d702accf61026f9d2", size = 99944 }, { url = "https://files.pythonhosted.org/packages/ab/98/3136d05f93e30cf29e1db280eaadf766df18d812dfe7994bcced653b2340/weaviate_client-3.24.2-py3-none-any.whl", hash = "sha256:bc50ca5fcebcd48de0d00f66700b0cf7c31a97c4cd3d29b4036d77c5d1d9479b", size = 107968 },
] ]
[[package]] [[package]]
@ -6282,6 +6280,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7b/c8/d529f8a32ce40d98309f4470780631e971a5a842b60aec864833b3615786/websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b", size = 157416 }, { url = "https://files.pythonhosted.org/packages/7b/c8/d529f8a32ce40d98309f4470780631e971a5a842b60aec864833b3615786/websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b", size = 157416 },
] ]
[[package]]
name = "webvtt-py"
version = "0.5.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5e/f6/7c9c964681fb148e0293e6860108d378e09ccab2218f9063fd3eb87f840a/webvtt-py-0.5.1.tar.gz", hash = "sha256:2040dd325277ddadc1e0c6cc66cbc4a1d9b6b49b24c57a0c3364374c3e8a3dc1", size = 55128 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f3/ed/aad7e0f5a462d679f7b4d2e0d8502c3096740c883b5bbed5103146480937/webvtt_py-0.5.1-py3-none-any.whl", hash = "sha256:9d517d286cfe7fc7825e9d4e2079647ce32f5678eb58e39ef544ffbb932610b7", size = 19802 },
]
[[package]] [[package]]
name = "werkzeug" name = "werkzeug"
version = "3.1.3" version = "3.1.3"

@ -11,4 +11,4 @@ dev/pytest/pytest_tools.sh
dev/pytest/pytest_workflow.sh dev/pytest/pytest_workflow.sh
# Unit tests # Unit tests
dev/pytest/pytest_unit_tests.sh dev/pytest/pytest_unit_tests.sh

@ -10,4 +10,4 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/fireworks \ api/tests/integration_tests/model_runtime/fireworks \
api/tests/integration_tests/model_runtime/nomic \ api/tests/integration_tests/model_runtime/nomic \
api/tests/integration_tests/model_runtime/mixedbread \ api/tests/integration_tests/model_runtime/mixedbread \
api/tests/integration_tests/model_runtime/voyage api/tests/integration_tests/model_runtime/voyage

@ -39,6 +39,12 @@ APP_WEB_URL=
# File preview or download Url prefix. # File preview or download Url prefix.
# used to display File preview or download Url to the front-end or as Multi-model inputs; # used to display File preview or download Url to the front-end or as Multi-model inputs;
# Url is signed and has expiration time. # Url is signed and has expiration time.
# Setting FILES_URL is required for file processing plugins.
# - For https://example.com, use FILES_URL=https://example.com
# - For http://example.com, use FILES_URL=http://example.com
# Recommendation: use a dedicated domain (e.g., https://upload.example.com).
# Alternatively, use http://<your-ip>:5001 or http://api:5001,
# ensuring port 5001 is externally accessible (see docker-compose.yaml).
FILES_URL= FILES_URL=
# ------------------------------ # ------------------------------
@ -520,9 +526,13 @@ RELYT_DATABASE=postgres
# open search configuration, only available when VECTOR_STORE is `opensearch` # open search configuration, only available when VECTOR_STORE is `opensearch`
OPENSEARCH_HOST=opensearch OPENSEARCH_HOST=opensearch
OPENSEARCH_PORT=9200 OPENSEARCH_PORT=9200
OPENSEARCH_SECURE=true
OPENSEARCH_AUTH_METHOD=basic
OPENSEARCH_USER=admin OPENSEARCH_USER=admin
OPENSEARCH_PASSWORD=admin OPENSEARCH_PASSWORD=admin
OPENSEARCH_SECURE=true # If using AWS managed IAM, e.g. Managed Cluster or OpenSearch Serverless
OPENSEARCH_AWS_REGION=ap-southeast-1
OPENSEARCH_AWS_SERVICE=aoss
# tencent vector configurations, only available when VECTOR_STORE is `tencent` # tencent vector configurations, only available when VECTOR_STORE is `tencent`
TENCENT_VECTOR_DB_URL=http://127.0.0.1 TENCENT_VECTOR_DB_URL=http://127.0.0.1

@ -14,7 +14,6 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T
- **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file. - **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file.
- **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades. - **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades.
- **Legacy Support**: Previous deployment files are now located in the `docker-legacy` directory and will no longer be maintained.
### How to Deploy Dify with `docker-compose.yaml` ### How to Deploy Dify with `docker-compose.yaml`

@ -1,4 +1,4 @@
FROM couchbase/server:latest AS stage_base FROM couchbase/server:latest AS stage_base
# FROM couchbase:latest AS stage_base # FROM couchbase:latest AS stage_base
COPY init-cbserver.sh /opt/couchbase/init/ COPY init-cbserver.sh /opt/couchbase/init/
RUN chmod +x /opt/couchbase/init/init-cbserver.sh RUN chmod +x /opt/couchbase/init/init-cbserver.sh

@ -1,8 +1,8 @@
#!/bin/bash #!/bin/bash
# used to start couchbase server - can't get around this as docker compose only allows you to start one command - so we have to start couchbase like the standard couchbase Dockerfile would # used to start couchbase server - can't get around this as docker compose only allows you to start one command - so we have to start couchbase like the standard couchbase Dockerfile would
# https://github.com/couchbase/docker/blob/master/enterprise/couchbase-server/7.2.0/Dockerfile#L88 # https://github.com/couchbase/docker/blob/master/enterprise/couchbase-server/7.2.0/Dockerfile#L88
/entrypoint.sh couchbase-server & /entrypoint.sh couchbase-server &
# track if setup is complete so we don't try to setup again # track if setup is complete so we don't try to setup again
FILE=/opt/couchbase/init/setupComplete.txt FILE=/opt/couchbase/init/setupComplete.txt
@ -36,9 +36,9 @@ if ! [ -f "$FILE" ]; then
--bucket-ramsize $COUCHBASE_BUCKET_RAMSIZE \ --bucket-ramsize $COUCHBASE_BUCKET_RAMSIZE \
--bucket-type couchbase --bucket-type couchbase
# create file so we know that the cluster is setup and don't run the setup again # create file so we know that the cluster is setup and don't run the setup again
touch $FILE touch $FILE
fi fi
# docker compose will stop the container from running unless we do this # docker compose will stop the container from running unless we do this
# known issue and workaround # known issue and workaround
tail -f /dev/null tail -f /dev/null

@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
services: services:
# API service # API service
api: api:
image: langgenius/dify-api:1.3.0 image: langgenius/dify-api:1.3.1
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -31,7 +31,7 @@ services:
# worker service # worker service
# The Celery worker for processing the queue. # The Celery worker for processing the queue.
worker: worker:
image: langgenius/dify-api:1.3.0 image: langgenius/dify-api:1.3.1
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -57,7 +57,7 @@ services:
# Frontend web application. # Frontend web application.
web: web:
image: langgenius/dify-web:1.3.0 image: langgenius/dify-web:1.3.1
restart: always restart: always
environment: environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-} CONSOLE_API_URL: ${CONSOLE_API_URL:-}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 170 KiB

@ -225,9 +225,12 @@ x-shared-env: &shared-api-worker-env
RELYT_DATABASE: ${RELYT_DATABASE:-postgres} RELYT_DATABASE: ${RELYT_DATABASE:-postgres}
OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch} OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch}
OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200} OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200}
OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true}
OPENSEARCH_AUTH_METHOD: ${OPENSEARCH_AUTH_METHOD:-basic}
OPENSEARCH_USER: ${OPENSEARCH_USER:-admin} OPENSEARCH_USER: ${OPENSEARCH_USER:-admin}
OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin} OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin}
OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true} OPENSEARCH_AWS_REGION: ${OPENSEARCH_AWS_REGION:-ap-southeast-1}
OPENSEARCH_AWS_SERVICE: ${OPENSEARCH_AWS_SERVICE:-aoss}
TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-http://127.0.0.1} TENCENT_VECTOR_DB_URL: ${TENCENT_VECTOR_DB_URL:-http://127.0.0.1}
TENCENT_VECTOR_DB_API_KEY: ${TENCENT_VECTOR_DB_API_KEY:-dify} TENCENT_VECTOR_DB_API_KEY: ${TENCENT_VECTOR_DB_API_KEY:-dify}
TENCENT_VECTOR_DB_TIMEOUT: ${TENCENT_VECTOR_DB_TIMEOUT:-30} TENCENT_VECTOR_DB_TIMEOUT: ${TENCENT_VECTOR_DB_TIMEOUT:-30}
@ -488,7 +491,7 @@ x-shared-env: &shared-api-worker-env
services: services:
# API service # API service
api: api:
image: langgenius/dify-api:1.3.0 image: langgenius/dify-api:1.3.1
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -517,7 +520,7 @@ services:
# worker service # worker service
# The Celery worker for processing the queue. # The Celery worker for processing the queue.
worker: worker:
image: langgenius/dify-api:1.3.0 image: langgenius/dify-api:1.3.1
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -543,7 +546,7 @@ services:
# Frontend web application. # Frontend web application.
web: web:
image: langgenius/dify-web:1.3.0 image: langgenius/dify-web:1.3.1
restart: always restart: always
environment: environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-} CONSOLE_API_URL: ${CONSOLE_API_URL:-}

@ -144,4 +144,4 @@ PLUGIN_AZURE_BLOB_STORAGE_CONNECTION_STRING=
# Plugin oss tencent cos # Plugin oss tencent cos
PLUGIN_TENCENT_COS_SECRET_KEY= PLUGIN_TENCENT_COS_SECRET_KEY=
PLUGIN_TENCENT_COS_SECRET_ID= PLUGIN_TENCENT_COS_SECRET_ID=
PLUGIN_TENCENT_COS_REGION= PLUGIN_TENCENT_COS_REGION=

@ -39,4 +39,4 @@ envsubst "$env_vars" < /etc/nginx/proxy.conf.template > /etc/nginx/proxy.conf
envsubst "$env_vars" < /etc/nginx/conf.d/default.conf.template > /etc/nginx/conf.d/default.conf envsubst "$env_vars" < /etc/nginx/conf.d/default.conf.template > /etc/nginx/conf.d/default.conf
# Start Nginx using the default entrypoint # Start Nginx using the default entrypoint
exec nginx -g 'daemon off;' exec nginx -g 'daemon off;'

@ -6,4 +6,4 @@ ssl_certificate_key ${SSL_CERTIFICATE_KEY_PATH};
ssl_protocols ${NGINX_SSL_PROTOCOLS}; ssl_protocols ${NGINX_SSL_PROTOCOLS};
ssl_prefer_server_ciphers on; ssl_prefer_server_ciphers on;
ssl_session_cache shared:SSL:10m; ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m; ssl_session_timeout 10m;

@ -31,4 +31,4 @@ http {
client_max_body_size ${NGINX_CLIENT_MAX_BODY_SIZE}; client_max_body_size ${NGINX_CLIENT_MAX_BODY_SIZE};
include /etc/nginx/conf.d/*.conf; include /etc/nginx/conf.d/*.conf;
} }

@ -44,7 +44,7 @@ refresh_pattern . 0 20% 4320
# cache_dir ufs /var/spool/squid 100 16 256 # cache_dir ufs /var/spool/squid 100 16 256
# upstream proxy, set to your own upstream proxy IP to avoid SSRF attacks # upstream proxy, set to your own upstream proxy IP to avoid SSRF attacks
# cache_peer 172.1.1.1 parent 3128 0 no-query no-digest no-netdb-exchange default # cache_peer 172.1.1.1 parent 3128 0 no-query no-digest no-netdb-exchange default
################################## Reverse Proxy To Sandbox ################################ ################################## Reverse Proxy To Sandbox ################################
http_port ${REVERSE_PROXY_PORT} accel vhost http_port ${REVERSE_PROXY_PORT} accel vhost
@ -53,4 +53,4 @@ acl src_all src all
http_access allow src_all http_access allow src_all
# Unless the option's size is increased, an error will occur when uploading more than two files. # Unless the option's size is increased, an error will occur when uploading more than two files.
client_request_buffer_max_size 100 MB client_request_buffer_max_size 100 MB

@ -8,6 +8,6 @@ if [ -f ${DB_INITIALIZED} ]; then
exit exit
else else
echo 'File does not exist. Standards for first time Start up this DB' echo 'File does not exist. Standards for first time Start up this DB'
"$ORACLE_HOME"/bin/sqlplus -s "/ as sysdba" @"/opt/oracle/scripts/startup/init_user.script"; "$ORACLE_HOME"/bin/sqlplus -s "/ as sysdba" @"/opt/oracle/scripts/startup/init_user.script";
touch ${DB_INITIALIZED} touch ${DB_INITIALIZED}
fi fi

@ -1,5 +1,5 @@
show pdbs; show pdbs;
ALTER SYSTEM SET PROCESSES=500 SCOPE=SPFILE; ALTER SYSTEM SET PROCESSES=500 SCOPE=SPFILE;
alter session set container= freepdb1; alter session set container= freepdb1;
create user dify identified by dify DEFAULT TABLESPACE users quota unlimited on users; create user dify identified by dify DEFAULT TABLESPACE users quota unlimited on users;
grant DB_DEVELOPER_ROLE to dify; grant DB_DEVELOPER_ROLE to dify;

@ -1,4 +1,4 @@
# PD Configuration File reference: # PD Configuration File reference:
# https://docs.pingcap.com/tidb/stable/pd-configuration-file#pd-configuration-file # https://docs.pingcap.com/tidb/stable/pd-configuration-file#pd-configuration-file
[replication] [replication]
max-replicas = 1 max-replicas = 1

@ -14,4 +14,4 @@
<access_management>1</access_management> <access_management>1</access_management>
</default> </default>
</users> </users>
</clickhouse> </clickhouse>

@ -1 +1 @@
ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30; ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;

@ -45,4 +45,4 @@ package-lock.json
.yarnrc.yml .yarnrc.yml
# pmpm # pmpm
pnpm-lock.yaml pnpm-lock.yaml

@ -26,7 +26,7 @@ export declare class DifyClient {
params?: Params, params?: Params,
stream?: boolean, stream?: boolean,
headerParams?: HeaderParams headerParams?: HeaderParams
): Promise<any>; ): Promise<any>;
messageFeedback(message_id: string, rating: number, user: User): Promise<any>; messageFeedback(message_id: string, rating: number, user: User): Promise<any>;
@ -64,9 +64,9 @@ export declare class ChatClient extends DifyClient {
getConversations( getConversations(
user: User, user: User,
first_id?: string | null, first_id?: string | null,
limit?: number | null, limit?: number | null,
pinned?: boolean | null pinned?: boolean | null
): Promise<any>; ): Promise<any>;
@ -80,7 +80,7 @@ export declare class ChatClient extends DifyClient {
renameConversation(conversation_id: string, name: string, user: User,auto_generate:boolean): Promise<any>; renameConversation(conversation_id: string, name: string, user: User,auto_generate:boolean): Promise<any>;
deleteConversation(conversation_id: string, user: User): Promise<any>; deleteConversation(conversation_id: string, user: User): Promise<any>;
audioToText(data: FormData): Promise<any>; audioToText(data: FormData): Promise<any>;
} }
@ -88,4 +88,4 @@ export declare class WorkflowClient extends DifyClient {
run(inputs: any, user: User, stream?: boolean,): Promise<any>; run(inputs: any, user: User, stream?: boolean,): Promise<any>;
stop(task_id: string, user: User): Promise<any>; stop(task_id: string, user: User): Promise<any>;
} }

@ -334,12 +334,12 @@ export class ChatClient extends DifyClient {
export class WorkflowClient extends DifyClient { export class WorkflowClient extends DifyClient {
run(inputs,user,stream) { run(inputs,user,stream) {
const data = { const data = {
inputs, inputs,
response_mode: stream ? "streaming" : "blocking", response_mode: stream ? "streaming" : "blocking",
user user
}; };
return this.sendRequest( return this.sendRequest(
routes.runWorkflow.method, routes.runWorkflow.method,
routes.runWorkflow.url(), routes.runWorkflow.url(),
@ -357,4 +357,4 @@ export class WorkflowClient extends DifyClient {
data data
); );
} }
} }

@ -62,4 +62,4 @@ describe('Send Requests', () => {
errorMessage errorMessage
) )
}) })
}) })

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save