Merge branch 'langgenius:main' into add-turbo-pack

pull/20696/head
GuanMu 11 months ago committed by GitHub
commit 61b606836c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field( CURRENT_VERSION: str = Field(
description="Dify version", description="Dify version",
default="1.4.1", default="1.4.2",
) )
COMMIT_SHA: str = Field( COMMIT_SHA: str = Field(

@ -29,7 +29,7 @@ from core.plugin.entities.request import (
RequestRequestUploadFile, RequestRequestUploadFile,
) )
from core.tools.entities.tool_entities import ToolProviderType from core.tools.entities.tool_entities import ToolProviderType
from libs.helper import compact_generate_response from libs.helper import length_prefixed_response
from models.account import Account, Tenant from models.account import Account, Tenant
from models.model import EndUser from models.model import EndUser
@ -44,7 +44,7 @@ class PluginInvokeLLMApi(Resource):
response = PluginModelBackwardsInvocation.invoke_llm(user_model.id, tenant_model, payload) response = PluginModelBackwardsInvocation.invoke_llm(user_model.id, tenant_model, payload)
return PluginModelBackwardsInvocation.convert_to_event_stream(response) return PluginModelBackwardsInvocation.convert_to_event_stream(response)
return compact_generate_response(generator()) return length_prefixed_response(0xF, generator())
class PluginInvokeTextEmbeddingApi(Resource): class PluginInvokeTextEmbeddingApi(Resource):
@ -101,7 +101,7 @@ class PluginInvokeTTSApi(Resource):
) )
return PluginModelBackwardsInvocation.convert_to_event_stream(response) return PluginModelBackwardsInvocation.convert_to_event_stream(response)
return compact_generate_response(generator()) return length_prefixed_response(0xF, generator())
class PluginInvokeSpeech2TextApi(Resource): class PluginInvokeSpeech2TextApi(Resource):
@ -162,7 +162,7 @@ class PluginInvokeToolApi(Resource):
), ),
) )
return compact_generate_response(generator()) return length_prefixed_response(0xF, generator())
class PluginInvokeParameterExtractorNodeApi(Resource): class PluginInvokeParameterExtractorNodeApi(Resource):
@ -228,7 +228,7 @@ class PluginInvokeAppApi(Resource):
files=payload.files, files=payload.files,
) )
return compact_generate_response(PluginAppBackwardsInvocation.convert_to_event_stream(response)) return length_prefixed_response(0xF, PluginAppBackwardsInvocation.convert_to_event_stream(response))
class PluginInvokeEncryptApi(Resource): class PluginInvokeEncryptApi(Resource):

@ -1,3 +1,4 @@
import logging
import time import time
from collections.abc import Generator, Mapping, Sequence from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, Union from typing import TYPE_CHECKING, Any, Optional, Union
@ -33,6 +34,8 @@ from models.model import App, AppMode, Message, MessageAnnotation
if TYPE_CHECKING: if TYPE_CHECKING:
from core.file.models import File from core.file.models import File
_logger = logging.getLogger(__name__)
class AppRunner: class AppRunner:
def get_pre_calculate_rest_tokens( def get_pre_calculate_rest_tokens(
@ -298,7 +301,7 @@ class AppRunner:
) )
def _handle_invoke_result_stream( def _handle_invoke_result_stream(
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
) -> None: ) -> None:
""" """
Handle invoke result Handle invoke result
@ -317,18 +320,28 @@ class AppRunner:
else: else:
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER) queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
text += result.delta.message.content message = result.delta.message
if isinstance(message.content, str):
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
text += content.data
else:
text += content # failback to str
if not model: if not model:
model = result.model model = result.model
if not prompt_messages: if not prompt_messages:
prompt_messages = result.prompt_messages prompt_messages = list(result.prompt_messages)
if result.delta.usage: if result.delta.usage:
usage = result.delta.usage usage = result.delta.usage
if not usage: if usage is None:
usage = LLMUsage.empty_usage() usage = LLMUsage.empty_usage()
llm_result = LLMResult( llm_result = LLMResult(

@ -48,6 +48,7 @@ from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.message_entities import (
AssistantPromptMessage, AssistantPromptMessage,
TextPromptMessageContent,
) )
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName from core.ops.entities.trace_entity import TraceTaskName
@ -309,6 +310,23 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
delta_text = chunk.delta.message.content delta_text = chunk.delta.message.content
if delta_text is None: if delta_text is None:
continue continue
if isinstance(chunk.delta.message.content, list):
delta_text = ""
for content in chunk.delta.message.content:
logger.debug(
"The content type %s in LLM chunk delta message content.: %r", type(content), content
)
if isinstance(content, TextPromptMessageContent):
delta_text += content.data
elif isinstance(content, str):
delta_text += content # failback to str
else:
logger.warning(
"Unsupported content type %s in LLM chunk delta message content.: %r",
type(content),
content,
)
continue
if not self._task_state.llm_result.prompt_messages: if not self._task_state.llm_result.prompt_messages:
self._task_state.llm_result.prompt_messages = chunk.prompt_messages self._task_state.llm_result.prompt_messages = chunk.prompt_messages

@ -11,14 +11,12 @@ class BaseBackwardsInvocation:
try: try:
for chunk in response: for chunk in response:
if isinstance(chunk, BaseModel | dict): if isinstance(chunk, BaseModel | dict):
yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode() + b"\n\n" yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode()
elif isinstance(chunk, str):
yield f"event: {chunk}\n\n".encode()
except Exception as e: except Exception as e:
error_message = BaseBackwardsInvocationResponse(error=str(e)).model_dump_json() error_message = BaseBackwardsInvocationResponse(error=str(e)).model_dump_json()
yield f"{error_message}\n\n".encode() yield error_message.encode()
else: else:
yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode() + b"\n\n" yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode()
T = TypeVar("T", bound=dict | Mapping | str | bool | int | BaseModel) T = TypeVar("T", bound=dict | Mapping | str | bool | int | BaseModel)

@ -6,7 +6,7 @@ import json
import logging import logging
from typing import Optional, Union from typing import Optional, Union
from sqlalchemy import select from sqlalchemy import func, select
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
@ -151,11 +151,11 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository):
existing = session.scalar(select(WorkflowRun).where(WorkflowRun.id == domain_model.id_)) existing = session.scalar(select(WorkflowRun).where(WorkflowRun.id == domain_model.id_))
if not existing: if not existing:
# For new records, get the next sequence number # For new records, get the next sequence number
stmt = select(WorkflowRun.sequence_number).where( stmt = select(func.max(WorkflowRun.sequence_number)).where(
WorkflowRun.app_id == self._app_id, WorkflowRun.app_id == self._app_id,
WorkflowRun.tenant_id == self._tenant_id, WorkflowRun.tenant_id == self._tenant_id,
) )
max_sequence = session.scalar(stmt.order_by(WorkflowRun.sequence_number.desc())) max_sequence = session.scalar(stmt)
db_model.sequence_number = (max_sequence or 0) + 1 db_model.sequence_number = (max_sequence or 0) + 1
else: else:
# For updates, keep the existing sequence number # For updates, keep the existing sequence number

@ -525,6 +525,8 @@ class LLMNode(BaseNode[LLMNodeData]):
# Set appropriate response format based on model capabilities # Set appropriate response format based on model capabilities
self._set_response_format(completion_params, model_schema.parameter_rules) self._set_response_format(completion_params, model_schema.parameter_rules)
model_config_with_cred.parameters = completion_params model_config_with_cred.parameters = completion_params
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
node_data_model.completion_params = completion_params
return model, model_config_with_cred return model, model_config_with_cred
def _fetch_prompt_messages( def _fetch_prompt_messages(

@ -3,6 +3,7 @@ import logging
import re import re
import secrets import secrets
import string import string
import struct
import subprocess import subprocess
import time import time
import uuid import uuid
@ -14,6 +15,7 @@ from zoneinfo import available_timezones
from flask import Response, stream_with_context from flask import Response, stream_with_context
from flask_restful import fields from flask_restful import fields
from pydantic import BaseModel
from configs import dify_config from configs import dify_config
from core.app.features.rate_limiting.rate_limit import RateLimitGenerator from core.app.features.rate_limiting.rate_limit import RateLimitGenerator
@ -183,7 +185,7 @@ def generate_string(n):
def extract_remote_ip(request) -> str: def extract_remote_ip(request) -> str:
if request.headers.get("CF-Connecting-IP"): if request.headers.get("CF-Connecting-IP"):
return cast(str, request.headers.get("Cf-Connecting-Ip")) return cast(str, request.headers.get("CF-Connecting-IP"))
elif request.headers.getlist("X-Forwarded-For"): elif request.headers.getlist("X-Forwarded-For"):
return cast(str, request.headers.getlist("X-Forwarded-For")[0]) return cast(str, request.headers.getlist("X-Forwarded-For")[0])
else: else:
@ -206,6 +208,60 @@ def compact_generate_response(response: Union[Mapping, Generator, RateLimitGener
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream") return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
def length_prefixed_response(magic_number: int, response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
"""
This function is used to return a response with a length prefix.
Magic number is a one byte number that indicates the type of the response.
For a compatibility with latest plugin daemon https://github.com/langgenius/dify-plugin-daemon/pull/341
Avoid using line-based response, it leads a memory issue.
We uses following format:
| Field | Size | Description |
|---------------|----------|---------------------------------|
| Magic Number | 1 byte | Magic number identifier |
| Reserved | 1 byte | Reserved field |
| Header Length | 2 bytes | Header length (usually 0xa) |
| Data Length | 4 bytes | Length of the data |
| Reserved | 6 bytes | Reserved fields |
| Data | Variable | Actual data content |
| Reserved Fields | Header | Data |
|-----------------|----------|----------|
| 4 bytes total | Variable | Variable |
all data is in little endian
"""
def pack_response_with_length_prefix(response: bytes) -> bytes:
header_length = 0xA
data_length = len(response)
# | Magic Number 1byte | Reserved 1byte | Header Length 2bytes | Data Length 4bytes | Reserved 6bytes | Data
return struct.pack("<BBHI", magic_number, 0, header_length, data_length) + b"\x00" * 6 + response
if isinstance(response, dict):
return Response(
response=pack_response_with_length_prefix(json.dumps(jsonable_encoder(response)).encode("utf-8")),
status=200,
mimetype="application/json",
)
elif isinstance(response, BaseModel):
return Response(
response=pack_response_with_length_prefix(response.model_dump_json().encode("utf-8")),
status=200,
mimetype="application/json",
)
def generate() -> Generator:
for chunk in response:
if isinstance(chunk, str):
yield pack_response_with_length_prefix(chunk.encode("utf-8"))
else:
yield pack_response_with_length_prefix(chunk)
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
class TokenManager: class TokenManager:
@classmethod @classmethod
def generate_token( def generate_token(

@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
services: services:
# API service # API service
api: api:
image: langgenius/dify-api:1.4.1 image: langgenius/dify-api:1.4.2
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -31,7 +31,7 @@ services:
# worker service # worker service
# The Celery worker for processing the queue. # The Celery worker for processing the queue.
worker: worker:
image: langgenius/dify-api:1.4.1 image: langgenius/dify-api:1.4.2
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -57,7 +57,7 @@ services:
# Frontend web application. # Frontend web application.
web: web:
image: langgenius/dify-web:1.4.1 image: langgenius/dify-web:1.4.2
restart: always restart: always
environment: environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-} CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -142,7 +142,7 @@ services:
# plugin daemon # plugin daemon
plugin_daemon: plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.

@ -71,7 +71,7 @@ services:
# plugin daemon # plugin daemon
plugin_daemon: plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always restart: always
env_file: env_file:
- ./middleware.env - ./middleware.env

@ -508,7 +508,7 @@ x-shared-env: &shared-api-worker-env
services: services:
# API service # API service
api: api:
image: langgenius/dify-api:1.4.1 image: langgenius/dify-api:1.4.2
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -537,7 +537,7 @@ services:
# worker service # worker service
# The Celery worker for processing the queue. # The Celery worker for processing the queue.
worker: worker:
image: langgenius/dify-api:1.4.1 image: langgenius/dify-api:1.4.2
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.
@ -563,7 +563,7 @@ services:
# Frontend web application. # Frontend web application.
web: web:
image: langgenius/dify-web:1.4.1 image: langgenius/dify-web:1.4.2
restart: always restart: always
environment: environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-} CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -648,7 +648,7 @@ services:
# plugin daemon # plugin daemon
plugin_daemon: plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always restart: always
environment: environment:
# Use the shared environment variables. # Use the shared environment variables.

@ -12,12 +12,18 @@ const Layout: FC<{
}> = ({ children }) => { }> = ({ children }) => {
const isGlobalPending = useGlobalPublicStore(s => s.isGlobalPending) const isGlobalPending = useGlobalPublicStore(s => s.isGlobalPending)
const setWebAppAccessMode = useGlobalPublicStore(s => s.setWebAppAccessMode) const setWebAppAccessMode = useGlobalPublicStore(s => s.setWebAppAccessMode)
const systemFeatures = useGlobalPublicStore(s => s.systemFeatures)
const pathname = usePathname() const pathname = usePathname()
const searchParams = useSearchParams() const searchParams = useSearchParams()
const redirectUrl = searchParams.get('redirect_url') const redirectUrl = searchParams.get('redirect_url')
const [isLoading, setIsLoading] = useState(true) const [isLoading, setIsLoading] = useState(true)
useEffect(() => { useEffect(() => {
(async () => { (async () => {
if (!systemFeatures.webapp_auth.enabled) {
setIsLoading(false)
return
}
let appCode: string | null = null let appCode: string | null = null
if (redirectUrl) if (redirectUrl)
appCode = redirectUrl?.split('/').pop() || null appCode = redirectUrl?.split('/').pop() || null

@ -14,7 +14,7 @@ const MarkdownButton = ({ node }: any) => {
size={size} size={size}
className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')} className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')}
onClick={() => { onClick={() => {
if (isValidUrl(link)) { if (link && isValidUrl(link)) {
window.open(link, '_blank') window.open(link, '_blank')
return return
} }

@ -1,6 +1,6 @@
{ {
"name": "dify-web", "name": "dify-web",
"version": "1.4.1", "version": "1.4.2",
"private": true, "private": true,
"engines": { "engines": {
"node": ">=v22.11.0" "node": ">=v22.11.0"

Loading…
Cancel
Save