Merge branch 'langgenius:main' into add-turbo-pack

pull/20696/head
GuanMu 11 months ago committed by GitHub
commit 61b606836c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description="Dify version",
default="1.4.1",
default="1.4.2",
)
COMMIT_SHA: str = Field(

@ -29,7 +29,7 @@ from core.plugin.entities.request import (
RequestRequestUploadFile,
)
from core.tools.entities.tool_entities import ToolProviderType
from libs.helper import compact_generate_response
from libs.helper import length_prefixed_response
from models.account import Account, Tenant
from models.model import EndUser
@ -44,7 +44,7 @@ class PluginInvokeLLMApi(Resource):
response = PluginModelBackwardsInvocation.invoke_llm(user_model.id, tenant_model, payload)
return PluginModelBackwardsInvocation.convert_to_event_stream(response)
return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())
class PluginInvokeTextEmbeddingApi(Resource):
@ -101,7 +101,7 @@ class PluginInvokeTTSApi(Resource):
)
return PluginModelBackwardsInvocation.convert_to_event_stream(response)
return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())
class PluginInvokeSpeech2TextApi(Resource):
@ -162,7 +162,7 @@ class PluginInvokeToolApi(Resource):
),
)
return compact_generate_response(generator())
return length_prefixed_response(0xF, generator())
class PluginInvokeParameterExtractorNodeApi(Resource):
@ -228,7 +228,7 @@ class PluginInvokeAppApi(Resource):
files=payload.files,
)
return compact_generate_response(PluginAppBackwardsInvocation.convert_to_event_stream(response))
return length_prefixed_response(0xF, PluginAppBackwardsInvocation.convert_to_event_stream(response))
class PluginInvokeEncryptApi(Resource):

@ -1,3 +1,4 @@
import logging
import time
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, Union
@ -33,6 +34,8 @@ from models.model import App, AppMode, Message, MessageAnnotation
if TYPE_CHECKING:
from core.file.models import File
_logger = logging.getLogger(__name__)
class AppRunner:
def get_pre_calculate_rest_tokens(
@ -298,7 +301,7 @@ class AppRunner:
)
def _handle_invoke_result_stream(
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
) -> None:
"""
Handle invoke result
@ -317,18 +320,28 @@ class AppRunner:
else:
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
text += result.delta.message.content
message = result.delta.message
if isinstance(message.content, str):
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
text += content.data
else:
text += content # failback to str
if not model:
model = result.model
if not prompt_messages:
prompt_messages = result.prompt_messages
prompt_messages = list(result.prompt_messages)
if result.delta.usage:
usage = result.delta.usage
if not usage:
if usage is None:
usage = LLMUsage.empty_usage()
llm_result = LLMResult(

@ -48,6 +48,7 @@ from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
TextPromptMessageContent,
)
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.ops.entities.trace_entity import TraceTaskName
@ -309,6 +310,23 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
delta_text = chunk.delta.message.content
if delta_text is None:
continue
if isinstance(chunk.delta.message.content, list):
delta_text = ""
for content in chunk.delta.message.content:
logger.debug(
"The content type %s in LLM chunk delta message content.: %r", type(content), content
)
if isinstance(content, TextPromptMessageContent):
delta_text += content.data
elif isinstance(content, str):
delta_text += content # failback to str
else:
logger.warning(
"Unsupported content type %s in LLM chunk delta message content.: %r",
type(content),
content,
)
continue
if not self._task_state.llm_result.prompt_messages:
self._task_state.llm_result.prompt_messages = chunk.prompt_messages

@ -11,14 +11,12 @@ class BaseBackwardsInvocation:
try:
for chunk in response:
if isinstance(chunk, BaseModel | dict):
yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode() + b"\n\n"
elif isinstance(chunk, str):
yield f"event: {chunk}\n\n".encode()
yield BaseBackwardsInvocationResponse(data=chunk).model_dump_json().encode()
except Exception as e:
error_message = BaseBackwardsInvocationResponse(error=str(e)).model_dump_json()
yield f"{error_message}\n\n".encode()
yield error_message.encode()
else:
yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode() + b"\n\n"
yield BaseBackwardsInvocationResponse(data=response).model_dump_json().encode()
T = TypeVar("T", bound=dict | Mapping | str | bool | int | BaseModel)

@ -6,7 +6,7 @@ import json
import logging
from typing import Optional, Union
from sqlalchemy import select
from sqlalchemy import func, select
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker
@ -151,11 +151,11 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository):
existing = session.scalar(select(WorkflowRun).where(WorkflowRun.id == domain_model.id_))
if not existing:
# For new records, get the next sequence number
stmt = select(WorkflowRun.sequence_number).where(
stmt = select(func.max(WorkflowRun.sequence_number)).where(
WorkflowRun.app_id == self._app_id,
WorkflowRun.tenant_id == self._tenant_id,
)
max_sequence = session.scalar(stmt.order_by(WorkflowRun.sequence_number.desc()))
max_sequence = session.scalar(stmt)
db_model.sequence_number = (max_sequence or 0) + 1
else:
# For updates, keep the existing sequence number

@ -525,6 +525,8 @@ class LLMNode(BaseNode[LLMNodeData]):
# Set appropriate response format based on model capabilities
self._set_response_format(completion_params, model_schema.parameter_rules)
model_config_with_cred.parameters = completion_params
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
node_data_model.completion_params = completion_params
return model, model_config_with_cred
def _fetch_prompt_messages(

@ -3,6 +3,7 @@ import logging
import re
import secrets
import string
import struct
import subprocess
import time
import uuid
@ -14,6 +15,7 @@ from zoneinfo import available_timezones
from flask import Response, stream_with_context
from flask_restful import fields
from pydantic import BaseModel
from configs import dify_config
from core.app.features.rate_limiting.rate_limit import RateLimitGenerator
@ -183,7 +185,7 @@ def generate_string(n):
def extract_remote_ip(request) -> str:
if request.headers.get("CF-Connecting-IP"):
return cast(str, request.headers.get("Cf-Connecting-Ip"))
return cast(str, request.headers.get("CF-Connecting-IP"))
elif request.headers.getlist("X-Forwarded-For"):
return cast(str, request.headers.getlist("X-Forwarded-For")[0])
else:
@ -206,6 +208,60 @@ def compact_generate_response(response: Union[Mapping, Generator, RateLimitGener
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
def length_prefixed_response(magic_number: int, response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
"""
This function is used to return a response with a length prefix.
Magic number is a one byte number that indicates the type of the response.
For a compatibility with latest plugin daemon https://github.com/langgenius/dify-plugin-daemon/pull/341
Avoid using line-based response, it leads a memory issue.
We uses following format:
| Field | Size | Description |
|---------------|----------|---------------------------------|
| Magic Number | 1 byte | Magic number identifier |
| Reserved | 1 byte | Reserved field |
| Header Length | 2 bytes | Header length (usually 0xa) |
| Data Length | 4 bytes | Length of the data |
| Reserved | 6 bytes | Reserved fields |
| Data | Variable | Actual data content |
| Reserved Fields | Header | Data |
|-----------------|----------|----------|
| 4 bytes total | Variable | Variable |
all data is in little endian
"""
def pack_response_with_length_prefix(response: bytes) -> bytes:
header_length = 0xA
data_length = len(response)
# | Magic Number 1byte | Reserved 1byte | Header Length 2bytes | Data Length 4bytes | Reserved 6bytes | Data
return struct.pack("<BBHI", magic_number, 0, header_length, data_length) + b"\x00" * 6 + response
if isinstance(response, dict):
return Response(
response=pack_response_with_length_prefix(json.dumps(jsonable_encoder(response)).encode("utf-8")),
status=200,
mimetype="application/json",
)
elif isinstance(response, BaseModel):
return Response(
response=pack_response_with_length_prefix(response.model_dump_json().encode("utf-8")),
status=200,
mimetype="application/json",
)
def generate() -> Generator:
for chunk in response:
if isinstance(chunk, str):
yield pack_response_with_length_prefix(chunk.encode("utf-8"))
else:
yield pack_response_with_length_prefix(chunk)
return Response(stream_with_context(generate()), status=200, mimetype="text/event-stream")
class TokenManager:
@classmethod
def generate_token(

@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
services:
# API service
api:
image: langgenius/dify-api:1.4.1
image: langgenius/dify-api:1.4.2
restart: always
environment:
# Use the shared environment variables.
@ -31,7 +31,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:1.4.1
image: langgenius/dify-api:1.4.2
restart: always
environment:
# Use the shared environment variables.
@ -57,7 +57,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.4.1
image: langgenius/dify-web:1.4.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -142,7 +142,7 @@ services:
# plugin daemon
plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local
image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always
environment:
# Use the shared environment variables.

@ -71,7 +71,7 @@ services:
# plugin daemon
plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local
image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always
env_file:
- ./middleware.env

@ -508,7 +508,7 @@ x-shared-env: &shared-api-worker-env
services:
# API service
api:
image: langgenius/dify-api:1.4.1
image: langgenius/dify-api:1.4.2
restart: always
environment:
# Use the shared environment variables.
@ -537,7 +537,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:1.4.1
image: langgenius/dify-api:1.4.2
restart: always
environment:
# Use the shared environment variables.
@ -563,7 +563,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.4.1
image: langgenius/dify-web:1.4.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -648,7 +648,7 @@ services:
# plugin daemon
plugin_daemon:
image: langgenius/dify-plugin-daemon:0.1.1-local
image: langgenius/dify-plugin-daemon:0.1.2-local
restart: always
environment:
# Use the shared environment variables.

@ -12,12 +12,18 @@ const Layout: FC<{
}> = ({ children }) => {
const isGlobalPending = useGlobalPublicStore(s => s.isGlobalPending)
const setWebAppAccessMode = useGlobalPublicStore(s => s.setWebAppAccessMode)
const systemFeatures = useGlobalPublicStore(s => s.systemFeatures)
const pathname = usePathname()
const searchParams = useSearchParams()
const redirectUrl = searchParams.get('redirect_url')
const [isLoading, setIsLoading] = useState(true)
useEffect(() => {
(async () => {
if (!systemFeatures.webapp_auth.enabled) {
setIsLoading(false)
return
}
let appCode: string | null = null
if (redirectUrl)
appCode = redirectUrl?.split('/').pop() || null

@ -14,7 +14,7 @@ const MarkdownButton = ({ node }: any) => {
size={size}
className={cn('!h-auto min-h-8 select-none whitespace-normal !px-3')}
onClick={() => {
if (isValidUrl(link)) {
if (link && isValidUrl(link)) {
window.open(link, '_blank')
return
}

@ -1,6 +1,6 @@
{
"name": "dify-web",
"version": "1.4.1",
"version": "1.4.2",
"private": true,
"engines": {
"node": ">=v22.11.0"

Loading…
Cancel
Save