Feat : add deepseek support for tongyi (#13445)

1 year ago · 75113c26c6
parent 939a9ecd21
commit 75113c26c6
6 changed files with 128 additions and 3 deletions
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@ -1,3 +1,7 @@
 - deepseek-r1
 - deepseek-r1-distill-qwen-14b
 - deepseek-r1-distill-qwen-32b
 - deepseek-v3
 - qwen-vl-max-0809
 - qwen-vl-max-0201
 - qwen-vl-max
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
@ -0,0 +1,21 @@
 model: deepseek-r1-distill-qwen-14b
 label:
  zh_Hans: DeepSeek-R1-Distill-Qwen-14B
  en_US: DeepSeek-R1-Distill-Qwen-14B
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32000
 parameter_rules:
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 8192
    default: 4096
 pricing:
  input: "0.001"
  output: "0.003"
  unit: "0.001"
  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
@ -0,0 +1,21 @@
 model: deepseek-r1-distill-qwen-32b
 label:
  zh_Hans: DeepSeek-R1-Distill-Qwen-32B
  en_US: DeepSeek-R1-Distill-Qwen-32B
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32000
 parameter_rules:
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 8192
    default: 4096
 pricing:
  input: "0.002"
  output: "0.006"
  unit: "0.001"
  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
@ -0,0 +1,21 @@
 model: deepseek-r1
 label:
  zh_Hans: DeepSeek-R1
  en_US: DeepSeek-R1
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 64000
 parameter_rules:
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 8192
    default: 4096
 pricing:
  input: "0.004"
  output: "0.016"
  unit: '0.001'
  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
@ -0,0 +1,52 @@
 model: deepseek-v3
 label:
  zh_Hans: DeepSeek-V3
  en_US: DeepSeek-V3
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 64000
 parameter_rules:
  - name: temperature
    use_template: temperature
  - name: max_tokens
    use_template: max_tokens
    type: int
    default: 512
    min: 1
    max: 4096
    help:
      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
  - name: top_p
    use_template: top_p
  - name: top_k
    label:
      zh_Hans: 取样数量
      en_US: Top k
    type: int
    help:
      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
      en_US: Only sample from the top K options for each subsequent token.
    required: false
  - name: frequency_penalty
    use_template: frequency_penalty
  - name: response_format
    label:
      zh_Hans: 回复格式
      en_US: Response Format
    type: string
    help:
      zh_Hans: 指定模型必须输出的格式
      en_US: specifying the format that the model must output
    required: false
    options:
      - text
      - json_object
 pricing:
  input: "0.002"
  output: "0.008"
  unit: "0.001"
  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        else:
            # nothing different between chat model and completion model in tongyi
            params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
-            response = Generation.call(**params, result_format="message", stream=stream)
+            response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True)
        if stream:
            return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
        """
        full_text = ""
        tool_calls = []
        is_reasoning_started = False
        # for index, response in enumerate(responses):
        index = 0
        for index, response in enumerate(responses):
            if response.status_code not in {200, HTTPStatus.OK}:
                raise ServiceUnavailableError(
@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
                    ),
                )
            else:
-                resp_content = response.output.choices[0].message.content
+                message = response.output.choices[0].message
                resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
                    message, is_reasoning_started
                )
                if not resp_content:
                    if "tool_calls" in response.output.choices[0].message:
                        tool_calls = response.output.choices[0].message["tool_calls"]