diff --git a/api/.env.example b/api/.env.example index 6f1081a65a..5be5e28e38 100644 --- a/api/.env.example +++ b/api/.env.example @@ -19,9 +19,10 @@ FILES_URL=http://127.0.0.1:5001 # APO Config APO_BACKEND_URL=http://127.0.0.1:8080 -APO_VM_URL=http://127.0.0.1:8080 -WORKFLOW_DIR=./workflows +APO_VM_URL=http://127.0.0.1:8428 +WORKFLOW_DIR=./inti_data/workflows INITIAL_LANGUAGE=en-US +OFFLINE_MODE=false # The time in seconds after the signature is rejected FILES_ACCESS_TIMEOUT=300 diff --git a/api/configs/apo/__init__.py b/api/configs/apo/__init__.py index 3398840850..e928716452 100644 --- a/api/configs/apo/__init__.py +++ b/api/configs/apo/__init__.py @@ -21,5 +21,9 @@ class APOConfig(BaseSettings): ) WORKFLOW_DIR: str = Field( description="Directory of workflows yaml file.", - default="./workflows" + default="./init_data/workflows" + ) + OFFLINE_MODE: bool = Field( + description="Offline mode", + default=False ) \ No newline at end of file diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 68f3c65a4b..67723ca592 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -2,6 +2,12 @@ set -e +if [[ "${MODE}" == "copy" ]]; then + mkdir -p /app/storage + cp -rf /app/api/init_data/plugins/storage/* /app/storage/ + exit 0 +fi + if [[ "${MIGRATION_ENABLED}" == "true" ]]; then echo "Running migrations" flask upgrade-db diff --git a/api/init_data/plugins/packages/langgenius-agent_0.0.11.difypkg b/api/init_data/plugins/packages/langgenius-agent_0.0.11.difypkg new file mode 100644 index 0000000000..eb7ac5f66e Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-agent_0.0.11.difypkg differ diff --git a/api/init_data/plugins/packages/langgenius-deepseek_0.0.5.difypkg b/api/init_data/plugins/packages/langgenius-deepseek_0.0.5.difypkg new file mode 100644 index 0000000000..cbcbc0b37d Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-deepseek_0.0.5.difypkg differ diff --git a/api/init_data/plugins/packages/langgenius-ollama_0.0.3.difypkg b/api/init_data/plugins/packages/langgenius-ollama_0.0.3.difypkg new file mode 100644 index 0000000000..4a6554f134 Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-ollama_0.0.3.difypkg differ diff --git a/api/init_data/plugins/packages/langgenius-openai_api_compatible_0.0.11.difypkg b/api/init_data/plugins/packages/langgenius-openai_api_compatible_0.0.11.difypkg new file mode 100644 index 0000000000..151db5c4a6 Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-openai_api_compatible_0.0.11.difypkg differ diff --git a/api/init_data/plugins/packages/langgenius-siliconflow_0.0.8.difypkg b/api/init_data/plugins/packages/langgenius-siliconflow_0.0.8.difypkg new file mode 100644 index 0000000000..4a5441cd85 Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-siliconflow_0.0.8.difypkg differ diff --git a/api/init_data/plugins/packages/langgenius-tongyi_0.0.14.difypkg b/api/init_data/plugins/packages/langgenius-tongyi_0.0.14.difypkg new file mode 100644 index 0000000000..8532a7140e Binary files /dev/null and b/api/init_data/plugins/packages/langgenius-tongyi_0.0.14.difypkg differ diff --git a/api/init_data/plugins/storage/assets/0458e787b7706a3743846cc695088866360b67a5a7d6020065f8c7e5ac091bc9.png b/api/init_data/plugins/storage/assets/0458e787b7706a3743846cc695088866360b67a5a7d6020065f8c7e5ac091bc9.png new file mode 100644 index 0000000000..bd8f2762d1 Binary files /dev/null and b/api/init_data/plugins/storage/assets/0458e787b7706a3743846cc695088866360b67a5a7d6020065f8c7e5ac091bc9.png differ diff --git a/api/init_data/plugins/storage/assets/0dbcd12864243bfcd8c0c1898a3e0c37ea553c21a727800b0863a599a9660729.svg b/api/init_data/plugins/storage/assets/0dbcd12864243bfcd8c0c1898a3e0c37ea553c21a727800b0863a599a9660729.svg new file mode 100644 index 0000000000..ad6b384f7a --- /dev/null +++ b/api/init_data/plugins/storage/assets/0dbcd12864243bfcd8c0c1898a3e0c37ea553c21a727800b0863a599a9660729.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/assets/450d471ec223ca027aa3de206d1aafcd67cd3fcefcbdc9c28eafac21513a9e4c.png b/api/init_data/plugins/storage/assets/450d471ec223ca027aa3de206d1aafcd67cd3fcefcbdc9c28eafac21513a9e4c.png new file mode 100644 index 0000000000..94de01136a Binary files /dev/null and b/api/init_data/plugins/storage/assets/450d471ec223ca027aa3de206d1aafcd67cd3fcefcbdc9c28eafac21513a9e4c.png differ diff --git a/api/init_data/plugins/storage/assets/49323fc47c82d3bf400280689f9ffa17708cfe465c7e1f3bbfb24f2a42ec0e2c.svg b/api/init_data/plugins/storage/assets/49323fc47c82d3bf400280689f9ffa17708cfe465c7e1f3bbfb24f2a42ec0e2c.svg new file mode 100644 index 0000000000..425494404f --- /dev/null +++ b/api/init_data/plugins/storage/assets/49323fc47c82d3bf400280689f9ffa17708cfe465c7e1f3bbfb24f2a42ec0e2c.svg @@ -0,0 +1,22 @@ + + + Created with Pixso. + + + + + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/assets/69f80e7dfe36e036321767f69cad4755fe162cf581b206a8290dce0629d2e4f6.svg b/api/init_data/plugins/storage/assets/69f80e7dfe36e036321767f69cad4755fe162cf581b206a8290dce0629d2e4f6.svg new file mode 100644 index 0000000000..aa854a7504 --- /dev/null +++ b/api/init_data/plugins/storage/assets/69f80e7dfe36e036321767f69cad4755fe162cf581b206a8290dce0629d2e4f6.svg @@ -0,0 +1,3 @@ + + + diff --git a/api/init_data/plugins/storage/assets/758825b9b095f55a1e391b138694e0d3c1cb07fd5eef27d7e5915aa7e2718a97.svg b/api/init_data/plugins/storage/assets/758825b9b095f55a1e391b138694e0d3c1cb07fd5eef27d7e5915aa7e2718a97.svg new file mode 100644 index 0000000000..39d8a1ece6 --- /dev/null +++ b/api/init_data/plugins/storage/assets/758825b9b095f55a1e391b138694e0d3c1cb07fd5eef27d7e5915aa7e2718a97.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/assets/7d40d629e02c01404af94652a8684f9aaab0da105182fc16fafe0da4e183dd9e.png b/api/init_data/plugins/storage/assets/7d40d629e02c01404af94652a8684f9aaab0da105182fc16fafe0da4e183dd9e.png new file mode 100644 index 0000000000..c1aff40ee0 Binary files /dev/null and b/api/init_data/plugins/storage/assets/7d40d629e02c01404af94652a8684f9aaab0da105182fc16fafe0da4e183dd9e.png differ diff --git a/api/init_data/plugins/storage/assets/87afd76dc06bfd0ef344496e0a061e195bbfbcaf604606634fac00d5d2b83caf.svg b/api/init_data/plugins/storage/assets/87afd76dc06bfd0ef344496e0a061e195bbfbcaf604606634fac00d5d2b83caf.svg new file mode 100644 index 0000000000..16e406f030 --- /dev/null +++ b/api/init_data/plugins/storage/assets/87afd76dc06bfd0ef344496e0a061e195bbfbcaf604606634fac00d5d2b83caf.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/assets/bfff83a66922c09cb5a5aa68829742b8b4f4e818579db42f53c7b8a30912cd8b.svg b/api/init_data/plugins/storage/assets/bfff83a66922c09cb5a5aa68829742b8b4f4e818579db42f53c7b8a30912cd8b.svg new file mode 100644 index 0000000000..f8482a96b9 --- /dev/null +++ b/api/init_data/plugins/storage/assets/bfff83a66922c09cb5a5aa68829742b8b4f4e818579db42f53c7b8a30912cd8b.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/assets/e74e644589f5d78cd6019be7b92050c2b54b2645139af705fe610649a73282cf.svg b/api/init_data/plugins/storage/assets/e74e644589f5d78cd6019be7b92050c2b54b2645139af705fe610649a73282cf.svg new file mode 100644 index 0000000000..c0d23316fe --- /dev/null +++ b/api/init_data/plugins/storage/assets/e74e644589f5d78cd6019be7b92050c2b54b2645139af705fe610649a73282cf.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/assets/fb1d7c05088e34ecabd0e087ebcfd929be9aad785b5e21e0577c2aa82acf7ae0.svg b/api/init_data/plugins/storage/assets/fb1d7c05088e34ecabd0e087ebcfd929be9aad785b5e21e0577c2aa82acf7ae0.svg new file mode 100644 index 0000000000..ce65af405f --- /dev/null +++ b/api/init_data/plugins/storage/assets/fb1d7c05088e34ecabd0e087ebcfd929be9aad785b5e21e0577c2aa82acf7ae0.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/.env.example b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/.env.example new file mode 100644 index 0000000000..2fa2ad9bf6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=ae1aa1c9-0af4-43db-b6d4-4fa9e6bfb646 diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/README.md b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/README.md new file mode 100644 index 0000000000..4e9b49148f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/README.md @@ -0,0 +1,24 @@ +# Overview +The Agent node in Dify Chatflow/Workflow lets LLMs autonomously use tools. This plugin features two official Dify Agent reasoning strategies, enabling LLMs to dynamically select and run tools during runtime for multi-step problem-solving. + +## Strategies + +### 1. Function Calling +Function Calling maps user commands to specific functions or tools. The LLM identifies the user's intent, decides which function to call, and extracts the required parameters. It is a straightforward mechanism for invoking external capabilities. + +![](./_assets/function_calling.png) + +#### Pros: +- **Precise:** Directly calls the right tool for defined tasks, avoiding complex reasoning. +- **Easy External Integration:** Integrates external APIs and tools as callable functions. +- **Structured Output:** Provides structured function call information for easy processing. + +### 2. ReAct (Reason + Act) +ReAct alternates between the LLM reasoning about the situation and taking actions. The LLM analyzes the current state and goal, selects and uses a tool, and then uses the tool's output for the next thought and action. This cycle repeats until the problem is resolved. + +![](./_assets/react.png) + +#### Pros: +- **Leverages External Information:** Effectively uses external tools to gather information for tasks the model cannot handle alone. +- **Explainable Reasoning:** Interwoven reasoning and action steps allow some tracking of the Agent's process. +- **Wide Applicability:** Suitable for tasks requiring external knowledge or specific actions, such as Q&A, information retrieval, and task execution. \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/function_calling.png b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/function_calling.png new file mode 100644 index 0000000000..f0ce6b0c41 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/function_calling.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/icon.svg b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/icon.svg new file mode 100644 index 0000000000..c0d23316fe --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/icon.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/react.png b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/react.png new file mode 100644 index 0000000000..271d6b82f7 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/_assets/react.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/main.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/main.py new file mode 100644 index 0000000000..08eb1bd963 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/main.py @@ -0,0 +1,10 @@ +import sys + +sys.path.append("../..") + +from dify_plugin import DifyPluginEnv, Plugin + +plugin = Plugin(DifyPluginEnv(MAX_REQUEST_TIMEOUT=240)) + +if __name__ == "__main__": + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/manifest.yaml new file mode 100644 index 0000000000..d330737af3 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/manifest.yaml @@ -0,0 +1,34 @@ +version: 0.0.11 +type: plugin +author: "langgenius" +name: "agent" +label: + en_US: "Dify Agent Strategies" + zh_Hans: "Dify Agent 策略" +created_at: "2024-07-12T08:03:44.658609186Z" +icon: icon.svg +description: + en_US: Dify official Agent strategies collection + zh_Hans: Dify 官方 Agent 策略集合 +tags: + - "agent" +resource: + memory: 1048576 + permission: + tool: + enabled: true + model: + enabled: true + llm: true +plugins: + agent_strategies: + - "provider/agent.yaml" +meta: + version: 0.0.1 + arch: + - "amd64" + - "arm64" + runner: + language: "python" + version: "3.12" + entrypoint: "main" diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/output_parser/cot_output_parser.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/output_parser/cot_output_parser.py new file mode 100644 index 0000000000..82e32d8039 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/output_parser/cot_output_parser.py @@ -0,0 +1,212 @@ +import json +import re +from collections.abc import Generator +from typing import Union + +from dify_plugin.entities.model.llm import LLMResultChunk +from dify_plugin.interfaces.agent import AgentScratchpadUnit + + +class CotAgentOutputParser: + @classmethod + def handle_react_stream_output( + cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict + ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: + def parse_action(json_str): + try: + action = json.loads(json_str, strict=False) + action_name = None + action_input = None + + # cohere always returns a list + if isinstance(action, list) and len(action) == 1: + action = action[0] + + for key, value in action.items(): + if "input" in key.lower(): + action_input = value + else: + action_name = value + + if action_name is not None and action_input is not None: + return AgentScratchpadUnit.Action( + action_name=action_name, + action_input=action_input, + ) + else: + return json_str or "" + except: + return json_str or "" + + def extra_json_from_code_block( + code_block, + ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: + code_blocks = re.findall(r"```(.*?)```", code_block, re.DOTALL) + if not code_blocks: + return + for block in code_blocks: + json_text = re.sub( + r"^[a-zA-Z]+\n", "", block.strip(), flags=re.MULTILINE + ) + yield parse_action(json_text) + + code_block_cache = "" + code_block_delimiter_count = 0 + in_code_block = False + json_cache = "" + json_quote_count = 0 + in_json = False + got_json = False + + action_cache = "" + action_str = "action:" + action_idx = 0 + + thought_cache = "" + thought_str = "thought:" + thought_idx = 0 + + last_character = "" + + for response in llm_response: + if response.delta.usage: + usage_dict["usage"] = response.delta.usage + response_content = response.delta.message.content + if not isinstance(response_content, str): + continue + + # stream + index = 0 + while index < len(response_content): + steps = 1 + delta = response_content[index : index + steps] + yield_delta = False + + if delta == "`": + last_character = delta + code_block_cache += delta + code_block_delimiter_count += 1 + else: + if not in_code_block: + if code_block_delimiter_count > 0: + last_character = delta + yield code_block_cache + code_block_cache = "" + else: + last_character = delta + code_block_cache += delta + code_block_delimiter_count = 0 + + if not in_code_block and not in_json: + if delta.lower() == action_str[action_idx] and action_idx == 0: + if last_character not in {"\n", " ", ""}: + yield_delta = True + else: + last_character = delta + action_cache += delta + action_idx += 1 + if action_idx == len(action_str): + action_cache = "" + action_idx = 0 + index += steps + continue + elif delta.lower() == action_str[action_idx] and action_idx > 0: + last_character = delta + action_cache += delta + action_idx += 1 + if action_idx == len(action_str): + action_cache = "" + action_idx = 0 + index += steps + continue + else: + if action_cache: + last_character = delta + yield action_cache + action_cache = "" + action_idx = 0 + + if delta.lower() == thought_str[thought_idx] and thought_idx == 0: + if last_character not in {"\n", " ", ""}: + yield_delta = True + else: + last_character = delta + thought_cache += delta + thought_idx += 1 + if thought_idx == len(thought_str): + thought_cache = "" + thought_idx = 0 + index += steps + continue + elif delta.lower() == thought_str[thought_idx] and thought_idx > 0: + last_character = delta + thought_cache += delta + thought_idx += 1 + if thought_idx == len(thought_str): + thought_cache = "" + thought_idx = 0 + index += steps + continue + else: + if thought_cache: + last_character = delta + yield thought_cache + thought_cache = "" + thought_idx = 0 + + if yield_delta: + index += steps + last_character = delta + yield delta + continue + + if code_block_delimiter_count == 3: + if in_code_block: + last_character = delta + yield from extra_json_from_code_block(code_block_cache) + code_block_cache = "" + + in_code_block = not in_code_block + code_block_delimiter_count = 0 + + if not in_code_block: + # handle single json + if delta == "{": + json_quote_count += 1 + in_json = True + last_character = delta + json_cache += delta + elif delta == "}": + last_character = delta + json_cache += delta + if json_quote_count > 0: + json_quote_count -= 1 + if json_quote_count == 0: + in_json = False + got_json = True + index += steps + continue + else: + if in_json: + last_character = delta + json_cache += delta + + if got_json: + got_json = False + last_character = delta + yield parse_action(json_cache) + json_cache = "" + json_quote_count = 0 + in_json = False + + if not in_code_block and not in_json: + last_character = delta + yield delta.replace("`", "") + + index += steps + + if code_block_cache: + yield code_block_cache + + if json_cache: + yield parse_action(json_cache) diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/prompt/template.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/prompt/template.py new file mode 100644 index 0000000000..ef64fd29fc --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/prompt/template.py @@ -0,0 +1,106 @@ +ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. + +{{instruction}} + +You have access to the following tools: + +{{tools}} + +Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). +Valid "action" values: "Final Answer" or {{tool_names}} + +Provide only ONE action per $JSON_BLOB, as shown: + +``` +{ + "action": $TOOL_NAME, + "action_input": $ACTION_INPUT +} +``` + +Follow this format: + +Question: input question to answer +Thought: consider previous and subsequent steps +Action: +``` +$JSON_BLOB +``` +Observation: action result +... (repeat Thought/Action/Observation N times) +Thought: I know what to respond +Action: +``` +{ + "action": "Final Answer", + "action_input": "Final response to human" +} +``` + +Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. +{{historic_messages}} +Question: {{query}} +{{agent_scratchpad}} +Thought:""" # noqa: E501 + + +ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES = """Observation: {{observation}} +Thought:""" + +ENGLISH_REACT_CHAT_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. + +{{instruction}} + +You have access to the following tools: + +{{tools}} + +Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). +Valid "action" values: "Final Answer" or {{tool_names}} + +Provide only ONE action per $JSON_BLOB, as shown: + +``` +{ + "action": $TOOL_NAME, + "action_input": $ACTION_INPUT +} +``` + +Follow this format: + +Question: input question to answer +Thought: consider previous and subsequent steps +Action: +``` +$JSON_BLOB +``` +Observation: action result +... (repeat Thought/Action/Observation N times) +Thought: I know what to respond +Action: +``` +{ + "action": "Final Answer", + "action_input": "Final response to human" +} +``` + +Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. +""" # noqa: E501 + + +ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES = "" + +REACT_PROMPT_TEMPLATES = { + "english": { + "chat": { + "prompt": ENGLISH_REACT_CHAT_PROMPT_TEMPLATES, + "agent_scratchpad": ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES, + }, + "completion": { + "prompt": ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES, + "agent_scratchpad": ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES, + }, + } +} diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.py new file mode 100644 index 0000000000..a3394298b6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.py @@ -0,0 +1,5 @@ +from dify_plugin.interfaces.agent import AgentProvider + + +class LanggeniusAgentProvider(AgentProvider): + pass diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.yaml b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.yaml new file mode 100644 index 0000000000..7be6dbbbce --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/provider/agent.yaml @@ -0,0 +1,18 @@ +identity: + author: langgenius + name: agent + label: + en_US: Agent + zh_Hans: Agent + pt_BR: Agent + description: + en_US: Agent + zh_Hans: Agent + pt_BR: Agent + icon: icon.svg +strategies: + - strategies/function_calling.yaml + - strategies/ReAct.yaml +extra: + python: + source: provider/agent.py diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/requirements.txt new file mode 100644 index 0000000000..d9ba35abcf --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/requirements.txt @@ -0,0 +1 @@ +dify_plugin==0.0.1b74 \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.py new file mode 100644 index 0000000000..f1a2b5feb9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.py @@ -0,0 +1,642 @@ +import json +import time +from collections.abc import Generator, Mapping +from typing import Any, Optional, cast + +from dify_plugin.entities.agent import AgentInvokeMessage +from dify_plugin.entities.model.llm import LLMModelConfig, LLMUsage +from dify_plugin.entities.model.message import ( + AssistantPromptMessage, + PromptMessage, + SystemPromptMessage, + ToolPromptMessage, + UserPromptMessage, +) +from dify_plugin.entities.tool import ( + LogMetadata, + ToolInvokeMessage, + ToolParameter, + ToolProviderType, +) +from dify_plugin.interfaces.agent import ( + AgentModelConfig, + AgentScratchpadUnit, + AgentStrategy, + ToolEntity, +) +from output_parser.cot_output_parser import CotAgentOutputParser +from prompt.template import REACT_PROMPT_TEMPLATES +from pydantic import BaseModel, Field + +ignore_observation_providers = ["wenxin"] + + +class ReActParams(BaseModel): + query: str + instruction: str | None + model: AgentModelConfig + tools: list[ToolEntity] | None + inputs: dict[str, Any] = {} + maximum_iterations: int = 3 + + +class AgentPromptEntity(BaseModel): + """ + Agent Prompt Entity. + """ + + first_prompt: str + next_iteration: str + + +class ToolInvokeMeta(BaseModel): + """ + Tool invoke meta + """ + + time_cost: float = Field(..., description="The time cost of the tool invoke") + error: Optional[str] = None + tool_config: Optional[dict] = None + + @classmethod + def empty(cls) -> "ToolInvokeMeta": + """ + Get an empty instance of ToolInvokeMeta + """ + return cls(time_cost=0.0, error=None, tool_config={}) + + @classmethod + def error_instance(cls, error: str) -> "ToolInvokeMeta": + """ + Get an instance of ToolInvokeMeta with error + """ + return cls(time_cost=0.0, error=error, tool_config={}) + + def to_dict(self) -> dict: + return { + "time_cost": self.time_cost, + "error": self.error, + "tool_config": self.tool_config, + } + + +class ReActAgentStrategy(AgentStrategy): + def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: + react_params = ReActParams(**parameters) + query = react_params.query + model = react_params.model + agent_scratchpad = [] + history_prompt_messages: list[PromptMessage] = [] + current_session_messages = [] + self._organize_historic_prompt_messages( + history_prompt_messages, current_session_messages=current_session_messages + ) + tools = react_params.tools + tool_instances = {tool.identity.name: tool for tool in tools} if tools else {} + react_params.model.completion_params = ( + react_params.model.completion_params or {} + ) + # check model mode + stop = ( + react_params.model.completion_params.get("stop", []) + if react_params.model.completion_params + else [] + ) + + if ( + "Observation" not in stop + and model.provider not in ignore_observation_providers + ): + stop.append("Observation") + # init instruction + inputs = react_params.inputs + instruction = react_params.instruction or "" + self._instruction = self._fill_in_inputs_from_external_data_tools( + instruction, inputs + ) + + iteration_step = 1 + max_iteration_steps = react_params.maximum_iterations + + # convert tools into ModelRuntime Tool format + prompt_messages_tools = self._init_prompt_tools(tools) + self._prompt_messages_tools = prompt_messages_tools + + run_agent_state = True + llm_usage: dict[str, Optional[LLMUsage]] = {"usage": None} + final_answer = "" + prompt_messages = [] + while run_agent_state and iteration_step <= max_iteration_steps: + # continue to run until there is not any tool call + run_agent_state = False + round_started_at = time.perf_counter() + round_log = self.create_log_message( + label=f"ROUND {iteration_step}", + data={}, + metadata={ + LogMetadata.STARTED_AT: round_started_at, + }, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield round_log + if iteration_step == max_iteration_steps: + # the last iteration, remove all tools + self._prompt_messages_tools = [] + + message_file_ids: list[str] = [] + + # recalc llm max tokens + prompt_messages = self._organize_prompt_messages(agent_scratchpad, query) + if model.completion_params: + self.recalc_llm_max_tokens( + model.entity, prompt_messages, model.completion_params + ) + # invoke model + chunks = self.session.model.llm.invoke( + model_config=LLMModelConfig(**model.model_dump(mode="json")), + prompt_messages=prompt_messages, + stream=True, + stop=stop, + ) + + usage_dict = {} + react_chunks = CotAgentOutputParser.handle_react_stream_output( + chunks, usage_dict + ) + scratchpad = AgentScratchpadUnit( + agent_response="", + thought="", + action_str="", + observation="", + action=None, + ) + + model_started_at = time.perf_counter() + model_log = self.create_log_message( + label=f"{model.model} Thought", + data={}, + metadata={ + LogMetadata.STARTED_AT: model_started_at, + LogMetadata.PROVIDER: model.provider, + }, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield model_log + + for chunk in react_chunks: + if isinstance(chunk, AgentScratchpadUnit.Action): + action = chunk + # detect action + assert scratchpad.agent_response is not None + scratchpad.agent_response += json.dumps(chunk.model_dump()) + + scratchpad.action_str = json.dumps(chunk.model_dump()) + scratchpad.action = action + else: + scratchpad.agent_response = scratchpad.agent_response or "" + scratchpad.thought = scratchpad.thought or "" + scratchpad.agent_response += chunk + scratchpad.thought += chunk + scratchpad.thought = ( + scratchpad.thought.strip() + if scratchpad.thought + else "I am thinking about how to help you" + ) + agent_scratchpad.append(scratchpad) + + # get llm usage + if "usage" in usage_dict: + if usage_dict["usage"] is not None: + self.increase_usage(llm_usage, usage_dict["usage"]) + else: + usage_dict["usage"] = LLMUsage.empty_usage() + + action = ( + scratchpad.action.to_dict() + if scratchpad.action + else {"action": scratchpad.agent_response} + ) + + yield self.finish_log_message( + log=model_log, + data={"thought": scratchpad.thought, **action}, + metadata={ + LogMetadata.STARTED_AT: model_started_at, + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() - model_started_at, + LogMetadata.PROVIDER: model.provider, + LogMetadata.TOTAL_PRICE: usage_dict["usage"].total_price + if usage_dict["usage"] + else 0, + LogMetadata.CURRENCY: usage_dict["usage"].currency + if usage_dict["usage"] + else "", + LogMetadata.TOTAL_TOKENS: usage_dict["usage"].total_tokens + if usage_dict["usage"] + else 0, + }, + ) + if not scratchpad.action: + final_answer = scratchpad.thought + else: + if scratchpad.action.action_name.lower() == "final answer": + # action is final answer, return final answer directly + try: + if isinstance(scratchpad.action.action_input, dict): + final_answer = json.dumps(scratchpad.action.action_input) + elif isinstance(scratchpad.action.action_input, str): + final_answer = scratchpad.action.action_input + else: + final_answer = f"{scratchpad.action.action_input}" + except json.JSONDecodeError: + final_answer = f"{scratchpad.action.action_input}" + else: + run_agent_state = True + # action is tool call, invoke tool + tool_call_started_at = time.perf_counter() + tool_name = scratchpad.action.action_name + tool_call_log = self.create_log_message( + label=f"CALL {tool_name}", + data={}, + metadata={ + LogMetadata.STARTED_AT: time.perf_counter(), + LogMetadata.PROVIDER: tool_instances[ + tool_name + ].identity.provider + if tool_instances.get(tool_name) + else "", + }, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield tool_call_log + tool_invoke_response, tool_invoke_parameters = ( + self._handle_invoke_action( + action=scratchpad.action, + tool_instances=tool_instances, + message_file_ids=message_file_ids, + ) + ) + scratchpad.observation = tool_invoke_response + scratchpad.agent_response = tool_invoke_response + yield self.finish_log_message( + log=tool_call_log, + data={ + "tool_name": tool_name, + "tool_call_args": tool_invoke_parameters, + "output": tool_invoke_response, + }, + metadata={ + LogMetadata.STARTED_AT: tool_call_started_at, + LogMetadata.PROVIDER: tool_instances[ + tool_name + ].identity.provider + if tool_instances.get(tool_name) + else "", + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() + - tool_call_started_at, + }, + ) + + # update prompt tool message + for prompt_tool in self._prompt_messages_tools: + self.update_prompt_message_tool( + tool_instances[prompt_tool.name], prompt_tool + ) + yield self.finish_log_message( + log=round_log, + data={ + "action_name": scratchpad.action.action_name + if scratchpad.action + else "", + "action_input": scratchpad.action.action_input + if scratchpad.action + else "", + "thought": scratchpad.thought, + "observation": scratchpad.observation, + }, + metadata={ + LogMetadata.STARTED_AT: round_started_at, + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() - round_started_at, + LogMetadata.TOTAL_PRICE: usage_dict["usage"].total_price + if usage_dict["usage"] + else 0, + LogMetadata.CURRENCY: usage_dict["usage"].currency + if usage_dict["usage"] + else "", + LogMetadata.TOTAL_TOKENS: usage_dict["usage"].total_tokens + if usage_dict["usage"] + else 0, + }, + ) + iteration_step += 1 + + yield self.create_text_message(final_answer) + yield self.create_json_message( + { + "execution_metadata": { + LogMetadata.TOTAL_PRICE: llm_usage["usage"].total_price + if llm_usage["usage"] is not None + else 0, + LogMetadata.CURRENCY: llm_usage["usage"].currency + if llm_usage["usage"] is not None + else "", + LogMetadata.TOTAL_TOKENS: llm_usage["usage"].total_tokens + if llm_usage["usage"] is not None + else 0, + } + } + ) + + def _organize_system_prompt(self) -> SystemPromptMessage: + """ + Organize system prompt + """ + + prompt_entity = AgentPromptEntity( + first_prompt=REACT_PROMPT_TEMPLATES["english"]["chat"]["prompt"], + next_iteration=REACT_PROMPT_TEMPLATES["english"]["chat"][ + "agent_scratchpad" + ], + ) + if not prompt_entity: + raise ValueError("Agent prompt configuration is not set") + first_prompt = prompt_entity.first_prompt + + system_prompt = ( + first_prompt.replace("{{instruction}}", self._instruction) + .replace( + "{{tools}}", + json.dumps( + [ + tool.model_dump(mode="json") + for tool in self._prompt_messages_tools + ] + ), + ) + .replace( + "{{tool_names}}", + ", ".join([tool.name for tool in self._prompt_messages_tools]), + ) + ) + + return SystemPromptMessage(content=system_prompt) + + def _organize_user_query( + self, query, prompt_messages: list[PromptMessage] + ) -> list[PromptMessage]: + """ + Organize user query + """ + prompt_messages.append(UserPromptMessage(content=query)) + + return prompt_messages + + def _organize_prompt_messages( + self, agent_scratchpad: list, query: str + ) -> list[PromptMessage]: + """ + Organize + """ + # organize system prompt + system_message = self._organize_system_prompt() + + # organize current assistant messages + agent_scratchpad = agent_scratchpad + if not agent_scratchpad: + assistant_messages = [] + else: + assistant_message = AssistantPromptMessage(content="") + assistant_message.content = ( + "" # FIXME: type check tell mypy that assistant_message.content is str + ) + for unit in agent_scratchpad: + if unit.is_final(): + assert isinstance(assistant_message.content, str) + assistant_message.content += f"Final Answer: {unit.agent_response}" + else: + assert isinstance(assistant_message.content, str) + assistant_message.content += f"Thought: {unit.thought}\n\n" + if unit.action_str: + assistant_message.content += f"Action: {unit.action_str}\n\n" + if unit.observation: + assistant_message.content += ( + f"Observation: {unit.observation}\n\n" + ) + + assistant_messages = [assistant_message] + + # query messages + query_messages = self._organize_user_query(query, []) + + if assistant_messages: + # organize historic prompt messages + historic_messages = self._organize_historic_prompt_messages( + [ + system_message, + *query_messages, + *assistant_messages, + UserPromptMessage(content="continue"), + ] + ) + messages = [ + system_message, + *historic_messages, + *query_messages, + *assistant_messages, + UserPromptMessage(content="continue"), + ] + else: + # organize historic prompt messages + historic_messages = self._organize_historic_prompt_messages( + [system_message, *query_messages] + ) + messages = [system_message, *historic_messages, *query_messages] + + # join all messages + return messages + + def _handle_invoke_action( + self, + action: AgentScratchpadUnit.Action, + tool_instances: Mapping[str, ToolEntity], + message_file_ids: list[str], + ) -> tuple[str, dict[str, Any] | str]: + """ + handle invoke action + :param action: action + :param tool_instances: tool instances + :param message_file_ids: message file ids + :param trace_manager: trace manager + :return: observation, meta + """ + # action is tool call, invoke tool + tool_call_name = action.action_name + tool_call_args = action.action_input + tool_instance = tool_instances.get(tool_call_name) + + if not tool_instance: + answer = f"there is not a tool named {tool_call_name}" + return answer, tool_call_args + + if isinstance(tool_call_args, str): + try: + tool_call_args = json.loads(tool_call_args) + except json.JSONDecodeError as e: + params = [ + param.name + for param in tool_instance.parameters + if param.form == ToolParameter.ToolParameterForm.LLM + ] + if len(params) > 1: + raise ValueError("tool call args is not a valid json string") from e + tool_call_args = {params[0]: tool_call_args} if len(params) == 1 else {} + + tool_invoke_parameters = {**tool_instance.runtime_parameters, **tool_call_args} + try: + tool_invoke_responses = self.session.tool.invoke( + provider_type=ToolProviderType(tool_instance.provider_type), + provider=tool_instance.identity.provider, + tool_name=tool_instance.identity.name, + parameters=tool_invoke_parameters, + ) + result = "" + for response in tool_invoke_responses: + if response.type == ToolInvokeMessage.MessageType.TEXT: + result += cast(ToolInvokeMessage.TextMessage, response.message).text + elif response.type == ToolInvokeMessage.MessageType.LINK: + result += ( + f"result link: {cast(ToolInvokeMessage.TextMessage, response.message).text}." + + " please tell user to check it." + ) + elif response.type in { + ToolInvokeMessage.MessageType.IMAGE_LINK, + ToolInvokeMessage.MessageType.IMAGE, + }: + result += ( + "image has been created and sent to user already, " + + "you do not need to create it, just tell the user to check it now." + ) + elif response.type == ToolInvokeMessage.MessageType.JSON: + text = json.dumps( + cast( + ToolInvokeMessage.JsonMessage, response.message + ).json_object, + ensure_ascii=False, + ) + result += f"tool response: {text}." + else: + result += f"tool response: {response.message!r}." + except Exception as e: + result = f"tool invoke error: {str(e)}" + + return result, tool_invoke_parameters + + def _convert_dict_to_action(self, action: dict) -> AgentScratchpadUnit.Action: + """ + convert dict to action + """ + return AgentScratchpadUnit.Action( + action_name=action["action"], action_input=action["action_input"] + ) + + def _fill_in_inputs_from_external_data_tools( + self, instruction: str, inputs: Mapping[str, Any] + ) -> str: + """ + fill in inputs from external data tools + """ + for key, value in inputs.items(): + try: + instruction = instruction.replace(f"{{{{{key}}}}}", str(value)) + except Exception: + continue + + return instruction + + def _format_assistant_message( + self, agent_scratchpad: list[AgentScratchpadUnit] + ) -> str: + """ + format assistant message + """ + message = "" + for scratchpad in agent_scratchpad: + if scratchpad.is_final(): + message += f"Final Answer: {scratchpad.agent_response}" + else: + message += f"Thought: {scratchpad.thought}\n\n" + if scratchpad.action_str: + message += f"Action: {scratchpad.action_str}\n\n" + if scratchpad.observation: + message += f"Observation: {scratchpad.observation}\n\n" + + return message + + def _organize_historic_prompt_messages( + self, + history_prompt_messages: list[PromptMessage], + current_session_messages: list[PromptMessage] | None = None, + ) -> list[PromptMessage]: + """ + organize historic prompt messages + """ + result: list[PromptMessage] = [] + scratchpads: list[AgentScratchpadUnit] = [] + current_scratchpad: AgentScratchpadUnit | None = None + + for message in history_prompt_messages: + if isinstance(message, AssistantPromptMessage): + if not current_scratchpad: + assert isinstance(message.content, str) + current_scratchpad = AgentScratchpadUnit( + agent_response=message.content, + thought=message.content + or "I am thinking about how to help you", + action_str="", + action=None, + observation=None, + ) + scratchpads.append(current_scratchpad) + if message.tool_calls: + try: + current_scratchpad.action = AgentScratchpadUnit.Action( + action_name=message.tool_calls[0].function.name, + action_input=json.loads( + message.tool_calls[0].function.arguments + ), + ) + current_scratchpad.action_str = json.dumps( + current_scratchpad.action.to_dict() + ) + except Exception: + pass + elif isinstance(message, ToolPromptMessage): + if current_scratchpad: + assert isinstance(message.content, str) + current_scratchpad.observation = message.content + else: + raise NotImplementedError("expected str type") + elif isinstance(message, UserPromptMessage): + if scratchpads: + result.append( + AssistantPromptMessage( + content=self._format_assistant_message(scratchpads) + ) + ) + scratchpads = [] + current_scratchpad = None + + result.append(message) + + if scratchpads: + result.append( + AssistantPromptMessage( + content=self._format_assistant_message(scratchpads) + ) + ) + + return current_session_messages or [] diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.yaml b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.yaml new file mode 100644 index 0000000000..a6faffd7a6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/ReAct.yaml @@ -0,0 +1,58 @@ +identity: + name: ReAct + author: Dify + label: + en_US: ReAct + zh_Hans: ReAct + pt_BR: ReAct +description: + en_US: ReAct is a basic strategy for agent, model will use the tools provided to perform the task. + zh_Hans: ReAct 是一个基本的 Agent 策略,模型将使用提供的工具来执行任务。 + pt_BR: ReAct is a basic strategy for agent, model will use the tools provided to perform the task. +parameters: + - name: model + type: model-selector + scope: tool-call&llm + required: true + label: + en_US: Model + zh_Hans: 模型 + pt_BR: Model + - name: tools + type: array[tools] + required: true + label: + en_US: Tool list + zh_Hans: 工具列表 + pt_BR: Tool list + - name: instruction + type: string + required: true + label: + en_US: Instruction + zh_Hans: 指令 + pt_BR: Instruction + auto_generate: + type: prompt_instruction + template: + enabled: true + - name: query + type: string + required: true + label: + en_US: Query + zh_Hans: 查询 + pt_BR: Query + - name: maximum_iterations + type: number + required: true + label: + en_US: Maximum Iterations + zh_Hans: 最大迭代次数 + pt_BR: Maximum Iterations + default: 3 + min: 1 + max: 30 +extra: + python: + source: strategies/ReAct.py diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.py b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.py new file mode 100644 index 0000000000..484dedaaad --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.py @@ -0,0 +1,546 @@ +import json +import time +from collections.abc import Generator +from copy import deepcopy +from typing import Any, Optional, cast + +from dify_plugin.entities.agent import AgentInvokeMessage +from dify_plugin.entities.model.llm import ( + LLMModelConfig, + LLMResult, + LLMResultChunk, + LLMUsage, +) +from dify_plugin.entities.model.message import ( + AssistantPromptMessage, + PromptMessage, + PromptMessageContentType, + PromptMessageRole, + SystemPromptMessage, + ToolPromptMessage, + UserPromptMessage, +) +from dify_plugin.entities.tool import LogMetadata, ToolInvokeMessage, ToolProviderType +from dify_plugin.interfaces.agent import AgentModelConfig, AgentStrategy, ToolEntity +from pydantic import BaseModel, Field + + +class FunctionCallingParams(BaseModel): + query: str + instruction: str | None + model: AgentModelConfig + tools: list[ToolEntity] | None + maximum_iterations: int = 3 + + +class ToolInvokeMeta(BaseModel): + """ + Tool invoke meta + """ + + time_cost: float = Field(..., description="The time cost of the tool invoke") + error: Optional[str] = None + tool_config: Optional[dict] = None + + @classmethod + def empty(cls) -> "ToolInvokeMeta": + """ + Get an empty instance of ToolInvokeMeta + """ + return cls(time_cost=0.0, error=None, tool_config={}) + + @classmethod + def error_instance(cls, error: str) -> "ToolInvokeMeta": + """ + Get an instance of ToolInvokeMeta with error + """ + return cls(time_cost=0.0, error=error, tool_config={}) + + def to_dict(self) -> dict: + return { + "time_cost": self.time_cost, + "error": self.error, + "tool_config": self.tool_config, + } + + +class FunctionCallingAgentStrategy(AgentStrategy): + def __init__(self, session): + super().__init__(session) + self.query = "" + + def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: + """ + Run FunctionCall agent application + """ + fc_params = FunctionCallingParams(**parameters) + query = fc_params.query + self.query = query + instruction = fc_params.instruction + init_prompt_messages = [ + PromptMessage(role=PromptMessageRole.SYSTEM, content=instruction) + ] + tools = fc_params.tools + tool_instances = {tool.identity.name: tool for tool in tools} if tools else {} + model = fc_params.model + stop = ( + fc_params.model.completion_params.get("stop", []) + if fc_params.model.completion_params + else [] + ) + # convert tools into ModelRuntime Tool format + prompt_messages_tools = self._init_prompt_tools(tools) + + iteration_step = 1 + max_iteration_steps = fc_params.maximum_iterations + current_thoughts: list[PromptMessage] = [] + # continue to run until there is not any tool call + function_call_state = True + llm_usage: dict[str, Optional[LLMUsage]] = {"usage": None} + final_answer = "" + + while function_call_state and iteration_step <= max_iteration_steps: + function_call_state = False + round_started_at = time.perf_counter() + round_log = self.create_log_message( + label=f"ROUND {iteration_step}", + data={}, + metadata={ + LogMetadata.STARTED_AT: round_started_at, + }, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield round_log + if iteration_step == max_iteration_steps: + # the last iteration, remove all tools + prompt_messages_tools = [] + + # recalc llm max tokens + prompt_messages = self._organize_prompt_messages( + history_prompt_messages=init_prompt_messages, + current_thoughts=current_thoughts, + ) + if model.completion_params: + self.recalc_llm_max_tokens( + model.entity, prompt_messages, model.completion_params + ) + # invoke model + model_started_at = time.perf_counter() + model_log = self.create_log_message( + label=f"{model.model} Thought", + data={}, + metadata={ + LogMetadata.STARTED_AT: model_started_at, + LogMetadata.PROVIDER: model.provider, + }, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield model_log + chunks: Generator[LLMResultChunk, None, None] | LLMResult = ( + self.session.model.llm.invoke( + model_config=LLMModelConfig(**model.model_dump(mode="json")), + prompt_messages=prompt_messages, + stream=True, + stop=stop, + tools=prompt_messages_tools, + ) + ) + + tool_calls: list[tuple[str, str, dict[str, Any]]] = [] + + # save full response + response = "" + + # save tool call names and inputs + tool_call_names = "" + + current_llm_usage = None + + if isinstance(chunks, Generator): + for chunk in chunks: + # check if there is any tool call + if self.check_tool_calls(chunk): + function_call_state = True + tool_calls.extend(self.extract_tool_calls(chunk) or []) + tool_call_names = ";".join( + [tool_call[1] for tool_call in tool_calls] + ) + + if chunk.delta.message and chunk.delta.message.content: + if isinstance(chunk.delta.message.content, list): + for content in chunk.delta.message.content: + response += content.data + if ( + not function_call_state + or iteration_step == max_iteration_steps + ): + yield self.create_text_message(content.data) + else: + response += str(chunk.delta.message.content) + if ( + not function_call_state + or iteration_step == max_iteration_steps + ): + yield self.create_text_message( + str(chunk.delta.message.content) + ) + + if chunk.delta.usage: + self.increase_usage(llm_usage, chunk.delta.usage) + current_llm_usage = chunk.delta.usage + + else: + result = chunks + # check if there is any tool call + if self.check_blocking_tool_calls(result): + function_call_state = True + tool_calls.extend(self.extract_blocking_tool_calls(result) or []) + tool_call_names = ";".join( + [tool_call[1] for tool_call in tool_calls] + ) + + if result.usage: + self.increase_usage(llm_usage, result.usage) + current_llm_usage = result.usage + + if result.message and result.message.content: + if isinstance(result.message.content, list): + for content in result.message.content: + response += content.data + else: + response += str(result.message.content) + + if not result.message.content: + result.message.content = "" + yield self.finish_log_message( + log=model_log, + data={ + "output": response, + "tool_name": tool_call_names, + "tool_input": { + tool_call[1]: tool_call[2] for tool_call in tool_calls + }, + }, + metadata={ + LogMetadata.STARTED_AT: model_started_at, + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() - model_started_at, + LogMetadata.PROVIDER: model.provider, + LogMetadata.TOTAL_PRICE: current_llm_usage.total_price + if current_llm_usage + else 0, + LogMetadata.CURRENCY: current_llm_usage.currency + if current_llm_usage + else "", + LogMetadata.TOTAL_TOKENS: current_llm_usage.total_tokens + if current_llm_usage + else 0, + }, + ) + assistant_message = AssistantPromptMessage(content="", tool_calls=[]) + if tool_calls: + assistant_message.tool_calls = [ + AssistantPromptMessage.ToolCall( + id=tool_call[0], + type="function", + function=AssistantPromptMessage.ToolCall.ToolCallFunction( + name=tool_call[1], + arguments=json.dumps(tool_call[2], ensure_ascii=False), + ), + ) + for tool_call in tool_calls + ] + else: + assistant_message.content = response + + current_thoughts.append(assistant_message) + + final_answer += response + "\n" + + # call tools + tool_responses = [] + for tool_call_id, tool_call_name, tool_call_args in tool_calls: + tool_instance = tool_instances[tool_call_name] + tool_call_started_at = time.perf_counter() + tool_call_log = self.create_log_message( + label=f"CALL {tool_call_name}", + data={}, + metadata={ + LogMetadata.STARTED_AT: time.perf_counter(), + LogMetadata.PROVIDER: tool_instance.identity.provider, + }, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield tool_call_log + if not tool_instance: + tool_response = { + "tool_call_id": tool_call_id, + "tool_call_name": tool_call_name, + "tool_response": f"there is not a tool named {tool_call_name}", + "meta": ToolInvokeMeta.error_instance( + f"there is not a tool named {tool_call_name}" + ).to_dict(), + } + else: + # invoke tool + try: + tool_invoke_responses = self.session.tool.invoke( + provider_type=ToolProviderType(tool_instance.provider_type), + provider=tool_instance.identity.provider, + tool_name=tool_instance.identity.name, + parameters={ + **tool_instance.runtime_parameters, + **tool_call_args, + }, + ) + result = "" + for response in tool_invoke_responses: + if response.type == ToolInvokeMessage.MessageType.TEXT: + result += cast( + ToolInvokeMessage.TextMessage, response.message + ).text + elif response.type == ToolInvokeMessage.MessageType.LINK: + result += ( + f"result link: {cast(ToolInvokeMessage.TextMessage, response.message).text}." + + " please tell user to check it." + ) + elif response.type in { + ToolInvokeMessage.MessageType.IMAGE_LINK, + ToolInvokeMessage.MessageType.IMAGE, + }: + result += ( + "image has been created and sent to user already, " + + "you do not need to create it, just tell the user to check it now." + ) + elif response.type == ToolInvokeMessage.MessageType.JSON: + text = json.dumps( + cast( + ToolInvokeMessage.JsonMessage, response.message + ).json_object, + ensure_ascii=False, + ) + result += f"tool response: {text}." + else: + result += f"tool response: {response.message!r}." + except Exception as e: + result = f"tool invoke error: {str(e)}" + tool_response = { + "tool_call_id": tool_call_id, + "tool_call_name": tool_call_name, + "tool_call_input": { + **tool_instance.runtime_parameters, + **tool_call_args, + }, + "tool_response": result, + } + + yield self.finish_log_message( + log=tool_call_log, + data={ + "output": tool_response, + }, + metadata={ + LogMetadata.STARTED_AT: tool_call_started_at, + LogMetadata.PROVIDER: tool_instance.identity.provider, + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() + - tool_call_started_at, + }, + ) + tool_responses.append(tool_response) + if tool_response["tool_response"] is not None: + current_thoughts.append( + ToolPromptMessage( + content=str(tool_response["tool_response"]), + tool_call_id=tool_call_id, + name=tool_call_name, + ) + ) + + # update prompt tool + for prompt_tool in prompt_messages_tools: + self.update_prompt_message_tool( + tool_instances[prompt_tool.name], prompt_tool + ) + yield self.finish_log_message( + log=round_log, + data={ + "output": { + "llm_response": response, + "tool_responses": tool_responses, + }, + }, + metadata={ + LogMetadata.STARTED_AT: round_started_at, + LogMetadata.FINISHED_AT: time.perf_counter(), + LogMetadata.ELAPSED_TIME: time.perf_counter() - round_started_at, + LogMetadata.TOTAL_PRICE: current_llm_usage.total_price + if current_llm_usage + else 0, + LogMetadata.CURRENCY: current_llm_usage.currency + if current_llm_usage + else "", + LogMetadata.TOTAL_TOKENS: current_llm_usage.total_tokens + if current_llm_usage + else 0, + }, + ) + iteration_step += 1 + + yield self.create_json_message( + { + "execution_metadata": { + LogMetadata.TOTAL_PRICE: llm_usage["usage"].total_price + if llm_usage["usage"] is not None + else 0, + LogMetadata.CURRENCY: llm_usage["usage"].currency + if llm_usage["usage"] is not None + else "", + LogMetadata.TOTAL_TOKENS: llm_usage["usage"].total_tokens + if llm_usage["usage"] is not None + else 0, + } + } + ) + + def check_tool_calls(self, llm_result_chunk: LLMResultChunk) -> bool: + """ + Check if there is any tool call in llm result chunk + """ + return bool(llm_result_chunk.delta.message.tool_calls) + + def check_blocking_tool_calls(self, llm_result: LLMResult) -> bool: + """ + Check if there is any blocking tool call in llm result + """ + return bool(llm_result.message.tool_calls) + + def extract_tool_calls( + self, llm_result_chunk: LLMResultChunk + ) -> list[tuple[str, str, dict[str, Any]]]: + """ + Extract tool calls from llm result chunk + + Returns: + List[Tuple[str, str, Dict[str, Any]]]: [(tool_call_id, tool_call_name, tool_call_args)] + """ + tool_calls = [] + for prompt_message in llm_result_chunk.delta.message.tool_calls: + args = {} + if prompt_message.function.arguments != "": + args = json.loads(prompt_message.function.arguments) + + tool_calls.append( + ( + prompt_message.id, + prompt_message.function.name, + args, + ) + ) + + return tool_calls + + def extract_blocking_tool_calls( + self, llm_result: LLMResult + ) -> list[tuple[str, str, dict[str, Any]]]: + """ + Extract blocking tool calls from llm result + + Returns: + List[Tuple[str, str, Dict[str, Any]]]: [(tool_call_id, tool_call_name, tool_call_args)] + """ + tool_calls = [] + for prompt_message in llm_result.message.tool_calls: + args = {} + if prompt_message.function.arguments != "": + args = json.loads(prompt_message.function.arguments) + + tool_calls.append( + ( + prompt_message.id, + prompt_message.function.name, + args, + ) + ) + + return tool_calls + + def _init_system_message( + self, prompt_template: str, prompt_messages: list[PromptMessage] + ) -> list[PromptMessage]: + """ + Initialize system message + """ + if not prompt_messages and prompt_template: + return [ + SystemPromptMessage(content=prompt_template), + ] + + if ( + prompt_messages + and not isinstance(prompt_messages[0], SystemPromptMessage) + and prompt_template + ): + prompt_messages.insert(0, SystemPromptMessage(content=prompt_template)) + + return prompt_messages or [] + + def _organize_user_query( + self, query: str, prompt_messages: list[PromptMessage] + ) -> list[PromptMessage]: + """ + Organize user query + """ + + prompt_messages.append(UserPromptMessage(content=query)) + + return prompt_messages + + def _clear_user_prompt_image_messages( + self, prompt_messages: list[PromptMessage] + ) -> list[PromptMessage]: + """ + As for now, gpt supports both fc and vision at the first iteration. + We need to remove the image messages from the prompt messages at the first iteration. + """ + prompt_messages = deepcopy(prompt_messages) + + for prompt_message in prompt_messages: + if isinstance(prompt_message, UserPromptMessage) and isinstance( + prompt_message.content, list + ): + prompt_message.content = "\n".join( + [ + content.data + if content.type == PromptMessageContentType.TEXT + else "[image]" + if content.type == PromptMessageContentType.IMAGE + else "[file]" + for content in prompt_message.content + ] + ) + + return prompt_messages + + def _organize_prompt_messages( + self, + current_thoughts: list[PromptMessage], + history_prompt_messages: list[PromptMessage], + ) -> list[PromptMessage]: + prompt_template = "" + history_prompt_messages = self._init_system_message( + prompt_template, history_prompt_messages + ) + query_prompt_messages = self._organize_user_query(self.query or "", []) + + prompt_messages = [ + *history_prompt_messages, + *query_prompt_messages, + *current_thoughts, + ] + if len(current_thoughts) != 0: + # clear messages after the first iteration + prompt_messages = self._clear_user_prompt_image_messages(prompt_messages) + return prompt_messages diff --git a/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.yaml b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.yaml new file mode 100644 index 0000000000..e1eebe259b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/agent-0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9/strategies/function_calling.yaml @@ -0,0 +1,58 @@ +identity: + name: function_calling + author: Dify + label: + en_US: FunctionCalling + zh_Hans: FunctionCalling + pt_BR: FunctionCalling +description: + en_US: Function Calling is a basic strategy for agent, model will use the tools provided to perform the task. + zh_Hans: Function Calling 是一个基本的 Agent 策略,模型将使用提供的工具来执行任务。 + pt_BR: Function Calling is a basic strategy for agent, model will use the tools provided to perform the task. +parameters: + - name: model + type: model-selector + scope: tool-call&llm + required: true + label: + en_US: Model + zh_Hans: 模型 + pt_BR: Model + - name: tools + type: array[tools] + required: true + label: + en_US: Tool list + zh_Hans: 工具列表 + pt_BR: Tool list + - name: instruction + type: string + required: true + label: + en_US: Instruction + zh_Hans: 指令 + pt_BR: Instruction + auto_generate: + type: prompt_instruction + template: + enabled: true + - name: query + type: string + required: true + label: + en_US: Query + zh_Hans: 查询 + pt_BR: Query + - name: maximum_iterations + type: number + required: true + label: + en_US: Maximum Iterations + zh_Hans: 最大迭代次数 + pt_BR: Maximum Iterations + default: 3 + max: 30 + min: 1 +extra: + python: + source: strategies/function_calling.py diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/.env.example b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/.env.example new file mode 100644 index 0000000000..dbb75b858b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=********-****-****-****-************ diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/README.md b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/README.md new file mode 100644 index 0000000000..f9417d0874 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/README.md @@ -0,0 +1,7 @@ +# Overview +DeepSeek provides advanced AI capabilities for chats and completions. This plugin enables developers to integrate DeepSeek's models, including text generation (deepseek-chat and deepseek-code) via the API. + +# Configure +After installation, you need to get API keys from [Deepseek](https://platform.deepseek.com/api_keys) and setup in Settings -> Model Provider. + +![](_assets/deepseek.PNG) \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/deepseek.PNG b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/deepseek.PNG new file mode 100644 index 0000000000..89ed0fce0f Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/deepseek.PNG differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_l_en.svg b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_l_en.svg new file mode 100644 index 0000000000..425494404f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_l_en.svg @@ -0,0 +1,22 @@ + + + Created with Pixso. + + + + + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_s_en.svg b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_s_en.svg new file mode 100644 index 0000000000..aa854a7504 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/_assets/icon_s_en.svg @@ -0,0 +1,3 @@ + + + diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/main.py b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/main.py new file mode 100644 index 0000000000..f2bfcdfc0e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/main.py @@ -0,0 +1,6 @@ +from dify_plugin import Plugin, DifyPluginEnv + +plugin = Plugin(DifyPluginEnv()) + +if __name__ == '__main__': + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/manifest.yaml new file mode 100644 index 0000000000..90fcbbda80 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/manifest.yaml @@ -0,0 +1,37 @@ +author: langgenius +created_at: '2024-09-20T00:13:50.29298939-04:00' +description: + en_US: Models provided by deepseek, such as deepseek-chat、deepseek-coder. + zh_Hans: 深度求索提供的模型,例如 deepseek-chat、deepseek-coder 。 +icon: icon_s_en.svg +label: + en_US: DeepSeek + zh_Hans: 深度求索 +meta: + arch: + - amd64 + - arm64 + runner: + entrypoint: main + language: python + version: '3.12' + version: 0.0.1 +name: deepseek +plugins: + models: + - provider/deepseek.yaml +resource: + memory: 268435456 + permission: + model: + enabled: true + llm: true + moderation: false + rerank: true + speech2text: false + text_embedding: true + tts: false + tool: + enabled: true +type: plugin +version: 0.0.5 diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/_position.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/_position.yaml new file mode 100644 index 0000000000..eeac8aa405 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/_position.yaml @@ -0,0 +1,3 @@ +- deepseek-chat +- deepseek-coder +- deepseek-reasoner diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-chat.yaml new file mode 100644 index 0000000000..4cf6eef757 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-chat.yaml @@ -0,0 +1,83 @@ +model: deepseek-chat +label: + zh_Hans: deepseek-chat + en_US: deepseek-chat +model_type: llm +features: + - agent-thought + - tool-call + - multi-tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 1 + min: 0.0 + max: 2.0 + help: + zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。 + en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is. + - name: max_tokens + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + type: float + default: 1 + min: 0.01 + max: 1.00 + help: + zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。 + en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature. + - name: logprobs + label: + en_US: Logprobs + help: + zh_Hans: 是否返回所输出 token 的对数概率。如果为 true,则在 message 的 content 中返回每个输出 token 的对数概率。 + en_US: Whether to return the log probability of the output token. If true, returns the log probability of each output token in the content of message . + type: boolean + - name: top_logprobs + label: + en_US: Top Logprobs + type: int + default: 0 + min: 0 + max: 20 + help: + zh_Hans: 一个介于 0 到 20 之间的整数 N,指定每个输出位置返回输出概率 top N 的 token,且返回这些 token 的对数概率。指定此参数时,logprobs 必须为 true。 + en_US: An integer N between 0 and 20, specifying that each output position returns the top N tokens with output probability, and returns the logarithmic probability of these tokens. When specifying this parameter, logprobs must be true. + - name: frequency_penalty + use_template: frequency_penalty + default: 0 + min: -2.0 + max: 2.0 + help: + zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。 + en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: "2" + output: "8" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-coder.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-coder.yaml new file mode 100644 index 0000000000..97310e76b9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-coder.yaml @@ -0,0 +1,29 @@ +model: deepseek-coder +label: + zh_Hans: deepseek-coder + en_US: deepseek-coder +model_type: llm +features: + - agent-thought + - tool-call + - multi-tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-reasoner.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-reasoner.yaml new file mode 100644 index 0000000000..45ef70977b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/deepseek-reasoner.yaml @@ -0,0 +1,21 @@ +model: deepseek-reasoner +label: + zh_Hans: deepseek-reasoner + en_US: deepseek-reasoner +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "4" + output: "16" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/llm.py b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/llm.py new file mode 100644 index 0000000000..3af28fb157 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/models/llm/llm.py @@ -0,0 +1,33 @@ +from collections.abc import Generator +from typing import Optional, Union +from dify_plugin.entities.model.llm import LLMMode, LLMResult +from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool +from yarl import URL +from dify_plugin import OAICompatLargeLanguageModel + + +class DeepseekLargeLanguageModel(OAICompatLargeLanguageModel): + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream) + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + super().validate_credentials(model, credentials) + + @staticmethod + def _add_custom_parameters(credentials) -> None: + credentials["endpoint_url"] = str(URL(credentials.get("endpoint_url", "https://api.deepseek.com"))) + credentials["mode"] = LLMMode.CHAT.value + credentials["function_calling_type"] = "tool_call" + credentials["stream_function_calling"] = "support" diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.py b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.py new file mode 100644 index 0000000000..737fc92b43 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.py @@ -0,0 +1,24 @@ +import logging +from dify_plugin.entities.model import ModelType +from dify_plugin.errors.model import CredentialsValidateFailedError +from dify_plugin import ModelProvider + +logger = logging.getLogger(__name__) + + +class DeepSeekProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + model_instance.validate_credentials(model="deepseek-chat", credentials=credentials) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") + raise ex diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.yaml b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.yaml new file mode 100644 index 0000000000..2750c35e06 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/provider/deepseek.yaml @@ -0,0 +1,51 @@ +background: '#c0cdff' +configurate_methods: +- predefined-model +description: + en_US: Models provided by deepseek, such as deepseek-reasoner、deepseek-chat、deepseek-coder. + zh_Hans: 深度求索提供的模型,例如 deepseek-reasoner、deepseek-chat、deepseek-coder 。 +extra: + python: + model_sources: + - models/llm/llm.py + provider_source: provider/deepseek.py +help: + title: + en_US: Get your API Key from deepseek + zh_Hans: 从深度求索获取 API Key + url: + en_US: https://platform.deepseek.com/api_keys +icon_large: + en_US: icon_l_en.svg +icon_small: + en_US: icon_s_en.svg +label: + en_US: deepseek + zh_Hans: 深度求索 +models: + llm: + position: models/llm/_position.yaml + predefined: + - models/llm/*.yaml +provider: deepseek +provider_credential_schema: + credential_form_schemas: + - label: + en_US: API Key + placeholder: + en_US: Enter your API Key + zh_Hans: 在此输入您的 API Key + required: true + type: secret-input + variable: api_key + - label: + en_US: Custom API endpoint URL + zh_Hans: 自定义 API endpoint 地址 + placeholder: + en_US: Base URL, e.g. https://api.deepseek.com/v1 or https://api.deepseek.com + zh_Hans: Base URL, e.g. https://api.deepseek.com/v1 or https://api.deepseek.com + required: false + type: text-input + variable: endpoint_url +supported_model_types: +- llm diff --git a/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/requirements.txt new file mode 100644 index 0000000000..5e06c37aa1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/deepseek-0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403/requirements.txt @@ -0,0 +1 @@ +dify_plugin==0.0.1b65 diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/.env.example b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/.env.example new file mode 100644 index 0000000000..dbb75b858b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=********-****-****-****-************ diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/README.md b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/README.md new file mode 100644 index 0000000000..c971d3ac5a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/README.md @@ -0,0 +1,50 @@ +## Overview + +Ollama is a cross-platform inference framework client (MacOS, Windows, Linux) designed for seamless deployment of large language models (LLMs) such as Llama 2, Mistral, Llava, and more. With its one-click setup, Ollama enables local execution of LLMs, providing enhanced data privacy and security by keeping your data on your own machine. + +Dify supports integrating LLM and Text Embedding capabilities of large language models deployed with Ollama. + +## Configure + +#### 1. Download Ollama +Visit [Ollama download page](https://ollama.com/download) to download the Ollama client for your system. + +#### 2. Run Ollama and Chat with Llava + +```` +ollama run llama3.2 +```` + +After successful launch, Ollama starts an API service on local port 11434, which can be accessed at `http://localhost:11434`. + +For other models, visit [Ollama Models](https://ollama.com/library) for more details. + +#### 3. Install Ollama Plugin +Go to the Dify marketplace and search the Ollama to download it. + +![](./_assets/ollama-01.png) + +#### 4. Integrate Ollama in Dify + +In `Settings > Model Providers > Ollama`, fill in: + +![](./_assets/ollama-02.png) + +- Model Name:`llama3.2` +- Base URL: `http://:11434` +- Enter the base URL where the Ollama service is accessible. +- If Dify is deployed using Docker, consider using the local network IP address, e.g., `http://192.168.1.100:11434` or `http://host.docker.internal:11434` to access the service. +- For local source code deployment, use `http://localhost:11434`. +- Model Type: `Chat` +- Model Context Length: `4096` +- The maximum context length of the model. If unsure, use the default value of 4096. +- Maximum Token Limit: `4096` +- The maximum number of tokens returned by the model. If there are no specific requirements for the model, this can be consistent with the model context length. +- Support for Vision: `Yes` +- Check this option if the model supports image understanding (multimodal), like `llava`. + +Click "Save" to use the model in the application after verifying that there are no errors. + +The integration method for Embedding models is similar to LLM, just change the model type to Text Embedding. + +For more detail, please check [Dify's official document](https://docs.dify.ai/development/models-integration/ollama). diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_l_en.svg b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_l_en.svg new file mode 100644 index 0000000000..39d8a1ece6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_l_en.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_s_en.svg b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_s_en.svg new file mode 100644 index 0000000000..f8482a96b9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/icon_s_en.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-01.png b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-01.png new file mode 100644 index 0000000000..854a0be798 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-01.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-02.png b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-02.png new file mode 100644 index 0000000000..6c58186e89 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama-02.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama_config.PNG b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama_config.PNG new file mode 100644 index 0000000000..a172a47f96 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/_assets/ollama_config.PNG differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/main.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/main.py new file mode 100644 index 0000000000..f2bfcdfc0e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/main.py @@ -0,0 +1,6 @@ +from dify_plugin import Plugin, DifyPluginEnv + +plugin = Plugin(DifyPluginEnv()) + +if __name__ == '__main__': + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/manifest.yaml new file mode 100644 index 0000000000..2e13c4c87a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/manifest.yaml @@ -0,0 +1,35 @@ +author: langgenius +created_at: '2024-09-20T00:13:50.29298939-04:00' +description: + en_US: Ollama +icon: icon_s_en.svg +label: + en_US: Ollama +meta: + arch: + - amd64 + - arm64 + runner: + entrypoint: main + language: python + version: '3.12' + version: 0.0.1 +name: ollama +plugins: + models: + - provider/ollama.yaml +resource: + memory: 268435456 + permission: + model: + enabled: true + llm: true + moderation: false + rerank: true + speech2text: false + text_embedding: true + tts: false + tool: + enabled: true +type: plugin +version: 0.0.3 diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/llm/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/llm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/llm/llm.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/llm/llm.py new file mode 100644 index 0000000000..2524a828bb --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/llm/llm.py @@ -0,0 +1,743 @@ +import json +import logging +import re +from collections.abc import Generator +from decimal import Decimal +from typing import Any, Optional, Union, cast +from urllib.parse import urljoin +import requests +from dify_plugin.entities.model import ( + AIModelEntity, + DefaultParameterName, + FetchFrom, + I18nObject, + ModelFeature, + ModelPropertyKey, + ModelType, + ParameterRule, + ParameterType, + PriceConfig, +) +from dify_plugin.entities.model.llm import ( + LLMMode, + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, +) +from dify_plugin.entities.model.message import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessage, + PromptMessageContentType, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, +) +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from dify_plugin.interfaces.model.large_language_model import LargeLanguageModel + +logger = logging.getLogger(__name__) + + +class OllamaLargeLanguageModel(LargeLanguageModel): + """ + Model class for Ollama large language model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + return self._generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def get_num_tokens( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + ) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: + """ + model_mode = self.get_model_mode(model, credentials) + if model_mode == LLMMode.CHAT: + return self._num_tokens_from_messages(prompt_messages) + else: + first_prompt_message = prompt_messages[0] + if isinstance(first_prompt_message.content, str): + text = first_prompt_message.content + elif isinstance(first_prompt_message.content, list): + text = "" + for message_content in first_prompt_message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + text = message_content.data + break + return self._get_num_tokens_by_gpt2(text) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._generate( + model=model, + credentials=credentials, + prompt_messages=[UserPromptMessage(content="ping")], + model_parameters={"num_predict": 5}, + stream=False, + ) + except InvokeError as ex: + raise CredentialsValidateFailedError( + f"An error occurred during credentials validation: {ex.description}" + ) + except Exception as ex: + raise CredentialsValidateFailedError( + f"An error occurred during credentials validation: {str(ex)}" + ) + + def _generate( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke llm completion model + + :param model: model name + :param credentials: credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + headers = {"Content-Type": "application/json"} + endpoint_url = credentials["base_url"] + if not endpoint_url.endswith("/"): + endpoint_url += "/" + data = {"model": model, "stream": stream} + if "format" in model_parameters: + data["format"] = model_parameters["format"] + del model_parameters["format"] + if "keep_alive" in model_parameters: + data["keep_alive"] = model_parameters["keep_alive"] + del model_parameters["keep_alive"] + data["options"] = model_parameters or {} + if stop: + data["options"]["stop"] = stop + completion_type = LLMMode.value_of(credentials["mode"]) + if completion_type is LLMMode.CHAT: + endpoint_url = urljoin(endpoint_url, "api/chat") + data["messages"] = [ + self._convert_prompt_message_to_dict(m) for m in prompt_messages + ] + if tools: + data["tools"] = [ + self._convert_prompt_message_tool_to_dict(tool) for tool in tools + ] + else: + endpoint_url = urljoin(endpoint_url, "api/generate") + first_prompt_message = prompt_messages[0] + if isinstance(first_prompt_message, UserPromptMessage): + first_prompt_message = cast(UserPromptMessage, first_prompt_message) + if isinstance(first_prompt_message.content, str): + data["prompt"] = first_prompt_message.content + elif isinstance(first_prompt_message.content, list): + text = "" + images = [] + for message_content in first_prompt_message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + text = message_content.data + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast( + ImagePromptMessageContent, message_content + ) + image_data = re.sub( + "^data:image\\/[a-zA-Z]+;base64,", + "", + message_content.data, + ) + images.append(image_data) + data["prompt"] = text + data["images"] = images + response = requests.post( + endpoint_url, headers=headers, json=data, timeout=(10, 300), stream=stream + ) + response.encoding = "utf-8" + if response.status_code != 200: + raise InvokeError( + f"API request failed with status code {response.status_code}: {response.text}" + ) + if stream: + return self._handle_generate_stream_response( + model, credentials, completion_type, response, prompt_messages + ) + return self._handle_generate_response( + model, credentials, completion_type, response, prompt_messages, tools + ) + + def _handle_generate_response( + self, + model: str, + credentials: dict, + completion_type: LLMMode, + response: requests.Response, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]], + ) -> LLMResult: + """ + Handle llm completion response + + :param model: model name + :param credentials: model credentials + :param completion_type: completion type + :param response: response + :param prompt_messages: prompt messages + :return: llm result + """ + response_json = response.json() + tool_calls = [] + if completion_type is LLMMode.CHAT: + message = response_json.get("message", {}) + response_content = message.get("content", "") + response_tool_calls = message.get("tool_calls", []) + tool_calls = [ + self._extract_response_tool_call(tool_call) + for tool_call in response_tool_calls + ] + else: + response_content = response_json["response"] + assistant_message = AssistantPromptMessage( + content=response_content, tool_calls=tool_calls + ) + if "prompt_eval_count" in response_json and "eval_count" in response_json: + prompt_tokens = response_json["prompt_eval_count"] + completion_tokens = response_json["eval_count"] + else: + prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) + completion_tokens = self._get_num_tokens_by_gpt2(assistant_message.content) + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + result = LLMResult( + model=response_json["model"], + prompt_messages=prompt_messages, + message=assistant_message, + usage=usage, + ) + return result + + def _handle_generate_stream_response( + self, + model: str, + credentials: dict, + completion_type: LLMMode, + response: requests.Response, + prompt_messages: list[PromptMessage], + ) -> Generator: + """ + Handle llm completion stream response + + :param model: model name + :param credentials: model credentials + :param completion_type: completion type + :param response: response + :param prompt_messages: prompt messages + :return: llm response chunk generator result + """ + full_text = "" + chunk_index = 0 + + def create_final_llm_result_chunk( + index: int, message: AssistantPromptMessage, finish_reason: str + ) -> LLMResultChunk: + prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) + completion_tokens = self._get_num_tokens_by_gpt2(full_text) + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + return LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=index, + message=message, + finish_reason=finish_reason, + usage=usage, + ), + ) + + for chunk in response.iter_lines(decode_unicode=True, delimiter="\n"): + if not chunk: + continue + try: + chunk_json = json.loads(chunk) + except json.JSONDecodeError as e: + yield create_final_llm_result_chunk( + index=chunk_index, + message=AssistantPromptMessage(content=""), + finish_reason="Non-JSON encountered.", + ) + chunk_index += 1 + break + if completion_type is LLMMode.CHAT: + if not chunk_json: + continue + if "message" not in chunk_json: + text = "" + else: + text = chunk_json.get("message").get("content", "") + else: + if not chunk_json: + continue + text = chunk_json["response"] + assistant_prompt_message = AssistantPromptMessage(content=text) + full_text += text + if chunk_json["done"]: + if "prompt_eval_count" in chunk_json: + prompt_tokens = chunk_json["prompt_eval_count"] + else: + prompt_message_content = prompt_messages[0].content + if isinstance(prompt_message_content, str): + prompt_tokens = self._get_num_tokens_by_gpt2( + prompt_message_content + ) + elif isinstance(prompt_message_content, list): + content_text = "" + for message_content in prompt_message_content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + content_text += message_content.data + prompt_tokens = self._get_num_tokens_by_gpt2(content_text) + completion_tokens = chunk_json.get( + "eval_count", self._get_num_tokens_by_gpt2(full_text) + ) + usage = self._calc_response_usage( + model, credentials, prompt_tokens, completion_tokens + ) + yield LLMResultChunk( + model=chunk_json["model"], + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=chunk_index, + message=assistant_prompt_message, + finish_reason="stop", + usage=usage, + ), + ) + else: + yield LLMResultChunk( + model=chunk_json["model"], + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=chunk_index, message=assistant_prompt_message + ), + ) + chunk_index += 1 + + def _convert_prompt_message_tool_to_dict(self, tool: PromptMessageTool) -> dict: + """ + Convert PromptMessageTool to dict for Ollama API + + :param tool: tool + :return: tool dict + """ + return { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters, + }, + } + + def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: + """ + Convert PromptMessage to dict for Ollama API + + :param message: prompt message + :return: message dict + """ + if isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + message_dict = {"role": "user", "content": message.content} + elif isinstance(message.content, list): + text = "" + images = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + text = message_content.data + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast( + ImagePromptMessageContent, message_content + ) + image_data = re.sub( + "^data:image\\/[a-zA-Z]+;base64,", "", message_content.data + ) + images.append(image_data) + message_dict = {"role": "user", "content": text, "images": images} + elif isinstance(message, AssistantPromptMessage): + message = cast(AssistantPromptMessage, message) + message_dict = {"role": "assistant", "content": message.content} + elif isinstance(message, SystemPromptMessage): + message = cast(SystemPromptMessage, message) + message_dict = {"role": "system", "content": message.content} + elif isinstance(message, ToolPromptMessage): + message = cast(ToolPromptMessage, message) + message_dict = {"role": "tool", "content": message.content} + else: + raise ValueError(f"Got unknown type {message}") + return message_dict + + def _num_tokens_from_messages(self, messages: list[PromptMessage]) -> int: + """ + Calculate num tokens. + + :param messages: messages + """ + num_tokens = 0 + messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages] + for message in messages_dict: + for key, value in message.items(): + num_tokens += self._get_num_tokens_by_gpt2(str(key)) + num_tokens += self._get_num_tokens_by_gpt2(str(value)) + return num_tokens + + def _extract_response_tool_call( + self, response_tool_call: dict + ) -> AssistantPromptMessage.ToolCall: + """ + Extract response tool call + """ + tool_call = None + if response_tool_call and "function" in response_tool_call: + arguments = response_tool_call.get("function", {}).get("arguments") + if isinstance(arguments, dict): + arguments = json.dumps(arguments) + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_tool_call.get("function", {}).get("name"), + arguments=arguments, + ) + tool_call = AssistantPromptMessage.ToolCall( + id=response_tool_call.get("function", {}).get("name"), + type="function", + function=function, + ) + return tool_call + + def get_customizable_model_schema( + self, model: str, credentials: dict + ) -> AIModelEntity: + """ + Get customizable model schema. + + :param model: model name + :param credentials: credentials + + :return: model schema + """ + extras: dict[str, Any] = {"features": []} + if "vision_support" in credentials and credentials["vision_support"] == "true": + extras["features"].append(ModelFeature.VISION) + if ( + "function_call_support" in credentials + and credentials["function_call_support"] == "true" + ): + extras["features"].append(ModelFeature.TOOL_CALL) + extras["features"].append(ModelFeature.MULTI_TOOL_CALL) + entity = AIModelEntity( + model=model, + label=I18nObject(zh_Hans=model, en_US=model), + model_type=ModelType.LLM, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: credentials.get("mode"), + ModelPropertyKey.CONTEXT_SIZE: int( + credentials.get("context_size", 4096) + ), + }, + parameter_rules=[ + ParameterRule( + name=DefaultParameterName.TEMPERATURE.value, + use_template=DefaultParameterName.TEMPERATURE.value, + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)", + zh_Hans="模型的温度。增加温度将使模型的回答更具创造性。(默认值:0.8)", + ), + default=0.1, + min=0, + max=1, + ), + ParameterRule( + name=DefaultParameterName.TOP_P.value, + use_template=DefaultParameterName.TOP_P.value, + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)", + zh_Hans="与top-k一起工作。较高的值(例如,0.95)会导致生成更多样化的文本,而较低的值(例如,0.5)会生成更专注和保守的文本。(默认值:0.9)", + ), + default=0.9, + min=0, + max=1, + ), + ParameterRule( + name="top_k", + label=I18nObject(en_US="Top K", zh_Hans="Top K"), + type=ParameterType.INT, + help=I18nObject( + en_US="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)", + zh_Hans="减少生成无意义内容的可能性。较高的值(例如100)将提供更多样化的答案,而较低的值(例如10)将更为保守。(默认值:40)", + ), + min=1, + max=100, + ), + ParameterRule( + name="repeat_penalty", + label=I18nObject(en_US="Repeat Penalty"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)", + zh_Hans="设置对重复内容的惩罚强度。一个较高的值(例如,1.5)会更强地惩罚重复内容,而一个较低的值(例如,0.9)则会相对宽容。(默认值:1.1)", + ), + min=-2, + max=2, + ), + ParameterRule( + name="num_predict", + use_template="max_tokens", + label=I18nObject(en_US="Num Predict", zh_Hans="最大令牌数预测"), + type=ParameterType.INT, + help=I18nObject( + en_US="Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)", + zh_Hans="生成文本时预测的最大令牌数。(默认值:128,-1 = 无限生成,-2 = 填充上下文)", + ), + default=512 + if int(credentials.get("max_tokens", 4096)) >= 768 + else 128, + min=-2, + max=int(credentials.get("max_tokens", 4096)), + ), + ParameterRule( + name="mirostat", + label=I18nObject( + en_US="Mirostat sampling", zh_Hans="Mirostat 采样" + ), + type=ParameterType.INT, + help=I18nObject( + en_US="Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", + zh_Hans="启用 Mirostat 采样以控制困惑度。(默认值:0,0 = 禁用,1 = Mirostat,2 = Mirostat 2.0)", + ), + min=0, + max=2, + ), + ParameterRule( + name="mirostat_eta", + label=I18nObject(en_US="Mirostat Eta", zh_Hans="学习率"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)", + zh_Hans="影响算法对生成文本反馈响应的速度。较低的学习率会导致调整速度变慢,而较高的学习率会使得算法更加灵敏。(默认值:0.1)", + ), + precision=1, + ), + ParameterRule( + name="mirostat_tau", + label=I18nObject(en_US="Mirostat Tau", zh_Hans="文本连贯度"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)", + zh_Hans="控制输出的连贯性和多样性之间的平衡。较低的值会导致更专注和连贯的文本。(默认值:5.0)", + ), + precision=1, + ), + ParameterRule( + name="num_ctx", + label=I18nObject( + en_US="Size of context window", zh_Hans="上下文窗口大小" + ), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the size of the context window used to generate the next token. (Default: 2048)", + zh_Hans="设置用于生成下一个标记的上下文窗口大小。(默认值:2048)", + ), + default=2048, + min=1, + ), + ParameterRule( + name="num_gpu", + label=I18nObject(en_US="GPU Layers", zh_Hans="GPU 层数"), + type=ParameterType.INT, + help=I18nObject( + en_US="The number of layers to offload to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable.As long as a model fits into one gpu it stays in one. It does not set the number of GPU(s). ", + zh_Hans="加载到 GPU 的层数。在 macOS 上,默认为 1 以启用 Metal 支持,设置为 0 则禁用。只要模型适合一个 GPU,它就保留在其中。它不设置 GPU 的数量。", + ), + min=-1, + default=1, + ), + ParameterRule( + name="num_thread", + label=I18nObject(en_US="Num Thread", zh_Hans="线程数"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores).", + zh_Hans="设置计算过程中使用的线程数。默认情况下,Ollama会检测以获得最佳性能。建议将此值设置为系统拥有的物理CPU核心数(而不是逻辑核心数)。", + ), + min=1, + ), + ParameterRule( + name="repeat_last_n", + label=I18nObject(en_US="Repeat last N", zh_Hans="回溯内容"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", + zh_Hans="设置模型回溯多远的内容以防止重复。(默认值:64,0 = 禁用,-1 = num_ctx)", + ), + min=-1, + ), + ParameterRule( + name="tfs_z", + label=I18nObject(en_US="TFS Z", zh_Hans="减少标记影响"), + type=ParameterType.FLOAT, + help=I18nObject( + en_US="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)", + zh_Hans="用于减少输出中不太可能的标记的影响。较高的值(例如,2.0)会更多地减少这种影响,而1.0的值则会禁用此设置。(默认值:1)", + ), + precision=1, + ), + ParameterRule( + name="seed", + label=I18nObject(en_US="Seed", zh_Hans="随机数种子"), + type=ParameterType.INT, + help=I18nObject( + en_US="Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)", + zh_Hans="设置用于生成的随机数种子。将此设置为特定数字将使模型对相同的提示生成相同的文本。(默认值:0)", + ), + ), + ParameterRule( + name="keep_alive", + label=I18nObject(en_US="Keep Alive", zh_Hans="模型存活时间"), + type=ParameterType.STRING, + help=I18nObject( + en_US="Sets how long the model is kept in memory after generating a response. This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours). A negative number keeps the model loaded indefinitely, and '0' unloads the model immediately after generating a response. Valid time units are 's','m','h'. (Default: 5m)", + zh_Hans="设置模型在生成响应后在内存中保留的时间。这必须是一个带有单位的持续时间字符串(例如,'10m' 表示10分钟,'24h' 表示24小时)。负数表示无限期地保留模型,'0'表示在生成响应后立即卸载模型。有效的时间单位有 's'(秒)、'm'(分钟)、'h'(小时)。(默认值:5m)", + ), + ), + ParameterRule( + name="format", + label=I18nObject(en_US="Format", zh_Hans="返回格式"), + type=ParameterType.STRING, + help=I18nObject( + en_US="the format to return a response in. Currently the only accepted value is json.", + zh_Hans="返回响应的格式。目前唯一接受的值是json。", + ), + options=["json"], + ), + ], + pricing=PriceConfig( + input=Decimal(credentials.get("input_price", 0)), + output=Decimal(credentials.get("output_price", 0)), + unit=Decimal(credentials.get("unit", 0)), + currency=credentials.get("currency", "USD"), + ), + **extras, + ) + return entity + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeAuthorizationError: [requests.exceptions.InvalidHeader], + InvokeBadRequestError: [ + requests.exceptions.HTTPError, + requests.exceptions.InvalidURL, + ], + InvokeRateLimitError: [requests.exceptions.RetryError], + InvokeServerUnavailableError: [ + requests.exceptions.ConnectionError, + requests.exceptions.HTTPError, + ], + InvokeConnectionError: [ + requests.exceptions.ConnectTimeout, + requests.exceptions.ReadTimeout, + ], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/text_embedding/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/text_embedding/text_embedding.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/text_embedding/text_embedding.py new file mode 100644 index 0000000000..0056fe870c --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/models/text_embedding/text_embedding.py @@ -0,0 +1,200 @@ +import json +import logging +import time +from decimal import Decimal +from typing import Optional +from urllib.parse import urljoin +from dify_plugin import TextEmbeddingModel +import numpy as np +import requests +from dify_plugin.entities.model import ( + AIModelEntity, + EmbeddingInputType, + FetchFrom, + I18nObject, + ModelPropertyKey, + ModelType, + PriceConfig, + PriceType, +) +from dify_plugin.entities.model.text_embedding import ( + EmbeddingUsage, + TextEmbeddingResult, +) +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + +logger = logging.getLogger(__name__) + + +class OllamaEmbeddingModel(TextEmbeddingModel): + """ + Model class for an Ollama text embedding model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ + headers = {"Content-Type": "application/json"} + endpoint_url = credentials.get("base_url", "") + if endpoint_url and not endpoint_url.endswith("/"): + endpoint_url += "/" + endpoint_url = urljoin(endpoint_url, "api/embed") + context_size = self._get_context_size(model, credentials) + inputs = [] + used_tokens = 0 + for text in texts: + num_tokens = self._get_num_tokens_by_gpt2(text) + if num_tokens >= context_size: + cutoff = int(np.floor(len(text) * (context_size / num_tokens))) + inputs.append(text[0:cutoff]) + else: + inputs.append(text) + payload = {"input": inputs, "model": model, "options": {"use_mmap": True}} + response = requests.post( + endpoint_url, headers=headers, data=json.dumps(payload), timeout=(10, 300) + ) + response.raise_for_status() + response_data = response.json() + embeddings = response_data["embeddings"] + embedding_used_tokens = self.get_num_tokens(model, credentials, inputs) + used_tokens += sum(embedding_used_tokens) + usage = self._calc_response_usage( + model=model, credentials=credentials, tokens=used_tokens + ) + return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model) + + def get_num_tokens( + self, model: str, credentials: dict, texts: list[str] + ) -> list[int]: + """ + Approximate number of tokens for given messages using GPT2 tokenizer + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + return [self._get_num_tokens_by_gpt2(text) for text in texts] + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._invoke(model=model, credentials=credentials, texts=["ping"]) + except InvokeError as ex: + raise CredentialsValidateFailedError( + f"An error occurred during credentials validation: {ex.description}" + ) + except Exception as ex: + raise CredentialsValidateFailedError( + f"An error occurred during credentials validation: {str(ex)}" + ) + + def get_customizable_model_schema( + self, model: str, credentials: dict + ) -> AIModelEntity: + """ + generate custom model entities from credentials + """ + entity = AIModelEntity( + model=model, + label=I18nObject(en_US=model), + model_type=ModelType.TEXT_EMBEDDING, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.CONTEXT_SIZE: int( + credentials.get("context_size", 512) + ), + ModelPropertyKey.MAX_CHUNKS: 1, + }, + parameter_rules=[], + pricing=PriceConfig( + input=Decimal(credentials.get("input_price", 0)), + unit=Decimal(credentials.get("unit", 0)), + currency=credentials.get("currency", "USD"), + ), + ) + return entity + + def _calc_response_usage( + self, model: str, credentials: dict, tokens: int + ) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param tokens: input tokens + :return: usage + """ + input_price_info = self.get_price( + model=model, + credentials=credentials, + price_type=PriceType.INPUT, + tokens=tokens, + ) + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at, + ) + return usage + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeAuthorizationError: [requests.exceptions.InvalidHeader], + InvokeBadRequestError: [ + requests.exceptions.HTTPError, + requests.exceptions.InvalidURL, + ], + InvokeRateLimitError: [requests.exceptions.RetryError], + InvokeServerUnavailableError: [ + requests.exceptions.ConnectionError, + requests.exceptions.HTTPError, + ], + InvokeConnectionError: [ + requests.exceptions.ConnectTimeout, + requests.exceptions.ReadTimeout, + ], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.py b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.py new file mode 100644 index 0000000000..ba7607c2ff --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.py @@ -0,0 +1,15 @@ +import logging +from dify_plugin import ModelProvider + +logger = logging.getLogger(__name__) + + +class OpenAIProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + pass diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.yaml b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.yaml new file mode 100644 index 0000000000..3d255df16c --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/provider/ollama.yaml @@ -0,0 +1,123 @@ +background: '#F9FAFB' +configurate_methods: +- customizable-model +extra: + python: + model_sources: + - models/llm/llm.py + - models/text_embedding/text_embedding.py + provider_source: provider/ollama.py +help: + title: + en_US: How to integrate with Ollama + zh_Hans: 如何集成 Ollama + url: + en_US: https://docs.dify.ai/tutorials/model-configuration/ollama +icon_large: + en_US: icon_l_en.svg +icon_small: + en_US: icon_s_en.svg +label: + en_US: Ollama +model_credential_schema: + credential_form_schemas: + - label: + en_US: Base URL + zh_Hans: 基础 URL + placeholder: + en_US: Base url of Ollama server, e.g. http://192.168.1.100:11434 + zh_Hans: Ollama server 的基础 URL,例如 http://192.168.1.100:11434 + required: true + type: text-input + variable: base_url + - default: chat + label: + en_US: Completion mode + zh_Hans: 模型类型 + options: + - label: + en_US: Completion + zh_Hans: 补全 + value: completion + - label: + en_US: Chat + zh_Hans: 对话 + value: chat + placeholder: + en_US: Select completion mode + zh_Hans: 选择对话类型 + required: true + show_on: + - value: llm + variable: __model_type + type: select + variable: mode + - default: '4096' + label: + en_US: Model context size + zh_Hans: 模型上下文长度 + placeholder: + en_US: Enter your Model context size + zh_Hans: 在此输入您的模型上下文长度 + required: true + type: text-input + variable: context_size + - default: '4096' + label: + en_US: Upper bound for max tokens + zh_Hans: 最大 token 上限 + required: true + show_on: + - value: llm + variable: __model_type + type: text-input + variable: max_tokens + - default: 'false' + label: + en_US: Vision support + zh_Hans: 是否支持 Vision + options: + - label: + en_US: 'Yes' + zh_Hans: 是 + value: 'true' + - label: + en_US: 'No' + zh_Hans: 否 + value: 'false' + required: false + show_on: + - value: llm + variable: __model_type + type: radio + variable: vision_support + - default: 'false' + label: + en_US: Function call support + zh_Hans: 是否支持函数调用 + options: + - label: + en_US: 'Yes' + zh_Hans: 是 + value: 'true' + - label: + en_US: 'No' + zh_Hans: 否 + value: 'false' + required: false + show_on: + - value: llm + variable: __model_type + type: radio + variable: function_call_support + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter your model name + zh_Hans: 输入模型名称 +provider: ollama +supported_model_types: +- llm +- text-embedding diff --git a/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/requirements.txt new file mode 100644 index 0000000000..268b497a18 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/ollama-0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7/requirements.txt @@ -0,0 +1,2 @@ +dify_plugin==0.0.1b65 +numpy~=2.1.3 diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/README.md b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/README.md new file mode 100644 index 0000000000..a06958301a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/README.md @@ -0,0 +1,9 @@ +## Overview + +This plugin provides access to models that are OpenAI-compatible, including LLMs, reranking, text embedding, speech-to-text (STT), and text-to-speech(TTS) models. Developers can easily add models by providing configuration parameters such as the model name and API key. + +## Configure + +Configure the OpenAI-API-compatible model by providing its core details (Type, Name, API Key, URL) and adjusting further options like completion, context, and token limits, as well as streaming and vision settings. Save when done. + +![](./_assets/openai_api_compatible-01.png) diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/audio.mp3 b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/audio.mp3 new file mode 100644 index 0000000000..7c86e02e16 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/audio.mp3 differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/icon.svg b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/icon.svg new file mode 100644 index 0000000000..ce65af405f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/openai_api_compatible-01.png b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/openai_api_compatible-01.png new file mode 100644 index 0000000000..4189f8de9b Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/_assets/openai_api_compatible-01.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/main.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/main.py new file mode 100644 index 0000000000..37eb41b64f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/main.py @@ -0,0 +1,6 @@ +from dify_plugin import Plugin, DifyPluginEnv + +plugin = Plugin(DifyPluginEnv()) + +if __name__ == "__main__": + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/manifest.yaml new file mode 100644 index 0000000000..1eed23d797 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/manifest.yaml @@ -0,0 +1,31 @@ +version: 0.0.11 +type: plugin +author: "langgenius" +name: "openai_api_compatible" +description: + en_US: Model providers compatible with OpenAI's API standard, such as LM Studio. + zh_Hans: 兼容 OpenAI API 的模型供应商,例如 LM Studio 。 +label: + en_US: "OpenAI-API-compatible" +created_at: "2024-07-12T08:03:44.658609186Z" +icon: icon.svg +resource: + memory: 1048576 + permission: + tool: + enabled: true + model: + enabled: true + llm: true +plugins: + models: + - "provider/openai_api_compatible.yaml" +meta: + version: 0.0.1 + arch: + - "amd64" + - "arm64" + runner: + language: "python" + version: "3.12" + entrypoint: "main" diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/common_openai.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/common_openai.py new file mode 100644 index 0000000000..282828eb98 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/common_openai.py @@ -0,0 +1,53 @@ +from collections.abc import Mapping + +import openai +from httpx import Timeout + +from dify_plugin.errors.model import InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError, InvokeError, InvokeRateLimitError, InvokeServerUnavailableError + + +class _CommonOpenAI: + def _to_credential_kwargs(self, credentials: Mapping) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = { + "api_key": credentials['openai_api_key'], + "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0), + "max_retries": 1, + } + + if credentials.get("openai_api_base"): + openai_api_base = credentials["openai_api_base"].rstrip("/") + credentials_kwargs["base_url"] = openai_api_base + "/v1" + + if 'openai_organization' in credentials: + credentials_kwargs['organization'] = credentials['openai_organization'] + + return credentials_kwargs + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError], + InvokeServerUnavailableError: [openai.InternalServerError], + InvokeRateLimitError: [openai.RateLimitError], + InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError], + InvokeBadRequestError: [ + openai.BadRequestError, + openai.NotFoundError, + openai.UnprocessableEntityError, + openai.APIError, + ], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/llm/llm.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/llm/llm.py new file mode 100644 index 0000000000..b3e2755866 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/llm/llm.py @@ -0,0 +1,31 @@ +from typing import Mapping + +from dify_plugin.entities.model import ( + AIModelEntity, + I18nObject, + ModelFeature +) + +from dify_plugin.interfaces.model.openai_compatible.llm import ( + OAICompatLargeLanguageModel, +) + + +class OpenAILargeLanguageModel(OAICompatLargeLanguageModel): + def get_customizable_model_schema(self, model: str, credentials: Mapping) -> AIModelEntity: + entity = super().get_customizable_model_schema(model, credentials) + + agent_though_support = credentials.get("agent_though_support", "not_supported") + if agent_though_support == "supported": + try: + entity.features.index(ModelFeature.AGENT_THOUGHT) + except ValueError: + entity.features.append(ModelFeature.AGENT_THOUGHT) + + if "display_name" in credentials and credentials["display_name"] != "": + entity.label= I18nObject( + en_US=credentials["display_name"], + zh_Hans=credentials["display_name"] + ) + + return entity diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/rerank/rerank.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/rerank/rerank.py new file mode 100644 index 0000000000..050b8415d2 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/rerank/rerank.py @@ -0,0 +1,50 @@ +from typing import Mapping + +from dify_plugin.entities.model import ( + AIModelEntity, + I18nObject +) + +from dify_plugin.interfaces.model.openai_compatible.rerank import ( + OAICompatRerankModel, +) +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, +) + +class OpenAIRerankModel(OAICompatRerankModel): + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._invoke( + model=model, + credentials=credentials, + query="What is the capital of the United States?", + docs=[ + "Carson City is the capital city of the American state of Nevada. At the 2010 United States " + "Census, Carson City had a population of 55,274.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that " + "are a political division controlled by the United States. Its capital is Saipan.", + ], + score_threshold=0.8, + top_n=3, + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) from ex + + def get_customizable_model_schema(self, model: str, credentials: Mapping) -> AIModelEntity: + entity = super().get_customizable_model_schema(model, credentials) + + if "display_name" in credentials and credentials["display_name"] != "": + entity.label= I18nObject( + en_US=credentials["display_name"], + zh_Hans=credentials["display_name"] + ) + + return entity diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/speech2text/speech2text.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/speech2text/speech2text.py new file mode 100644 index 0000000000..0dc949dfab --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/speech2text/speech2text.py @@ -0,0 +1,28 @@ +from typing import Optional +from dify_plugin.entities.model import AIModelEntity, FetchFrom, I18nObject, ModelType +from dify_plugin.interfaces.model.openai_compatible.speech2text import ( + OAICompatSpeech2TextModel, +) + + +class OpenAISpeech2TextModel(OAICompatSpeech2TextModel): + def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]: + """ + used to define customizable model schema + """ + entity = AIModelEntity( + model=model, + label=I18nObject(en_US=model), + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_type=ModelType.SPEECH2TEXT, + model_properties={}, + parameter_rules=[], + ) + + if "display_name" in credentials and credentials["display_name"] != "": + entity.label= I18nObject( + en_US=credentials["display_name"], + zh_Hans=credentials["display_name"] + ) + + return entity diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/text_embedding/text_embedding.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/text_embedding/text_embedding.py new file mode 100644 index 0000000000..d3fd211b40 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/text_embedding/text_embedding.py @@ -0,0 +1,24 @@ +from typing import Mapping + +from dify_plugin.entities.model import ( + AIModelEntity, + I18nObject +) + +from dify_plugin.interfaces.model.openai_compatible.text_embedding import ( + OAICompatEmbeddingModel, +) + + +class OpenAITextEmbeddingModel(OAICompatEmbeddingModel): + + def get_customizable_model_schema(self, model: str, credentials: Mapping) -> AIModelEntity: + entity = super().get_customizable_model_schema(model, credentials) + + if "display_name" in credentials and credentials["display_name"] != "": + entity.label= I18nObject( + en_US=credentials["display_name"], + zh_Hans=credentials["display_name"] + ) + + return entity diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/tts/tts.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/tts/tts.py new file mode 100644 index 0000000000..2201dff8a5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/models/tts/tts.py @@ -0,0 +1,22 @@ +from typing import Mapping + +from dify_plugin.entities.model import ( + AIModelEntity, + I18nObject +) + +from dify_plugin.interfaces.model.openai_compatible.tts import OAICompatText2SpeechModel + + +class OpenAIText2SpeechModel(OAICompatText2SpeechModel): + + def get_customizable_model_schema(self, model: str, credentials: Mapping) -> AIModelEntity: + entity = super().get_customizable_model_schema(model, credentials) + + if "display_name" in credentials and credentials["display_name"] != "": + entity.label= I18nObject( + en_US=credentials["display_name"], + zh_Hans=credentials["display_name"] + ) + + return entity diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.py b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.py new file mode 100644 index 0000000000..57e28ffbc2 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.py @@ -0,0 +1,17 @@ +import logging +from collections.abc import Mapping + +from dify_plugin import ModelProvider + +logger = logging.getLogger(__name__) + + +class OpenAIProvider(ModelProvider): + def validate_provider_credentials(self, credentials: Mapping) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + pass \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.yaml b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.yaml new file mode 100644 index 0000000000..193ce61976 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/provider/openai_api_compatible.yaml @@ -0,0 +1,240 @@ +provider: openai_api_compatible +label: + en_US: OpenAI-API-compatible +description: + en_US: Model providers compatible with OpenAI's API standard, such as LM Studio. + zh_Hans: 兼容 OpenAI API 的模型供应商,例如 LM Studio 。 +icon_small: + en_US: icon.svg +supported_model_types: + - llm + - rerank + - text-embedding + - speech2text + - tts +configurate_methods: + - customizable-model +model_credential_schema: + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter full model name + zh_Hans: 输入模型全称 + credential_form_schemas: + - variable: display_name + label: + en_US: Model display name + zh_Hans: 模型显示名称 + type: text-input + required: false + placeholder: + zh_Hans: 模型在界面的显示名称 + en_US: The display name of the model in the interface. + - variable: api_key + label: + en_US: API Key + type: secret-input + required: false + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key + - variable: endpoint_url + label: + zh_Hans: API endpoint URL + en_US: API endpoint URL + type: text-input + required: true + placeholder: + zh_Hans: Base URL, e.g. https://api.openai.com/v1 + en_US: Base URL, e.g. https://api.openai.com/v1 + - variable: endpoint_model_name + label: + zh_Hans: API endpoint中的模型名称 + en_US: model name for API endpoint + type: text-input + required: false + placeholder: + zh_Hans: endpoint model name, e.g. chatgpt4.0 + en_US: endpoint model name, e.g. chatgpt4.0 + - variable: mode + show_on: + - variable: __model_type + value: llm + label: + en_US: Completion mode + type: select + required: false + default: chat + placeholder: + zh_Hans: 选择对话类型 + en_US: Select completion mode + options: + - value: completion + label: + en_US: Completion + zh_Hans: 补全 + - value: chat + label: + en_US: Chat + zh_Hans: 对话 + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + show_on: + - variable: __model_type + value: llm + type: text-input + default: "4096" + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + show_on: + - variable: __model_type + value: text-embedding + type: text-input + default: "4096" + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + show_on: + - variable: __model_type + value: rerank + type: text-input + default: "4096" + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: max_tokens_to_sample + label: + zh_Hans: 最大 token 上限 + en_US: Upper bound for max tokens + show_on: + - variable: __model_type + value: llm + default: "4096" + type: text-input + - variable: agent_though_support + show_on: + - variable: __model_type + value: llm + label: + en_US: Agent Thought + type: select + required: false + default: not_supported + options: + - value: supported + label: + en_US: Support + zh_Hans: 支持 + - value: not_supported + label: + en_US: Not Support + zh_Hans: 不支持 + - variable: function_calling_type + show_on: + - variable: __model_type + value: llm + label: + en_US: Function calling + type: select + required: false + default: no_call + options: + - value: function_call + label: + en_US: Function Call + zh_Hans: Function Call + - value: tool_call + label: + en_US: Tool Call + zh_Hans: Tool Call + - value: no_call + label: + en_US: Not Support + zh_Hans: 不支持 + - variable: stream_function_calling + show_on: + - variable: __model_type + value: llm + label: + en_US: Stream function calling + type: select + required: false + default: not_supported + options: + - value: supported + label: + en_US: Support + zh_Hans: 支持 + - value: not_supported + label: + en_US: Not Support + zh_Hans: 不支持 + - variable: vision_support + show_on: + - variable: __model_type + value: llm + label: + zh_Hans: Vision 支持 + en_US: Vision Support + type: select + required: false + default: no_support + options: + - value: support + label: + en_US: Support + zh_Hans: 支持 + - value: no_support + label: + en_US: Not Support + zh_Hans: 不支持 + - variable: stream_mode_delimiter + label: + zh_Hans: 流模式返回结果的分隔符 + en_US: Delimiter for streaming results + show_on: + - variable: __model_type + value: llm + default: '\n\n' + type: text-input + - variable: voices + show_on: + - variable: __model_type + value: tts + label: + en_US: Available Voices (comma-separated) + zh_Hans: 可用声音(用英文逗号分隔) + type: text-input + required: false + default: "alloy" + placeholder: + en_US: "alloy,echo,fable,onyx,nova,shimmer" + zh_Hans: "alloy,echo,fable,onyx,nova,shimmer" + help: + en_US: "List voice names separated by commas. First voice will be used as default." + zh_Hans: "用英文逗号分隔的声音列表。第一个声音将作为默认值。" +extra: + python: + provider_source: provider/openai_api_compatible.py + model_sources: + - "models/llm/llm.py" + - "models/text_embedding/text_embedding.py" + - "models/rerank/rerank.py" + - "models/speech2text/speech2text.py" + - "models/tts/tts.py" diff --git a/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/requirements.txt new file mode 100644 index 0000000000..dd23658227 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/openai_api_compatible-0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7/requirements.txt @@ -0,0 +1,2 @@ +dify_plugin==0.0.1b73 +openai~=1.66.3 \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/.env.example b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/.env.example new file mode 100644 index 0000000000..dbb75b858b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=********-****-****-****-************ diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/README.md b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/README.md new file mode 100644 index 0000000000..a27b2c9121 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/README.md @@ -0,0 +1,15 @@ +## Overview + +SiliconCloud (MaaS) simplifies AI model deployment with robust performance. This plugin provides access to various models (LLMs, text embedding, reranking, STT, TTS), configurable via model name, API key, and other parameters. + +## Configure + +Install the SiliconFlow plugin, then configure it by entering your API Key. Get your API Key from [SiliconFlow](https://cloud.siliconflow.cn/account/ak) and save. + +![](./_assets/siliconflow-01.png) + +## About SiliconFlow  + +SiliconFlow is committed to building a scalable, standardized, and high-performance AI Infra platform. It offers SiliconCloud (the model cloud service platform), SiliconLLM (the LLM inference engine), and OneDiff (the high-performance text-to-image/video acceleration library). These solutions help enterprises and individual users deploy AI models efficiently and cost-effectively. + +[Website](https://siliconflow.cn/) | [SiliconCloud Quick Start](https://docs.siliconflow.cn/quickstart) \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow-01.png b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow-01.png new file mode 100644 index 0000000000..d930db0b39 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow-01.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow.svg b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow.svg new file mode 100644 index 0000000000..16e406f030 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow_square.svg b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow_square.svg new file mode 100644 index 0000000000..ad6b384f7a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/_assets/siliconflow_square.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/main.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/main.py new file mode 100644 index 0000000000..f2bfcdfc0e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/main.py @@ -0,0 +1,6 @@ +from dify_plugin import Plugin, DifyPluginEnv + +plugin = Plugin(DifyPluginEnv()) + +if __name__ == '__main__': + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/manifest.yaml new file mode 100644 index 0000000000..4262fe5aa5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/manifest.yaml @@ -0,0 +1,29 @@ +meta: + arch: + - amd64 + - arm64 + runner: + entrypoint: main + language: python + version: "3.12" + version: 0.0.1 +name: siliconflow +author: langgenius +label: + en_US: SiliconFlow + zh_Hans: 硅基流动 +description: + en_US: SiliconFlow provides access to various models (LLMs, text embedding, reranking, STT, TTS), configurable via model name, API key, and other parameters. + zh_Hans: 硅基流动提供对各种模型(LLM、文本嵌入、重排序、STT、TTS)的访问,可通过模型名称、API密钥和其他参数进行配置。 +icon: siliconflow_square.svg +plugins: + models: + - provider/siliconflow.yaml +resource: + memory: 268435456 + permission: + model: + enabled: false +type: plugin +version: 0.0.8 +created_at: 2024-09-20T00:13:50.29298939-04:00 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-26b.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-26b.yaml new file mode 100644 index 0000000000..f7b03e1254 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-26b.yaml @@ -0,0 +1,84 @@ +model: OpenGVLab/InternVL2-26B +label: + en_US: OpenGVLab/InternVL2-26B +model_type: llm +features: + - vision +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-8b.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-8b.yaml new file mode 100644 index 0000000000..1e858bb4be --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/Internvl2-8b.yaml @@ -0,0 +1,84 @@ +model: Pro/OpenGVLab/InternVL2-8B +label: + en_US: Pro/OpenGVLab/InternVL2-8B +model_type: llm +features: + - vision +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/_position.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/_position.yaml new file mode 100644 index 0000000000..ffa16f8bb4 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/_position.yaml @@ -0,0 +1,44 @@ +- Pro/deepseek-ai/DeepSeek-R1 +- Pro/deepseek-ai/DeepSeek-V3 +- deepseek-ai/DeepSeek-R1 +- deepseek-ai/DeepSeek-V3 +- deepseek-ai/DeepSeek-V2.5 +- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B +- deepseek-ai/DeepSeek-R1-Distill-Qwen-14B +- deepseek-ai/DeepSeek-R1-Distill-Qwen-7B +- deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +- deepseek-ai/DeepSeek-R1-Distill-Llama-70B +- deepseek-ai/DeepSeek-R1-Distill-Llama-8B +- deepseek-ai/DeepSeek-V2.5 +- deepseek-ai/Janus-Pro-7B +- Qwen/QwQ-32B-Preview +- Qwen/QVQ-72B-Preview +- Qwen/Qwen2.5-72B-Instruct +- Qwen/Qwen2.5-72B-Instruct-128K +- Qwen/Qwen2.5-32B-Instruct +- Qwen/Qwen2.5-14B-Instruct +- Qwen/Qwen2.5-7B-Instruct +- Qwen/Qwen2.5-Coder-32B-Instruct +- Qwen/Qwen2.5-Coder-7B-Instruct +- Qwen/Qwen2-VL-72B-Instruct +- Qwen/Qwen2-1.5B-Instruct +- Qwen/Qwen2.5-72B-Instruct-128K +- Vendor-A/Qwen/Qwen2.5-72B-Instruct +- Pro/Qwen/Qwen2-VL-7B-Instruct +- OpenGVLab/InternVL2-26B +- Pro/OpenGVLab/InternVL2-8B +- Vendor-A/Qwen/Qwen2.5-72B-Instruct +- Pro/Qwen/Qwen2-VL-7B-Instruct +- Pro/OpenGVLab/InternVL2-8B +- OpenGVLab/InternVL2-26B +- THUDM/glm-4-9b-chat +- 01-ai/Yi-1.5-34B-Chat-16K +- 01-ai/Yi-1.5-9B-Chat-16K +- 01-ai/Yi-1.5-6B-Chat +- internlm/internlm2_5-20b-chat +- internlm/internlm2_5-7b-chat +- meta-llama/Llama-3.3-70B-Instruct +- meta-llama/Meta-Llama-3.1-70B-Instruct +- meta-llama/Meta-Llama-3.1-8B-Instruct +- google/gemma-2-27b-it +- google/gemma-2-9b-it diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepdeek-coder-v2-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepdeek-coder-v2-instruct.yaml new file mode 100644 index 0000000000..b13a2a751c --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepdeek-coder-v2-instruct.yaml @@ -0,0 +1,40 @@ +model: deepseek-ai/DeepSeek-Coder-V2-Instruct +label: + en_US: deepseek-ai/DeepSeek-Coder-V2-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.33' + output: '1.33' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-70B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-70B.yaml new file mode 100644 index 0000000000..59e0b4d68e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-70B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Llama-70B + en_US: deepseek-ai/DeepSeek-R1-Distill-Llama-70B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "4.3" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-8B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-8B.yaml new file mode 100644 index 0000000000..f3256aa5a0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-llama-8B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Llama-8B + en_US: deepseek-ai/DeepSeek-R1-Distill-Llama-8B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "0.00" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-1.5B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-1.5B.yaml new file mode 100644 index 0000000000..7297278654 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-1.5B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B + en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "1.26" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-14B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-14B.yaml new file mode 100644 index 0000000000..24b5c89ebf --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-14B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B + en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "0.70" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-32B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-32B.yaml new file mode 100644 index 0000000000..2a8cce1f96 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-32B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B + en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "1.26" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-7B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-7B.yaml new file mode 100644 index 0000000000..cde1c14aae --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-distill-qwen-7B.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B +label: + zh_Hans: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B + en_US: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "0.00" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-pro.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-pro.yaml new file mode 100644 index 0000000000..d9d34ab2a1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1-pro.yaml @@ -0,0 +1,21 @@ +model: Pro/deepseek-ai/DeepSeek-R1 +label: + zh_Hans: Pro/deepseek-ai/DeepSeek-R1 + en_US: Pro/deepseek-ai/DeepSeek-R1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 16384 + default: 16384 +pricing: + input: "4" + output: "16" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1.yaml new file mode 100644 index 0000000000..1c382f26ea --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-r1.yaml @@ -0,0 +1,21 @@ +model: deepseek-ai/DeepSeek-R1 +label: + zh_Hans: deepseek-ai/DeepSeek-R1 + en_US: deepseek-ai/DeepSeek-R1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 16384 + default: 16384 +pricing: + input: "4" + output: "16" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2-chat.yaml new file mode 100644 index 0000000000..00bdb0ddf5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2-chat.yaml @@ -0,0 +1,40 @@ +model: deepseek-ai/DeepSeek-V2-Chat +label: + en_US: deepseek-ai/DeepSeek-V2-Chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.33' + output: '1.33' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2.5.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2.5.yaml new file mode 100644 index 0000000000..511cc1df9f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v2.5.yaml @@ -0,0 +1,53 @@ +model: deepseek-ai/DeepSeek-V2.5 +label: + en_US: deepseek-ai/DeepSeek-V2.5 +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.33' + output: '1.33' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3-pro.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3-pro.yaml new file mode 100644 index 0000000000..3d37734045 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3-pro.yaml @@ -0,0 +1,53 @@ +model: Pro/deepseek-ai/DeepSeek-V3 +label: + en_US: Pro/deepseek-ai/DeepSeek-V3 +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: "1" + output: "2" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3.yaml new file mode 100644 index 0000000000..ed1a5f0099 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/deepseek-v3.yaml @@ -0,0 +1,53 @@ +model: deepseek-ai/DeepSeek-V3 +label: + en_US: deepseek-ai/DeepSeek-V3 +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: "1" + output: "2" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-27b-it.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-27b-it.yaml new file mode 100644 index 0000000000..7fd7adb753 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-27b-it.yaml @@ -0,0 +1,51 @@ +model: google/gemma-2-27b-it +label: + en_US: google/gemma-2-27b-it +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8196 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-9b-it.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-9b-it.yaml new file mode 100644 index 0000000000..ab548e4409 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/gemma-2-9b-it.yaml @@ -0,0 +1,51 @@ +model: google/gemma-2-9b-it +label: + en_US: google/gemma-2-9b-it +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8196 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/glm4-9b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/glm4-9b-chat.yaml new file mode 100644 index 0000000000..43c5b344a0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/glm4-9b-chat.yaml @@ -0,0 +1,51 @@ +model: THUDM/glm-4-9b-chat +label: + en_US: THUDM/glm-4-9b-chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/hunyuan-a52b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/hunyuan-a52b-instruct.yaml new file mode 100644 index 0000000000..51d6c024ff --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/hunyuan-a52b-instruct.yaml @@ -0,0 +1,85 @@ +model: Tencent/Hunyuan-A52B-Instruct +label: + en_US: Tencent/Hunyuan-A52B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-20b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-20b-chat.yaml new file mode 100644 index 0000000000..a5ae3674c9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-20b-chat.yaml @@ -0,0 +1,51 @@ +model: internlm/internlm2_5-20b-chat +label: + en_US: internlm/internlm2_5-20b-chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1' + output: '1' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-7b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-7b-chat.yaml new file mode 100644 index 0000000000..dec856ed2e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internlm2_5-7b-chat.yaml @@ -0,0 +1,51 @@ +model: internlm/internlm2_5-7b-chat +label: + en_US: internlm/internlm2_5-7b-chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internvl2-llama3-76b.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internvl2-llama3-76b.yaml new file mode 100644 index 0000000000..b5443df18c --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/internvl2-llama3-76b.yaml @@ -0,0 +1,85 @@ +model: OpenGVLab/InternVL2-Llama3-76B +label: + en_US: OpenGVLab/InternVL2-Llama3-76B +model_type: llm +features: + - vision +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/janus-pro-7B.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/janus-pro-7B.yaml new file mode 100644 index 0000000000..dabbd745e5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/janus-pro-7B.yaml @@ -0,0 +1,22 @@ +model: deepseek-ai/Janus-Pro-7B +label: + zh_Hans: deepseek-ai/Janus-Pro-7B + en_US: deepseek-ai/Janus-Pro-7B +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.00" + output: "0.00" + unit: "0.000001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/llm.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/llm.py new file mode 100644 index 0000000000..b26f30de1e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/llm.py @@ -0,0 +1,90 @@ +from collections.abc import Generator +from typing import Optional, Union +from dify_plugin import OAICompatLargeLanguageModel +from dify_plugin.entities.model import ( + AIModelEntity, + FetchFrom, + I18nObject, + ModelFeature, + ModelPropertyKey, + ModelType, + ParameterRule, + ParameterType, +) +from dify_plugin.entities.model.llm import LLMMode, LLMResult +from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool + + +class SiliconflowLargeLanguageModel(OAICompatLargeLanguageModel): + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream) + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + super().validate_credentials(model, credentials) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials["mode"] = "chat" + credentials["endpoint_url"] = "https://api.siliconflow.cn/v1" + + def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]: + return AIModelEntity( + model=model, + label=I18nObject(en_US=model, zh_Hans=model), + model_type=ModelType.LLM, + features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL] + if credentials.get("function_calling_type") == "tool_call" + else [], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)), + ModelPropertyKey.MODE: LLMMode.CHAT.value, + }, + parameter_rules=[ + ParameterRule( + name="temperature", + use_template="temperature", + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + default=4096, + min=1, + max=int(credentials.get("max_tokens", 16384)), + label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + type=ParameterType.INT, + ), + ParameterRule( + name="top_p", + use_template="top_p", + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="top_k", + use_template="top_k", + label=I18nObject(en_US="Top K", zh_Hans="Top K"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="frequency_penalty", + use_template="frequency_penalty", + label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), + type=ParameterType.FLOAT, + ), + ], + ) diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-llama-3.3-70b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-llama-3.3-70b-instruct.yaml new file mode 100644 index 0000000000..9373a8f4ca --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-llama-3.3-70b-instruct.yaml @@ -0,0 +1,53 @@ +model: meta-llama/Llama-3.3-70B-Instruct +label: + en_US: meta-llama/Llama-3.3-70B-Instruct +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-70b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-70b-instruct.yaml new file mode 100644 index 0000000000..9825090759 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-70b-instruct.yaml @@ -0,0 +1,40 @@ +model: meta-llama/Meta-Llama-3-70B-Instruct +label: + en_US: meta-llama/Meta-Llama-3-70B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-8b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-8b-instruct.yaml new file mode 100644 index 0000000000..0133fd15d4 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3-8b-instruct.yaml @@ -0,0 +1,40 @@ +model: meta-llama/Meta-Llama-3-8B-Instruct +label: + en_US: meta-llama/Meta-Llama-3-8B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-405b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-405b-instruct.yaml new file mode 100644 index 0000000000..ceebbac515 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-405b-instruct.yaml @@ -0,0 +1,51 @@ +model: meta-llama/Meta-Llama-3.1-405B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-405B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '21' + output: '21' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-70b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-70b-instruct.yaml new file mode 100644 index 0000000000..f1fbb74cbd --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-70b-instruct.yaml @@ -0,0 +1,51 @@ +model: meta-llama/Meta-Llama-3.1-70B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-70B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-8b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-8b-instruct.yaml new file mode 100644 index 0000000000..a9a43545e8 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/meta-mlama-3.1-8b-instruct.yaml @@ -0,0 +1,51 @@ +model: meta-llama/Meta-Llama-3.1-8B-Instruct +label: + en_US: meta-llama/Meta-Llama-3.1-8B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-7b-instruct-v0.2.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-7b-instruct-v0.2.yaml new file mode 100644 index 0000000000..a71d8688a8 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-7b-instruct-v0.2.yaml @@ -0,0 +1,40 @@ +model: mistralai/Mistral-7B-Instruct-v0.2 +label: + en_US: mistralai/Mistral-7B-Instruct-v0.2 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-8x7b-instruct-v0.1.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-8x7b-instruct-v0.1.yaml new file mode 100644 index 0000000000..db45a75c6d --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/mistral-8x7b-instruct-v0.1.yaml @@ -0,0 +1,40 @@ +model: mistralai/Mixtral-8x7B-Instruct-v0.1 +label: + en_US: mistralai/Mixtral-8x7B-Instruct-v0.1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qvq-72B-preview.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qvq-72B-preview.yaml new file mode 100644 index 0000000000..dada6bb803 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qvq-72B-preview.yaml @@ -0,0 +1,54 @@ +model: Qwen/QVQ-72B-Preview +label: + en_US: Qwen/QVQ-72B-Preview +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call + - vision +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 16384 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '9.90' + output: '9.90' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qwq-32B-preview.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qwq-32B-preview.yaml new file mode 100644 index 0000000000..e73c5d2030 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen-qwq-32B-preview.yaml @@ -0,0 +1,53 @@ +model: Qwen/QwQ-32B-Preview +label: + en_US: Qwen/QwQ-32B-Preview +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-1.5b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-1.5b-instruct.yaml new file mode 100644 index 0000000000..bec5d37c57 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-1.5b-instruct.yaml @@ -0,0 +1,39 @@ +model: Qwen/Qwen2-1.5B-Instruct +label: + en_US: Qwen/Qwen2-1.5B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-57b-a14b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-57b-a14b-instruct.yaml new file mode 100644 index 0000000000..0f56d16d9b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-57b-a14b-instruct.yaml @@ -0,0 +1,40 @@ +model: Qwen/Qwen2-57B-A14B-Instruct +label: + en_US: Qwen/Qwen2-57B-A14B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-72b-instruct.yaml new file mode 100644 index 0000000000..af65cfb8ed --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-72b-instruct.yaml @@ -0,0 +1,40 @@ +model: Qwen/Qwen2-72B-Instruct +label: + en_US: Qwen/Qwen2-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-7b-instruct.yaml new file mode 100644 index 0000000000..f0f10ae625 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-7b-instruct.yaml @@ -0,0 +1,40 @@ +model: Qwen/Qwen2-7B-Instruct +label: + en_US: Qwen/Qwen2-7B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-72b-instruct.yaml new file mode 100644 index 0000000000..f5180b41f1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-72b-instruct.yaml @@ -0,0 +1,84 @@ +model: Qwen/Qwen2-VL-72B-Instruct +label: + en_US: Qwen/Qwen2-VL-72B-Instruct +model_type: llm +features: + - vision +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-7b-Instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-7b-Instruct.yaml new file mode 100644 index 0000000000..0ffbaee383 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2-vl-7b-Instruct.yaml @@ -0,0 +1,84 @@ +model: Pro/Qwen/Qwen2-VL-7B-Instruct +label: + en_US: Pro/Qwen/Qwen2-VL-7B-Instruct +model_type: llm +features: + - vision +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0.35' + output: '0.35' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-14b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-14b-instruct.yaml new file mode 100644 index 0000000000..8a045b818a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-14b-instruct.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-14B-Instruct +label: + en_US: Qwen/Qwen2.5-14B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 8192 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0.7' + output: '0.7' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-32b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-32b-instruct.yaml new file mode 100644 index 0000000000..9c308d2ab4 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-32b-instruct.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-32B-Instruct +label: + en_US: Qwen/Qwen2.5-32B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 8192 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-128k.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-128k.yaml new file mode 100644 index 0000000000..79f94da376 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-128k.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-72B-Instruct-128K +label: + en_US: Qwen/Qwen2.5-72B-Instruct-128K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-vendorA.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-vendorA.yaml new file mode 100644 index 0000000000..fdbe38ff21 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct-vendorA.yaml @@ -0,0 +1,51 @@ +model: Vendor-A/Qwen/Qwen2.5-72B-Instruct +label: + en_US: Vendor-A/Qwen/Qwen2.5-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.00' + output: '1.00' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct.yaml new file mode 100644 index 0000000000..de9d9d97bf --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-72b-instruct.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-72B-Instruct +label: + en_US: Qwen/Qwen2.5-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-7b-instruct.yaml new file mode 100644 index 0000000000..bb85dbe948 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-7b-instruct.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-7B-Instruct +label: + en_US: Qwen/Qwen2.5-7B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 8192 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-32b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-32b-instruct.yaml new file mode 100644 index 0000000000..de2224a67b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-32b-instruct.yaml @@ -0,0 +1,84 @@ +model: Qwen/Qwen2.5-Coder-32B-Instruct +label: + en_US: Qwen/Qwen2.5-Coder-32B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-7b-instruct.yaml new file mode 100644 index 0000000000..c31a338cdd --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-coder-7b-instruct.yaml @@ -0,0 +1,84 @@ +model: Qwen/Qwen2.5-Coder-7B-Instruct +label: + en_US: Qwen/Qwen2.5-Coder-7B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-math-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-math-72b-instruct.yaml new file mode 100644 index 0000000000..40c9ab48ca --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/qwen2.5-math-72b-instruct.yaml @@ -0,0 +1,85 @@ +model: Qwen/Qwen2.5-Math-72B-Instruct +label: + en_US: Qwen/Qwen2.5-Math-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-34b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-34b-chat.yaml new file mode 100644 index 0000000000..3e25f82369 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-34b-chat.yaml @@ -0,0 +1,39 @@ +model: 01-ai/Yi-1.5-34B-Chat +label: + en_US: 01-ai/Yi-1.5-34B-Chat-16K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 16384 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-6b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-6b-chat.yaml new file mode 100644 index 0000000000..827b2ce1e5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-6b-chat.yaml @@ -0,0 +1,39 @@ +model: 01-ai/Yi-1.5-6B-Chat +label: + en_US: 01-ai/Yi-1.5-6B-Chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-9b-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-9b-chat.yaml new file mode 100644 index 0000000000..112fcbfe97 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/llm/yi-1.5-9b-chat.yaml @@ -0,0 +1,39 @@ +model: 01-ai/Yi-1.5-9B-Chat-16K +label: + en_US: 01-ai/Yi-1.5-9B-Chat-16K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 16384 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty +pricing: + input: '0' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bce-reranker-base_v1.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bce-reranker-base_v1.yaml new file mode 100644 index 0000000000..ff3635bfeb --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bce-reranker-base_v1.yaml @@ -0,0 +1,4 @@ +model: netease-youdao/bce-reranker-base_v1 +model_type: rerank +model_properties: + context_size: 512 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bge-reranker-v2-m3.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bge-reranker-v2-m3.yaml new file mode 100644 index 0000000000..807f531b08 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/bge-reranker-v2-m3.yaml @@ -0,0 +1,4 @@ +model: BAAI/bge-reranker-v2-m3 +model_type: rerank +model_properties: + context_size: 8192 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/rerank.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/rerank.py new file mode 100644 index 0000000000..2587dc2242 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/rerank/rerank.py @@ -0,0 +1,76 @@ +from typing import Optional +import httpx +from dify_plugin.entities.model.rerank import RerankDocument, RerankResult +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from dify_plugin.interfaces.model.rerank_model import RerankModel + + +class SiliconflowRerankModel(RerankModel): + def _invoke( + self, + model: str, + credentials: dict, + query: str, + docs: list[str], + score_threshold: Optional[float] = None, + top_n: Optional[int] = None, + user: Optional[str] = None, + ) -> RerankResult: + if len(docs) == 0: + return RerankResult(model=model, docs=[]) + base_url = credentials.get("base_url", "https://api.siliconflow.cn/v1") + base_url = base_url.removesuffix("/") + try: + response = httpx.post( + base_url + "/rerank", + json={"model": model, "query": query, "documents": docs, "top_n": top_n, "return_documents": True}, + headers={"Authorization": f"Bearer {credentials.get('api_key')}"}, + ) + response.raise_for_status() + results = response.json() + rerank_documents = [] + for result in results["results"]: + rerank_document = RerankDocument( + index=result["index"], text=result["document"]["text"], score=result["relevance_score"] + ) + if score_threshold is None or result["relevance_score"] >= score_threshold: + rerank_documents.append(rerank_document) + return RerankResult(model=model, docs=rerank_documents) + except httpx.HTTPStatusError as e: + raise InvokeServerUnavailableError(str(e)) + + def validate_credentials(self, model: str, credentials: dict) -> None: + try: + self._invoke( + model=model, + credentials=credentials, + query="What is the capital of the United States?", + docs=[ + "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", + ], + score_threshold=0.8, + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + """ + return { + InvokeConnectionError: [httpx.ConnectError], + InvokeServerUnavailableError: [httpx.RemoteProtocolError], + InvokeRateLimitError: [], + InvokeAuthorizationError: [httpx.HTTPStatusError], + InvokeBadRequestError: [httpx.RequestError], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/funaudio-sense-voice-small.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/funaudio-sense-voice-small.yaml new file mode 100644 index 0000000000..d4bc33c68e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/funaudio-sense-voice-small.yaml @@ -0,0 +1,5 @@ +model: FunAudioLLM/SenseVoiceSmall +model_type: speech2text +model_properties: + file_upload_limit: 1 + supported_file_extensions: mp3,wav diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/sense-voice-small.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/sense-voice-small.yaml new file mode 100644 index 0000000000..455b2ad467 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/sense-voice-small.yaml @@ -0,0 +1,6 @@ +model: iic/SenseVoiceSmall +model_type: speech2text +model_properties: + file_upload_limit: 1 + supported_file_extensions: mp3,wav +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/speech2text.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/speech2text.py new file mode 100644 index 0000000000..f5b2f3447f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/speech2text/speech2text.py @@ -0,0 +1,29 @@ +from typing import IO, Optional +from dify_plugin import OAICompatSpeech2TextModel + + +class SiliconflowSpeech2TextModel(OAICompatSpeech2TextModel): + """ + Model class for Siliconflow Speech to text model. + """ + + def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str: + """ + Invoke speech2text model + + :param model: model name + :param credentials: model credentials + :param file: audio file + :param user: unique user id + :return: text for given audio file + """ + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, file) + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + return super().validate_credentials(model, credentials) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials["endpoint_url"] = "https://api.siliconflow.cn/v1" diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bce-embedding-base-v1.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bce-embedding-base-v1.yaml new file mode 100644 index 0000000000..710fbc04f6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bce-embedding-base-v1.yaml @@ -0,0 +1,5 @@ +model: netease-youdao/bce-embedding-base_v1 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-en-v1.5.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-en-v1.5.yaml new file mode 100644 index 0000000000..84f69b41a0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-en-v1.5.yaml @@ -0,0 +1,5 @@ +model: BAAI/bge-large-en-v1.5 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-zh-v1.5.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-zh-v1.5.yaml new file mode 100644 index 0000000000..5248375d0b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-large-zh-v1.5.yaml @@ -0,0 +1,5 @@ +model: BAAI/bge-large-zh-v1.5 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-m3.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-m3.yaml new file mode 100644 index 0000000000..f0b12dd420 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/bge-m3.yaml @@ -0,0 +1,5 @@ +model: BAAI/bge-m3 +model_type: text-embedding +model_properties: + context_size: 8192 + max_chunks: 1 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/text_embedding.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/text_embedding.py new file mode 100644 index 0000000000..ab86738dae --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/text_embedding/text_embedding.py @@ -0,0 +1,43 @@ +from typing import Optional +from dify_plugin import OAICompatEmbeddingModel +from dify_plugin.entities.model import EmbeddingInputType +from dify_plugin.entities.model.text_embedding import TextEmbeddingResult + + +class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel): + """ + Model class for Siliconflow text embedding model. + """ + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + super().validate_credentials(model, credentials) + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, texts, user) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + self._add_custom_parameters(credentials) + return super().get_num_tokens(model, credentials, texts) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials["endpoint_url"] = "https://api.siliconflow.cn/v1" diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/cosyvoice2-0.5b.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/cosyvoice2-0.5b.yaml new file mode 100644 index 0000000000..d2e02ed21b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/cosyvoice2-0.5b.yaml @@ -0,0 +1,38 @@ +model: FunAudioLLM/CosyVoice2-0.5B +model_type: tts +model_properties: + default_voice: 'FunAudioLLM/CosyVoice2-0.5B:alex' + voices: + - mode: "FunAudioLLM/CosyVoice2-0.5B:alex" + name: "Alex(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:benjamin" + name: "Benjamin(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:charles" + name: "Charles(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:david" + name: "David(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:anna" + name: "Anna(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:bella" + name: "Bella(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:claire" + name: "Claire(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "FunAudioLLM/CosyVoice2-0.5B:diana" + name: "Diana(女声)" + language: [ "zh-Hans", "en-US" ] + audio_type: 'mp3' + max_workers: 5 + # stream: false +pricing: + input: '50' + output: '0' + unit: '0.000001' + currency: RMB + diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.4.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.4.yaml new file mode 100644 index 0000000000..d8af252241 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.4.yaml @@ -0,0 +1,37 @@ +model: fishaudio/fish-speech-1.4 +model_type: tts +model_properties: + default_voice: 'fishaudio/fish-speech-1.4:alex' + voices: + - mode: "fishaudio/fish-speech-1.4:alex" + name: "Alex(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:benjamin" + name: "Benjamin(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:charles" + name: "Charles(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:david" + name: "David(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:anna" + name: "Anna(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:bella" + name: "Bella(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:claire" + name: "Claire(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.4:diana" + name: "Diana(女声)" + language: [ "zh-Hans", "en-US" ] + audio_type: 'mp3' + max_workers: 5 + # stream: false +pricing: + input: '105' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.5.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.5.yaml new file mode 100644 index 0000000000..df09c19526 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/fish-speech-1.5.yaml @@ -0,0 +1,37 @@ +model: fishaudio/fish-speech-1.5 +model_type: tts +model_properties: + default_voice: 'fishaudio/fish-speech-1.5:alex' + voices: + - mode: "fishaudio/fish-speech-1.5:alex" + name: "Alex(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:benjamin" + name: "Benjamin(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:charles" + name: "Charles(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:david" + name: "David(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:anna" + name: "Anna(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:bella" + name: "Bella(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:claire" + name: "Claire(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "fishaudio/fish-speech-1.5:diana" + name: "Diana(女声)" + language: [ "zh-Hans", "en-US" ] + audio_type: 'mp3' + max_workers: 5 + # stream: false +pricing: + input: '105' + output: '0' + unit: '0.000001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/gpt-sovits.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/gpt-sovits.yaml new file mode 100644 index 0000000000..66b1dc99c2 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/gpt-sovits.yaml @@ -0,0 +1,38 @@ +model: RVC-Boss/GPT-SoVITS +model_type: tts +model_properties: + default_voice: 'RVC-Boss/GPT-SoVITS:alex' + voices: + - mode: "RVC-Boss/GPT-SoVITS:alex" + name: "Alex(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:benjamin" + name: "Benjamin(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:charles" + name: "Charles(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:david" + name: "David(男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:anna" + name: "Anna(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:bella" + name: "Bella(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:claire" + name: "Claire(女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "RVC-Boss/GPT-SoVITS:diana" + name: "Diana(女声)" + language: [ "zh-Hans", "en-US" ] + audio_type: 'mp3' + max_workers: 5 + # stream: false +pricing: + input: '50' + output: '0' + unit: '0.000001' + currency: RMB + diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/tts.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/tts.py new file mode 100644 index 0000000000..3a4454b7b8 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/models/tts/tts.py @@ -0,0 +1,142 @@ +import concurrent.futures +from typing import Any, Mapping, Optional +from dify_plugin.interfaces.model.openai_compatible.common import _CommonOaiApiCompat +from httpx import Timeout +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeBadRequestError, +) +from dify_plugin.interfaces.model.tts_model import TTSModel +from openai import OpenAI + + +class SiliconFlowText2SpeechModel(_CommonOaiApiCompat, TTSModel): + """ + Model class for SiliconFlow Speech to text model. + """ + + def _invoke( + self, + model: str, + tenant_id: str, + credentials: dict, + content_text: str, + voice: str, + user: Optional[str] = None, + ) -> Any: + """ + _invoke text2speech model + + :param model: model name + :param tenant_id: user tenant id + :param credentials: model credentials + :param content_text: text content to be translated + :param voice: model timbre + :param user: unique user id + :return: text translated to audio file + """ + voices = self.get_tts_model_voices(model=model, credentials=credentials) or [] + if not voice or voice not in [d["value"] for d in voices]: + voice = self._get_model_default_voice(model, credentials) + return self._tts_invoke_streaming( + model=model, credentials=credentials, content_text=content_text, voice=voice + ) + + def validate_credentials(self, model: str, credentials: Mapping) -> None: + """ + validate credentials text2speech model + + :param model: model name + :param credentials: model credentials + :param user: unique user id + :return: text translated to audio file + """ + try: + self._tts_invoke_streaming( + model=model, + credentials=credentials, + content_text="Hello SiliconFlow!", + voice=self._get_model_default_voice(model, credentials), + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _tts_invoke_streaming( + self, model: str, credentials: Mapping, content_text: str, voice: str + ) -> Any: + """ + _tts_invoke_streaming text2speech model + + :param model: model name + :param credentials: model credentials + :param content_text: text content to be translated + :param voice: model timbre + :return: text translated to audio file + """ + credentials = dict(credentials) + try: + self._add_custom_parameters(credentials) + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + voices = ( + self.get_tts_model_voices(model=model, credentials=credentials) or [] + ) + model_support_voice = [x.get("value") for x in voices] + if not voice or voice not in model_support_voice: + voice = self._get_model_default_voice(model, credentials) + if len(content_text) > 4096: + sentences = self._split_text_into_sentences( + content_text, max_length=4096 + ) + executor = concurrent.futures.ThreadPoolExecutor( + max_workers=min(3, len(sentences)) + ) + futures = [ + executor.submit( + client.audio.speech.with_streaming_response.create, + model=model, + response_format="mp3", + input=sentences[i], + voice=voice, + ) + for i in range(len(sentences)) + ] + for future in futures: + yield from future.result().__enter__().iter_bytes(1024) + else: + response = client.audio.speech.with_streaming_response.create( + model=model, + voice=voice, + response_format="mp3", + input=content_text.strip(), + ) + yield from response.__enter__().iter_bytes(1024) + except Exception as ex: + raise InvokeBadRequestError(str(ex)) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials["openai_api_base"] = "https://api.siliconflow.cn" + credentials["openai_api_key"] = credentials["api_key"] + + def _to_credential_kwargs(self, credentials: Mapping) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = { + "api_key": credentials["openai_api_key"], + "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0), + "max_retries": 1, + } + + if credentials.get("openai_api_base"): + openai_api_base = credentials["openai_api_base"].rstrip("/") + credentials_kwargs["base_url"] = openai_api_base + "/v1" + + if "openai_organization" in credentials: + credentials_kwargs["organization"] = credentials["openai_organization"] + + return credentials_kwargs diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.py b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.py new file mode 100644 index 0000000000..087751d3d1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.py @@ -0,0 +1,24 @@ +import logging +from dify_plugin import ModelProvider +from dify_plugin.entities.model import ModelType +from dify_plugin.errors.model import CredentialsValidateFailedError + +logger = logging.getLogger(__name__) + + +class SiliconflowProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + model_instance.validate_credentials(model="deepseek-ai/DeepSeek-V2.5", credentials=credentials) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") + raise ex diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.yaml b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.yaml new file mode 100644 index 0000000000..0c22f041e0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/provider/siliconflow.yaml @@ -0,0 +1,114 @@ +background: "#ffecff" +configurate_methods: + - predefined-model + - customizable-model +extra: + python: + model_sources: + - models/llm/llm.py + - models/rerank/rerank.py + - models/text_embedding/text_embedding.py + - models/tts/tts.py + - models/speech2text/speech2text.py + provider_source: provider/siliconflow.py +help: + title: + en_US: Get your API Key from SiliconFlow + zh_Hans: 从 SiliconFlow 获取 API Key + url: + en_US: https://cloud.siliconflow.cn/account/ak +icon_large: + en_US: siliconflow.svg +icon_small: + en_US: siliconflow_square.svg +label: + en_US: SiliconFlow + zh_Hans: 硅基流动 +model_credential_schema: + credential_form_schemas: + - label: + en_US: API Key + placeholder: + en_US: Enter your API Key + zh_Hans: 在此输入您的 API Key + required: true + type: secret-input + variable: api_key + - default: "4096" + label: + en_US: Model context size + zh_Hans: 模型上下文长度 + placeholder: + en_US: Enter your Model context size + zh_Hans: 在此输入您的模型上下文长度 + required: true + type: text-input + variable: context_size + - default: "4096" + label: + en_US: Upper bound for max tokens + zh_Hans: 最大 token 上限 + show_on: + - value: llm + variable: __model_type + type: text-input + variable: max_tokens + - default: no_call + label: + en_US: Function calling + options: + - label: + en_US: Not Support + zh_Hans: 不支持 + value: no_call + - label: + en_US: Support + zh_Hans: 支持 + value: function_call + required: false + show_on: + - value: llm + variable: __model_type + type: select + variable: function_calling_type + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter your model name + zh_Hans: 输入模型名称 +models: + llm: + position: models/llm/_position.yaml + predefined: + - models/llm/*.yaml + rerank: + predefined: + - models/rerank/*.yaml + speech2text: + predefined: + - models/speech2text/*.yaml + text_embedding: + predefined: + - models/text_embedding/*.yaml + tts: + predefined: + - models/tts/*.yaml +provider: siliconflow +provider_credential_schema: + credential_form_schemas: + - label: + en_US: API Key + placeholder: + en_US: Enter your API Key + zh_Hans: 在此输入您的 API Key + required: true + type: secret-input + variable: api_key +supported_model_types: + - llm + - text-embedding + - rerank + - speech2text + - tts diff --git a/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/requirements.txt new file mode 100644 index 0000000000..555a9390ba --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/siliconflow-0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3/requirements.txt @@ -0,0 +1,3 @@ +dify_plugin==0.0.1b73 +httpx~=0.27.2 +openai~=1.57.0 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/.env.example b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/.env.example new file mode 100644 index 0000000000..dbb75b858b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=********-****-****-****-************ diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/README.md b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/README.md new file mode 100644 index 0000000000..54bc105805 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/README.md @@ -0,0 +1,7 @@ +# Overview +Tongyi Qwen, developed by Alibaba Cloud, is a sophisticated series of LLMs. It includes multiple variants, such as Qwen for text processing, Qwen-VL for vision-language tasks, and Qwen-Audio for audio understanding. The models are notable for their impressive scale, with the flagship Qwen-72B model featuring 72 billion parameters and trained on over 3 trillion tokens. + +# Configure +After installation, you need to get API keys from [Alibaba Cloud](https://bailian.console.aliyun.com/?apiKey=1#/api-key) and setup in Settings -> Model Provider. + +![](_assets/tongyi.PNG) diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_en.png b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_en.png new file mode 100644 index 0000000000..94de01136a Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_en.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_zh.png b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_zh.png new file mode 100644 index 0000000000..bd8f2762d1 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_l_zh.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_s_en.png b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_s_en.png new file mode 100644 index 0000000000..c1aff40ee0 Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/icon_s_en.png differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/tongyi.PNG b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/tongyi.PNG new file mode 100644 index 0000000000..ff8806744b Binary files /dev/null and b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/_assets/tongyi.PNG differ diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/main.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/main.py new file mode 100644 index 0000000000..f2bfcdfc0e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/main.py @@ -0,0 +1,6 @@ +from dify_plugin import Plugin, DifyPluginEnv + +plugin = Plugin(DifyPluginEnv()) + +if __name__ == '__main__': + plugin.run() diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/manifest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/manifest.yaml new file mode 100644 index 0000000000..e1cb6ef016 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/manifest.yaml @@ -0,0 +1,29 @@ +meta: + arch: + - amd64 + - arm64 + runner: + entrypoint: main + language: python + version: "3.12" + version: 0.0.1 +name: tongyi +author: langgenius +label: + en_US: TONGYI + zh_Hans: 通义千问 +description: + en_US: TONGYI + zh_Hans: 通义千问 +icon: icon_s_en.png +plugins: + models: + - provider/tongyi.yaml +resource: + memory: 268435456 + permission: + model: + enabled: false +type: plugin +version: 0.0.14 +created_at: "2024-09-20T00:13:50.29298939-04:00" diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/_common.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/_common.py new file mode 100644 index 0000000000..e22441e834 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/_common.py @@ -0,0 +1,55 @@ +from dashscope.common.error import ( + AuthenticationError, + InvalidParameter, + RequestFailure, + ServiceUnavailableError, + UnsupportedHTTPMethod, + UnsupportedModel, +) + +from dify_plugin.errors.model import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class _CommonTongyi: + @staticmethod + def _to_credential_kwargs(credentials: dict) -> dict: + credentials_kwargs = { + "dashscope_api_key": credentials["dashscope_api_key"], + } + + return credentials_kwargs + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [ + RequestFailure, + ], + InvokeServerUnavailableError: [ + ServiceUnavailableError, + ], + InvokeRateLimitError: [], + InvokeAuthorizationError: [ + AuthenticationError, + ], + InvokeBadRequestError: [ + InvalidParameter, + UnsupportedModel, + UnsupportedHTTPMethod, + ], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/_position.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/_position.yaml new file mode 100644 index 0000000000..0cf4612f18 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/_position.yaml @@ -0,0 +1,67 @@ +- qwq-32b +- qwq-plus +- qwq-plus-0305 +- deepseek-r1 +- deepseek-r1-distill-qwen-14b +- deepseek-r1-distill-qwen-32b +- deepseek-v3 +- qwen2.5-vl-72b-instruct +- qwen2.5-vl-7b-instruct +- qwen2.5-vl-3b-instruct +- qwen-vl-max-latest +- qwen-vl-max-2025-01-25 +- qwen-vl-max-0809 +- qwen-vl-max-0201 +- qwen-vl-max +- qwen-max-latest +- qwen-max-1201 +- qwen-max-0919 +- qwen-max-0428 +- qwen-max-0403 +- qwen-max-0107 +- qwen-max +- qwen-max-longcontext +- qwen-plus-latest +- qwen-plus-0919 +- qwen-plus-0806 +- qwen-plus-0723 +- qwen-plus-0624 +- qwen-plus-0206 +- qwen-plus-chat +- qwen-plus +- qwen-vl-plus-latest +- qwen-vl-plus-2025-01-25 +- qwen-vl-plus-2025-01-02 +- qwen-vl-plus-0809 +- qwen-vl-plus +- qwen-turbo-latest +- qwen-turbo-0919 +- qwen-turbo-0624 +- qwen-turbo-0206 +- qwen-turbo-chat +- qwen-turbo +- qwen2.5-72b-instruct +- qwen2.5-32b-instruct +- qwen2.5-14b-instruct-1m +- qwen2.5-14b-instruct +- qwen2.5-7b-instruct-1m +- qwen2.5-7b-instruct +- qwen2.5-3b-instruct +- qwen2.5-1.5b-instruct +- qwen2.5-0.5b-instruct +- qwen2.5-coder-7b-instruct +- qwen2-math-72b-instruct +- qwen2-math-7b-instruct +- qwen2-math-1.5b-instruct +- qwen-long +- qwen-math-plus-latest +- qwen-math-plus-0919 +- qwen-math-plus-0816 +- qwen-math-plus +- qwen-math-turbo-latest +- qwen-math-turbo-0919 +- qwen-math-turbo +- qwen-coder-turbo-latest +- qwen-coder-turbo-0919 +- qwen-coder-turbo +- farui-plus diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-14B.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-14B.yaml new file mode 100644 index 0000000000..2bce8805c6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-14B.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1-distill-qwen-14b +label: + zh_Hans: DeepSeek-R1-Distill-Qwen-14B + en_US: DeepSeek-R1-Distill-Qwen-14B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.001" + output: "0.003" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-32B.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-32B.yaml new file mode 100644 index 0000000000..dfc155ff6a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1-distill-qwen-32B.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1-distill-qwen-32b +label: + zh_Hans: DeepSeek-R1-Distill-Qwen-32B + en_US: DeepSeek-R1-Distill-Qwen-32B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.002" + output: "0.006" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1.yaml new file mode 100644 index 0000000000..b97356e641 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-r1.yaml @@ -0,0 +1,21 @@ +model: deepseek-r1 +label: + zh_Hans: DeepSeek-R1 + en_US: DeepSeek-R1 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + min: 1 + max: 8192 + default: 4096 +pricing: + input: "0.004" + output: "0.016" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-v3.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-v3.yaml new file mode 100644 index 0000000000..23f38d60d3 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/deepseek-v3.yaml @@ -0,0 +1,52 @@ +model: deepseek-v3 +label: + zh_Hans: DeepSeek-V3 + en_US: DeepSeek-V3 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 64000 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: "0.002" + output: "0.008" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/farui-plus.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/farui-plus.yaml new file mode 100644 index 0000000000..34a57d1fc0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/farui-plus.yaml @@ -0,0 +1,77 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: farui-plus +label: + en_US: farui-plus +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 12288 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/llm.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/llm.py new file mode 100644 index 0000000000..df3b80f12b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/llm.py @@ -0,0 +1,699 @@ +import base64 +import os +import tempfile +import uuid +from collections.abc import Generator +from http import HTTPStatus +from pathlib import Path +from typing import Optional, Union, cast + +import requests +from dashscope import Generation, MultiModalConversation, get_tokenizer +from dashscope.api_entities.dashscope_response import GenerationResponse +from dashscope.common.error import ( + AuthenticationError, + InvalidParameter, + RequestFailure, + ServiceUnavailableError, + UnsupportedHTTPMethod, + UnsupportedModel, +) +from dify_plugin.entities.model import ( + AIModelEntity, + FetchFrom, + I18nObject, + ModelFeature, + ModelPropertyKey, + ModelType, + ParameterRule, + ParameterType, +) +from dify_plugin.entities.model.llm import ( + LLMMode, + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, +) +from dify_plugin.entities.model.message import ( + AssistantPromptMessage, + DocumentPromptMessageContent, + ImagePromptMessageContent, + PromptMessage, + PromptMessageContentType, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, + VideoPromptMessageContent, +) +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from dify_plugin.interfaces.model.large_language_model import LargeLanguageModel +from openai import OpenAI + + +class TongyiLargeLanguageModel(LargeLanguageModel): + tokenizers = {} + + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + return self._generate( + model, + credentials, + prompt_messages, + model_parameters, + tools, + stop, + stream, + user, + ) + + def get_num_tokens( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + ) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: + """ + if self.get_customizable_model_schema(model, credentials) is not None: + return 0 + if model in {"qwen-turbo-chat", "qwen-plus-chat"}: + model = model.replace("-chat", "") + if model == "farui-plus": + model = "qwen-farui-plus" + if model in self.tokenizers: + tokenizer = self.tokenizers[model] + else: + tokenizer = get_tokenizer(model) + self.tokenizers[model] = tokenizer + tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages)) + return len(tokens) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self._generate( + model=model, + credentials=credentials, + prompt_messages=[UserPromptMessage(content="ping")], + model_parameters={"temperature": 0.5}, + stream=False, + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _generate( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :param model_parameters: model parameters + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + credentials_kwargs = self._to_credential_kwargs(credentials) + mode = self.get_model_mode(model, credentials) + if model in {"qwen-turbo-chat", "qwen-plus-chat"}: + model = model.replace("-chat", "") + extra_model_kwargs = {} + if tools: + extra_model_kwargs["tools"] = self._convert_tools(tools) + if stop: + extra_model_kwargs["stop"] = stop + params = { + "model": model, + **model_parameters, + **credentials_kwargs, + **extra_model_kwargs, + } + model_schema = self.get_model_schema(model, credentials) + if ModelFeature.VISION in (model_schema.features or []): + params["messages"] = self._convert_prompt_messages_to_tongyi_messages( + credentials, prompt_messages, rich_content=True + ) + response = MultiModalConversation.call(**params, stream=stream) + else: + params["messages"] = self._convert_prompt_messages_to_tongyi_messages( + credentials, prompt_messages + ) + response = Generation.call( + **params, + result_format="message", + stream=stream, + incremental_output=False if tools else stream, + ) + if stream: + return self._handle_generate_stream_response( + model, credentials, response, prompt_messages + ) + return self._handle_generate_response( + model, credentials, response, prompt_messages + ) + + def _handle_generate_response( + self, + model: str, + credentials: dict, + response: GenerationResponse, + prompt_messages: list[PromptMessage], + ) -> LLMResult: + """ + Handle llm response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :return: llm response + """ + if response.status_code not in {200, HTTPStatus.OK}: + raise ServiceUnavailableError(response.message) + resp_content = response.output.choices[0].message.content + # special for qwen-vl + if isinstance(resp_content, list): + resp_content = resp_content[0]["text"] + assistant_prompt_message = AssistantPromptMessage(content=resp_content) + usage = self._calc_response_usage( + model, + credentials, + response.usage.input_tokens, + response.usage.output_tokens, + ) + result = LLMResult( + model=model, + message=assistant_prompt_message, + prompt_messages=prompt_messages, + usage=usage, + ) + return result + + def _handle_tool_call_stream(self, response, tool_calls): + tool_calls_stream = response.output.choices[0].message["tool_calls"] + for tool_call_stream in tool_calls_stream: + idx = tool_call_stream.get('index') + if idx >= len(tool_calls): + tool_calls.append(tool_call_stream) + else: + if tool_call_stream.get('function'): + func_name = tool_call_stream.get('function').get('name') + tool_call_obj = tool_calls[idx] + if func_name: + tool_call_obj['function']['name'] += func_name + args = tool_call_stream.get('function').get('arguments') + if args: + tool_call_obj['function']['arguments'] += args + + def _handle_generate_stream_response( + self, + model: str, + credentials: dict, + responses: Generator[GenerationResponse, None, None], + prompt_messages: list[PromptMessage], + ) -> Generator: + """ + Handle llm stream response + + :param model: model name + :param credentials: credentials + :param responses: response + :param prompt_messages: prompt messages + :return: llm response chunk generator result + """ + is_reasoning = False + full_text = "" + tool_calls = [] + for index, response in enumerate(responses): + if response.status_code not in {200, HTTPStatus.OK}: + raise ServiceUnavailableError( + f"Failed to invoke model {model}, status code: {response.status_code}, message: {response.message}" + ) + resp_finish_reason = response.output.choices[0].finish_reason + if resp_finish_reason is not None and resp_finish_reason != "null": + resp_content = response.output.choices[0].message.content + assistant_prompt_message = AssistantPromptMessage(content="") + if "tool_calls" in response.output.choices[0].message: + self._handle_tool_call_stream(response, tool_calls) + elif resp_content: + if isinstance(resp_content, list): + resp_content = resp_content[0]["text"] + assistant_prompt_message.content = resp_content.replace( + full_text, "", 1 + ) + full_text = resp_content + if tool_calls: + message_tool_calls = [] + for tool_call_obj in tool_calls: + message_tool_call = AssistantPromptMessage.ToolCall( + id=tool_call_obj["function"]["name"], + type="function", + function=AssistantPromptMessage.ToolCall.ToolCallFunction( + name=tool_call_obj["function"]["name"], + arguments=tool_call_obj["function"]["arguments"], + ), + ) + message_tool_calls.append(message_tool_call) + assistant_prompt_message.tool_calls = message_tool_calls + usage = response.usage + usage = self._calc_response_usage( + model, credentials, usage.input_tokens, usage.output_tokens + ) + yield LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=index, + message=assistant_prompt_message, + finish_reason=resp_finish_reason, + usage=usage, + ), + ) + else: + message = response.output.choices[0].message + + resp_content, is_reasoning = self._wrap_thinking_by_reasoning_content( + message, is_reasoning + ) + if not resp_content: + if "tool_calls" in response.output.choices[0].message: + self._handle_tool_call_stream(response, tool_calls) + continue + if isinstance(resp_content, list): + resp_content = resp_content[0]["text"] + assistant_prompt_message = AssistantPromptMessage( + content=resp_content.replace(full_text, "", 1) + ) + full_text = resp_content + yield LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=index, message=assistant_prompt_message + ), + ) + + def _to_credential_kwargs(self, credentials: dict) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = {"api_key": credentials["dashscope_api_key"]} + return credentials_kwargs + + def _convert_one_message_to_text(self, message: PromptMessage) -> str: + """ + Convert a single message to a string. + + :param message: PromptMessage to convert. + :return: String representation of the message. + """ + human_prompt = "\n\nHuman:" + ai_prompt = "\n\nAssistant:" + content = message.content + if isinstance(message, UserPromptMessage): + if isinstance(content, str): + message_text = f"{human_prompt} {content}" + else: + message_text = "" + for sub_message in content: + if sub_message.type == PromptMessageContentType.TEXT: + message_text = f"{human_prompt} {sub_message.data}" + break + elif isinstance(message, AssistantPromptMessage): + message_text = f"{ai_prompt} {content}" + elif isinstance(message, SystemPromptMessage | ToolPromptMessage): + message_text = content + else: + raise ValueError(f"Got unknown type {message}") + return message_text + + def _convert_messages_to_prompt(self, messages: list[PromptMessage]) -> str: + """ + Format a list of messages into a full prompt for the Anthropic model + + :param messages: List of PromptMessage to combine. + :return: Combined string with necessary human_prompt and ai_prompt tags. + """ + messages = messages.copy() + text = "".join( + (self._convert_one_message_to_text(message) for message in messages) + ) + return text.rstrip() + + def _convert_prompt_messages_to_tongyi_messages( + self, + credentials: dict, + prompt_messages: list[PromptMessage], + rich_content: bool = False, + ) -> list[dict]: + """ + Convert prompt messages to tongyi messages + + :param prompt_messages: prompt messages + :return: tongyi messages + """ + tongyi_messages = [] + for prompt_message in prompt_messages: + if isinstance(prompt_message, SystemPromptMessage): + tongyi_messages.append( + { + "role": "system", + "content": ( + prompt_message.content + if not rich_content + else [{"text": prompt_message.content}] + ), + } + ) + elif isinstance(prompt_message, UserPromptMessage): + if isinstance(prompt_message.content, str): + tongyi_messages.append( + { + "role": "user", + "content": ( + prompt_message.content + if not rich_content + else [{"text": prompt_message.content}] + ), + } + ) + else: + user_messages = [] + file_id_list = [] + for message_content in prompt_message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast( + TextPromptMessageContent, message_content + ) + sub_message_dict = {"text": message_content.data} + user_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast( + ImagePromptMessageContent, message_content + ) + image_url = message_content.data + if message_content.data.startswith("data:"): + image_url = self._save_base64_image_to_file( + message_content.data + ) + sub_message_dict = {"image": image_url} + user_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.VIDEO: + message_content = cast( + VideoPromptMessageContent, message_content + ) + video_url = message_content.data + if message_content.data.startswith("data:"): + raise InvokeError( + "not support base64, please set MULTIMODAL_SEND_VIDEO_FORMAT to url" + ) + sub_message_dict = {"video": video_url} + user_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.DOCUMENT: + message_content = cast( + DocumentPromptMessageContent, message_content + ) + file_id = self._upload_file_to_tongyi( + credentials, message_content + ) + file_id_url = f"fileid://{file_id}" + file_id_list.append(file_id_url) + if len(file_id_list) > 0: + tongyi_messages.append( + {"role": "system", "content": ",".join(file_id_list)} + ) + user_messages = sorted(user_messages, key=lambda x: "text" in x) + tongyi_messages.append({"role": "user", "content": user_messages}) + elif isinstance(prompt_message, AssistantPromptMessage): + content = prompt_message.content + if not content: + content = " " + message = { + "role": "assistant", + "content": content if not rich_content else [{"text": content}], + } + if prompt_message.tool_calls: + message["tool_calls"] = [ + tool_call.model_dump() + for tool_call in prompt_message.tool_calls + ] + tongyi_messages.append(message) + elif isinstance(prompt_message, ToolPromptMessage): + tongyi_messages.append( + { + "role": "tool", + "content": prompt_message.content, + "name": prompt_message.tool_call_id, + } + ) + else: + raise ValueError(f"Got unknown type {prompt_message}") + return tongyi_messages + + def _save_base64_image_to_file(self, base64_image: str) -> str: + """ + Save base64 image to file + 'data:{upload_file.mime_type};base64,{encoded_string}' + + :param base64_image: base64 image data + :return: image file path + """ + (mime_type, encoded_string) = ( + base64_image.split(",")[0].split(";")[0].split(":")[1], + base64_image.split(",")[1], + ) + temp_dir = tempfile.gettempdir() + file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.{mime_type.split('/')[1]}") + Path(file_path).write_bytes(base64.b64decode(encoded_string)) + return f"file://{file_path}" + + def _upload_file_to_tongyi( + self, credentials: dict, message_content: DocumentPromptMessageContent + ) -> str: + """ + Upload file to Tongyi + + :param credentials: credentials for Tongyi + :param message_content: message content to upload + :return: file ID in Tongyi + """ + client = OpenAI( + api_key=credentials.dashscope_api_key, + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + ) + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + if message_content.base64_data: + file_content = base64.b64decode(message_content.base64_data) + temp_file.write(file_content) + else: + try: + response = requests.get(message_content.url, timeout=60) + response.raise_for_status() + temp_file.write(response.content) + except Exception as ex: + raise ValueError( + f"Failed to fetch data from url {message_content.url}, {ex}" + ) from ex + temp_file.flush() + response = client.files.create(file=temp_file, purpose="file-extract") + return response.id + + def _convert_tools(self, tools: list[PromptMessageTool]) -> list[dict]: + """ + Convert tools + """ + tool_definitions = [] + for tool in tools: + properties = tool.parameters["properties"] + required_properties = tool.parameters["required"] + properties_definitions = {} + for p_key, p_val in properties.items(): + desc = p_val["description"] + if "enum" in p_val: + desc += f"; Only accepts one of the following predefined options: [{', '.join(p_val['enum'])}]" + properties_definitions[p_key] = { + "description": desc, + "type": p_val["type"], + } + tool_definition = { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": properties_definitions, + "required": required_properties, + }, + } + tool_definitions.append(tool_definition) + return tool_definitions + def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]: + """ + If the reasoning response is from delta.get("reasoning_content"), we wrap + it with HTML think tag. + :param delta: delta dictionary from LLM streaming response + :param is_reasoning: is reasoning + :return: tuple of (processed_content, is_reasoning) + """ + + content = delta.get("content") or "" + reasoning_content = delta.get("reasoning_content") + + if reasoning_content: + if not is_reasoning: + content = "\n" + reasoning_content + is_reasoning = True + else: + content = reasoning_content + elif is_reasoning and content: + content = "\n" + content + is_reasoning = False + return content, is_reasoning + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [RequestFailure], + InvokeServerUnavailableError: [ServiceUnavailableError], + InvokeRateLimitError: [], + InvokeAuthorizationError: [AuthenticationError], + InvokeBadRequestError: [ + InvalidParameter, + UnsupportedModel, + UnsupportedHTTPMethod, + ], + } + + def get_customizable_model_schema( + self, model: str, credentials: dict + ) -> Optional[AIModelEntity]: + """ + Architecture for defining customizable models + + :param model: model name + :param credentials: model credentials + :return: AIModelEntity or None + """ + return AIModelEntity( + model=model, + label=I18nObject(en_US=model, zh_Hans=model), + model_type=ModelType.LLM, + features=( + [ + ModelFeature.TOOL_CALL, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ] + if credentials.get("function_calling_type") == "tool_call" + else [] + ), + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.CONTEXT_SIZE: int( + credentials.get("context_size", 8000) + ), + ModelPropertyKey.MODE: LLMMode.CHAT.value, + }, + parameter_rules=[ + ParameterRule( + name="temperature", + use_template="temperature", + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + default=512, + min=1, + max=int(credentials.get("max_tokens", 1024)), + label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + type=ParameterType.INT, + ), + ParameterRule( + name="top_p", + use_template="top_p", + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="top_k", + use_template="top_k", + label=I18nObject(en_US="Top K", zh_Hans="Top K"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="frequency_penalty", + use_template="frequency_penalty", + label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), + type=ParameterType.FLOAT, + ), + ], + ) diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-0919.yaml new file mode 100644 index 0000000000..64a3f33133 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-0919.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-coder-turbo-0919 +label: + en_US: qwen-coder-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-latest.yaml new file mode 100644 index 0000000000..a4c93f7047 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo-latest.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-coder-turbo-latest +label: + en_US: qwen-coder-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo.yaml new file mode 100644 index 0000000000..ff68faed80 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-coder-turbo.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-coder-turbo +label: + en_US: qwen-coder-turbo +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-long.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-long.yaml new file mode 100644 index 0000000000..be0a522709 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-long.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-long +label: + en_US: qwen-long +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call + - document +model_properties: + mode: chat + context_size: 10000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 6000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: "0.0005" + output: "0.002" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0816.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0816.yaml new file mode 100644 index 0000000000..42fe1f6862 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0816.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-plus-0816 +label: + en_US: qwen-math-plus-0816 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0919.yaml new file mode 100644 index 0000000000..9b6567b8cd --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-0919.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-plus-0919 +label: + en_US: qwen-math-plus-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-latest.yaml new file mode 100644 index 0000000000..b2a2393b36 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus-latest.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-plus-latest +label: + en_US: qwen-math-plus-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus.yaml new file mode 100644 index 0000000000..63f4b7ff0a --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-plus.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-plus +label: + en_US: qwen-math-plus +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-0919.yaml new file mode 100644 index 0000000000..4da90eec3e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-0919.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-turbo-0919 +label: + en_US: qwen-math-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-latest.yaml new file mode 100644 index 0000000000..d29f8851dd --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo-latest.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-turbo-latest +label: + en_US: qwen-math-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo.yaml new file mode 100644 index 0000000000..2a8f7f725e --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-math-turbo.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-math-turbo +label: + en_US: qwen-math-turbo +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0107.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0107.yaml new file mode 100644 index 0000000000..661311f178 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0107.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-0107 +label: + en_US: qwen-max-0107 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0403.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0403.yaml new file mode 100644 index 0000000000..76b739a92b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0403.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max-0403, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-0403 +label: + en_US: qwen-max-0403 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0428.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0428.yaml new file mode 100644 index 0000000000..334b41257f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0428.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max-0428, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-0428 +label: + en_US: qwen-max-0428 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0919.yaml new file mode 100644 index 0000000000..bfa70ca935 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-0919.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-0919 +label: + en_US: qwen-max-0919 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.06' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-1201.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-1201.yaml new file mode 100644 index 0000000000..83c5732cd6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-1201.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-1201 +label: + en_US: qwen-max-1201 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-latest.yaml new file mode 100644 index 0000000000..538bfb0448 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-latest.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-latest +label: + en_US: qwen-max-latest +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.06' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-longcontext.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-longcontext.yaml new file mode 100644 index 0000000000..52bd758702 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max-longcontext.yaml @@ -0,0 +1,88 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max-longcontext +label: + en_US: qwen-max-longcontext +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8000 + min: 1 + max: 8000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max.yaml new file mode 100644 index 0000000000..4af4822e86 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-max.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwen-max +label: + en_US: qwen-max +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.06' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0206.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0206.yaml new file mode 100644 index 0000000000..d37052f560 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0206.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-0206 +label: + en_US: qwen-plus-0206 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8000 + min: 1 + max: 8000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0624.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0624.yaml new file mode 100644 index 0000000000..0025db9bd1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0624.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-0624 +label: + en_US: qwen-plus-0624 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8000 + min: 1 + max: 8000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0723.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0723.yaml new file mode 100644 index 0000000000..401a15b690 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0723.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-0723, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-0723 +label: + en_US: qwen-plus-0723 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8000 + min: 1 + max: 8000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0806.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0806.yaml new file mode 100644 index 0000000000..39154708a1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0806.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-0806, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-0806 +label: + en_US: qwen-plus-0806 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0919.yaml new file mode 100644 index 0000000000..2ed2949a45 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-0919.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-0919 +label: + en_US: qwen-plus-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0008' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-chat.yaml new file mode 100644 index 0000000000..d891796bac --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-chat.yaml @@ -0,0 +1,88 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-chat +label: + en_US: qwen-plus-chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-latest.yaml new file mode 100644 index 0000000000..7ef5c04975 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus-latest.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-plus-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus-latest +label: + en_US: qwen-plus-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0008' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus.yaml new file mode 100644 index 0000000000..529a29b1b5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-plus.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) +model: qwen-plus +label: + en_US: qwen-plus +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0008' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0206.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0206.yaml new file mode 100644 index 0000000000..09bc797465 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0206.yaml @@ -0,0 +1,86 @@ +# this model corresponds to qwen-turbo-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) + +model: qwen-turbo-0206 +label: + en_US: qwen-turbo-0206 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0624.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0624.yaml new file mode 100644 index 0000000000..036a96d1ae --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0624.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-turbo-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) +model: qwen-turbo-0624 +label: + en_US: qwen-turbo-0624 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0919.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0919.yaml new file mode 100644 index 0000000000..866efa3248 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-0919.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-turbo-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) +model: qwen-turbo-0919 +label: + en_US: qwen-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0003' + output: '0.0006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-chat.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-chat.yaml new file mode 100644 index 0000000000..98c78ba8ac --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-chat.yaml @@ -0,0 +1,88 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) +model: qwen-turbo-chat +label: + en_US: qwen-turbo-chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 1500 + min: 1 + max: 1500 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-latest.yaml new file mode 100644 index 0000000000..e1193aeb7f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo-latest.yaml @@ -0,0 +1,85 @@ +# this model corresponds to qwen-turbo-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) +model: qwen-turbo-latest +label: + en_US: qwen-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0006' + output: '0.0003' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo.yaml new file mode 100644 index 0000000000..a0c4ba6820 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-turbo.yaml @@ -0,0 +1,87 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) +model: qwen-turbo +label: + en_US: qwen-turbo +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + label: + zh_Hans: 联网搜索 + en_US: Web Search + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0006' + output: '0.0003' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0201.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0201.yaml new file mode 100644 index 0000000000..d80168ffc3 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0201.yaml @@ -0,0 +1,49 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-max-0201 +label: + en_US: qwen-vl-max-0201 +model_type: llm +features: + - vision + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB +deprecated: true diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0809.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0809.yaml new file mode 100644 index 0000000000..94b6666d05 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-0809.yaml @@ -0,0 +1,80 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-max-0809 +label: + en_US: qwen-vl-max-0809 +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-2025-01-25.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-2025-01-25.yaml new file mode 100644 index 0000000000..f458f7b252 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-2025-01-25.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-max-2025-01-25 +label: + en_US: qwen-vl-max-2025-01-25 +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.003' + output: '0.009' + unit: '0.001' + currency: RMB \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-latest.yaml new file mode 100644 index 0000000000..73b90ad8b0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max-latest.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-max-latest +label: + en_US: qwen-vl-max-latest +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.003' + output: '0.009' + unit: '0.001' + currency: RMB \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max.yaml new file mode 100644 index 0000000000..b6172c1cbc --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-max.yaml @@ -0,0 +1,80 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-max +label: + en_US: qwen-vl-max +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-0809.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-0809.yaml new file mode 100644 index 0000000000..0be4b68f4f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-0809.yaml @@ -0,0 +1,80 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-plus-0809 +label: + en_US: qwen-vl-plus-0809 +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.008' + output: '0.008' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-02.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-02.yaml new file mode 100644 index 0000000000..c586fb6693 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-02.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-plus-2025-01-02 +label: + en_US: qwen-vl-plus-2025-01-02 +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 1024 + min: 1 + max: 2048 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.0015' + output: '0.0045' + unit: '0.001' + currency: RMB \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-25.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-25.yaml new file mode 100644 index 0000000000..f30b3749e1 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-2025-01-25.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-plus-2025-01-25 +label: + en_US: qwen-vl-plus-2025-01-25 +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.003' + output: '0.009' + unit: '0.001' + currency: RMB \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-latest.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-latest.yaml new file mode 100644 index 0000000000..5787c45f4b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus-latest.yaml @@ -0,0 +1,78 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-plus-latest +label: + en_US: qwen-vl-plus-latest +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 4096 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.003' + output: '0.009' + unit: '0.001' + currency: RMB \ No newline at end of file diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus.yaml new file mode 100644 index 0000000000..6c8a8121c6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen-vl-plus.yaml @@ -0,0 +1,80 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen-vl-plus +label: + en_US: qwen-vl-plus +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.008' + output: '0.008' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-1.5b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-1.5b-instruct.yaml new file mode 100644 index 0000000000..ea157f42de --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-1.5b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2-math-1.5b-instruct +label: + en_US: qwen2-math-1.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-72b-instruct.yaml new file mode 100644 index 0000000000..37052a9233 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-72b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2-math-72b-instruct +label: + en_US: qwen2-math-72b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-7b-instruct.yaml new file mode 100644 index 0000000000..e182f1c27f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2-math-7b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2-math-7b-instruct +label: + en_US: qwen2-math-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-0.5b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-0.5b-instruct.yaml new file mode 100644 index 0000000000..9e75ccc1f2 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-0.5b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-0.5b-instruct +label: + en_US: qwen2.5-0.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-1.5b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-1.5b-instruct.yaml new file mode 100644 index 0000000000..67c9d31243 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-1.5b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-1.5b-instruct +label: + en_US: qwen2.5-1.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct-1m.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct-1m.yaml new file mode 100644 index 0000000000..87c83bf0bb --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct-1m.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-14b-instruct-1m +label: + en_US: qwen2.5-14b-instruct-1m +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 1000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: "0.001" + output: "0.003" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct.yaml new file mode 100644 index 0000000000..2a38be921c --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-14b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-14b-instruct +label: + en_US: qwen2.5-14b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-32b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-32b-instruct.yaml new file mode 100644 index 0000000000..e6e4fbf978 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-32b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-32b-instruct +label: + en_US: qwen2.5-32b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.0035' + output: '0.007' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-3b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-3b-instruct.yaml new file mode 100644 index 0000000000..8f250379a7 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-3b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-3b-instruct +label: + en_US: qwen2.5-3b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-72b-instruct.yaml new file mode 100644 index 0000000000..bb3cdd6141 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-72b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-72b-instruct +label: + en_US: qwen2.5-72b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct-1m.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct-1m.yaml new file mode 100644 index 0000000000..fee32794b4 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct-1m.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-7b-instruct-1m +label: + en_US: qwen2.5-7b-instruct-1m +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 1000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: "0.0005" + output: "0.001" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct.yaml new file mode 100644 index 0000000000..fdcd3d4275 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-7b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-7b-instruct +label: + en_US: qwen2.5-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-coder-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-coder-7b-instruct.yaml new file mode 100644 index 0000000000..7ebeec3953 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-coder-7b-instruct.yaml @@ -0,0 +1,75 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-coder-7b-instruct +label: + en_US: qwen2.5-coder-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-3b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-3b-instruct.yaml new file mode 100644 index 0000000000..02c5d3f2d0 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-3b-instruct.yaml @@ -0,0 +1,77 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-vl-3b-instruct +label: + en_US: qwen2.5-vl-3b-instruct +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.0012' + output: '0.0036' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-72b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-72b-instruct.yaml new file mode 100644 index 0000000000..4fb5e2d2c9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-72b-instruct.yaml @@ -0,0 +1,77 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-vl-72b-instruct +label: + en_US: qwen2.5-vl-72b-instruct +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.016' + output: '0.048' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-7b-instruct.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-7b-instruct.yaml new file mode 100644 index 0000000000..4284ba0563 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwen2.5-vl-7b-instruct.yaml @@ -0,0 +1,77 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models +model: qwen2.5-vl-7b-instruct +label: + en_US: qwen2.5-vl-7b-instruct +model_type: llm +features: + - vision + - agent-thought + - video +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + zh_Hans: 重复惩罚 + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.005' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-32b.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-32b.yaml new file mode 100644 index 0000000000..e36599fd47 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-32b.yaml @@ -0,0 +1,48 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwq-32b +label: + en_US: qwq-32b + zh_Hans: 通义千问QWQ-32B +model_type: llm +features: + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. +pricing: + input: '0.012' + output: '0.036' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus-0305.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus-0305.yaml new file mode 100644 index 0000000000..fcb52ed26b --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus-0305.yaml @@ -0,0 +1,48 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwq-plus-0305 +label: + en_US: qwq-plus-0305 + zh_Hans: 通义千问QWQ-Plus-0305 +model_type: llm +features: + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. +pricing: + input: '0.0016' + output: '0.004' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus.yaml new file mode 100644 index 0000000000..48d58997a3 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/llm/qwq-plus.yaml @@ -0,0 +1,48 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) +model: qwq-plus +label: + en_US: qwq-plus + zh_Hans: 通义千问QWQ-Plus +model_type: llm +features: + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. +pricing: + input: '0.0016' + output: '0.004' + unit: '0.001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/_position.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/_position.yaml new file mode 100644 index 0000000000..439afda992 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/_position.yaml @@ -0,0 +1 @@ +- gte-rerank diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/gte-rerank.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/gte-rerank.yaml new file mode 100644 index 0000000000..44d51b9b0d --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/gte-rerank.yaml @@ -0,0 +1,4 @@ +model: gte-rerank +model_type: rerank +model_properties: + context_size: 4000 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/rerank.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/rerank.py new file mode 100644 index 0000000000..c82bcadfa9 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/rerank/rerank.py @@ -0,0 +1,110 @@ +from typing import Optional +import dashscope +from dashscope.common.error import ( + AuthenticationError, + InvalidParameter, + RequestFailure, + ServiceUnavailableError, + UnsupportedHTTPMethod, + UnsupportedModel, +) +from dify_plugin.entities.model.rerank import RerankDocument, RerankResult +from dify_plugin.errors.model import ( + CredentialsValidateFailedError, + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from dify_plugin.interfaces.model.rerank_model import RerankModel + + +class GTERerankModel(RerankModel): + """ + Model class for GTE rerank model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + query: str, + docs: list[str], + score_threshold: Optional[float] = None, + top_n: Optional[int] = None, + user: Optional[str] = None, + ) -> RerankResult: + """ + Invoke rerank model + + :param model: model name + :param credentials: model credentials + :param query: search query + :param docs: docs for reranking + :param score_threshold: score threshold + :param top_n: top n + :param user: unique user id + :return: rerank result + """ + if len(docs) == 0: + return RerankResult(model=model, docs=docs) + dashscope.api_key = credentials["dashscope_api_key"] + response = dashscope.TextReRank.call( + query=query, documents=docs, model=model, top_n=top_n, return_documents=True + ) + rerank_documents = [] + if not response.output: + return RerankResult(model=model, docs=rerank_documents) + for _, result in enumerate(response.output.results): + rerank_document = RerankDocument( + index=result.index, score=result.relevance_score, text=result["document"]["text"] + ) + if score_threshold is not None: + if result.relevance_score >= score_threshold: + rerank_documents.append(rerank_document) + else: + rerank_documents.append(rerank_document) + return RerankResult(model=model, docs=rerank_documents) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + self.invoke( + model=model, + credentials=credentials, + query="What is the capital of the United States?", + docs=[ + "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", + ], + score_threshold=0.8, + ) + except Exception as ex: + print(ex) + raise CredentialsValidateFailedError(str(ex)) + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [RequestFailure], + InvokeServerUnavailableError: [ServiceUnavailableError], + InvokeRateLimitError: [], + InvokeAuthorizationError: [AuthenticationError], + InvokeBadRequestError: [InvalidParameter, UnsupportedModel, UnsupportedHTTPMethod], + } diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v1.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v1.yaml new file mode 100644 index 0000000000..52e35d8b50 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v1.yaml @@ -0,0 +1,10 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw +model: text-embedding-v1 +model_type: text-embedding +model_properties: + context_size: 2048 + max_chunks: 25 +pricing: + input: "0.0007" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v2.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v2.yaml new file mode 100644 index 0000000000..5bb6a8f424 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v2.yaml @@ -0,0 +1,10 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw +model: text-embedding-v2 +model_type: text-embedding +model_properties: + context_size: 2048 + max_chunks: 25 +pricing: + input: "0.0007" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v3.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v3.yaml new file mode 100644 index 0000000000..d8af0e2b63 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text-embedding-v3.yaml @@ -0,0 +1,10 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw +model: text-embedding-v3 +model_type: text-embedding +model_properties: + context_size: 8192 + max_chunks: 25 +pricing: + input: "0.0007" + unit: "0.001" + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text_embedding.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text_embedding.py new file mode 100644 index 0000000000..b5e12c4c81 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/text_embedding/text_embedding.py @@ -0,0 +1,167 @@ +import time +from typing import Optional +import dashscope +import numpy as np +from dify_plugin.entities.model import EmbeddingInputType, PriceType +from dify_plugin.entities.model.text_embedding import EmbeddingUsage, TextEmbeddingResult +from dify_plugin.errors.model import CredentialsValidateFailedError +from dify_plugin.interfaces.model.text_embedding_model import TextEmbeddingModel +from models._common import _CommonTongyi + + +class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel): + """ + Model class for Tongyi text embedding model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ + credentials_kwargs = self._to_credential_kwargs(credentials) + context_size = self._get_context_size(model, credentials) + max_chunks = self._get_max_chunks(model, credentials) + inputs = [] + indices = [] + used_tokens = 0 + for i, text in enumerate(texts): + num_tokens = self._get_num_tokens_by_gpt2(text) + if num_tokens >= context_size: + cutoff = int(np.floor(len(text) * (context_size / num_tokens))) + inputs.append(text[0:cutoff]) + else: + inputs.append(text) + indices += [i] + batched_embeddings = [] + _iter = range(0, len(inputs), max_chunks) + for i in _iter: + (embeddings_batch, embedding_used_tokens) = self.embed_documents( + credentials_kwargs=credentials_kwargs, model=model, texts=inputs[i : i + max_chunks] + ) + used_tokens += embedding_used_tokens + batched_embeddings += embeddings_batch + usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens) + return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> list[int]: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + if len(texts) == 0: + return [] + tokens = [] + for text in texts: + tokens.append(self._get_num_tokens_by_gpt2(text)) + return tokens + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + credentials_kwargs = self._to_credential_kwargs(credentials) + self.embed_documents(credentials_kwargs=credentials_kwargs, model=model, texts=["ping"]) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + @staticmethod + def embed_documents(credentials_kwargs: dict, model: str, texts: list[str]) -> tuple[list[list[float]], int]: + """Call out to Tongyi's embedding endpoint. + + Args: + credentials_kwargs: The credentials to use for the call. + model: The model to use for embedding. + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text, and tokens usage. + """ + embeddings = [] + embedding_used_tokens = 0 + + def call_embedding_api(text): + try: + return dashscope.TextEmbedding.call( + api_key=credentials_kwargs["dashscope_api_key"], + model=model, + input=text, + text_type="document" + ) + except Exception as e: + # Return the exception to be handled by the caller + return e + + for text in texts: + # First attempt + response = call_embedding_api(text) + + # Handle rate limit error (429) + # Check if response is an exception with rate limit info + if hasattr(response, 'status_code') and response.status_code == 429: + print(f"Rate limit exceeded (429). Response: {response}") + import time + time.sleep(10) + # Retry once after sleeping + response = call_embedding_api(text) + + # Process response + if hasattr(response, 'output') and response.output and "embeddings" in response.output and response.output["embeddings"]: + data = response.output["embeddings"][0] + if "embedding" in data: + embeddings.append(data["embedding"]) + else: + raise ValueError(f"Embedding data is missing in the response: {response}") + else: + raise ValueError(f"Response output is missing or does not contain embeddings: {response}") + + if hasattr(response, 'usage') and response.usage and "total_tokens" in response.usage: + embedding_used_tokens += response.usage["total_tokens"] + else: + raise ValueError(f"Response usage is missing or does not contain total tokens: {response}") + + return ([list(map(float, e)) for e in embeddings], embedding_used_tokens) + + def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param tokens: input tokens + :return: usage + """ + input_price_info = self.get_price( + model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens + ) + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at, + ) + return usage diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/__init__.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts-1.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts-1.yaml new file mode 100644 index 0000000000..4eaa0ff361 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts-1.yaml @@ -0,0 +1,139 @@ +model: tts-1 +model_type: tts +model_properties: + default_voice: 'sambert-zhiru-v1' + voices: + - mode: "sambert-zhinan-v1" + name: "知楠(广告男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiqi-v1" + name: "知琪(温柔女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhichu-v1" + name: "知厨(新闻播报)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhide-v1" + name: "知德(新闻男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhijia-v1" + name: "知佳(标准女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiru-v1" + name: "知茹(新闻女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiqian-v1" + name: "知倩(配音解说、新闻播报)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhixiang-v1" + name: "知祥(配音解说)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiwei-v1" + name: "知薇(萝莉女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhihao-v1" + name: "知浩(咨询男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhijing-v1" + name: "知婧(严厉女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiming-v1" + name: "知茗(诙谐男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhimo-v1" + name: "知墨(情感男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhina-v1" + name: "知娜(浙普女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhishu-v1" + name: "知树(资讯男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhistella-v1" + name: "知莎(知性女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiting-v1" + name: "知婷(电台女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhixiao-v1" + name: "知笑(资讯女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiya-v1" + name: "知雅(严厉女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiye-v1" + name: "知晔(青年男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiying-v1" + name: "知颖(软萌童声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhiyuan-v1" + name: "知媛(知心姐姐)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhigui-v1" + name: "知柜(直播女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhishuo-v1" + name: "知硕(自然男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhimiao-emo-v1" + name: "知妙(多种情感女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhimao-v1" + name: "知猫(直播女声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhilun-v1" + name: "知伦(悬疑解说)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhifei-v1" + name: "知飞(激昂解说)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-zhida-v1" + name: "知达(标准男声)" + language: [ "zh-Hans", "en-US" ] + - mode: "sambert-camila-v1" + name: "Camila(西班牙语女声)" + language: [ "es-ES" ] + - mode: "sambert-perla-v1" + name: "Perla(意大利语女声)" + language: [ "it-IT" ] + - mode: "sambert-indah-v1" + name: "Indah(印尼语女声)" + language: [ "id-ID" ] + - mode: "sambert-clara-v1" + name: "Clara(法语女声)" + language: [ "fr-FR" ] + - mode: "sambert-hanna-v1" + name: "Hanna(德语女声)" + language: [ "de-DE" ] + - mode: "sambert-beth-v1" + name: "Beth(咨询女声)" + language: [ "en-US" ] + - mode: "sambert-betty-v1" + name: "Betty(客服女声)" + language: [ "en-US" ] + - mode: "sambert-cally-v1" + name: "Cally(自然女声)" + language: [ "en-US" ] + - mode: "sambert-cindy-v1" + name: "Cindy(对话女声)" + language: [ "en-US" ] + - mode: "sambert-eva-v1" + name: "Eva(陪伴女声)" + language: [ "en-US" ] + - mode: "sambert-donna-v1" + name: "Donna(教育女声)" + language: [ "en-US" ] + - mode: "sambert-brian-v1" + name: "Brian(客服男声)" + language: [ "en-US" ] + - mode: "sambert-waan-v1" + name: "Waan(泰语女声)" + language: [ "th-TH" ] + word_limit: 7000 + audio_type: 'mp3' + max_workers: 5 +pricing: + input: '1' + output: '0' + unit: '0.0001' + currency: RMB diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts.py new file mode 100644 index 0000000000..78787ad62f --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/models/tts/tts.py @@ -0,0 +1,146 @@ +import threading +from queue import Queue +from typing import Any, Optional +import dashscope +from dashscope import SpeechSynthesizer +from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse +from dashscope.audio.tts import ResultCallback, SpeechSynthesisResult +from dify_plugin.errors.model import CredentialsValidateFailedError, InvokeBadRequestError +from dify_plugin.interfaces.model.tts_model import TTSModel +from models._common import _CommonTongyi + + +class TongyiText2SpeechModel(_CommonTongyi, TTSModel): + """ + Model class for Tongyi Speech to text model. + """ + + def _invoke( + self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None + ) -> Any: + """ + _invoke text2speech model + + :param model: model name + :param tenant_id: user tenant id + :param credentials: model credentials + :param voice: model timbre + :param content_text: text content to be translated + :param user: unique user id + :return: text translated to audio file + """ + if not voice or voice not in [ + d["value"] for d in self.get_tts_model_voices(model=model, credentials=credentials) + ]: + voice = self._get_model_default_voice(model, credentials) + return self._tts_invoke_streaming(model=model, credentials=credentials, content_text=content_text, voice=voice) + + def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None: + """ + validate credentials text2speech model + + :param model: model name + :param credentials: model credentials + :param user: unique user id + :return: text translated to audio file + """ + try: + self._tts_invoke_streaming( + model=model, + credentials=credentials, + content_text="Hello Dify!", + voice=self._get_model_default_voice(model, credentials), + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> Any: + """ + _tts_invoke_streaming text2speech model + + :param model: model name + :param credentials: model credentials + :param voice: model timbre + :param content_text: text content to be translated + :return: text translated to audio file + """ + word_limit = self._get_model_word_limit(model, credentials) + audio_type = self._get_model_audio_type(model, credentials) + try: + audio_queue: Queue = Queue() + callback = Callback(queue=audio_queue) + + def invoke_remote(content, v, api_key, cb, at, wl): + if len(content) < word_limit: + sentences = [content] + else: + sentences = list(self._split_text_into_sentences(org_text=content, max_length=wl)) + for sentence in sentences: + SpeechSynthesizer.call( + model=v, + sample_rate=16000, + api_key=api_key, + text=sentence.strip(), + callback=cb, + format=at, + word_timestamp_enabled=True, + phoneme_timestamp_enabled=True, + ) + + threading.Thread( + target=invoke_remote, + args=(content_text, voice, credentials.get("dashscope_api_key"), callback, audio_type, word_limit), + ).start() + while True: + audio = audio_queue.get() + if audio is None: + break + yield audio + except Exception as ex: + raise InvokeBadRequestError(str(ex)) + + @staticmethod + def _process_sentence(sentence: str, credentials: dict, voice: str, audio_type: str): + """ + _tts_invoke Tongyi text2speech model api + + :param credentials: model credentials + :param sentence: text content to be translated + :param voice: model timbre + :param audio_type: audio file type + :return: text translated to audio file + """ + response = dashscope.audio.tts.SpeechSynthesizer.call( + model=voice, + sample_rate=48000, + api_key=credentials.get("dashscope_api_key"), + text=sentence.strip(), + format=audio_type, + ) + if isinstance(response.get_audio_data(), bytes): + return response.get_audio_data() + + +class Callback(ResultCallback): + def __init__(self, queue: Queue): + self._queue = queue + + def on_open(self): + pass + + def on_complete(self): + self._queue.put(None) + self._queue.task_done() + + def on_error(self, response: SpeechSynthesisResponse): + self._queue.put(None) + self._queue.task_done() + + def on_close(self): + self._queue.put(None) + self._queue.task_done() + + def on_event(self, result: SpeechSynthesisResult): + ad = result.get_audio_frame() + if ad: + self._queue.put(ad) diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.py b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.py new file mode 100644 index 0000000000..bb9df97fb7 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.py @@ -0,0 +1,25 @@ +import logging +from dify_plugin.entities.model import ModelType +from dify_plugin.errors.model import CredentialsValidateFailedError +from dify_plugin import ModelProvider + +logger = logging.getLogger(__name__) + + +class TongyiProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + model_instance.validate_credentials(model="qwen-turbo", credentials=credentials) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") + raise ex diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.yaml b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.yaml new file mode 100644 index 0000000000..2c925c02b6 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/provider/tongyi.yaml @@ -0,0 +1,111 @@ +background: '#EFF1FE' +configurate_methods: +- predefined-model +- customizable-model +extra: + python: + model_sources: + - models/llm/llm.py + - models/rerank/rerank.py + - models/text_embedding/text_embedding.py + - models/tts/tts.py + provider_source: provider/tongyi.py +help: + title: + en_US: Get your API key from AliCloud + zh_Hans: 从阿里云百炼获取 API Key + url: + en_US: https://bailian.console.aliyun.com/?apiKey=1#/api-key +icon_large: + en_US: icon_l_en.png + zh_Hans: icon_l_zh.png +icon_small: + en_US: icon_s_en.png +label: + en_US: TONGYI + zh_Hans: 通义千问 +model_credential_schema: + credential_form_schemas: + - label: + en_US: API Key + placeholder: + en_US: Enter your API Key + zh_Hans: 在此输入您的 API Key + required: true + type: secret-input + variable: dashscope_api_key + - default: '4096' + label: + en_US: Model context size + zh_Hans: 模型上下文长度 + placeholder: + en_US: Enter your Model context size + zh_Hans: 在此输入您的模型上下文长度 + required: true + type: text-input + variable: context_size + - default: '4096' + label: + en_US: Upper bound for max tokens + zh_Hans: 最大 token 上限 + show_on: + - value: llm + variable: __model_type + type: text-input + variable: max_tokens + - default: no_call + label: + en_US: Function calling + options: + - label: + en_US: Not Support + zh_Hans: 不支持 + value: no_call + - label: + en_US: Support + zh_Hans: 支持 + value: function_call + required: false + show_on: + - value: llm + variable: __model_type + type: select + variable: function_calling_type + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter your model name + zh_Hans: 输入模型名称 +models: + llm: + position: models/llm/_position.yaml + predefined: + - models/llm/*.yaml + rerank: + position: models/rerank/_position.yaml + predefined: + - models/rerank/*.yaml + text_embedding: + predefined: + - models/text_embedding/*.yaml + tts: + predefined: + - models/tts/*.yaml +provider: tongyi +provider_credential_schema: + credential_form_schemas: + - label: + en_US: API Key + placeholder: + en_US: Enter your API Key + zh_Hans: 在此输入您的 API Key + required: true + type: secret-input + variable: dashscope_api_key +supported_model_types: +- llm +- tts +- text-embedding +- rerank diff --git a/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/requirements.txt b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/requirements.txt new file mode 100644 index 0000000000..07a3a40cd5 --- /dev/null +++ b/api/init_data/plugins/storage/cwd/langgenius/tongyi-0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb/requirements.txt @@ -0,0 +1,4 @@ +dify_plugin==0.0.1b73 +numpy~=2.2.3 +dashscope~=1.22.2 +openai~=1.65.4 diff --git a/api/init_data/plugins/storage/plugin/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 b/api/init_data/plugins/storage/plugin/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 new file mode 100644 index 0000000000..eb7ac5f66e Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 differ diff --git a/api/init_data/plugins/storage/plugin/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 b/api/init_data/plugins/storage/plugin/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 new file mode 100644 index 0000000000..cbcbc0b37d Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 differ diff --git a/api/init_data/plugins/storage/plugin/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 b/api/init_data/plugins/storage/plugin/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 new file mode 100644 index 0000000000..4a6554f134 Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 differ diff --git a/api/init_data/plugins/storage/plugin/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 b/api/init_data/plugins/storage/plugin/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 new file mode 100644 index 0000000000..151db5c4a6 Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 differ diff --git a/api/init_data/plugins/storage/plugin/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 b/api/init_data/plugins/storage/plugin/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 new file mode 100644 index 0000000000..4a5441cd85 Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 differ diff --git a/api/init_data/plugins/storage/plugin/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb b/api/init_data/plugins/storage/plugin/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb new file mode 100644 index 0000000000..8532a7140e Binary files /dev/null and b/api/init_data/plugins/storage/plugin/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 b/api/init_data/plugins/storage/plugin_packages/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 new file mode 100644 index 0000000000..eb7ac5f66e Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/agent0.0.11@6b60e6812d1428f06a4dee70a1e9c7dd2af217bf123103758fb40483069c12c9 differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 b/api/init_data/plugins/storage/plugin_packages/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 new file mode 100644 index 0000000000..cbcbc0b37d Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/deepseek0.0.5@fd6efd37c2a931911de8ab9ca3ba2da303bef146d45ee87ad896b04b36d09403 differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 b/api/init_data/plugins/storage/plugin_packages/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 new file mode 100644 index 0000000000..4a6554f134 Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/ollama0.0.3@9ded90ac00e8510119a24be7396ba77191c9610d5e1e29f59d68fa1229822fc7 differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 b/api/init_data/plugins/storage/plugin_packages/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 new file mode 100644 index 0000000000..151db5c4a6 Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/openai_api_compatible0.0.11@410445eba2fa0f693d26dea2c3b9ffe51ad0777e021146ff877af6098412efc7 differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 b/api/init_data/plugins/storage/plugin_packages/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 new file mode 100644 index 0000000000..4a5441cd85 Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/siliconflow0.0.8@217f973bd7ced1b099c2f0c669f1356bdf4cc38b8372fd58d7874f9940b95de3 differ diff --git a/api/init_data/plugins/storage/plugin_packages/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb b/api/init_data/plugins/storage/plugin_packages/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb new file mode 100644 index 0000000000..8532a7140e Binary files /dev/null and b/api/init_data/plugins/storage/plugin_packages/langgenius/tongyi0.0.14@d333909fe8d75a335b44da4b878d5120779b8703f12c21cf4730135d6864afbb differ diff --git a/api/workflows/en/Container CPU usage analysis.yml b/api/init_data/workflows/en/Container CPU usage analysis.yml similarity index 100% rename from api/workflows/en/Container CPU usage analysis.yml rename to api/init_data/workflows/en/Container CPU usage analysis.yml diff --git a/api/workflows/en/Container memory analysis.yml b/api/init_data/workflows/en/Container memory analysis.yml similarity index 100% rename from api/workflows/en/Container memory analysis.yml rename to api/init_data/workflows/en/Container memory analysis.yml diff --git a/api/workflows/en/Host CPU IO Wait.yml b/api/init_data/workflows/en/Host CPU IO Wait.yml similarity index 100% rename from api/workflows/en/Host CPU IO Wait.yml rename to api/init_data/workflows/en/Host CPU IO Wait.yml diff --git a/api/workflows/en/Host CPU usage analysis.yml b/api/init_data/workflows/en/Host CPU usage analysis.yml similarity index 100% rename from api/workflows/en/Host CPU usage analysis.yml rename to api/init_data/workflows/en/Host CPU usage analysis.yml diff --git a/api/workflows/en/Host disk usage.yml b/api/init_data/workflows/en/Host disk usage.yml similarity index 100% rename from api/workflows/en/Host disk usage.yml rename to api/init_data/workflows/en/Host disk usage.yml diff --git a/api/workflows/en/Host memory analysis.yml b/api/init_data/workflows/en/Host memory analysis.yml similarity index 100% rename from api/workflows/en/Host memory analysis.yml rename to api/init_data/workflows/en/Host memory analysis.yml diff --git a/api/workflows/en/Log analysis.yml b/api/init_data/workflows/en/Log analysis.yml similarity index 100% rename from api/workflows/en/Log analysis.yml rename to api/init_data/workflows/en/Log analysis.yml diff --git a/api/workflows/en/Network anomalies analysis.yml b/api/init_data/workflows/en/Network anomalies analysis.yml similarity index 100% rename from api/workflows/en/Network anomalies analysis.yml rename to api/init_data/workflows/en/Network anomalies analysis.yml diff --git a/api/workflows/en/Service alert event.yml b/api/init_data/workflows/en/Service alert event.yml similarity index 100% rename from api/workflows/en/Service alert event.yml rename to api/init_data/workflows/en/Service alert event.yml diff --git a/api/workflows/en/alert events topology root cause analysis.yml b/api/init_data/workflows/en/alert events topology root cause analysis.yml similarity index 100% rename from api/workflows/en/alert events topology root cause analysis.yml rename to api/init_data/workflows/en/alert events topology root cause analysis.yml diff --git a/api/workflows/en/alert events vaild function calling.yml b/api/init_data/workflows/en/alert events vaild function calling.yml similarity index 100% rename from api/workflows/en/alert events vaild function calling.yml rename to api/init_data/workflows/en/alert events vaild function calling.yml diff --git a/api/workflows/en/alert root cause another function.yml b/api/init_data/workflows/en/alert root cause another function.yml similarity index 100% rename from api/workflows/en/alert root cause another function.yml rename to api/init_data/workflows/en/alert root cause another function.yml diff --git a/api/workflows/en/alert simple root cause analysis.yml b/api/init_data/workflows/en/alert simple root cause analysis.yml similarity index 100% rename from api/workflows/en/alert simple root cause analysis.yml rename to api/init_data/workflows/en/alert simple root cause analysis.yml diff --git a/api/workflows/en/alert validity confirmation.yml b/api/init_data/workflows/en/alert validity confirmation.yml similarity index 100% rename from api/workflows/en/alert validity confirmation.yml rename to api/init_data/workflows/en/alert validity confirmation.yml diff --git a/api/workflows/en/service cpu analysis.yml b/api/init_data/workflows/en/service cpu analysis.yml similarity index 100% rename from api/workflows/en/service cpu analysis.yml rename to api/init_data/workflows/en/service cpu analysis.yml diff --git a/api/workflows/zh/CPU异常分析确认.yml b/api/init_data/workflows/zh/CPU异常分析确认.yml similarity index 100% rename from api/workflows/zh/CPU异常分析确认.yml rename to api/init_data/workflows/zh/CPU异常分析确认.yml diff --git a/api/workflows/zh/主机CPU使用率分析.yml b/api/init_data/workflows/zh/主机CPU使用率分析.yml similarity index 100% rename from api/workflows/zh/主机CPU使用率分析.yml rename to api/init_data/workflows/zh/主机CPU使用率分析.yml diff --git a/api/workflows/zh/主机内存异常分析.yml b/api/init_data/workflows/zh/主机内存异常分析.yml similarity index 100% rename from api/workflows/zh/主机内存异常分析.yml rename to api/init_data/workflows/zh/主机内存异常分析.yml diff --git a/api/workflows/zh/告警单节点根因分析方法2.yml b/api/init_data/workflows/zh/告警单节点根因分析方法2.yml similarity index 100% rename from api/workflows/zh/告警单节点根因分析方法2.yml rename to api/init_data/workflows/zh/告警单节点根因分析方法2.yml diff --git a/api/workflows/zh/告警有效性确认.yml b/api/init_data/workflows/zh/告警有效性确认.yml similarity index 100% rename from api/workflows/zh/告警有效性确认.yml rename to api/init_data/workflows/zh/告警有效性确认.yml diff --git a/api/workflows/zh/告警有效性确认_通过functionCalling实现.yml b/api/init_data/workflows/zh/告警有效性确认_通过functionCalling实现.yml similarity index 100% rename from api/workflows/zh/告警有效性确认_通过functionCalling实现.yml rename to api/init_data/workflows/zh/告警有效性确认_通过functionCalling实现.yml diff --git a/api/workflows/zh/告警简单根因分析.yml b/api/init_data/workflows/zh/告警简单根因分析.yml similarity index 100% rename from api/workflows/zh/告警简单根因分析.yml rename to api/init_data/workflows/zh/告警简单根因分析.yml diff --git a/api/workflows/zh/复杂告警上下游根因分析.yml b/api/init_data/workflows/zh/复杂告警上下游根因分析.yml similarity index 100% rename from api/workflows/zh/复杂告警上下游根因分析.yml rename to api/init_data/workflows/zh/复杂告警上下游根因分析.yml diff --git a/api/workflows/zh/容器CPU使用率分析.yml b/api/init_data/workflows/zh/容器CPU使用率分析.yml similarity index 100% rename from api/workflows/zh/容器CPU使用率分析.yml rename to api/init_data/workflows/zh/容器CPU使用率分析.yml diff --git a/api/workflows/zh/容器内存异常分析.yml b/api/init_data/workflows/zh/容器内存异常分析.yml similarity index 100% rename from api/workflows/zh/容器内存异常分析.yml rename to api/init_data/workflows/zh/容器内存异常分析.yml diff --git a/api/workflows/zh/宿主机CPU IO Wait情况.yml b/api/init_data/workflows/zh/宿主机CPU IO Wait情况.yml similarity index 100% rename from api/workflows/zh/宿主机CPU IO Wait情况.yml rename to api/init_data/workflows/zh/宿主机CPU IO Wait情况.yml diff --git a/api/workflows/zh/宿主机磁盘使用情况.yml b/api/init_data/workflows/zh/宿主机磁盘使用情况.yml similarity index 100% rename from api/workflows/zh/宿主机磁盘使用情况.yml rename to api/init_data/workflows/zh/宿主机磁盘使用情况.yml diff --git a/api/workflows/zh/日志分析.yml b/api/init_data/workflows/zh/日志分析.yml similarity index 100% rename from api/workflows/zh/日志分析.yml rename to api/init_data/workflows/zh/日志分析.yml diff --git a/api/workflows/zh/服务告警事件确认.yml b/api/init_data/workflows/zh/服务告警事件确认.yml similarity index 100% rename from api/workflows/zh/服务告警事件确认.yml rename to api/init_data/workflows/zh/服务告警事件确认.yml diff --git a/api/workflows/zh/网络异常分析确认.yml b/api/init_data/workflows/zh/网络异常分析确认.yml similarity index 100% rename from api/workflows/zh/网络异常分析确认.yml rename to api/init_data/workflows/zh/网络异常分析确认.yml diff --git a/api/initializer/__init__.py b/api/initializer/__init__.py index 8a6c278d5a..39754e2acb 100644 --- a/api/initializer/__init__.py +++ b/api/initializer/__init__.py @@ -1,7 +1,8 @@ from flask import Flask from . import ( account, - workflow + workflow, + plugin ) from .decorator import _initializers diff --git a/api/initializer/plugin.py b/api/initializer/plugin.py new file mode 100644 index 0000000000..1a753dfbfc --- /dev/null +++ b/api/initializer/plugin.py @@ -0,0 +1,97 @@ +import os +import logging +import asyncio +import threading + +from functools import partial +from configs import dify_config +from services.plugin.plugin_service import PluginService +from .admin import get_admin +from .decorator import initializer + +PLUGIN_CHECK_INTERVAL = 180 + +plugin_ids = [] +plugin_unique_identifiers = [] + +@initializer(priority=4) +def init_plugin(): + plugin_dir = './init_data/plugins/packages' + + if not os.path.isdir(plugin_dir): + logging.error(f"Invalid directory: {plugin_dir}") + return + + admin = get_admin() + + for file_entry in os.scandir(plugin_dir): + if not file_entry.name.endswith('.difypkg') or file_entry.name.startswith('.'): + continue + + try: + with open(file_entry.path, 'rb') as file: + file_size = os.fstat(file.fileno()).st_size + if file_size > dify_config.PLUGIN_MAX_PACKAGE_SIZE: + logging.error(f"File size exceeds the limit: {file_entry.path}") + continue + response = PluginService.upload_pkg(admin.current_tenant_id, file.read()) + + plugin_id = _get_plugin_id(response.unique_identifier) + installations = PluginService.list_installations_from_ids(admin.current_tenant_id, [plugin_id]) + if len(installations) > 0: + # Plugin already installed + continue + + plugin_ids.append(plugin_id) + plugin_unique_identifiers.append(response.unique_identifier) + + PluginService.install_from_local_pkg(admin.current_tenant_id, [response.unique_identifier]) + + except Exception as e: + logging.error(f"Failed to install plugin: {file_entry.path} {str(e)}") + + if not dify_config.OFFLINE_MODE: + threading.Thread( + target=partial(_run_async_activation, admin.current_tenant_id), + daemon=True + ).start() + +def _get_plugin_id(plugin_unique_identifier: str) -> str: + return plugin_unique_identifier.split(':')[0] + +def _run_async_activation(tenant_id: str): + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(_activate_plugin(tenant_id)) + except Exception as e: + logging.error(f"Async activation crashed: {str(e)}") + +async def _activate_plugin(tenant_id: str): + max_retries = 10 + retry_count = 0 + + while retry_count < max_retries: + try: + await asyncio.sleep(PLUGIN_CHECK_INTERVAL) + + installations = PluginService.list_installations_from_ids(tenant_id, plugin_ids) + if len(installations) == len(plugin_ids): + logging.info("All plugins activated") + return + + for id in plugin_unique_identifiers: + installed = False + for installation in installations: + if installation.plugin_unique_identifier == id: + installed = True + break + if not installed: + PluginService.install_from_local_pkg(tenant_id, [id]) + + retry_count += 1 + + except Exception as e: + logging.error(f"Activation failed: {str(e)}") + + logging.error("Plugin activation timeout") \ No newline at end of file