|
|
|
|
@ -1,5 +1,4 @@
|
|
|
|
|
import logging
|
|
|
|
|
import re
|
|
|
|
|
from collections.abc import Generator
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
@ -231,6 +230,17 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
|
|
|
|
|
return _handle_chat_response()
|
|
|
|
|
return _handle_stream_chat_response()
|
|
|
|
|
|
|
|
|
|
def wrap_thinking(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
|
|
|
|
|
content = ""
|
|
|
|
|
reasoning_content = None
|
|
|
|
|
if hasattr(delta, "content"):
|
|
|
|
|
content = delta.content
|
|
|
|
|
if hasattr(delta, "reasoning_content"):
|
|
|
|
|
reasoning_content = delta.reasoning_content
|
|
|
|
|
return self._wrap_thinking_by_reasoning_content(
|
|
|
|
|
{"content": content, "reasoning_content": reasoning_content}, is_reasoning
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _generate_v3(
|
|
|
|
|
self,
|
|
|
|
|
model: str,
|
|
|
|
|
@ -253,22 +263,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
|
|
|
|
|
content = ""
|
|
|
|
|
if chunk.choices:
|
|
|
|
|
delta = chunk.choices[0].delta
|
|
|
|
|
if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
|
|
|
|
|
content = ""
|
|
|
|
|
elif hasattr(delta, "reasoning_content"):
|
|
|
|
|
if not is_reasoning_started:
|
|
|
|
|
is_reasoning_started = True
|
|
|
|
|
content = "> 💭 " + delta.reasoning_content
|
|
|
|
|
else:
|
|
|
|
|
content = delta.reasoning_content
|
|
|
|
|
|
|
|
|
|
if "\n" in content:
|
|
|
|
|
content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
|
|
|
|
|
elif is_reasoning_started:
|
|
|
|
|
content = "\n\n" + delta.content
|
|
|
|
|
is_reasoning_started = False
|
|
|
|
|
else:
|
|
|
|
|
content = delta.content
|
|
|
|
|
content, is_reasoning_started = self.wrap_thinking(delta, is_reasoning_started)
|
|
|
|
|
|
|
|
|
|
yield LLMResultChunk(
|
|
|
|
|
model=model,
|
|
|
|
|
|