|
|
|
|
@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
|
|
|
|
|
|
|
|
|
if chunk_json["done"]:
|
|
|
|
|
# calculate num tokens
|
|
|
|
|
if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
|
|
|
|
|
# transform usage
|
|
|
|
|
if "prompt_eval_count" in chunk_json:
|
|
|
|
|
prompt_tokens = chunk_json["prompt_eval_count"]
|
|
|
|
|
completion_tokens = chunk_json["eval_count"]
|
|
|
|
|
else:
|
|
|
|
|
# calculate num tokens
|
|
|
|
|
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
|
|
|
|
|
completion_tokens = self._get_num_tokens_by_gpt2(full_text)
|
|
|
|
|
prompt_message_content = prompt_messages[0].content
|
|
|
|
|
if isinstance(prompt_message_content, str):
|
|
|
|
|
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
|
|
|
|
|
else:
|
|
|
|
|
content_text = ""
|
|
|
|
|
for message_content in prompt_message_content:
|
|
|
|
|
if message_content.type == PromptMessageContentType.TEXT:
|
|
|
|
|
message_content = cast(TextPromptMessageContent, message_content)
|
|
|
|
|
content_text += message_content.data
|
|
|
|
|
prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
|
|
|
|
|
|
|
|
|
|
completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
|
|
|
|
|
|
|
|
|
|
# transform usage
|
|
|
|
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
|
|
|
|
|