From c50f72f7bd6374889087cfd17c84c9ee2ce68a54 Mon Sep 17 00:00:00 2001 From: Davide Delbianco Date: Mon, 7 Jul 2025 16:56:50 +0200 Subject: [PATCH] chore/improve token usage access in trace-providers --- api/core/ops/aliyun_trace/aliyun_trace.py | 7 ++--- .../arize_phoenix_trace.py | 26 ++++++++++++------- api/core/ops/langfuse_trace/langfuse_trace.py | 9 +++---- .../ops/langsmith_trace/langsmith_trace.py | 9 +++---- api/core/ops/opik_trace/opik_trace.py | 12 +++------ 5 files changed, 30 insertions(+), 33 deletions(-) diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index 163b5d0307..b18a6905fe 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -372,6 +372,7 @@ class AliyunDataTrace(BaseTraceInstance): ) -> SpanData: process_data = node_execution.process_data or {} outputs = node_execution.outputs or {} + usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) return SpanData( trace_id=trace_id, parent_span_id=workflow_span_id, @@ -385,9 +386,9 @@ class AliyunDataTrace(BaseTraceInstance): GEN_AI_FRAMEWORK: "dify", GEN_AI_MODEL_NAME: process_data.get("model_name", ""), GEN_AI_SYSTEM: process_data.get("model_provider", ""), - GEN_AI_USAGE_INPUT_TOKENS: str(outputs.get("usage", {}).get("prompt_tokens", 0)), - GEN_AI_USAGE_OUTPUT_TOKENS: str(outputs.get("usage", {}).get("completion_tokens", 0)), - GEN_AI_USAGE_TOTAL_TOKENS: str(outputs.get("usage", {}).get("total_tokens", 0)), + GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), + GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), + GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False), GEN_AI_COMPLETION: str(outputs.get("text", "")), GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason", ""), diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 0b6834acf3..ffda0885d4 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -213,11 +213,12 @@ class ArizePhoenixDataTrace(BaseTraceInstance): if model: node_metadata["ls_model_name"] = model - usage = json.loads(node_execution.outputs).get("usage", {}) if node_execution.outputs else {} - if usage: - node_metadata["total_tokens"] = usage.get("total_tokens", 0) - node_metadata["prompt_tokens"] = usage.get("prompt_tokens", 0) - node_metadata["completion_tokens"] = usage.get("completion_tokens", 0) + outputs = json.loads(node_execution.outputs).get("usage", {}) + usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + if usage_data: + node_metadata["total_tokens"] = usage_data.get("total_tokens", 0) + node_metadata["prompt_tokens"] = usage_data.get("prompt_tokens", 0) + node_metadata["completion_tokens"] = usage_data.get("completion_tokens", 0) elif node_execution.node_type == "dataset_retrieval": span_kind = OpenInferenceSpanKindValues.RETRIEVER.value elif node_execution.node_type == "tool": @@ -246,14 +247,19 @@ class ArizePhoenixDataTrace(BaseTraceInstance): if model: node_span.set_attribute(SpanAttributes.LLM_MODEL_NAME, model) - usage = json.loads(node_execution.outputs).get("usage", {}) if node_execution.outputs else {} - if usage: - node_span.set_attribute(SpanAttributes.LLM_TOKEN_COUNT_TOTAL, usage.get("total_tokens", 0)) + outputs = json.loads(node_execution.outputs).get("usage", {}) + usage_data = ( + process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + ) + if usage_data: + node_span.set_attribute( + SpanAttributes.LLM_TOKEN_COUNT_TOTAL, usage_data.get("total_tokens", 0) + ) node_span.set_attribute( - SpanAttributes.LLM_TOKEN_COUNT_PROMPT, usage.get("prompt_tokens", 0) + SpanAttributes.LLM_TOKEN_COUNT_PROMPT, usage_data.get("prompt_tokens", 0) ) node_span.set_attribute( - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, usage.get("completion_tokens", 0) + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, usage_data.get("completion_tokens", 0) ) finally: node_span.end(end_time=datetime_to_nanos(finished_at)) diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 8646c2db3f..a3dbce0e59 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -181,12 +181,9 @@ class LangFuseDataTrace(BaseTraceInstance): prompt_tokens = 0 completion_tokens = 0 try: - if process_data.get("usage"): - prompt_tokens = process_data.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = process_data.get("usage", {}).get("completion_tokens", 0) - else: - prompt_tokens = outputs.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = outputs.get("usage", {}).get("completion_tokens", 0) + usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + prompt_tokens = usage_data.get("prompt_tokens", 0) + completion_tokens = usage_data.get("completion_tokens", 0) except Exception: logger.error("Failed to extract usage", exc_info=True) diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index e025220208..f94e5e49d7 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -206,12 +206,9 @@ class LangSmithDataTrace(BaseTraceInstance): prompt_tokens = 0 completion_tokens = 0 try: - if process_data.get("usage"): - prompt_tokens = process_data.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = process_data.get("usage", {}).get("completion_tokens", 0) - else: - prompt_tokens = outputs.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = outputs.get("usage", {}).get("completion_tokens", 0) + usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + prompt_tokens = usage_data.get("prompt_tokens", 0) + completion_tokens = usage_data.get("completion_tokens", 0) except Exception: logger.error("Failed to extract usage", exc_info=True) diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index b7e49c2650..8bedea20fb 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -222,14 +222,10 @@ class OpikDataTrace(BaseTraceInstance): ) try: - if process_data.get("usage"): - total_tokens = process_data.get("usage", {}).get("total_tokens", 0) - prompt_tokens = process_data.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = process_data.get("usage", {}).get("completion_tokens", 0) - else: - total_tokens = outputs.get("usage", {}).get("total_tokens", 0) - prompt_tokens = outputs.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = outputs.get("usage", {}).get("completion_tokens", 0) + usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + total_tokens = usage_data.get("total_tokens", 0) + prompt_tokens = usage_data.get("prompt_tokens", 0) + completion_tokens = usage_data.get("completion_tokens", 0) except Exception: logger.error("Failed to extract usage", exc_info=True)