Feat(tools) add tavily extract tool and enhance tavily search implementation (#10786)
parent
6d532bfc02
commit
6de1f8c770
@ -0,0 +1,145 @@
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.tool.builtin_tool import BuiltinTool
|
||||
|
||||
TAVILY_API_URL = "https://api.tavily.com"
|
||||
|
||||
|
||||
class TavilyExtract:
|
||||
"""
|
||||
A class for extracting content from web pages using the Tavily Extract API.
|
||||
|
||||
Args:
|
||||
api_key (str): The API key for accessing the Tavily Extract API.
|
||||
|
||||
Methods:
|
||||
extract_content: Retrieves extracted content from the Tavily Extract API.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str) -> None:
|
||||
self.api_key = api_key
|
||||
|
||||
def extract_content(self, params: dict[str, Any]) -> dict:
|
||||
"""
|
||||
Retrieves extracted content from the Tavily Extract API.
|
||||
|
||||
Args:
|
||||
params (Dict[str, Any]): The extraction parameters.
|
||||
|
||||
Returns:
|
||||
dict: The extracted content.
|
||||
|
||||
"""
|
||||
# Ensure required parameters are set
|
||||
if "api_key" not in params:
|
||||
params["api_key"] = self.api_key
|
||||
|
||||
# Process parameters
|
||||
processed_params = self._process_params(params)
|
||||
|
||||
response = requests.post(f"{TAVILY_API_URL}/extract", json=processed_params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def _process_params(self, params: dict[str, Any]) -> dict:
|
||||
"""
|
||||
Processes and validates the extraction parameters.
|
||||
|
||||
Args:
|
||||
params (Dict[str, Any]): The extraction parameters.
|
||||
|
||||
Returns:
|
||||
dict: The processed parameters.
|
||||
"""
|
||||
processed_params = {}
|
||||
|
||||
# Process 'urls'
|
||||
if "urls" in params:
|
||||
urls = params["urls"]
|
||||
if isinstance(urls, str):
|
||||
processed_params["urls"] = [url.strip() for url in urls.replace(",", " ").split()]
|
||||
elif isinstance(urls, list):
|
||||
processed_params["urls"] = urls
|
||||
else:
|
||||
raise ValueError("The 'urls' parameter is required.")
|
||||
|
||||
# Only include 'api_key'
|
||||
processed_params["api_key"] = params.get("api_key", self.api_key)
|
||||
|
||||
return processed_params
|
||||
|
||||
|
||||
class TavilyExtractTool(BuiltinTool):
|
||||
"""
|
||||
A tool for extracting content from web pages using Tavily Extract.
|
||||
"""
|
||||
|
||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
|
||||
"""
|
||||
Invokes the Tavily Extract tool with the given user ID and tool parameters.
|
||||
|
||||
Args:
|
||||
user_id (str): The ID of the user invoking the tool.
|
||||
tool_parameters (Dict[str, Any]): The parameters for the Tavily Extract tool.
|
||||
|
||||
Returns:
|
||||
ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily Extract tool invocation.
|
||||
"""
|
||||
urls = tool_parameters.get("urls", "")
|
||||
api_key = self.runtime.credentials.get("tavily_api_key")
|
||||
if not api_key:
|
||||
return self.create_text_message(
|
||||
"Tavily API key is missing. Please set the 'tavily_api_key' in credentials."
|
||||
)
|
||||
if not urls:
|
||||
return self.create_text_message("Please input at least one URL to extract.")
|
||||
|
||||
tavily_extract = TavilyExtract(api_key)
|
||||
try:
|
||||
raw_results = tavily_extract.extract_content(tool_parameters)
|
||||
except requests.HTTPError as e:
|
||||
return self.create_text_message(f"Error occurred while extracting content: {str(e)}")
|
||||
|
||||
if not raw_results.get("results"):
|
||||
return self.create_text_message("No content could be extracted from the provided URLs.")
|
||||
else:
|
||||
# Always return JSON message with all data
|
||||
json_message = self.create_json_message(raw_results)
|
||||
|
||||
# Create text message based on user-selected parameters
|
||||
text_message_content = self._format_results_as_text(raw_results)
|
||||
text_message = self.create_text_message(text=text_message_content)
|
||||
|
||||
return [json_message, text_message]
|
||||
|
||||
def _format_results_as_text(self, raw_results: dict) -> str:
|
||||
"""
|
||||
Formats the raw extraction results into a markdown text based on user-selected parameters.
|
||||
|
||||
Args:
|
||||
raw_results (dict): The raw extraction results.
|
||||
|
||||
Returns:
|
||||
str: The formatted markdown text.
|
||||
"""
|
||||
output_lines = []
|
||||
|
||||
for idx, result in enumerate(raw_results.get("results", []), 1):
|
||||
url = result.get("url", "")
|
||||
raw_content = result.get("raw_content", "")
|
||||
|
||||
output_lines.append(f"## Extracted Content {idx}: {url}\n")
|
||||
output_lines.append(f"**Raw Content:**\n{raw_content}\n")
|
||||
output_lines.append("---\n")
|
||||
|
||||
if raw_results.get("failed_results"):
|
||||
output_lines.append("## Failed URLs:\n")
|
||||
for failed in raw_results["failed_results"]:
|
||||
url = failed.get("url", "")
|
||||
error = failed.get("error", "Unknown error")
|
||||
output_lines.append(f"- {url}: {error}\n")
|
||||
|
||||
return "\n".join(output_lines)
|
||||
@ -0,0 +1,23 @@
|
||||
identity:
|
||||
name: tavily_extract
|
||||
author: Kalo Chin
|
||||
label:
|
||||
en_US: Tavily Extract
|
||||
zh_Hans: Tavily Extract
|
||||
description:
|
||||
human:
|
||||
en_US: A web extraction tool built specifically for AI agents (LLMs), delivering raw content from web pages.
|
||||
zh_Hans: 专为人工智能代理 (LLM) 构建的网页提取工具,提供网页的原始内容。
|
||||
llm: A tool for extracting raw content from web pages, designed for AI agents (LLMs).
|
||||
parameters:
|
||||
- name: urls
|
||||
type: string
|
||||
required: true
|
||||
label:
|
||||
en_US: URLs
|
||||
zh_Hans: URLs
|
||||
human_description:
|
||||
en_US: A comma-separated list of URLs to extract content from.
|
||||
zh_Hans: 要从中提取内容的 URL 的逗号分隔列表。
|
||||
llm_description: A comma-separated list of URLs to extract content from.
|
||||
form: llm
|
||||
Loading…
Reference in New Issue