huggingface_ai_final / browser_tools.py
alfulanny's picture
Rename smolagents_agent.py to browser_tools.py
5ccf326 verified
raw
history blame
1.71 kB
"""
Simple browser tool to fetch and extract textual content from a webpage.
Provides `visit_page(url)` which returns a short cleaned text excerpt.
"""
import requests
import logging
from typing import Optional
logger = logging.getLogger(__name__)
try:
from bs4 import BeautifulSoup
except Exception:
BeautifulSoup = None
try:
from smolagents import tool
except Exception:
tool = None
def visit_page(url: str, max_chars: int = 2000) -> str:
if not url:
return "(no url provided)"
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
text = resp.text
if BeautifulSoup is not None:
soup = BeautifulSoup(text, "html.parser")
# remove scripts and styles
for s in soup(["script", "style", "noscript"]):
s.extract()
body = soup.get_text(separator=" \n")
lines = [l.strip() for l in body.splitlines() if l.strip()]
excerpt = " \n".join(lines)[:max_chars]
return excerpt
# fallback: return first chunk of raw HTML (not ideal)
return text[:max_chars]
except Exception as e:
logger.warning("visit_page failed for %s: %s", url, e)
return f"(visit_page error) {e}"
# If smolagents is available, expose a decorated tool usable by CodeAgent
if tool is not None:
try:
@tool
def visit_page_tool(url: str, max_chars: int = 2000) -> str:
"""smolagents-wrapped tool around `visit_page`"""
return visit_page(url, max_chars)
except Exception:
visit_page_tool = visit_page
else:
visit_page_tool = visit_page
__all__ = ["visit_page", "visit_page_tool"]