""" Simple browser tool to fetch and extract textual content from a webpage. Provides `visit_page(url)` which returns a short cleaned text excerpt. """ import requests import logging from typing import Optional logger = logging.getLogger(__name__) try: from bs4 import BeautifulSoup except Exception: BeautifulSoup = None try: from smolagents import tool except Exception: tool = None def visit_page(url: str, max_chars: int = 2000) -> str: if not url: return "(no url provided)" try: resp = requests.get(url, timeout=10) resp.raise_for_status() text = resp.text if BeautifulSoup is not None: soup = BeautifulSoup(text, "html.parser") # remove scripts and styles for s in soup(["script", "style", "noscript"]): s.extract() body = soup.get_text(separator=" \n") lines = [l.strip() for l in body.splitlines() if l.strip()] excerpt = " \n".join(lines)[:max_chars] return excerpt # fallback: return first chunk of raw HTML (not ideal) return text[:max_chars] except Exception as e: logger.warning("visit_page failed for %s: %s", url, e) return f"(visit_page error) {e}" # If smolagents is available, expose a decorated tool usable by CodeAgent if tool is not None: try: @tool def visit_page_tool(url: str, max_chars: int = 2000) -> str: """smolagents-wrapped tool around `visit_page`""" return visit_page(url, max_chars) except Exception: visit_page_tool = visit_page else: visit_page_tool = visit_page __all__ = ["visit_page", "visit_page_tool"]